def main(rolls, write_csv, write_chart, plot_chart): """The main entrypoint.""" print(f"Running dice mode with {rolls} rolls.\n") data = run(rolls) df = helpers.get_df(data) print(df.to_string(index=False)) if write_csv: helpers.write_csv(df, 'dice', rolls) if write_chart: helpers.write_chart(df, 'dice', rolls) if plot_chart: helpers.plot_chart(df)
std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return mu + eps * std def decode(self, z): h3 = F.relu(self.fc3(z)) return torch.sigmoid(self.fc4(h3)) def forward(self, x): mu, logvar = self.encode(x.view(-1, input_size)) z = self.reparameterize(mu, logvar) return self.decode(z), mu, logvar batch_size = 1 df = h.get_df(fn='./data/3_years_of_data.csv') num_cols = df.shape[1] # scaled = h.sk_scale(df) train_df, test_df = h.train_test(df, train_pct=0.3) train = h.DfPastGames(train_df) test = h.DfPastGames(test_df) input_size = train.data_shape train_loader = DataLoader(dataset=train, batch_size=batch_size, shuffle=False) test_loader = DataLoader(dataset=test, batch_size=batch_size, shuffle=False) autoencoder = VAE(input_size) optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
def main(spec): np.random.seed(spec['seed']) torch.manual_seed(spec['seed']) nthreads = spec['threads'] os.environ["OMP_NUM_THREADS"] = str(nthreads) os.environ["OPENBLAS_NUM_THREADS"] = str(nthreads) os.environ["MKL_NUM_THREADS"] = str(nthreads) os.environ["VECLIB_MAXIMUM_THREADS"] = str(nthreads) os.environ["NUMEXPR_NUM_THREADS"] = str(nthreads) input_file = spec['cl']['input_file'] folds_path = spec['cl']['folds'] mode = spec['cl']['mode'] device = spec['device'] models_path = spec['cl']['model_path'] ce_dim = spec['ce']['encdim'] senc_dim = spec['senc_dim'] window = spec['ce']['window'] f_dim = spec['fe']['fdim'] fenc_dim = spec['fe']['enc_dim'] n_classes = spec['cl']['num_classes'] infersent_model = spec['infersent_model'] w2v_path = spec['w2v_path'] vocab_size = spec['vocab_size'] half_precision = False if device != 'cpu': torch.cuda.set_device(device) senc = SentEnc(infersent_model, w2v_path, vocab_size, device=device, hp=False) prep = Preprocess() with gzip.open(input_file) as infile: tables = np.array([json.loads(line) for li, line in enumerate(infile)]) for i in range(len(tables)): tables[i]['table_array'] = np.array(prep.clean_table_array(tables[i]['table_array'])) folds = json.load(open(folds_path)) ## initialize the sentence encodings pbar = tqdm(total=len(tables)) pbar.set_description('initialize sent encodings:') sentences = set() for t in tables: for row in t['table_array']: for c in row: sentences.add(c) senc.cache_sentences(list(sentences)) reports = [] for fi, fold in enumerate(folds): train_tables, dev_tables, test_tables = split_train_test(tables, fold, 1) ce_model = CEModel(senc_dim, ce_dim//2, window*4) ce_model = ce_model.to(device) fe_model = FeatEnc(f_dim, fenc_dim) fe_model = fe_model.to(device) cl_model = ClassificationModel(ce_dim+fenc_dim, n_classes).to(device) ce_model.load_state_dict(torch.load(models_path+f'/ce_fold{fi}.model', map_location=device)) fe_model.load_state_dict(torch.load(models_path+f'/fe_fold{fi}.model', map_location=device)) cl_model.load_state_dict(torch.load(models_path+f'/cl_fold{fi}.model', map_location=device)) f1macro, report, _, _, _ = predict(test_tables, cl_model, ce_model, fe_model, senc, label2ind, device=device) reports.append(report) print(f'fold {fi} test f1-macro = {f1macro}') dfs = [get_df(r) for r in reports] mean_res = reduce(lambda x, y: x.add(y, fill_value=0), dfs)/len(dfs) std_res = [(x-mean_res) ** 2 for x in dfs] std_res = reduce(lambda x, y: x.add(y, fill_value=0), dfs) std_res = std_res.pow(1./2)/len(dfs) print('mean:') print(tabulate.tabulate(mean_res, headers='keys', tablefmt='psql')) print('STD:') print(tabulate.tabulate(std_res, headers='keys', tablefmt='psql'))
def forward(self, tensors): lstm_out, _ = self.lstm(tensors.view(len(tensors), 1, -1)) target = self.hidden2target(lstm_out.view(len(tensors), -1)) prediction = F.log_softmax(target, dim=1) return prediction if __name__ == "__main__": batch_size = 128 df = h.get_df() train = h.Df(df) item = train.__getitem__(500) input_size = h.num_flat_features(item[0]) output_size = h.num_flat_features(item[1]) hidden_size = (input_size + output_size) // 2 TARGET_SIZE = 99 # (1, 5, 99) EMBEDDING_DIM = 5 * 99 # (1, 50, 99) HIDDEN_DIM = 5 * 99 # (1, 50, 99) model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, TARGET_SIZE) loss_function = nn.MSELoss() optimizer = optim.SGD(model.parameters(), lr=0.1)
raw = raw.dropna() raw = pd.get_dummies(data=raw, columns=[ 'a_team', 'h_team', 'league', 'sport']) # raw = pd.get_dummies(data=raw, columns=[ 'a_team', 'h_team',]) # raw = raw.drop(['game_id', 'lms_date', 'lms_time'], axis=1) raw = raw.drop(['game_id'], axis=1) print(raw.columns) # raw = raw.astype(np.float32) # raw = raw.sort_values('cur_time', axis=0) return raw.copy() EPOCH = 1 LR = 0.005 N_TEST_IMG = 5 tmp_df = h.get_df('./data/nba2.csv') tmp_df = h.select_dtypes(tmp_df) print(tmp_df.dtypes) train_df, test_df = h.train_test(tmp_df) scaled_train = h.sk_scale(train_df) scaled_test = h.sk_scale(test_df) train = h.Df(scaled_train, train_df.values) test = h.Df(scaled_test, test_df.values) batch_size = 1 num_cols = tmp_df.shape[1] input_size = num_cols