def stream_data_store(args): for n, G in tqdm(enumerate( gen_random_graphs(args.graphs, *args.nodes, read_in=args.readin) )): A = nx.to_numpy_array(G) N = G.number_of_nodes() # Mcore = sample_censored(G, n_walks=0, # n_obsv=args.length, steps=100, demo=False) for nwalks in args.nwalks: M = sample_censored(G, n_walks=nwalks, n_obsv=args.length, steps=100, demo=False, cuda=False) model = CensoredRW(N, sym=True) learning_rate = 0.1 train(model, np.array(M), batch_size=None, compare_true=A, epochs=50, callback=False, lr=learning_rate) res = model.P.detach().cpu().numpy() def prep(A): return minmax(symmetrize(A)) mkv1 = markov_model(M, k=1) mkv2 = markov_model(M, k=2) cos = cosine_model(M) models = dict( INVITE=prep(res), Cosine=prep(cos), MC1=prep(mkv1), MC2=prep(mkv2), ) logs = test_multi(A, **models) for name, log in logs.items(): for pos, t in enumerate(log['t']): d = dict( graph=f'G{n}', model=name, aps=log['aps'], f_opt=log['f'][log['opt_pos']], t_opt=log['opt_pos'], t=t, r=log['r'][pos], p=log['p'][pos], f=log['f'][pos], nwalks=nwalks, nodes=N, ) yield d
def main(seqpath, epochs, threshold, cuda_id, use_cuda, save_dir, fwd, rev, min_count, multi, reg_epochs): logger = logging.getLogger(__name__) logger.info(f"saving to {save_dir}") save_dir = Path(save_dir) save_dir.mkdir(exist_ok=True, parents=True) experiment = SingleRound(path=seqpath, forward_adapter=fwd, reverse_adapter=rev) # training train_loader, test_loader = experiment.get_dataloader(min_count=min_count, use_cuda=use_cuda) device = torch.device(f"cuda:{cuda_id}" if ( use_cuda and torch.cuda.is_available()) else "cpu") train_kwargs = { "epochs": epochs, "threshold": threshold, "device": device, "train_loader": train_loader, "test_loader": test_loader, "save_dir": save_dir, "beta_schedule": True, "force_matching": True, "force_epochs": reg_epochs, } # evaluate model target_len = experiment.random_region_length for i in range(multi): model = CNN_PHMM_VAE(motif_len=target_len, embed_size=2) model_str = str(type(model)).split("\'")[-2].split(".")[-1].lower() if multi > 1: model_str += f"_{i}" model_str += ".mdl" logger.info(f"training {model_str}") optimizer = optim.Adam(model.parameters()) model = model.to(device) train_kwargs.update({ "model": model, "model_str": model_str, "optimizer": optimizer }) models.train(**train_kwargs) torch.cuda.empty_cache()
def run(self): X, y = {}, {} filepath = f'{self.data_path}/train/train1.npz' X, y, self.fs, self.ch_names = load_session(filepath, start=self.start, end=self.end) logging.info(f'Shape: X_full {X.shape} - y {y.shape}') # Pre-processing - filtering if self.model_name == 'CSP': X = filtering(X, self.fs, f_order=5, f_low=7, f_high=35, f_type='cheby') # Cropping if self.n_crops > 1: X, y = cropping(X, y, self.fs, n_crops=self.n_crops, crop_len=self.crop_len) # Preprocessing X = preprocessing(X, self.fs, rereference=self.should_reref, filt=self.should_filter, standardize=self.should_standardize) # For logging purpose (t-SNE) try: trained_model, cv_mean, cv_std, train_time = train( self.model_name, X, y, self.train_mode, n_iters=self.n_iters) except Exception: logging.info(f'Training failed - {traceback.format_exc()}') logging.info(f'Trained successfully in {train_time:.0f}s \n' f'Accuracy: {cv_mean:.2f}+-{cv_std:.2f} \n' f'{trained_model}')
else: plot_ivt(G, pos, M[:4], model, txtpos=.5, withlabels=lab) ps(f'inviteRW') N = struct.shape[0] cuda = False # if N > 50 else True approx = CensoredRW(N, sym=True, cuda=cuda) learning_rate = 0.1 train(approx, np.array(M), compare_true=A, batch_size=min(500, len(M)), epochs=50, callback=True, lr=learning_rate) res = fill_diagonal(approx.P.detach().cpu().numpy(), 0) def prep(A): return labeled_adj(struct.columns, minmax(symmetrize(A))) mkv1 = markov_model(M, k=1) mkv2 = markov_model(M, k=2) # mkv2 = hidden_markov(M, max(map(len, M)), N, n_jobs=4) cos = cosine_model(M) models = dict(
sys = subsys[lab.get_text()] lab.set_color(color_dict[sys]) plt.tight_layout() sns.despine() ps('tag_multinomial') # ################## Calculate INVITE Distances ################ # logging.info('training...') N = voc_nodes.shape[0] # uses pytorch + ADAM model = CensoredRW(N, cuda=False, sym=False) learning_rate = 0.1 train(model, np.array(m), batch_size=min(500, len(m)), epochs=50, callback=True, lr=learning_rate) logging.info('Done!') # ########## COMPARISONS ############# # def prep(A): return labeled_adj(voc_nodes, tools.minmax(tools.symmetrize(A))) res = tools.fill_diagonal(model.P.detach().cpu().numpy(), 0) mkv1 = tools.markov_model(m, k=1) # order 1, via `pomegranate` mkv2 = tools.markov_model(m, k=2) # order 2, via `pomegranate`
def main(n_seq, seed, epochs, threshold, cuda_id, use_cuda, save_dir, reg_epochs, multi, only_cnn): logger = logging.getLogger(__name__) logger.info(f"saving to {save_dir}") save_dir = Path(save_dir) save_dir.mkdir(exist_ok=True, parents=True) # generate sequences fwd_adapter = "AAAAA" rev_adapter = "GGGGG" generator = SequenceGenerator(num_motifs=1, seed=seed, fix_random_region_length=True, error_rate=0, generate_motifs=True, add_primer=True, forward_primer=fwd_adapter, reverse_primer=rev_adapter, middle_insert_range=[2, 6], one_side_proba=0.5, paired=True) reads, motif_indices, paired_indices = generator.sample(n_seq) with open(save_dir / "seqences.txt", "w") as f: for index, read in zip(motif_indices, reads): f.write(f"{index}, {read}\n") with open(save_dir / "motifs.txt", "w") as f: for motif in generator.motifs: f.write(f"{motif}\n") experiment = SingleRound(reads, forward_adapter=fwd_adapter, reverse_adapter=rev_adapter) # training train_loader, test_loader = experiment.get_dataloader(use_cuda=use_cuda) device = torch.device(f"cuda:{cuda_id}" if ( use_cuda and torch.cuda.is_available()) else "cpu") train_kwargs = { "epochs": epochs, "threshold": threshold, "device": device, "train_loader": train_loader, "test_loader": test_loader, "save_dir": save_dir, "beta_schedule": True, "force_matching": True, "force_epochs": reg_epochs, } # evaluate models target_len = experiment.random_region_length results = dict() for i in range(multi): eval_models = [ CNN_Mul_VAE(target_len=target_len, embed_size=2), CNN_AR_VAE(embed_size=2), CNN_PHMM_VAE(motif_len=target_len, embed_size=2) ] if not only_cnn: eval_models.extend([ LSTM_Mul_VAE(target_len=target_len, embed_size=2), LSTM_AR_VAE(embed_size=2), LSTM_PHMM_VAE(motif_len=target_len, embed_size=2), CNNLSTM_Mul_VAE(target_len=target_len, embed_size=2), CNNLSTM_AR_VAE(embed_size=2), CNNLSTM_PHMM_VAE(motif_len=target_len, embed_size=2) ]) for model in eval_models: model_str = str(type(model)).split("\'")[-2].split(".")[-1].lower() if multi > 1: model_str += f"_{i}" model_str += ".mdl" print(f"training {model_str}") optimizer = optim.Adam(model.parameters()) model = model.to(device) train_kwargs.update({ "model": model, "model_str": model_str, "optimizer": optimizer }) results[model_str] = models.train(**train_kwargs) torch.cuda.empty_cache()