def fit(self): it = range(self.n_iter) if self.verbose: it = ipb(it) for e in it: self.__update__(e) self.decomposed = True return self
def engine( ckpt_loc: str='ckpt-default', device_id: int=1, batch_size: int=128, use_cuda: bool=True, num_embeddings: int=8, casual_hidden_sizes: t.Iterable=[16, 32], num_botnec_feat: int=72, num_k_feat: int=24, num_dense_layers: int=20, num_out_feat: int=268, num_z_feat: int=10, activation: str='elu', LR: float=1e-3, final_lr: float=0.1, init_beta: float=0., final_beta: float=1., num_annealing_steps: int=2000, # beta: float=0.25, grad_clip=3.0, num_epochs: int=5, num_p=1 ): beta_step_len = (final_beta - init_beta) / num_annealing_steps model = GraphInf( num_in_feat=43, num_c_feat=8, num_embeddings=num_embeddings, casual_hidden_sizes=casual_hidden_sizes, num_botnec_feat=num_botnec_feat, # 16 x 4 num_k_feat=num_k_feat, # 16 num_dense_layers=num_dense_layers, num_out_feat=num_out_feat, num_z_feat=num_z_feat, activation=activation, use_cuda=use_cuda ) optim = adabound.AdaBound( model.parameters(), lr=LR, final_lr=final_lr ) device = torch.device(f'cuda:{device_id}') model = model.to(device) model.train() save_loc = path.join( path.dirname(__file__), 'ckpt', ckpt_loc ) events_loc = path.join(save_loc, 'events') if not path.exists(events_loc): makedirs(events_loc) try: with SummaryWriter(events_loc) as writer: step = 0 has_nan_or_inf = False train_loader = ComLoader( original_scaffolds_file='data-center/train.smi', num_workers=num_p, batch_size=batch_size ) test_loader = ComLoader( original_scaffolds_file='data-center/test.smi', num_workers=num_p, batch_size=batch_size ) for epoch in ipb(range(num_epochs), desc='epochs'): iter_train = iter(train_loader) iter_test = iter(test_loader) try: if has_nan_or_inf: break for i in ipb(range( train_loader.num_id_block + train_loader.num_id_block // 200 ), desc='iteration'): if step > 0 and step % 200 == 0: batch = next(iter_test) else: batch = next(iter_train) ( block, nums_nodes, nums_edges, seg_ids, bond_info_all, nodes_o, nodes_c ) = batch beta = min(init_beta + beta_step_len * step, 1) num_N = sum(nums_nodes) num_E = sum(nums_edges) values = torch.ones(num_E) s_adj = torch.sparse_coo_tensor( bond_info_all.T, values, torch.Size([num_N, num_N]) ).to(device) s_nfeat = torch.from_numpy(nodes_o).to(device) c_nfeat = torch.from_numpy(nodes_c).to(device) x_recon, mu1, logvar1, mu2, logvar2 = ( model(s_nfeat, c_nfeat, s_adj) ) seg_ids = torch.from_numpy(seg_ids) optim.zero_grad() MSE, KL = loss_func( x_recon, s_nfeat, mu1, logvar1, mu2, logvar2, seg_ids ) loss = MSE + beta * KL if not (step > 0 and step % 200 == 0): loss.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), grad_clip ) optim.step() # debug for Nan in recon loss has_nan_or_inf = torch.cat( [ torch.stack( ( torch.isnan(params.grad).any(), torch.isinf(params.grad).any() ), dim=-1 ) for params in model.parameters() ], dim=-1 ).any() if has_nan_or_inf: torch.save( model, path.join(save_loc, f'broken_{epoch}.ckpt') ) torch.save( s_nfeat, path.join(save_loc, f's_nfeat_{epoch}.pt') ) torch.save( c_nfeat, path.join(save_loc, f'c_nfeat_{epoch}.pt') ) torch.save( s_adj.to_dense(), path.join(save_loc, f's_adj_{epoch}.pt') ) torch.save( seg_ids, path.join(save_loc, f'seg_ids_{epoch}.pt') ) with open( path.join(save_loc, 'batch.smi'), 'w' ) as f: for smiles in block: f.write(smiles + '\n') break if not (step > 0 and step % 200 == 0): writer.add_scalar( f'loss', loss.cpu().item(), step ) writer.add_scalar( f'recon_loss', MSE.cpu().item(), step ) writer.add_scalar( f'KL', KL.cpu().item(), step ) else: writer.add_scalar( f'test_loss', loss.cpu().item(), step ) writer.add_scalar( f'test_recon_loss', MSE.cpu().item(), step ) writer.add_scalar( f'test_KL', KL.cpu().item(), step ) step += 1 torch.save( model, path.join(save_loc, f'model_{epoch}.ckpt') ) except StopIteration: continue except KeyboardInterrupt: torch.save( model, path.join(save_loc, f'model_{epoch}.ckpt') )
def epochs_to_task_completions(runs, name=None, ignore_runs=None, samples=500, ipb_desc=None): if name is None: name = 'epochs_to_completion' results_per_run = [] last_tasks_per_run = [] for run in ipb(runs, desc=ipb_desc): # if i > 0 and i % 10 == 0: # print(run.name, i) # else: # print(run.name) if ignore_runs is not None and run.name in ignore_runs: continue df = run.history(pandas=True, samples=samples) first_row_blank = int(np.isnan(df['Test Accuracy'][0])) first_task_finished = df['Test Accuracy, Query #2'].first_valid_index( ) - first_row_blank task_finishes = [first_task_finished] last_tasks = [1] for current_task in range(2, 11): current_task_start = df[ f'Test Accuracy, Query #{current_task}'].first_valid_index() if current_task == 10: current_task_end = df.shape[0] else: current_task_end = df[ f'Test Accuracy, Query #{current_task + 1}'].first_valid_index( ) task_finishes.append(current_task_end) sub_df = df[[ f'Test Accuracy, Query #{i + 1}' for i in range(current_task) ]] min_column_name = sub_df.iloc[current_task_end - 2].idxmin() last_task_to_finish = int( min_column_name[min_column_name.find('#') + 1:]) last_tasks.append(last_task_to_finish) results_per_run.append(task_finishes) last_tasks_per_run.append(last_tasks) results_per_run = np.array(results_per_run) trials_per_epoch = np.array( [epoch_to_trials(name, e) for e in np.arange(1, 11)]) trials_per_run = results_per_run * trials_per_epoch return ResultSet(name=name, mean=np.nanmean(results_per_run, axis=0), std=np.nanstd(results_per_run, axis=0)),\ ResultSet(name=name, mean=np.nanmean(np.log(results_per_run), axis=0), std=np.nanstd(np.log(results_per_run), axis=0)),\ ResultSet(name=name, mean=np.nanmean(trials_per_run, axis=0), std=np.nanstd(trials_per_run, axis=0)),\ ResultSet(name=name, mean=np.nanmean(np.log(trials_per_run), axis=0), std=np.nanstd(np.log(trials_per_run), axis=0)),\ last_tasks_per_run
def engine( config_id='naive3', device_id=3, model_idx=4, scaffolds_file='data-center/test.smi', batch_size=500, np=mp.cpu_count(), ): device = torch.device(f'cuda:{device_id}') model_ckpt = path.join(path.dirname(__file__), 'ckpt', config_id, f'model_{model_idx}_ckpt.ckpt') model_dic_loc = path.join(path.dirname(__file__), 'ckpt', config_id, 'modle_dic.json') if path.exists(model_dic_loc): with open(model_dic_loc) as f: model_dic = json.load(f) else: model_dic = dict( num_in_feat=43, num_c_feat=8, num_embeddings=8, casual_hidden_sizes=[16, 32], num_botnec_feat=72, # 16 x 4 num_k_feat=24, # 16 num_dense_layers=20, num_out_feat=268, num_z_feat=10, activation='elu', use_cuda=True) # print(model_ckpt) model = GraphInf(**model_dic) model.load_state_dict(torch.load(model_ckpt)) print(device_id) model.to(device) model.eval() dataloader = ComLoader(original_scaffolds_file=scaffolds_file, batch_size=batch_size, num_workers=1) all_num_valid = 0 all_num_recon = 0 events_loc = f'eval_configs/{config_id}/' if not path.exists(events_loc): makedirs(events_loc) with SummaryWriter(events_loc) as writer: step = 0 with open(f'eval_configs/{config_id}_records.txt', 'w') as f: for batch in ipb(dataloader, desc="step", total=dataloader.num_id_block): (block, nums_nodes, nums_edges, seg_ids, bond_info_all, nodes_o, nodes_c) = batch num_N = sum(nums_nodes) num_E = sum(nums_edges) values = torch.ones(num_E) s_adj = torch.sparse_coo_tensor(bond_info_all.T, values, torch.Size([num_N, num_N])).to(device) s_nfeat = torch.from_numpy(nodes_o).to(device) c_nfeat = torch.from_numpy(nodes_c).to(device) x_inf, mu2, var2 = model.inf(c_nfeat, s_adj) x_inf, mu2, var2 = (x_inf.cpu().detach(), mu2.cpu().detach(), var2.cpu().detach()) x_recon, mu1, var1 = model.reconstrcut(s_nfeat, c_nfeat, s_adj) x_recon, mu1, var1 = (x_recon.cpu().detach(), mu1.cpu().detach(), var1.cpu().detach()) seg_ids = torch.from_numpy(seg_ids) MSE, KL = loss_func(x_recon, s_nfeat, mu1, var1, mu2, var2, seg_ids) loss = MSE + KL writer.add_scalar(f'loss', loss.cpu().item(), step) writer.add_scalar(f'recon_loss', MSE.cpu().item(), step) writer.add_scalar(f'KL', KL.cpu().item(), step) ls_x_inf = torch.split(x_inf, nums_nodes) ls_x_recon = torch.split(x_recon, nums_nodes) ls_mols_inf = Parallel(n_jobs=np, backend='multiprocessing')( delayed(get_mol_from_array)(ls_x_inf[i], block[i], True, False) for i in range(len(block))) ls_mols_recon = Parallel(n_jobs=np, backend='multiprocessing')( delayed(get_mol_from_array)(ls_x_recon[i], block[i], True, True) for i in range(len(block))) num_valid = sum(x is not None for x in ls_mols_inf) num_recon = sum(ls_mols_recon[i] == block[i] for i in range(len(block))) all_num_valid += num_valid all_num_recon += num_recon f.write( str(num_valid) + '\t' + str(num_recon) + '\t' + str(len(ls_mols_inf)) + '\n') f.flush() step += 1 with open(f'eval_configs/{config_id}.txt', 'w') as f: f.write(str(all_num_valid) + '\t') f.write(str(all_num_recon))
def engine( config_id='naive3', device_id=2, model_idx=4, scaffolds_file='data-center/test.smi', batch_size=500, np=mp.cpu_count(), ): device = torch.device(f'cuda:{device_id}') model_ckpt = path.join( path.dirname(__file__), 'ckpt', config_id, f'model_{model_idx}.ckpt' ) # print(model_ckpt) model = torch.load(model_ckpt).to(device) model.eval() dataloader = ComLoader( original_scaffolds_file=scaffolds_file, batch_size=batch_size, num_workers=1 ) all_num_valid = 0 all_num_recon = 0 with open(f'eval_configs/{config_id}_records.txt', 'w') as f: for batch in ipb( dataloader, desc="step", total=dataloader.num_id_block ): ( block, nums_nodes, nums_edges, seg_ids, bond_info_all, nodes_o, nodes_c ) = batch num_N = sum(nums_nodes) num_E = sum(nums_edges) values = torch.ones(num_E) s_adj = torch.sparse_coo_tensor( bond_info_all.T, values, torch.Size([num_N, num_N]) ).to(device) s_nfeat = torch.from_numpy(nodes_o).to(device) c_nfeat = torch.from_numpy(nodes_c).to(device) x_inf = model.inf(c_nfeat, s_adj).cpu().detach() x_recon = model.reconstrcut(s_nfeat, s_adj).cpu().detach() ls_x_inf = torch.split(x_inf, nums_nodes) ls_x_recon = torch.split(x_recon, nums_nodes) ls_mols_inf = Parallel( n_jobs=np, backend='multiprocessing' )( delayed(get_mol_from_array) ( ls_x_inf[i], block[i], True, False ) for i in range(len(block)) ) ls_mols_recon = Parallel( n_jobs=np, backend='multiprocessing' )( delayed(get_mol_from_array) ( ls_x_recon[i], block[i], True, True ) for i in range(len(block)) ) num_valid = sum(x is not None for x in ls_mols_inf) num_recon = sum( ls_mols_recon[i] == block[i] for i in range(len(block)) ) all_num_valid += num_valid all_num_recon += num_recon f.write( str(num_valid) + '\t' + str(num_recon) + '\t' + str(len(ls_mols_inf)) + '\n' ) f.flush() with open(f'eval_configs/{config_id}.txt', 'w') as f: f.write(str(all_num_valid) + '\t') f.write(str(all_num_recon))