def evaluate(model, problem): preds, acts = [], [] for ids, targets, _ in problem.iterate(mode='val', shuffle=False): preds.append(to_numpy(model(ids, problem.adj, train=True))) acts.append(to_numpy(targets)) return problem.metric_fn(np.vstack(acts), np.vstack(preds))
def eval_epoch(self, dataloaders, mode='val', num_batches=np.inf): loader = dataloaders[mode] if loader is None: return None else: gen = enumerate(loader) if self.verbose: gen = tqdm(gen, total=len(loader), desc='eval_epoch:%s' % mode) total_loss = 0.0 correct, total, loss_hist = 0, 0, [] for batch_idx, (data, target) in gen: output, loss = self.eval_batch(data, target) loss_hist.append(loss) correct += (to_numpy( output.float()).argmax(axis=1) == to_numpy(target)).sum() total += data.shape[0] total_loss += loss * data.shape[0] if batch_idx > num_batches: break if self.verbose: gen.set_postfix(acc=correct / total) return { "acc": correct / total, "loss": total_loss / total, }
def evaluate(model, problem, mode='val'): assert mode in ['test', 'val'] preds, acts = [], [] for (ids, targets, _) in problem.iterate(mode=mode, shuffle=False): preds.append(to_numpy(model(ids, problem.feats, train=False))) acts.append(to_numpy(targets)) return problem.metric_fn(np.vstack(acts), np.vstack(preds))
def sample_actions(self, states): states = Variable(torch.from_numpy(states)) if self._cuda: states = states.cuda() policy, value_predictions = self(states) probs = F.softmax(policy, dim=1) action = probs.multinomial() return to_numpy(action), to_numpy(value_predictions)
def evaluate(model, problem, batch_size, loss_fn, mode='val'): assert mode in ['test', 'val'] preds, acts = [], [] loss = 0 for (ids, targets, _) in problem.iterate(mode=mode, shuffle=False, batch_size=batch_size): # print(ids.shape,targets.shape) pred, _ = model(ids, train=False) loss += loss_fn(pred, targets.squeeze()).item() preds.append(to_numpy(pred)) acts.append(to_numpy(targets)) # return loss, problem.metric_fn(np.vstack(acts), np.vstack(preds))
def evaluate(model, problem, batch_size, loss_fn, coff, mode='val'): assert mode in ['test', 'val'] preds, acts = [], [] loss=0 for (ids, targets, _) in problem.iterate(mode=mode, shuffle=False, batch_size=batch_size): # print(ids.shape,targets.shape) pred = model(problem.feats, feat2=None, msk=None, samp_bias1=None, samp_bias2=None, get_embed='pred') loss += loss_fn(pred[ids], targets.squeeze()).item() preds.append(to_numpy(pred[ids])) acts.append(to_numpy(targets)) # return loss, problem.metric_fn(np.vstack(acts), np.vstack(preds))
def _do_random_morph(model, morph_factories, assert_eye=True, attempts=10, block_model=False): if not block_model: block = model else: block_name = np.random.choice(list(model._sea_blocks.keys())) print(colstring.yellow("_do_random_morph: %s" % block_name), file=sys.stderr) block = model._sea_blocks[block_name][0] # -- # Sample valid morph new_block = copy.deepcopy(block) morph_factory = np.random.choice(morph_factories) morph_function = morph_factory(new_block) while morph_function is None: morph_factory = np.random.choice(morph_factories) morph_function = morph_factory(new_block) attempts -= 1 if attempts == 0 : raise Exception('!! _do_random_morph: hit max attempts') # -- # Apply morph new_model = copy.deepcopy(model) if not block_model: new_model = morph_function(new_model) else: for block in new_model._sea_blocks[block_name]: block = morph_function(block) new_model = new_model.eval() # -- # Check idempotence if assert_eye: is_eye = np.allclose(to_numpy(new_model()), to_numpy(model())) if not is_eye: print('---- error ----', file=sys.stderr) print(model) print('**') print(new_model) raise Exception('_do_random_morph: assert_eye failed') return new_model
def test_morph(model, morph, morph_args, test_step=False): orig_scores = model() morph(model, morph_args) new_scores = model() assert np.allclose(to_numpy(orig_scores), to_numpy(new_scores)) if test_step: opt = torch.optim.Adam(model.parameters(), lr=0.1) loss = F.cross_entropy(new_scores, Variable(torch.randn(new_scores.size(0)))) loss.backward() opt.step() return model
def embed_ppnp_supervised(*, ppr_array, y, idx_train, hidden_dim, lr, epochs, batch_size): """ minimize cross entropy between linear transform of node embedding and labels. node embedding is ppr-weights average of other node embedding. """ # !! Could benefit a lot from early stopping # !! Could benefit a lot from features n_nodes = ppr_array.shape[0] # -- # Train embedding # np.fill_diagonal(ppr_array, 0) model = SupervisedEmbeddingPPNP( ppr = PrecomputedPPR(ppr=ppr_array), n_nodes = n_nodes, hidden_dim = hidden_dim, n_classes = len(set(y)) ) model = model.cuda() model = model.train() loss_hist = train_supervised(model, y, idx_train, lr=lr, epochs=epochs, batch_size=batch_size) idx_chunks = np.array_split(np.arange(n_nodes), n_nodes // batch_size) with torch.no_grad(): X_hat = np.row_stack([to_numpy(model(idx_chunk)) for idx_chunk in idx_chunks]) return X_hat
def __call__(self, ids, n_samples=128): assert n_samples > 0, 'SparseUniformNeighborSampler: n_samples must be set explicitly' is_cuda = ids.is_cuda ids = to_numpy(ids) tmp = self.adj[ids] sel = np.random.choice(self.adj.shape[1], (ids.shape[0], n_samples)) sel = sel % self.degrees[ids].reshape(-1, 1) tmp = tmp[np.arange(ids.shape[0]).repeat(n_samples).reshape(-1), np.array(sel).reshape(-1)] tmp = np.asarray(tmp).squeeze() tmp = Variable(torch.LongTensor(tmp)) if is_cuda: tmp = tmp.cuda() return tmp
def embed_ppnp(*, ppr_array, hidden_dim, lr, epochs, batch_size): """ minimize l2 distance between node embedding and ppr-weighted average of other node embeddings""" n_nodes = ppr_array.shape[0] np.fill_diagonal(ppr_array, 0) model = EmbeddingPPNP( ppr = PrecomputedPPR(ppr=ppr_array), n_nodes = n_nodes, hidden_dim = hidden_dim, ) model = model.cuda() model = model.train() loss_hist = train_unsupervised(model, lr=lr, epochs=epochs, batch_size=batch_size) idx_chunks = np.array_split(np.arange(n_nodes), n_nodes // batch_size) with torch.no_grad(): X_hat = np.row_stack([to_numpy(model(idx_chunk)[1]) for idx_chunk in idx_chunks]) return X_hat
if not args.qm_files_in: raise ValueError("Missing input QM files") if type(args.qm_files_in) == "str": infiles = [args.qm_files_in] else: infiles = args.qm_files_in operation = re.sub("\[", "dms[", args.operation) operation = re.sub("\]", "][i,j]", operation) #get density matrices molecules = [] for filename in infiles: print(filename) molecules.append(horton.IOData.from_file(filename)) dms = [] for mol in molecules: dms.append(to_numpy(mol.get_dm_full())) reference = molecules[0] dm = reference.get_dm_full() for i in range(reference.obasis.nbasis): for j in range(reference.obasis.nbasis): dm.set_element(i,j, eval(operation)) #get cube points xyzgrid, origin, npts, spacings = get_cubic_grid(reference, args.cube_buffer, args.cube_density) #evaluate real-space density on the cubic grid, using the transformed dm in the basis of the first molecule data = reference.obasis.compute_grid_density_dm(dm, xyzgrid) #write the transformed real-space density to cube file write_cube(args.outfile, reference, data, origin, npts, spacings)
set_seeds(args.seed**2) for epoch in range(args.epochs): # Train _ = model.train() for ids, targets, progress in problem.iterate( mode='train', shuffle=True, batch_size=args.batch_size): preds = model.train_step( ids=ids, adj=problem.adj, targets=targets, loss_fn=problem.loss_fn, ) sys.stderr.write("\repoch=%d | progress=%f" % (epoch, progress)) sys.stderr.flush() # Evaluate _ = model.eval() print() print({ "epoch": epoch, "train_metric": problem.metric_fn(to_numpy(targets), to_numpy(preds)), "val_metric": evaluate(model, problem), }) print()
from trainer import make_dataloaders, train from helpers import to_numpy def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--inpath', type=str) parser.add_argument('--outpath', type=str) return parser.parse_args() # -- # Run if __name__ == "__main__": args = parse_args() print('%s -> %s' % (args.inpath, args.outpath)) model = SeaNet.load(args.inpath).cuda().eval() dataloader = make_dataloaders(root='../data', train_size=1.0)['test'] all_outputs = [] for batch_idx, (data, targets) in enumerate(dataloader): data = Variable(data.cuda(), volatile=True) all_outputs.append(to_numpy(model(data))) all_outputs = np.vstack(all_outputs) np.save(args.outpath, all_outputs)
if args.lr_schedule == 'cosine': lr = lr_scheduler(Tcur + epoch_progress, epochs=Ti) LRSchedule.set_lr(optimizer, lr) print('learning rate:{}'.format(lr)) else: # set_progress(optimizer, lr_scheduler, (epoch + epoch_progress) / args.epochs) pass loss, preds = train_step( model=model, optimizer=optimizer, ids=ids, targets=targets, loss_fn=problem.loss_fn, ) train_loss += loss.item() train_metric = problem.metric_fn(to_numpy(targets), to_numpy(preds)) #print(json.dumps({ # "epoch": epoch, # "epoch_progress": epoch_progress, # "train_metric": train_metric, # "time": time() - start_time, #}, double_precision=5)) #sys.stdout.flush() print( json.dumps( { "epoch": epoch, "time": time() - start_time, "train_loss": train_loss,
lr = lr_scheduler(Tcur + epoch_progress, epochs=Ti) LRSchedule.set_lr(optimizer, lr) print('learning rate:{}'.format(lr)) else: set_progress(optimizer, lr_scheduler, (epoch + epoch_progress) / args.epochs) loss, preds = train_step( model=model, optimizer=optimizer, ids=ids, targets=targets, loss_fn=problem.loss_fn, ) train_loss += loss.item() train_metric = problem.metric_fn( to_numpy(targets), to_numpy(preds)) # print(json.dumps({ # "epoch": epoch, # "epoch_progress": epoch_progress, # "train_metric": train_metric, # "time": time() - start_time, # }, double_precision=5)) # sys.stdout.flush() print(json.dumps({ "epoch": epoch, "time": time() - start_time, "train_loss": train_loss, }, double_precision=5)) sys.stdout.flush()
def sample_action(self, state): state = Variable(torch.from_numpy(state).unsqueeze(0)) action_mean, action_log_std = self._forward(state) action = torch.normal(action_mean, torch.exp(action_log_std)) return to_numpy(action).squeeze(axis=0)
# -- # IO # Get metadata log_fs = np.array(sorted(glob('../results/logs/run-n12-m2-s20/*'))) meta = list(map(lambda x: json.load(open(x)), log_fs)) meta = pd.DataFrame([{ "model_name" : os.path.basename(x['log_path']), "timestamp" : x['timestamp'] } for x in meta]) meta = meta.sort_values('timestamp').reset_index(drop=True) # Get labels dataloader = make_dataloaders(root='../data', train_size=1.0)['test'] targets = np.hstack([to_numpy(targets) for _,targets in dataloader]) # Get predictions fs = np.array(sorted(glob('../results/predictions/run-n12-m2-s20/*'))) all_preds = [] for f in tqdm(fs): all_preds.append(np.load(f)) all_preds = np.array(all_preds) # Compute accuracy accs = np.array([(all_preds[i].argmax(axis=1) == targets).mean() for i in range(all_preds.shape[0])]) # sel = accs > 0.8 # fs, all_preds, accs = fs[sel], all_preds[sel], accs[sel]