def generate_mask(start, end): mask = torch.BoolTensor(count) mask[:] = False mask[start:end] = True return mask
def main(args): # load and preprocess dataset data = load_data(args) if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i, i) for i in range(len(data.graph))]) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) test_nid = np.nonzero(data.test_mask)[0].astype(np.int64) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().item() n_val_samples = val_mask.sum().item() n_test_samples = test_mask.sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = DGLGraph(data.graph, readonly=True) norm = 1. / g.in_degrees().float().unsqueeze(1) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() norm = norm.cuda() g.ndata['features'] = features num_neighbors = args.num_neighbors g.ndata['norm'] = norm model = GCNSampling(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = nn.CrossEntropyLoss() infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu) if cuda: infer_model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): for nf in dgl.contrib.sampling.NeighborSampler(g, args.batch_size, args.num_neighbors, neighbor_type='in', shuffle=True, num_workers=32, num_hops=args.n_layers + 1, seed_nodes=train_nid): nf.copy_from_parent() model.train() # forward pred = model(nf) batch_nids = nf.layer_parent_nid(-1).to(device=pred.device, dtype=torch.long) batch_labels = labels[batch_nids] loss = loss_fcn(pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() for infer_param, param in zip(infer_model.parameters(), model.parameters()): infer_param.data.copy_(param.data) num_acc = 0. for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size, g.number_of_nodes(), neighbor_type='in', num_workers=32, num_hops=args.n_layers + 1, seed_nodes=test_nid): nf.copy_from_parent() infer_model.eval() with torch.no_grad(): pred = infer_model(nf) batch_nids = nf.layer_parent_nid(-1).to(device=pred.device, dtype=torch.long) batch_labels = labels[batch_nids] num_acc += (pred.argmax( dim=1) == batch_labels).sum().cpu().item() print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
def _add_constraints(self, mask, known, y_train_labeled, y_train_unlabeled_truth): """ Add additional constraints to the equivalence matrix. :param mask: Binary matrix with value 0 in entry (i,j) if it is known whether i and j belong to the same class and 1 else :param known: Binary matrix with value 1 in entry (i,j) if it is known that i and j belong to the same class and 0 else :param y_train_labeled: Labels for the labeled subset of the batch :param y_train_unlabeled_truth: True (generally unknown) labels for the unlabeled subset of the batch :return: mask: Binary matrix with value 0 in entry (i,j) if it is known whether i and j belong to the same class and 1 else :return: known: Binary matrix with value 1 in entry (i,j) if it is known that i and j belong to the same class and 0 else """ if self.params.add_constraints_method == 'random': n = len(mask) nl = len(y_train_labeled) mask = (torch.BoolTensor(n, n).zero_() + 1) y_labeled_one_hot = opt_utils.one_hot_embedding( y_train_labeled, self.params.nclasses) mask = mask.cpu().numpy() idxs = np.random.choice([0, 1], size=(n, n), p=[ 1 - self.params.add_constraints_frac, self.params.add_constraints_frac ]) idxs = np.triu(idxs, k=1) idxs = idxs + idxs.T idxs = idxs.astype('bool') mask = mask * (~idxs) true_y = opt_utils.one_hot_embedding( torch.cat((y_train_labeled, y_train_unlabeled_truth.to(defaults.device))), self.params.nclasses) true_m = true_y.mm(true_y.t()) known = true_m * torch.Tensor(idxs).to(defaults.device) mask = torch.from_numpy(mask).to(defaults.device) known[:nl, :nl] = y_labeled_one_hot.mm(y_labeled_one_hot.t()) torch.diagonal(known).fill_(1) mask[:nl, :nl] = 0 torch.diagonal(mask).fill_(0) # Remove 1's among (labeled, unlabeled) pairs bad_idxs = known[:nl, nl:] == 1 known[:nl, nl:][bad_idxs] = 0 mask[:nl, nl:][bad_idxs] = 1 bad_idxs = known[nl:, :nl] == 1 known[nl:, :nl][bad_idxs] = 0 mask[nl:, :nl][bad_idxs] = 1 elif self.params.add_constraints_method == 'specific': mask = mask.cpu().numpy() nl = len(y_train_labeled) idxs_unlabeled = np.isin(y_train_unlabeled_truth.cpu(), self.params.add_constraints_classes) idxs_labeled = np.isin(y_train_labeled.cpu(), self.params.add_constraints_classes) mask[:nl, nl:][np.ix_(~idxs_labeled, idxs_unlabeled)] = 0 mask[nl:, :nl][np.ix_(idxs_unlabeled, ~idxs_labeled)] = 0 mask[:nl, nl:][np.ix_(idxs_labeled, ~idxs_unlabeled)] = 0 mask[nl:, :nl][np.ix_(~idxs_unlabeled, idxs_labeled)] = 0 mask[nl:, nl:][np.ix_(idxs_unlabeled, ~idxs_unlabeled)] = 0 mask[nl:, nl:][np.ix_(~idxs_unlabeled, idxs_unlabeled)] = 0 mask = torch.from_numpy(mask).to(defaults.device) return mask, known
def run(args, device, data): # Unpack data train_mask, val_mask, in_feats, labels, n_classes, g = data train_nid = th.LongTensor(np.nonzero(train_mask)[0]) val_nid = th.LongTensor(np.nonzero(val_mask)[0]) train_mask = th.BoolTensor(train_mask) val_mask = th.BoolTensor(val_mask) # Create sampler sampler = NeighborSampler( g, [int(fanout) for fanout in args.fan_out.split(',')]) # Create PyTorch DataLoader for constructing blocks dataloader = DataLoader(dataset=train_nid.numpy(), batch_size=args.batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False, num_workers=args.num_workers) # Define model and optimizer model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) model = model.to(device) loss_fcn = nn.BCEWithLogitsLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Training loop avg = 0 iter_tput = [] for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, blocks in enumerate(dataloader): tic_step = time.time() # The nodes for input lies at the LHS side of the first block. # The nodes for output lies at the RHS side of the last block. input_nodes = blocks[0].srcdata[dgl.NID] seeds = blocks[-1].dstdata[dgl.NID] # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( g, labels, seeds, input_nodes, device) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: acc = compute_f1(batch_pred, batch_labels) gpu_mem_alloc = th.cuda.max_memory_allocated( ) / 1000000 if th.cuda.is_available() else 0 print( 'Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB' .format(epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) toc = time.time() print('Epoch Time(s): {:.4f}'.format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc = evaluate(model, g, g.ndata['features'], labels, val_mask, args.batch_size, device) print('Eval Acc {:.4f}'.format(eval_acc)) print('Avg epoch time: {}'.format(avg / (epoch - 4)))
requires_grad=False) # Dilations & padding self._set_dilations(seq_len) # Channel combinations (multivariate) if c_in > 1: self._set_channel_combinations(c_in) # Bias for i in range(self.num_dilations): self.register_buffer( f'biases_{i}', torch.empty( (self.num_kernels, self.num_features_per_dilation[i]))) self.register_buffer('prefit', torch.BoolTensor([False])) def fit(self, X, chunksize=None): num_samples = X.shape[0] if chunksize is None: chunksize = min(num_samples, self.num_dilations * self.num_kernels) else: chunksize = min(num_samples, chunksize) np.random.seed(self.random_state) idxs = np.random.choice(num_samples, chunksize, False) self.fitting = True self(X[idxs]) self.fitting = False def forward(self, x): _features = []
def map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, prefix, covariates_df=None, interaction_s=None, maf_threshold_interaction=0.05, group_s=None, window=1000000, run_eigenmt=False, output_dir='.', write_top=True, write_stats=True, logger=None, verbose=True): """ cis-QTL mapping: nominal associations for all variant-phenotype pairs Association results for each chromosome are written to parquet files in the format <output_dir>/<prefix>.cis_qtl_pairs.<chr>.parquet If interaction_s is provided, the top association per phenotype is written to <output_dir>/<prefix>.cis_qtl_top_assoc.txt.gz unless write_top is set to False, in which case it is returned as a DataFrame """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if logger is None: logger = SimpleLogger() if group_s is not None: group_dict = group_s.to_dict() logger.write('cis-QTL mapping: nominal associations for all variant-phenotype pairs') logger.write(' * {} samples'.format(phenotype_df.shape[1])) logger.write(' * {} phenotypes'.format(phenotype_df.shape[0])) if covariates_df is not None: assert np.all(phenotype_df.columns==covariates_df.index) logger.write(' * {} covariates'.format(covariates_df.shape[1])) residualizer = Residualizer(torch.tensor(covariates_df.values, dtype=torch.float32).to(device)) dof = phenotype_df.shape[1] - 2 - covariates_df.shape[1] else: residualizer = None dof = phenotype_df.shape[1] - 2 logger.write(' * {} variants'.format(variant_df.shape[0])) if interaction_s is not None: assert np.all(interaction_s.index==phenotype_df.columns) logger.write(' * including interaction term') if maf_threshold_interaction>0: logger.write(' * using {:.2f} MAF threshold'.format(maf_threshold_interaction)) genotype_ix = np.array([genotype_df.columns.tolist().index(i) for i in phenotype_df.columns]) genotype_ix_t = torch.from_numpy(genotype_ix).to(device) if interaction_s is not None: dof -= 2 interaction_t = torch.tensor(interaction_s.values.reshape(1,-1), dtype=torch.float32).to(device) if maf_threshold_interaction > 0: interaction_mask_t = torch.BoolTensor(interaction_s >= interaction_s.median()).to(device) else: interaction_mask_t = None igc = genotypeio.InputGeneratorCis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, group_s=group_s, window=window) # iterate over chromosomes best_assoc = [] start_time = time.time() k = 0 logger.write(' * Computing associations') for chrom in igc.chrs: logger.write(' Mapping chromosome {}'.format(chrom)) # allocate arrays n = 0 if group_s is None: for i in igc.phenotype_pos_df[igc.phenotype_pos_df['chr']==chrom].index: j = igc.cis_ranges[i] n += j[1] - j[0] + 1 else: for i in igc.group_s[igc.phenotype_pos_df['chr']==chrom].drop_duplicates().index: j = igc.cis_ranges[i] n += j[1] - j[0] + 1 chr_res = OrderedDict() chr_res['phenotype_id'] = [] chr_res['variant_id'] = [] chr_res['tss_distance'] = np.empty(n, dtype=np.int32) chr_res['maf'] = np.empty(n, dtype=np.float32) chr_res['ma_samples'] = np.empty(n, dtype=np.int32) chr_res['ma_count'] = np.empty(n, dtype=np.int32) if interaction_s is None: chr_res['pval_nominal'] = np.empty(n, dtype=np.float64) chr_res['slope'] = np.empty(n, dtype=np.float32) chr_res['slope_se'] = np.empty(n, dtype=np.float32) else: chr_res['pval_g'] = np.empty(n, dtype=np.float64) chr_res['b_g'] = np.empty(n, dtype=np.float32) chr_res['b_g_se'] = np.empty(n, dtype=np.float32) chr_res['pval_i'] = np.empty(n, dtype=np.float64) chr_res['b_i'] = np.empty(n, dtype=np.float32) chr_res['b_i_se'] = np.empty(n, dtype=np.float32) chr_res['pval_gi'] = np.empty(n, dtype=np.float64) chr_res['b_gi'] = np.empty(n, dtype=np.float32) chr_res['b_gi_se'] = np.empty(n, dtype=np.float32) start = 0 if group_s is None: for k, (phenotype, genotypes, genotype_range, phenotype_id) in enumerate(igc.generate_data(chrom=chrom, verbose=verbose), k+1): # copy genotypes to GPU phenotype_t = torch.tensor(phenotype, dtype=torch.float).to(device) genotypes_t = torch.tensor(genotypes, dtype=torch.float).to(device) genotypes_t = genotypes_t[:,genotype_ix_t] impute_mean(genotypes_t) variant_ids = variant_df.index[genotype_range[0]:genotype_range[-1]+1] tss_distance = np.int32(variant_df['pos'].values[genotype_range[0]:genotype_range[-1]+1] - igc.phenotype_tss[phenotype_id]) if interaction_s is None: res = calculate_cis_nominal(genotypes_t, phenotype_t, residualizer=residualizer) tstat, slope, slope_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res] n = len(variant_ids) else: genotypes_t, mask_t = filter_maf_interaction(genotypes_t, interaction_mask_t=interaction_mask_t, maf_threshold_interaction=maf_threshold_interaction) if genotypes_t.shape[0]>0: res = calculate_interaction_nominal(genotypes_t, phenotype_t.unsqueeze(0), interaction_t, residualizer=residualizer, return_sparse=False) tstat, b, b_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res] mask = mask_t.cpu().numpy() variant_ids = variant_ids[mask] tss_distance = tss_distance[mask] n = len(variant_ids) # top association ix = np.nanargmax(np.abs(tstat[:,2])) top_s = pd.Series([phenotype_id, variant_ids[ix], tss_distance[ix], maf[ix], ma_samples[ix], ma_count[ix], tstat[ix,0], b[ix,0], b_se[ix,0], tstat[ix,1], b[ix,1], b_se[ix,1], tstat[ix,2], b[ix,2], b_se[ix,2]], index=chr_res.keys()) if run_eigenmt: # compute eigenMT correction top_s['tests_emt'] = eigenmt.compute_tests(genotypes_t, var_thresh=0.99, variant_window=200) best_assoc.append(top_s) else: # all genotypes in window were filtered out n = 0 if n > 0: chr_res['phenotype_id'].extend([phenotype_id]*n) chr_res['variant_id'].extend(variant_ids) chr_res['tss_distance'][start:start+n] = tss_distance chr_res['maf'][start:start+n] = maf chr_res['ma_samples'][start:start+n] = ma_samples chr_res['ma_count'][start:start+n] = ma_count if interaction_s is None: chr_res['pval_nominal'][start:start+n] = tstat chr_res['slope'][start:start+n] = slope chr_res['slope_se'][start:start+n] = slope_se else: chr_res['pval_g'][start:start+n] = tstat[:,0] chr_res['b_g'][start:start+n] = b[:,0] chr_res['b_g_se'][start:start+n] = b_se[:,0] chr_res['pval_i'][start:start+n] = tstat[:,1] chr_res['b_i'][start:start+n] = b[:,1] chr_res['b_i_se'][start:start+n] = b_se[:,1] chr_res['pval_gi'][start:start+n] = tstat[:,2] chr_res['b_gi'][start:start+n] = b[:,2] chr_res['b_gi_se'][start:start+n] = b_se[:,2] start += n # update pointer else: # groups for k, (phenotypes, genotypes, genotype_range, phenotype_ids, group_id) in enumerate(igc.generate_data(chrom=chrom, verbose=verbose), k+1): # copy genotypes to GPU genotypes_t = torch.tensor(genotypes, dtype=torch.float).to(device) genotypes_t = genotypes_t[:,genotype_ix_t] impute_mean(genotypes_t) variant_ids = variant_df.index[genotype_range[0]:genotype_range[-1]+1] # assuming that the TSS for all grouped phenotypes is the same tss_distance = np.int32(variant_df['pos'].values[genotype_range[0]:genotype_range[-1]+1] - igc.phenotype_tss[phenotype_ids[0]]) if interaction_s is not None: genotypes_t, mask_t = filter_maf_interaction(genotypes_t, interaction_mask_t=interaction_mask_t, maf_threshold_interaction=maf_threshold_interaction) mask = mask_t.cpu().numpy() variant_ids = variant_ids[mask] tss_distance = tss_distance[mask] n = len(variant_ids) if genotypes_t.shape[0]>0: # process first phenotype in group phenotype_id = phenotype_ids[0] phenotype_t = torch.tensor(phenotypes[0], dtype=torch.float).to(device) if interaction_s is None: res = calculate_cis_nominal(genotypes_t, phenotype_t, residualizer=residualizer) tstat, slope, slope_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res] else: res = calculate_interaction_nominal(genotypes_t, phenotype_t.unsqueeze(0), interaction_t, residualizer=residualizer, return_sparse=False) tstat, b, b_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res] px = [phenotype_id]*n # iterate over remaining phenotypes in group for phenotype, phenotype_id in zip(phenotypes[1:], phenotype_ids[1:]): phenotype_t = torch.tensor(phenotype, dtype=torch.float).to(device) if interaction_s is None: res = calculate_cis_nominal(genotypes_t, phenotype_t, residualizer=residualizer) tstat0, slope0, slope_se0, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res] else: res = calculate_interaction_nominal(genotypes_t, phenotype_t.unsqueeze(0), interaction_t, residualizer=residualizer, return_sparse=False) tstat0, b0, b_se0, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res] # find associations that are stronger for current phenotype if interaction_s is None: ix = np.where(np.abs(tstat0) > np.abs(tstat))[0] else: ix = np.where(np.abs(tstat0[:,2]) > np.abs(tstat[:,2]))[0] # update relevant positions for j in ix: px[j] = phenotype_id if interaction_s is None: tstat[ix] = tstat0[ix] slope[ix] = slope0[ix] slope_se[ix] = slope_se0[ix] else: tstat[ix] = tstat0[ix] b[ix] = b0[ix] b_se[ix] = b_se0[ix] chr_res['phenotype_id'].extend(px) chr_res['variant_id'].extend(variant_ids) chr_res['tss_distance'][start:start+n] = tss_distance chr_res['maf'][start:start+n] = maf chr_res['ma_samples'][start:start+n] = ma_samples chr_res['ma_count'][start:start+n] = ma_count if interaction_s is None: chr_res['pval_nominal'][start:start+n] = tstat chr_res['slope'][start:start+n] = slope chr_res['slope_se'][start:start+n] = slope_se else: chr_res['pval_g'][start:start+n] = tstat[:,0] chr_res['b_g'][start:start+n] = b[:,0] chr_res['b_g_se'][start:start+n] = b_se[:,0] chr_res['pval_i'][start:start+n] = tstat[:,1] chr_res['b_i'][start:start+n] = b[:,1] chr_res['b_i_se'][start:start+n] = b_se[:,1] chr_res['pval_gi'][start:start+n] = tstat[:,2] chr_res['b_gi'][start:start+n] = b[:,2] chr_res['b_gi_se'][start:start+n] = b_se[:,2] # top association for the group if interaction_s is not None: ix = np.nanargmax(np.abs(tstat[:,2])) top_s = pd.Series([chr_res['phenotype_id'][start:start+n][ix], variant_ids[ix], tss_distance[ix], maf[ix], ma_samples[ix], ma_count[ix], tstat[ix,0], b[ix,0], b_se[ix,0], tstat[ix,1], b[ix,1], b_se[ix,1], tstat[ix,2], b[ix,2], b_se[ix,2]], index=chr_res.keys()) top_s['num_phenotypes'] = len(phenotype_ids) if run_eigenmt: # compute eigenMT correction top_s['tests_emt'] = eigenmt.compute_tests(genotypes_t, var_thresh=0.99, variant_window=200) best_assoc.append(top_s) start += n # update pointer logger.write(' time elapsed: {:.2f} min'.format((time.time()-start_time)/60)) # convert to dataframe, compute p-values and write current chromosome if start < len(chr_res['maf']): for x in chr_res: chr_res[x] = chr_res[x][:start] if write_stats: chr_res_df = pd.DataFrame(chr_res) if interaction_s is None: m = chr_res_df['pval_nominal'].notnull() chr_res_df.loc[m, 'pval_nominal'] = 2*stats.t.cdf(-chr_res_df.loc[m, 'pval_nominal'].abs(), dof) else: m = chr_res_df['pval_gi'].notnull() chr_res_df.loc[m, 'pval_g'] = 2*stats.t.cdf(-chr_res_df.loc[m, 'pval_g'].abs(), dof) chr_res_df.loc[m, 'pval_i'] = 2*stats.t.cdf(-chr_res_df.loc[m, 'pval_i'].abs(), dof) chr_res_df.loc[m, 'pval_gi'] = 2*stats.t.cdf(-chr_res_df.loc[m, 'pval_gi'].abs(), dof) print(' * writing output') chr_res_df.to_parquet(os.path.join(output_dir, '{}.cis_qtl_pairs.{}.parquet'.format(prefix, chrom))) if interaction_s is not None and len(best_assoc) > 0: best_assoc = pd.concat(best_assoc, axis=1, sort=False).T.set_index('phenotype_id').infer_objects() m = best_assoc['pval_g'].notnull() best_assoc.loc[m, 'pval_g'] = 2*stats.t.cdf(-best_assoc.loc[m, 'pval_g'].abs(), dof) best_assoc.loc[m, 'pval_i'] = 2*stats.t.cdf(-best_assoc.loc[m, 'pval_i'].abs(), dof) best_assoc.loc[m, 'pval_gi'] = 2*stats.t.cdf(-best_assoc.loc[m, 'pval_gi'].abs(), dof) if run_eigenmt: if group_s is None: best_assoc['pval_emt'] = np.minimum(best_assoc['tests_emt']*best_assoc['pval_gi'], 1) else: best_assoc['pval_emt'] = np.minimum(best_assoc['num_phenotypes']*best_assoc['tests_emt']*best_assoc['pval_gi'], 1) best_assoc['pval_adj_bh'] = eigenmt.padjust_bh(best_assoc['pval_emt']) if write_top: best_assoc.to_csv(os.path.join(output_dir, '{}.cis_qtl_top_assoc.txt.gz'.format(prefix)), sep='\t', float_format='%.6g') else: return best_assoc logger.write('done.')
def main(): # Praise argparser! parser = argparse.ArgumentParser( description= "Inference script for performing joint tasks on ATIS datasets.") parser.add_argument("--train_path", type=str, help="path of train dataset.") parser.add_argument("--test_path", type=str, help="path of test dataset.") parser.add_argument("--model_dir", type=str, default="./models/", help='path for saved trained models.') parser.add_argument('--max_length', type=int, default=60, help='max sequence length') parser.add_argument('--embedding_size', type=int, default=100, help='dimension of word embedding vectors') parser.add_argument('--hidden_size', type=int, default=50, help='dimension of lstm hidden states') args = parser.parse_args() # Load data print("Loading data...") _, word2index, tag2index, intent2index = preprocessing( args.train_path, args.max_length) index2tag = {v: k for k, v in tag2index.items()} index2intent = {v: k for k, v in intent2index.items()} # Load model print("Loading model...") encoder = Encoder(len(word2index), args.embedding_size, args.hidden_size) decoder = Decoder(len(tag2index), len(intent2index), len(tag2index) // 3, args.hidden_size * 2) encoder.load_state_dict( torch.load(os.path.join(args.model_dir, 'jointnlu-encoder.pkl'), map_location=None if USE_CUDA else "cpu")) decoder.load_state_dict( torch.load(os.path.join(args.model_dir, 'jointnlu-decoder.pkl'), map_location=None if USE_CUDA else "cpu")) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() # Switch to evaluation mode encoder.eval() decoder.eval() # Preprocess test data test = open(args.test_path, "r").readlines() test = [t[:-1] for t in test] test = [[ t.split("\t")[0].split(" "), t.split("\t")[1].split(" ")[:-1], t.split("\t")[1].split(" ")[-1] ] for t in test] test = [ [t[0][1:-1], t[1][1:], t[2].split("#")[0]] for t in test ] # Note here I split embedded multiple labels into separate labels and get the first one. # This could lower error rate. slot_f1 = [] intent_err = [] # Test cases. for index in range(len(test)): test_raw = test[index][0] test_in = prepare_sequence(test_raw, word2index).to("cpu") test_mask = Variable( torch.BoolTensor(tuple(map( lambda s: s == 0, test_in.data)))).cuda() if USE_CUDA else Variable( torch.BoolTensor(tuple(map(lambda s: s == 0, test_in.data)))).view(1, -1) if USE_CUDA: start_decode = Variable( torch.LongTensor([[word2index['<SOS>']] * 1 ])).cuda().transpose(1, 0) else: start_decode = Variable( torch.LongTensor([[word2index['<SOS>']] * 1])).transpose(1, 0) output, hidden_c = encoder(test_in.unsqueeze(0), test_mask.unsqueeze(0)) tag_score, intent_score = decoder(start_decode, hidden_c, output, test_mask) v, i = torch.max(tag_score, 1) slot_pred = list(map(lambda ii: index2tag[ii], i.data.tolist())) slot_gt = test[index][1] # Calculate f1_micro with sklearn. Pretty handy. slot_f1.append(f1_score(slot_gt, slot_pred, average="micro")) v, i = torch.max(intent_score, 1) intent_pred = index2intent[i.data.tolist()[0]] intent_gt = test[index][2] if intent_pred != intent_gt: intent_err.append([test[index][0], intent_gt, intent_pred]) # Print our results. print("Input Sentence\t: ", *test[index][0]) print("Truth\t\t: ", *slot_gt) print("Prediction\t: ", *slot_pred) print("Truth\t\t: ", intent_gt) print("Prediction\t: ", intent_pred) print() # Print out everything I need to finish my report. # print("Got slot err ", len(slot_err[0])) # print(*slot_err, sep="\n") print("Got intent err ", len(intent_err)) print("--- BEGIN ERR PRINT ---") for case in intent_err: print("Input : ", *case[0]) print("Truth : ", case[1]) print("Predict: ", case[2]) print() print("--- ENDOF ERR PRINT ---") print("Total ", len(test)) print("Slot f1_micro avg %f" % np.average(slot_f1)) print("Intent acc %f" % (1 - len(intent_err) / len(test)))
def run_experiment(p, csv_path, out_dir, data_cols='_mri_vol'): """ Function to run the experiments. p contain all the hyperparameters needed to run the experiments We assume that all the parameters needed are present in p!! out_dir is the out directory #hyperparameters """ if not os.path.exists(out_dir): os.makedirs(out_dir) #Seed torch.manual_seed(p["seed"]) np.random.seed(p["seed"]) #Redirect output to the out dir # sys.stdout = open(out_dir + 'output.out', 'w') #save parameters to the out dir with open(out_dir + "params.txt", "w") as f: f.write(str(p)) # DEVICE ## Decidint on device on device. DEVICE_ID = 0 DEVICE = torch.device( 'cuda:' + str(DEVICE_ID) if torch.cuda.is_available() else 'cpu') if torch.cuda.is_available(): torch.cuda.set_device(DEVICE_ID) # LOAD DATA X_train, X_test, Y_train, Y_test, mri_col = open_MRI_data_var( csv_path, train_set=0.9, normalize=True, return_covariates=True, data_cols=data_cols) #TEMPORAL #Combine test and train Y for later Y = {} for k in Y_train.keys(): Y[k] = Y_train[k] + Y_test[k] # List of (nt, nfeatures) numpy objects p["x_size"] = X_train[0].shape[1] print(p["x_size"]) # Apply padding to both X_train and X_val # REMOVE LAST POINT OF EACH INDIVIDUAL X_train_tensor = [torch.FloatTensor(t[:-1, :]) for t in X_train] X_train_pad = nn.utils.rnn.pad_sequence(X_train_tensor, batch_first=False, padding_value=np.nan) X_test_tensor = [torch.FloatTensor(t) for t in X_test] X_test_pad = nn.utils.rnn.pad_sequence(X_test_tensor, batch_first=False, padding_value=np.nan) p["ntp"] = max(X_train_pad.size(0), X_test_pad.size(0)) # Those datasets are of size [Tmax, Batch_size, nfeatures] # Save mask to unpad later when testing mask_train = ~torch.isnan(X_train_pad) mask_test = ~torch.isnan(X_test_pad) # convert to tensor mask_train_tensor = torch.BoolTensor(mask_train) mask_test_tensor = torch.BoolTensor(mask_test) #convert those NaN to zeros X_train_pad[torch.isnan(X_train_pad)] = 0 X_test_pad[torch.isnan(X_test_pad)] = 0 # Define model and optimizer model = rnnvae.ModelRNNVAE(p["x_size"], p["h_size"], p["hidden"], p["n_layers"], p["hidden"], p["n_layers"], p["hidden"], p["n_layers"], p["z_dim"], p["hidden"], p["n_layers"], p["clip"], p["n_epochs"], p["batch_size"], DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=p["learning_rate"]) model.optimizer = optimizer model = model.to(DEVICE) # Fit the model model.fit(X_train_pad.to(DEVICE), X_test_pad.to(DEVICE), mask_train_tensor.to(DEVICE), mask_test_tensor.to(DEVICE)) ### After training, save the model! model.save(out_dir, 'model.pt') # Predict the reconstructions from X_val and X_train X_test_fwd = model.predict(X_test_pad.to(DEVICE)) X_train_fwd = model.predict(X_train_pad.to(DEVICE)) #Reformulate things X_train_fwd['xnext'] = np.array(X_train_fwd['xnext']).swapaxes(0, 1) X_train_fwd['z'] = np.array(X_train_fwd['z']).swapaxes(0, 1) X_test_fwd['xnext'] = np.array(X_test_fwd['xnext']).swapaxes(0, 1) X_test_fwd['z'] = np.array(X_test_fwd['z']).swapaxes(0, 1) X_test_hat = X_test_fwd["xnext"] X_train_hat = X_train_fwd["xnext"] # Unpad using the masks #after masking, need to rehsape to (nt, nfeat) X_test_hat = [ X[mask_test[:, i, :]].reshape((-1, p["x_size"])) for (i, X) in enumerate(X_test_hat) ] X_train_hat = [ X[mask_train[:, i, :]].reshape((-1, p["x_size"])) for (i, X) in enumerate(X_train_hat) ] #Compute mean absolute error over all sequences mse_train = np.mean([ mean_absolute_error(xval[:-1, :], xhat) for (xval, xhat) in zip(X_train, X_train_hat) ]) print('MSE over the train set: ' + str(mse_train)) #Compute mean absolute error over all sequences mse_test = np.mean([ mean_absolute_error(xval, xhat) for (xval, xhat) in zip(X_test, X_test_hat) ]) print('MSE over the test set: ' + str(mse_test)) #plot validation and plot_total_loss(model.loss['total'], model.val_loss['total'], "Total loss", out_dir, "total_loss.png") plot_total_loss(model.loss['kl'], model.val_loss['kl'], "kl_loss", out_dir, "kl_loss.png") plot_total_loss(model.loss['ll'], model.val_loss['ll'], "ll_loss", out_dir, "ll_loss.png") #Negative to see downard curve # Visualization of trajectories """ subj = 6 feature = 12 # For train plot_trajectory(X_train, X_train_hat, subj, 'all', out_dir, f'traj_train_s_{subj}_f_all') # testing for a given subject plot_trajectory(X_train, X_train_hat, subj, feature, out_dir, f'traj_train_s_{subj}_f_{feature}') # testing for a given feature # For test plot_trajectory(X_test, X_test_hat, subj, 'all', out_dir, f'traj_test_s_{subj}_f_all') # testing for a given subject plot_trajectory(X_test, X_test_hat, subj, feature, out_dir, f'traj_test_s_{subj}_f_{feature}') # testing for a given feature """ z_train = X_train_fwd['z'] z_test = X_test_fwd['z'] # select only the existing time points # Repeat the mask for each latent features, as we can have variable features, need to treat the mask #Use ptile to repeat it as many times as p["z_dim"], and transpose it z_test = [ X[np.tile(mask_test[:, i, 0], (p["z_dim"], 1)).T].reshape( (-1, p["z_dim"])) for (i, X) in enumerate(z_test) ] z_train = [ X[np.tile(mask_train[:, i, 0], (p["z_dim"], 1)).T].reshape( (-1, p["z_dim"])) for (i, X) in enumerate(z_train) ] z = z_train + z_test # Dir for projections proj_path = 'z_proj/' if not os.path.exists(out_dir + proj_path): os.makedirs(out_dir + proj_path) #plot latent space for dim0 in range(p["z_dim"]): for dim1 in range(dim0, p["z_dim"]): if dim0 == dim1: continue # very dirty plot_z_time_2d(z, p["ntp"], [dim0, dim1], out_dir + proj_path, out_name=f'z_d{dim0}_d{dim1}') # Dir for projections sampling_path = 'z_proj_dx/' if not os.path.exists(out_dir + sampling_path): os.makedirs(out_dir + sampling_path) #plot latent space for dim0 in range(p["z_dim"]): for dim1 in range(dim0, p["z_dim"]): if dim0 == dim1: continue # very dirty plot_z_time_2d(z, p["ntp"], [dim0, dim1], out_dir + sampling_path, c='DX', Y=Y, out_name=f'z_d{dim0}_d{dim1}') # Dir for projections sampling_path = 'z_proj_age/' if not os.path.exists(out_dir + sampling_path): os.makedirs(out_dir + sampling_path) #plot latent space for dim0 in range(p["z_dim"]): for dim1 in range(dim0, p["z_dim"]): if dim0 == dim1: continue # very dirty plot_z_time_2d(z, p["ntp"], [dim0, dim1], out_dir + sampling_path, c='AGE', Y=Y, out_name=f'z_d{dim0}_d{dim1}') # Compute MSE # Predict for max+1 and select only the positions that I am interested in #this sequence predict DO NOT work well Y_true = [p[-1, :] for p in X_train] Y_pred = [] for i in range(X_train_pad.size(1)): x = torch.FloatTensor(X_train[i][:-1, :]) x = x.unsqueeze(1) tp = x.size(0) # max time points (and timepoint to predict) if tp == 0: continue X_fwd = model.sequence_predict(x.to(DEVICE), tp + 1) X_hat = X_fwd['xnext'] Y_pred.append(X_hat[tp, 0, :]) #get predicted point #For each patient in X_hat, saveonly the timepoint that we want #Compute mse mse_predict = mean_squared_error(Y_true, Y_pred) print('MSE over a future timepoint prediction: ' + str(mse_predict)) # TODO: THIS SAMPLING PROCEDURE NEEDS TO BE UPDATED """ nt = len(X_train_pad) nsamples = 1000 X_sample = model.sample_latent(nsamples, nt) #Get the samples X_sample['xnext'] = np.array(X_sample['xnext']).swapaxes(0,1) X_sample['z'] = np.array(X_sample['z']).swapaxes(0,1) # Dir for projections sampling_path = 'z_proj_sampling/' if not os.path.exists(out_dir + sampling_path): os.makedirs(out_dir + sampling_path) #plot latent space for dim0 in range(p["z_dim"]): for dim1 in range(dim0, p["z_dim"]): if dim0 == dim1: continue # very dirty plot_z_time_2d(X_sample['z'], p["ntp"], [dim0, dim1], out_dir + 'z_proj_sampling/', out_name=f'z_d{dim0}_d{dim1}') """ loss = { "mse_train": mse_train, "mse_test": mse_test, "mse_predict": mse_predict, "loss_total": model.loss['total'][-1], "loss_kl": model.loss['kl'][-1], "loss_ll": model.loss['ll'][-1] } return loss
def main(args): device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) path = osp.join('dataset', 'Reddit') dataset = Reddit(path) data = dataset[0] features = data.x.to(device) labels = data.y.to(device) train_mask = torch.BoolTensor(data.train_mask).to(device) val_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) edge_index = data.edge_index.to(device) edge_index, _ = remove_self_loops(edge_index) edge_index, _ = add_self_loops(edge_index, num_nodes=features.size(0)) model = GAT(num_layers=args.num_layers, in_feats=features.size(-1), num_hidden=args.num_hidden, num_classes=dataset.num_classes, heads=[1, 1, 1], dropout=args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, 1 + args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, edge_index) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, edge_index, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print("Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}".format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def _set_batch_skip_search(self, valid_exs: List[Message], batch: Batch) -> Batch: skip_search = [ex.get(self.opt['skip_search_key'], False) for ex in valid_exs] batch.skip_search = torch.BoolTensor(skip_search) return batch
def bfixed(self, *cmp): return torch.BoolTensor([list(cmp) ]).to(self.device).repeat(self.batch_size, 1)
def __init__(self, opt, dicts, positional_encoder, encoder_type): super(TransformerEncoder, self).__init__() self.model_size = opt.model_size self.n_heads = opt.n_heads self.inner_size = opt.inner_size if hasattr(opt, 'encoder_layers') and opt.encoder_layers != -1: self.layers = opt.encoder_layers else: self.layers = opt.layers self.dropout = opt.dropout self.word_dropout = opt.word_dropout self.attn_dropout = opt.attn_dropout self.emb_dropout = opt.emb_dropout self.time = opt.time self.version = opt.version self.input_type = encoder_type # input lookup table if encoder_type != "text": self.audio_trans = nn.Linear(dicts, self.model_size) else: self.word_lut = nn.Embedding(dicts.size(), self.model_size, padding_idx=onmt.Constants.PAD) if opt.time == 'positional_encoding': self.time_transformer = positional_encoder elif opt.time == 'gru': self.time_transformer = nn.GRU(self.model_size, self.model_size, 1, batch_first=True) elif opt.time == 'lstm': self.time_transformer = nn.LSTM(self.model_size, self.model_size, 1, batch_first=True) self.preprocess_layer = PrePostProcessing(self.model_size, self.emb_dropout, sequence='d', static=False) self.postprocess_layer = PrePostProcessing(self.model_size, 0, sequence='n') self.positional_encoder = positional_encoder self.limit_rhs_steps = opt.limit_rhs_steps self.build_modules(limit_rhs_steps=opt.limit_rhs_steps) if self.limit_rhs_steps is not None: largest_rhs_mask = positional_encoder.len_max + self.limit_rhs_steps rhs_mask = torch.BoolTensor( np.triu(np.ones((largest_rhs_mask, largest_rhs_mask)), k=1 + self.limit_rhs_steps).astype('uint8')) self.register_buffer('rhs_mask', rhs_mask) if opt.freeze_encoder: for p in self.parameters(): p.requires_grad = False print(p.requires_grad)
def predict( # called from automatic_speech_recognition.py l217 # builds and returns result_dict (containing “audio”, “duration”, “results”) self, audio_path: str, signal: np.ndarray, # 'signal' = audio as ndarray top_db: int = 48, vad: bool = False, # vad = False first batch_size: int = 1, ) -> dict: result_dict = dict() duration = librosa.get_duration(signal, sr=self.SAMPLE_RATE) # get overall duration (in seconds) from entire audio file batch_inference = True if duration > 50.0 else False # True if duration > 50s--so can use vad if >50s (if less than 50s, split into dB criteria and speech recognition happens) result_dict["audio"] = audio_path # audio path--does NOT go through model result_dict["duration"] = str(datetime.timedelta(seconds=duration)) # str(duration of audio found by subtracting)--does NOT go through model result_dict["results"] = list() """ building up empty "results" list (by building dict 'hypo_dict' and appending to list 'results') """ if batch_inference: # if duration > 50s: if vad: # if vad: speech_intervals = self.vad_model( # vad.py __call__ (l177): 'speech_intervals' = model output (of VoiceActivityDetection, which uses ConvVADModel to get the probability of the labels); list of lists of frequencies over interval frames (VoiceActivityDetection; vad.py); since call on object, goes to __call__ (vad.py l178) signal, sample_rate=self.SAMPLE_RATE, ) else: # else: speech_intervals = self._split_audio(signal, top_db) # rule-based: get list of non-silent intervals (==splices from 'signal', an ndarray ver of audio) from audio # either way, we get a list 'speech_intervals' of splices (of non-silent intervals) of the original 'signal' ndarray batches, total_speech_sections, total_durations = self._create_batches( # return lists: 'batches' (of 'batch': tensors), 'total_speech_sections' (list of list 'speech_sections (for 1 batch)' of dicts 'speech_section' for 1 interval, shape {"start": START_TIME, "end": END_TIME}), 'total_durations' (of 'duration') speech_intervals, batch_size, ) for batch_idx, batch in enumerate(batches): # for each batch (dict of 'inputs' tensor (non-silent interval of 'signal'; all the batches add up to 211833) and 'input_lengths' int) net_input, sample = dict(), dict() net_input["padding_mask"] = get_mask_from_lengths( # net_input["padding_mask"] = tensor same size as batch["inputs"] filled w/ False inputs=batch["inputs"], seq_lengths=batch["input_lengths"], ).to(self.device) net_input["source"] = batch["inputs"].to(self.device) # net_input["source"] = batch["inputs"] (==the tensor w/ frequencies of a section from overall signal) sample["net_input"] = net_input # sample = dict containing net_input (dict; "source": tensor w/ frequencies of a section, "padding_mask": tensor w/ same size as "source" filled w/ False) # yapf: disable if sample["net_input"]["source"].size(1) < self.MINIMUM_INPUT_LENGTH: # skip this iteration if section is too short continue # yapf: enable # hypos: list of list of dict [[{'tokens': tensor of ints, 'score: 0}; tensor([ 8, 11, 14, 11, 10, 5, 8, 48, 10, 32, 6, 37, 7, 11, 10, 5, 32, 12, 26, 22, 6, 18, 27, 8, 13, 5]), "score": 0}]] hypos = self.generator.generate( # W2lViterbiDecoder -> W2lDecoder.generate # Generate a batch of inferences (for each batch (section), generate tensor using wav2vec model self.model, # model = BrainWav2VecCtc.build_model (built w/ pretrained weights from model_path: (/home/kris/.pororo/misc/wav2vec.ko.pt)) sample, prefix_tokens=None, ) for hypo_idx, hypo in enumerate(hypos): # TODO: does 'hypos' ever have more than 1 hypo? # For each inference (i.e. for 1 section): # hypo_idx: 0; hypo: [{'tokens': tensor([ 8, 11, 14, 11, 10, 5, 8, 48, 10, 32, 6, 37, 7, 11, 10, 5, 32, 12, 26, 22, 6, 18, 27, 8, 13, 5]), 'score': 0}] hypo_dict = dict() hyp_pieces = self.target_dict.string( # convert tensor of ints to string (letters) using dict --> hyp_pieces: all tokens (letter by letter), "ᄀ ᅳ ᄂ ᅳ ᆫ | ᄀ ᅫ ᆫ ᄎ ᅡ ᆭ ᄋ ᅳ ᆫ | ᄎ ᅥ ᆨ ᄒ ᅡ ᄅ ᅧ ᄀ ᅩ |" hypo[0]["tokens"].int().cpu()) # dict = target dict loaded from FB; speech_section = total_speech_sections[batch_idx][hypo_idx] # get dict 'speech_section' (for this section); {"start": START_TIME, "end": END_TIME} speech_start_time = str( # get rounded str ver of start time (e.g. 0:00:00) datetime.timedelta( seconds=int(round( speech_section["start"], 0, )))) speech_end_time = str( # get rounded str ver of end time datetime.timedelta( seconds=int(round( speech_section["end"], 0, )))) # yapf: disable # hypo_dict: dict printed out when asr is run (inside dict 'results') hypo_dict["speech_section"] = f"{speech_start_time} ~ {speech_end_time}" # time stamps for segment hypo_dict["length_ms"] = total_durations[batch_idx][hypo_idx] * 1000 # 'total_durations': list (of 'duration'); getting ith duration hypo_dict["speech"] = self._text_postprocess(hyp_pieces) # puts individual letters together to make proper sentence # "그는 괜찮은 척하려고" # yapf: enable if hypo_dict["speech"]: # if the text is not empty: (remove empty sections) result_dict["results"].append(hypo_dict) # append this dict to overall 'result_dict' del hypos, net_input, sample # 'hypos': batch of inferences, 'net_input': ?, 'sample': input? else: # if duration <= 50s (i.e. batch_interference = False): net_input, sample, hypo_dict = dict(), dict(), dict() feature, duration = self._parse_audio(signal) # feature (tensor ver of signal; [211883]) and duration (in sec) # duration: 13.2426875 net_input["source"] = feature.unsqueeze(0).to(self.device) # add a dimension of 1 in index 0 to feature (change to 2D) # TODO: figure out math padding_mask = torch.BoolTensor( # ? # will be passed onto Wav2Vec2Model as input (will have to check that code later) net_input["source"].size(1)).fill_(False) net_input["padding_mask"] = padding_mask.unsqueeze(0).to( # net_input["source"].shape: torch.Size([1, 211883]), net_input["padding_mask"].shape: torch.Size([1, 211883]), filled with False self.device) sample["net_input"] = net_input # add dict 'net_input' to dict 'sample' hypo = self.generator.generate( # Generate a batch of inferences using wav2vec model (W2lViterbiDecoder) self.model, # self.model = BrainWav2VecCtc.build_model sample, # 'hypo': [[{'tokens': tensor([ 8, 11, 14, 11, 10, 5, 8, 48, 10, 32, 6, 37, 7, 11, 10, 5, 32, 12, 26, 5, 22, 6, 18, 27, 8, 13, 5, 7, 23, 5, 49, 11, 14, 11, 10, 5, 8, 12, 5, 8, 6, 46, 7, 6, 29, 15, 6, 5, 8, 11, 14, 27, 7, 20, 5, 19, 6, 18, 6, 25, 7, 11, 17, 5, 7, 12, 59, 8, 9, 5, 7, 56, 22, 23, 5, 7, 23, 5, 49, 12, 29, 16, 9, 21, 6, 10, 5, 22, 12, 43, 49, 11, 8, 13, 7, 27, 29, 15, 6, 5, 7, 45, 25, 15, 13, 10, 7, 11, 17, 5, 7, 6, 33, 27, 29, 49, 12, 18, 6, 5]), 'score': 0}]] prefix_tokens=None, ) hyp_pieces = self.target_dict.string( # 'hyp_pieces': string version of tensor of token indices, converted by using 'target_dict' (.pororo/misc/ko.ltr.txt) hypo[0][0]["tokens"].int().cpu()) # hypo[0][0] (Cf. hypo[0]) # hyp_pieces: ᄀ ᅳ ᄂ ᅳ ᆫ | ᄀ ᅫ ᆫ ᄎ ᅡ ᆭ ᄋ ᅳ ᆫ | ᄎ ᅥ ᆨ | ᄒ ᅡ ᄅ ᅧ ᄀ ᅩ | ᄋ ᅢ | ᄊ ᅳ ᄂ ᅳ ᆫ | ᄀ ᅥ | ᄀ ᅡ ᇀ ᄋ ᅡ ᆻ ᄃ ᅡ | ᄀ ᅳ ᄂ ᅧ ᄋ ᅦ | ᄉ ᅡ ᄅ ᅡ ᆼ ᄋ ᅳ ᆯ | ᄋ ᅥ ᆮ ᄀ ᅵ | ᄋ ᅱ ᄒ ᅢ | ᄋ ᅢ | ᄊ ᅥ ᆻ ᄌ ᅵ ᄆ ᅡ ᆫ | ᄒ ᅥ ᆺ ᄊ ᅳ ᄀ ᅩ ᄋ ᅧ ᆻ ᄃ ᅡ | ᄋ ᅭ ᆼ ᄃ ᅩ ᆫ ᄋ ᅳ ᆯ | ᄋ ᅡ ᄁ ᅧ ᆻ ᄊ ᅥ ᄅ ᅡ |, len(hyp_pieces): 239 speech_start_time = str(datetime.timedelta(seconds=0)) # start_time set to 0 speech_end_time = str( datetime.timedelta(seconds=int(round(duration, 0)))) # end_time hypo_dict[ "speech_section"] = f"{speech_start_time} ~ {speech_end_time}" # fill up 'hypo_dict' (dict inside 'results') hypo_dict["length_ms"] = duration * 1000 # total_durations[batch_idx][hypo_idx] * 1000 hypo_dict["speech"] = self._text_postprocess(hyp_pieces) # 그는 괜찮은 척 하려고 애 쓰는 거 같았다 그녀에 사랑을 얻기 위해 애 썼지만 헛쓰고였다 용돈을 아꼈써라 if hypo_dict["speech"]: result_dict["results"].append(hypo_dict) return result_dict
def apply_model(self, ner_model, features): """ apply_model function for LM-LSTM-CRF args: ner_model: sequence labeling model feature (list): list of words list """ char_features = encode2char_safe(features, self.c_map) if self.caseless: word_features = encode_safe( list(map(lambda t: list(map(lambda x: x.lower(), t)), features)), self.f_map, self.f_map['<unk>']) else: word_features = encode_safe(features, self.f_map, self.f_map['<unk>']) fea_len = [list(map(lambda t: len(t) + 1, f)) for f in char_features] forw_features = concatChar(char_features, self.c_map) word_len = max(map(lambda t: len(t) + 1, word_features)) char_len = max( map(lambda t: len(t[0]) + word_len - len(t[1]), zip(forw_features, word_features))) forw_t = list( map(lambda t: t + [self.pad_char] * (char_len - len(t)), forw_features)) back_t = torch.LongTensor(list(map(lambda t: t[::-1], forw_t))) forw_t = torch.LongTensor(forw_t) forw_p = torch.LongTensor( list( map( lambda t: list( itertools.accumulate(t + [1] * (word_len - len(t)))), fea_len))) back_p = torch.LongTensor( list( map( lambda t: [char_len - 1] + [char_len - 1 - tup for tup in t[:-1]], forw_p))) masks = torch.BoolTensor( list( map( lambda t: [1] * (len(t) + 1) + [0] * (word_len - len(t) - 1), word_features))) word_t = torch.LongTensor( list( map(lambda t: t + [self.pad_word] * (word_len - len(t)), word_features))) if self.if_cuda: f_f = autograd.Variable(forw_t.transpose(0, 1)).cuda() f_p = autograd.Variable(forw_p.transpose(0, 1)).cuda() b_f = autograd.Variable(back_t.transpose(0, 1)).cuda() b_p = autograd.Variable(back_p.transpose(0, 1)).cuda() w_f = autograd.Variable(word_t.transpose(0, 1)).cuda() mask_v = masks.transpose(0, 1).cuda() else: f_f = autograd.Variable(forw_t.transpose(0, 1)) f_p = autograd.Variable(forw_p.transpose(0, 1)) b_f = autograd.Variable(back_t.transpose(0, 1)) b_p = autograd.Variable(back_p.transpose(0, 1)) w_f = autograd.Variable(word_t.transpose(0, 1)) mask_v = masks.transpose(0, 1) scores = ner_model(f_f, f_p, b_f, b_p, w_f) decoded = self.decoder.decode(scores.data, mask_v) return decoded
def main(args): torch.manual_seed(1234) if args.dataset == 'cora' or args.dataset == 'citeseer' or args.dataset == 'pubmed': data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) in_feats = features.shape[1] g = data.graph if args.dataset == 'cora': g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) attr_matrix = data.features labels = data.labels else: if args.dataset == 'physics': data = Coauthor('physics') if args.dataset == 'cs': data = Coauthor('cs') if args.dataset == 'computers': data = AmazonCoBuy('computers') if args.dataset == 'photo': data = AmazonCoBuy('photo') g = data g = data[0] attr_matrix = g.ndata['feat'] labels = g.ndata['label'] features = torch.FloatTensor(g.ndata['feat']) ### LCC of the graph n_components = 1 sparse_graph = g.adjacency_matrix_scipy(return_edge_ids=False) _, component_indices = sp.csgraph.connected_components(sparse_graph) component_sizes = np.bincount(component_indices) components_to_keep = np.argsort( component_sizes )[::-1][:n_components] # reverse order to sort descending nodes_to_keep = [ idx for (idx, component) in enumerate(component_indices) if component in components_to_keep ] adj_matrix = sparse_graph[nodes_to_keep][:, nodes_to_keep] num_nodes = len(nodes_to_keep) g = adj_matrix g = DGLGraph(g) g = remove_self_loop(g) g = add_self_loop(g) g = DGLGraph(g) g.ndata['feat'] = attr_matrix[nodes_to_keep] features = torch.FloatTensor(g.ndata['feat'].float()) if args.dataset == 'cora' or args.dataset == 'pubmed': features = features / (features.norm(dim=1) + 1e-8)[:, None] g.ndata['label'] = labels[nodes_to_keep] labels = torch.LongTensor(g.ndata['label']) in_feats = features.shape[1] unique_l = np.unique(labels, return_counts=False) n_classes = len(unique_l) n_nodes = g.number_of_nodes() n_edges = g.number_of_edges() print('Number of nodes', n_nodes, 'Number of edges', n_edges) enc = OneHotEncoder() enc.fit(labels.reshape(-1, 1)) ylabels = enc.transform(labels.reshape(-1, 1)).toarray() for beta in [args.beta]: for K in [args.num_clusters]: for alpha in [args.alpha]: accs = [] t_st = time.time() sets = "imbalanced" for k in range(2): #number of differnet trainings #print(k) random_state = np.random.RandomState() if sets == "imbalanced": train_idx, val_idx, test_idx = get_train_val_test_split( random_state, ylabels, train_examples_per_class=None, val_examples_per_class=None, test_examples_per_class=None, train_size=20 * n_classes, val_size=30 * n_classes, test_size=None) elif sets == "balanced": train_idx, val_idx, test_idx = get_train_val_test_split( random_state, ylabels, train_examples_per_class=20, val_examples_per_class=30, test_examples_per_class=None, train_size=None, val_size=None, test_size=None) else: ("No such set configuration (imbalanced/balanced)") n_nodes = len(nodes_to_keep) train_mask = np.zeros(n_nodes) train_mask[train_idx] = 1 val_mask = np.zeros(n_nodes) val_mask[val_idx] = 1 test_mask = np.zeros(n_nodes) test_mask[test_idx] = 1 train_mask = torch.BoolTensor(train_mask) val_mask = torch.BoolTensor(val_mask) test_mask = torch.BoolTensor(test_mask) """ Planetoid Split for CORA, CiteSeer, PubMed train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) train_mask2 = torch.BoolTensor(data.train_mask) val_mask2 = torch.BoolTensor(data.val_mask) test_mask2 = torch.BoolTensor(data.test_mask) """ if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() gic = GIC(g, in_feats, args.n_hidden, args.n_layers, nn.PReLU(args.n_hidden), args.dropout, K, beta, alpha) if cuda: gic.cuda() gic_optimizer = torch.optim.Adam( gic.parameters(), lr=args.gic_lr, weight_decay=args.weight_decay) # train GIC cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_gic_epochs): gic.train() if epoch >= 3: t0 = time.time() gic_optimizer.zero_grad() loss = gic(features) #print(loss) loss.backward() gic_optimizer.step() if loss < best: best = loss best_t = epoch cnt_wait = 0 torch.save(gic.state_dict(), 'best_gic.pkl') else: cnt_wait += 1 if cnt_wait == args.patience: #print('Early stopping!') break if epoch >= 3: dur.append(time.time() - t0) #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), #n_edges / np.mean(dur) / 1000)) # train classifier #print('Loading {}th epoch'.format(best_t)) gic.load_state_dict(torch.load('best_gic.pkl')) embeds = gic.encoder(features, corrupt=False) embeds = embeds / (embeds + 1e-8).norm(dim=1)[:, None] embeds = embeds.detach() # create classifier model classifier = Classifier(args.n_hidden, n_classes) if cuda: classifier.cuda() classifier_optimizer = torch.optim.Adam( classifier.parameters(), lr=args.classifier_lr, weight_decay=args.weight_decay) dur = [] best_a = 0 cnt_wait = 0 for epoch in range(args.n_classifier_epochs): classifier.train() if epoch >= 3: t0 = time.time() classifier_optimizer.zero_grad() preds = classifier(embeds) loss = F.nll_loss(preds[train_mask], labels[train_mask]) loss.backward() classifier_optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate( classifier, embeds, labels, val_mask ) #+ evaluate(classifier, embeds, labels, train_mask) if acc > best_a and epoch > 100: best_a = acc best_t = epoch torch.save(classifier.state_dict(), 'best_class.pkl') #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), #acc, n_edges / np.mean(dur) / 1000)) acc = evaluate(classifier, embeds, labels, test_mask) accs.append(acc) print('=================== ', ' alpha', alpha, ' beta ', beta, 'K', K) print(args.dataset, ' Acc (mean)', mean(accs), ' (std)', stdev(accs)) print('=================== time', int( (time.time() - t_st) / 60))
def main(): game = pyspiel.load_game( f"quoridor(ansi_color_output=true,board_size={BOARD_SIZE},wall_count={WALL_COUNT})" ) board_diam = 2 * BOARD_SIZE - 1 agent = DQNAgent(2 * BOARD_SIZE - 1, game.num_distinct_actions()) epsilon = EPSILON_START results = [] wins, draws, loses = (0, 0, 0) for episode in range(EPISODES): #Start game/episode state = game.new_initial_state() #Loop inside one game episode while not state.is_terminal(): pl = state.current_player() nn_input = get_nn_input(game, state) state_action_q_values = agent.forward(get_nn_input(game, state)) rotated_state_action_q_values = state_action_q_values if state.current_player( ) == 0 else torch.flip(state_action_q_values.clone(), [1]) if random.random() <= epsilon: actual_action = random.choice(state.legal_actions()) rotated_action = actual_action if state.current_player( ) == 0 else (board_diam**2 - 1) - actual_action else: actual_action = torch.argmax( (rotated_state_action_q_values - torch.min(rotated_state_action_q_values) + 1) * torch.tensor(state.legal_actions_mask())).item() rotated_action = actual_action if state.current_player( ) == 0 else (board_diam**2 - 1) - actual_action state.apply_action(actual_action) rewards = state.rewards() if state.is_terminal(): with torch.no_grad(): state_action_q_values_target = state_action_q_values.clone( ).detach() state_action_q_values_target[0][rotated_action] = rewards[ pl] agent.backward(state_action_q_values, state_action_q_values_target) else: with torch.no_grad(): next_state_action_q_values = agent.forward( get_nn_input(game, state)) if (state.current_player() == 1): next_state_action_q_values = torch.flip( next_state_action_q_values, [1]) state_action_q_values_target = state_action_q_values.clone( ).detach() next_mask = torch.BoolTensor(state.legal_actions_mask()) next_legal_q_values = torch.masked_select( next_state_action_q_values, next_mask) state_action_q_values_target[0][rotated_action] = rewards[ pl] - GAMMA * torch.max(next_legal_q_values) agent.backward(state_action_q_values, state_action_q_values_target) if (rewards[0] == 1): wins += 1 if (rewards[0] == 0): draws += 1 if (rewards[0] == -1): loses += 1 if (episode % 100 == 0): print("Episode: ", episode, epsilon) print(f"W:{wins}, D:{draws}, L:{loses}") wins, draws, loses = (0, 0, 0) if epsilon > EPSILON_END: epsilon -= EPSILON_DECAY if (episode % 500 == 0): torch.save( agent, f"/mnt/QuoridorAI/Agents/SelfLearned{BOARD_SIZE}x{BOARD_SIZE}-{episode}" )
def main(): parser = argparse.ArgumentParser(description='GraphSAGE') parser.add_argument("--dataset", type=str) parser.add_argument("--device", type=int, default=0) parser.add_argument("--dropout", type=float, default=0.5, help="dropout probability") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument("--epochs", type=int, default=200, help="number of training epochs") parser.add_argument("--n-hidden", type=int, default=16, help="number of hidden gcn units") parser.add_argument("--aggr", type=str, choices=['sum', 'mean'], default='mean', help='Aggregation for messages') parser.add_argument("--weight-decay", type=float, default=5e-4, help="Weight for L2 loss") parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) path = osp.join('dataset', args.dataset) dataset = Planetoid(path, args.dataset, transform=T.NormalizeFeatures()) data = dataset[0] features = data.x.to(device) labels = data.y.to(device) edge_index = data.edge_index.to(device) adj = SparseTensor(row=edge_index[0], col=edge_index[1]) train_mask = torch.BoolTensor(data.train_mask).to(device) val_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) model = GraphSAGE(dataset.num_features, args.n_hidden, dataset.num_classes, args.aggr, F.relu, args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, args.epochs + 1): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, adj) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, adj, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print( "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}" .format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def run(proc_id, n_gpus, args, devices, data): # Start up distributed training, if enabled. dev_id = devices[proc_id] if n_gpus > 1: dist_init_method = 'tcp://{master_ip}:{master_port}'.format( master_ip='127.0.0.1', master_port='12345') world_size = n_gpus th.distributed.init_process_group(backend="nccl", init_method=dist_init_method, world_size=world_size, rank=proc_id) th.cuda.set_device(dev_id) # Unpack data train_mask, val_mask, in_feats, labels, n_classes, g = data train_nid = th.LongTensor(np.nonzero(train_mask)[0]) val_nid = th.LongTensor(np.nonzero(val_mask)[0]) train_mask = th.BoolTensor(train_mask) val_mask = th.BoolTensor(val_mask) # Split train_nid train_nid = th.split(train_nid, len(train_nid) // n_gpus)[proc_id] # Create PyTorch DataLoader for constructing blocks sampler = dgl.sampling.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(',')]) dataloader = dgl.sampling.NodeDataLoader(g, train_nid, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers) # Define model and optimizer model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) model = model.to(dev_id) if n_gpus > 1: model = DistributedDataParallel(model, device_ids=[dev_id], output_device=dev_id) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(dev_id) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Training loop avg = 0 iter_tput = [] for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, (input_nodes, seeds, blocks) in enumerate(dataloader): if proc_id == 0: tic_step = time.time() # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( g, labels, seeds, input_nodes, dev_id) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() if n_gpus > 1: for param in model.parameters(): if param.requires_grad and param.grad is not None: th.distributed.all_reduce( param.grad.data, op=th.distributed.ReduceOp.SUM) param.grad.data /= n_gpus optimizer.step() if proc_id == 0: iter_tput.append( len(seeds) * n_gpus / (time.time() - tic_step)) if step % args.log_every == 0 and proc_id == 0: acc = compute_acc(batch_pred, batch_labels) print( 'Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB' .format(epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), th.cuda.max_memory_allocated() / 1000000)) if n_gpus > 1: th.distributed.barrier() toc = time.time() if proc_id == 0: print('Epoch Time(s): {:.4f}'.format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: if n_gpus == 1: eval_acc = evaluate(model, g, g.ndata['features'], labels, val_mask, args.batch_size, devices[0]) else: eval_acc = evaluate(model.module, g, g.ndata['features'], labels, val_mask, args.batch_size, devices[0]) print('Eval Acc {:.4f}'.format(eval_acc)) if n_gpus > 1: th.distributed.barrier() if proc_id == 0: print('Avg epoch time: {}'.format(avg / (epoch - 4)))
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset data = load_data(args) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) # Normalize features if args.normalize: train_feats = data.features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(data.features) else: features = data.features features = torch.FloatTensor(features) if not multitask: labels = torch.LongTensor(data.labels) else: labels = torch.FloatTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = data.graph if args.self_loop and not args.dataset.startswith('reddit'): g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) print("adding self-loop edges") g = DGLGraph(g, readonly=True) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print(torch.cuda.get_device_name(0)) g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask print('labels shape:', labels.shape) cluster_iterator = ClusterIter( args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) print("features shape, ", features.shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(cluster_iterator): # sync with upper level training graph cluster.copy_from_parent() model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: val_f1_mic, val_f1_mac = evaluate( model, g, labels, val_mask, multitask) print( "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join( log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict(torch.load(os.path.join( log_dir, 'best_model.pkl'))) test_f1_mic, test_f1_mac = evaluate( model, g, labels, test_mask, multitask) print("Test F1-mic{:.4f}, Test F1-mac{:.4f}". format(test_f1_mic, test_f1_mac)) writer.add_scalar('test/f1-mic', test_f1_mic) writer.add_scalar('test/f1-mac', test_f1_mac)
def main(args): # load and preprocess dataset data = load_data(args) if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i, i) for i in range(len(data.graph))]) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) test_nid = np.nonzero(data.test_mask)[0].astype(np.int64) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = DGLGraph(data.graph, readonly=True) norm = 1. / g.in_degrees().float().unsqueeze(1) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() norm = norm.cuda() g.ndata['features'] = features num_neighbors = args.num_neighbors n_layers = args.n_layers g.ndata['norm'] = norm g.update_all( fn.copy_src(src='features', out='m'), fn.sum(msg='m', out='preprocess'), lambda node: {'preprocess': node.data['preprocess'] * node.data['norm']}) for i in range(n_layers): g.ndata['h_{}'.format(i)] = torch.zeros( features.shape[0], args.n_hidden).to(device=features.device) g.ndata['h_{}'.format(n_layers - 1)] = torch.zeros( features.shape[0], 2 * args.n_hidden).to(device=features.device) model = GCNSampling(in_feats, args.n_hidden, n_classes, n_layers, F.relu, args.dropout) loss_fcn = nn.CrossEntropyLoss() infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, n_layers, F.relu) if cuda: model.cuda() infer_model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # Create sampler receiver sampler = dgl.contrib.sampling.SamplerReceiver(graph=g, addr=args.ip, num_sender=args.num_sampler) for epoch in range(args.n_epochs): for nf in sampler: for i in range(n_layers): agg_history_str = 'agg_h_{}'.format(i) g.pull( nf.layer_parent_nid(i + 1).long(), fn.copy_src(src='h_{}'.format(i), out='m'), fn.sum(msg='m', out=agg_history_str), lambda node: { agg_history_str: node.data[agg_history_str] * node.data['norm'] }) node_embed_names = [['preprocess', 'h_0']] for i in range(1, n_layers): node_embed_names.append( ['h_{}'.format(i), 'agg_h_{}'.format(i - 1)]) node_embed_names.append(['agg_h_{}'.format(n_layers - 1)]) nf.copy_from_parent(node_embed_names=node_embed_names) model.train() # forward pred = model(nf) batch_nids = nf.layer_parent_nid(-1).to(device=pred.device).long() batch_labels = labels[batch_nids] loss = loss_fcn(pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)] node_embed_names.append([]) nf.copy_to_parent(node_embed_names=node_embed_names) for infer_param, param in zip(infer_model.parameters(), model.parameters()): infer_param.data.copy_(param.data) num_acc = 0. for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size, g.number_of_nodes(), neighbor_type='in', num_workers=32, num_hops=n_layers, seed_nodes=test_nid): node_embed_names = [['preprocess']] for i in range(n_layers): node_embed_names.append(['norm']) nf.copy_from_parent(node_embed_names=node_embed_names) infer_model.eval() with torch.no_grad(): pred = infer_model(nf) batch_nids = nf.layer_parent_nid(-1).to( device=pred.device).long() batch_labels = labels[batch_nids] num_acc += (pred.argmax( dim=1) == batch_labels).sum().cpu().item() print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
def visualize_subgraph(self, node_idx: Optional[int], edge_index: Tensor, edge_mask: Tensor, y: Optional[Tensor] = None, threshold: Optional[int] = None, edge_y: Optional[Tensor] = None, node_alpha: Optional[Tensor] = None, seed: int = 10, **kwargs): r"""Visualizes the subgraph given an edge mask :attr:`edge_mask`. Args: node_idx (int): The node id to explain. Set to :obj:`None` to explain a graph. edge_index (LongTensor): The edge indices. edge_mask (Tensor): The edge mask. y (Tensor, optional): The ground-truth node-prediction labels used as node colorings. All nodes will have the same color if :attr:`node_idx` is :obj:`-1`.(default: :obj:`None`). threshold (float, optional): Sets a threshold for visualizing important edges. If set to :obj:`None`, will visualize all edges with transparancy indicating the importance of edges. (default: :obj:`None`) edge_y (Tensor, optional): The edge labels used as edge colorings. node_alpha (Tensor, optional): Tensor of floats (0 - 1) indicating transparency of each node. seed (int, optional): Random seed of the :obj:`networkx` node placement algorithm. (default: :obj:`10`) **kwargs (optional): Additional arguments passed to :func:`nx.draw`. :rtype: :class:`matplotlib.axes.Axes`, :class:`networkx.DiGraph` """ import matplotlib.pyplot as plt import networkx as nx assert edge_mask.size(0) == edge_index.size(1) if node_idx is None or node_idx < 0: hard_edge_mask = torch.BoolTensor([True] * edge_index.size(1), device=edge_mask.device) subset = torch.arange(edge_index.max().item() + 1, device=edge_index.device) y = None else: # Only operate on a k-hop subgraph around `node_idx`. subset, edge_index, _, hard_edge_mask = k_hop_subgraph( node_idx, self.num_hops, edge_index, relabel_nodes=True, num_nodes=None, flow=self._flow()) edge_mask = edge_mask[hard_edge_mask] if threshold is not None: edge_mask = (edge_mask >= threshold).to(torch.float) if y is None: y = torch.zeros(edge_index.max().item() + 1, device=edge_index.device) else: y = y[subset].to(torch.float) / y.max().item() if edge_y is None: edge_color = ['black'] * edge_index.size(1) else: colors = list(plt.rcParams['axes.prop_cycle']) edge_color = [ colors[i % len(colors)]['color'] for i in edge_y[hard_edge_mask] ] data = Data(edge_index=edge_index, att=edge_mask, edge_color=edge_color, y=y, num_nodes=y.size(0)).to('cpu') G = to_networkx(data, node_attrs=['y'], edge_attrs=['att', 'edge_color']) mapping = {k: i for k, i in enumerate(subset.tolist())} G = nx.relabel_nodes(G, mapping) node_args = set(signature(nx.draw_networkx_nodes).parameters.keys()) node_kwargs = {k: v for k, v in kwargs.items() if k in node_args} node_kwargs['node_size'] = kwargs.get('node_size') or 800 node_kwargs['cmap'] = kwargs.get('cmap') or 'cool' label_args = set(signature(nx.draw_networkx_labels).parameters.keys()) label_kwargs = {k: v for k, v in kwargs.items() if k in label_args} label_kwargs['font_size'] = kwargs.get('font_size') or 10 pos = nx.spring_layout(G, seed=seed) ax = plt.gca() for source, target, data in G.edges(data=True): ax.annotate('', xy=pos[target], xycoords='data', xytext=pos[source], textcoords='data', arrowprops=dict( arrowstyle="->", alpha=max(data['att'], 0.1), color=data['edge_color'], shrinkA=sqrt(node_kwargs['node_size']) / 2.0, shrinkB=sqrt(node_kwargs['node_size']) / 2.0, connectionstyle="arc3,rad=0.1", )) if node_alpha is None: nx.draw_networkx_nodes(G, pos, node_color=y.tolist(), **node_kwargs) else: node_alpha_subset = node_alpha[subset] assert ((node_alpha_subset >= 0) & (node_alpha_subset <= 1)).all() nx.draw_networkx_nodes(G, pos, alpha=node_alpha_subset.tolist(), node_color=y.tolist(), **node_kwargs) nx.draw_networkx_labels(G, pos, **label_kwargs) return ax, G
def run(args, device, data): # Unpack data train_mask, val_mask, test_mask, in_feats, labels, ind_labels, n_classes, g, ind_g, lp_dict = data train_nid = th.LongTensor(np.nonzero(train_mask)[0]) val_nid = th.LongTensor(np.nonzero(val_mask)[0]) train_mask = th.BoolTensor(train_mask) val_mask = th.BoolTensor(val_mask) test_mask = th.BoolTensor(test_mask) # Create sampler sampler = NeighborSampler( g, [int(fanout) for fanout in args.fan_out.split(',')]) # Create PyTorch DataLoader for constructing blocks dataloader = DataLoader(dataset=train_nid.numpy(), batch_size=args.batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False, num_workers=args.num_workers) # Define model and optimizer model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) coeffs = Variable(torch.FloatTensor([1., 3.0]).to(device), requires_grad=True) coeffs_optimizer = optim.SGD([coeffs], lr=1e-1, momentum=0.0) # Training loop avg = 0 iter_tput = [] steps_per_epoch = len(dataloader) for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, blocks in enumerate(dataloader): tic_step = time.time() # The nodes for input lies at the LHS side of the first block. # The nodes for output lies at the RHS side of the last block. input_nodes = blocks[0].srcdata[dgl.NID] seeds = blocks[-1].dstdata[dgl.NID] # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( g, labels, seeds, input_nodes, device) # Compute loss and prediction model.train() batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred.squeeze(), batch_labels.squeeze(), seeds, lp_dict['adj'], coeffs, device, False) optimizer.zero_grad() loss.backward() optimizer.step() if (step + 1) % (steps_per_epoch // 2) == 0: model.train() batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred.squeeze(), batch_labels.squeeze(), seeds, lp_dict['adj'], coeffs, device, True) coeffs_optimizer.zero_grad() loss.backward() coeffs_optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: r2 = compute_r2(batch_pred, batch_labels) gpu_mem_alloc = th.cuda.max_memory_allocated( ) / 1000000 if th.cuda.is_available() else 0 #print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train R2 {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB'.format(epoch, step, loss.item(), r2.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) print( 'Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train R2 {:.4f} | alpha: {:.4f} | beta: {:.4f}' .format(epoch, step, loss.item(), r2.item(), th.tanh(coeffs[0]).item(), th.exp(coeffs[1]).item())) toc = time.time() print('Epoch Time(s): {:.4f}'.format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_r2 = evaluate(model, g, g.ndata['features'], labels, val_mask, args.batch_size, device) print('Eval R2: {:.4f}'.format(eval_r2)) evaluate_test(model, g, g.ndata['features'], labels, test_mask, args.batch_size, device, lp_dict, coeffs, "2012") evaluate_test(model, ind_g, ind_g.ndata['features'], ind_labels, test_mask, args.batch_size, device, lp_dict, coeffs, "2016") print('Avg epoch time: {}'.format(avg / (epoch - 4)))
def __getitem__(self, idx): input_example = self.data_list[idx] text = input_example.text label = input_example.label word_tokens = ['[CLS]'] label_list = ['[CLS]'] label_mask = [0] # value in (0, 1) - 0 signifies invalid token input_ids = [self.tokenizer.convert_tokens_to_ids('[CLS]')] label_ids = [self.label_map['[CLS]']] # iterate over individual tokens and their labels for word, label in zip(text.split(), label): tokenized_word = self.tokenizer.tokenize(word) for token in tokenized_word: word_tokens.append(token) input_ids.append(self.tokenizer.convert_tokens_to_ids(token)) label_list.append(label) label_ids.append(self.label_map[label]) label_mask.append(1) # len(tokenized_word) > 1 only if it splits word in between, in which case # the first token gets assigned NER tag and the remaining ones get assigned # X for i in range(1, len(tokenized_word)): label_list.append('X') label_ids.append(self.label_map['X']) label_mask.append(0) assert len(word_tokens) == len(label_list) == len(input_ids) == len( label_ids) == len(label_mask) if len(word_tokens) >= self.max_len: word_tokens = word_tokens[:(self.max_len - 1)] label_list = label_list[:(self.max_len - 1)] input_ids = input_ids[:(self.max_len - 1)] label_ids = label_ids[:(self.max_len - 1)] label_mask = label_mask[:(self.max_len - 1)] assert len(word_tokens) < self.max_len, len(word_tokens) word_tokens.append('[SEP]') label_list.append('[SEP]') input_ids.append(self.tokenizer.convert_tokens_to_ids('[SEP]')) label_ids.append(self.label_map['[SEP]']) label_mask.append(0) assert len(word_tokens) == len(label_list) == len(input_ids) == len( label_ids) == len(label_mask) sentence_id = [0 for _ in input_ids] attention_mask = [1 for _ in input_ids] while len(input_ids) < self.max_len: input_ids.append(0) label_ids.append(self.label_map['X']) attention_mask.append(0) sentence_id.append(0) label_mask.append(0) assert len(word_tokens) == len(label_list) assert len(input_ids) == len(label_ids) == len(attention_mask) == len( sentence_id) == len(label_mask) == self.max_len, len(input_ids) # return word_tokens, label_list, return torch.LongTensor(input_ids), torch.LongTensor( label_ids), torch.LongTensor(attention_mask), torch.LongTensor( sentence_id), torch.BoolTensor(label_mask)
def __getitem__(self, index): h5_file = h5py.File(self.config['filename'], 'r', swmr=True) sample = h5_file[self.memberslist[index]] optical = sample['optical'][...] if self.config['raw_thermal']: thermal = sample['thermal_raw'][...] else: thermal = sample['thermal'][...] if thermal.shape != optical.shape: raise ValueError( 'ImagePairDataset: The optical and thermal image must have the same shape' ) if self.config['keypoints_filename'] is not None: with h5py.File(self.config['keypoints_filename'], 'r', swmr=True) as keypoints_file: keypoints = np.array( keypoints_file[self.memberslist[index]]['keypoints']) else: keypoints = None # subsample images if requested if self.config['height'] > 0 or self.config['width'] > 0: if self.config['height'] > 0: h = self.config['height'] else: h = thermal.shape[0] if self.config['width'] > 0: w = self.config['width'] else: w = thermal.shape[1] if w > thermal.shape[1] or h > thermal.shape[0]: raise ValueError( 'ImagePairDataset: Requested height/width exceeds original image size' ) # subsample the image i_h = random.randint(0, thermal.shape[0] - h) i_w = random.randint(0, thermal.shape[1] - w) optical = optical[i_h:i_h + h, i_w:i_w + w] thermal = thermal[i_h:i_h + h, i_w:i_w + w] if keypoints is not None: # shift keypoints keypoints = keypoints - np.array([[i_h, i_w]]) # filter out bad ones keypoints = keypoints[np.logical_and( np.logical_and(keypoints[:, 0] >= 0, keypoints[:, 0] < h), np.logical_and(keypoints[:, 1] >= 0, keypoints[:, 1] < w))] else: h = thermal.shape[0] w = thermal.shape[1] out = {} if self.config['single_image']: is_optical = bool(random.randint(0, 1)) if is_optical: image = optical else: image = thermal # augmentation if self.config['augmentation']['photometric']['enable']: image = augmentation.photometric_augmentation( image, **self.config['augmentation']['photometric']) if self.config['augmentation']['homographic']['enable']: image, keypoints, valid_mask = augmentation.homographic_augmentation( image, keypoints, **self.config['augmentation']['homographic']) else: valid_mask = augmentation.dummy_valid_mask(image.shape) # add channel information to image and mask image = np.expand_dims(image, 0) valid_mask = np.expand_dims(valid_mask, 0) # add to output dict out['image'] = torch.from_numpy(image.astype(np.float32)) out['valid_mask'] = torch.from_numpy(valid_mask.astype(np.bool)) out['is_optical'] = torch.BoolTensor([is_optical]) if keypoints is not None: keypoints = utils.generate_keypoint_map(keypoints, (h, w)) out['keypoints'] = torch.from_numpy(keypoints.astype(np.bool)) else: # initialize the images out['optical'] = {} out['thermal'] = {} optical_is_optical = True thermal_is_optical = False if self.config['random_pairs']: tmp_optical = optical tmp_thermal = thermal if bool(random.randint(0, 1)): optical = tmp_thermal optical_is_optical = False if bool(random.randint(0, 1)): thermal = tmp_optical thermal_is_optical = True # augmentation if self.config['augmentation']['photometric']['enable']: optical = augmentation.photometric_augmentation( optical, **self.config['augmentation']['photometric']) thermal = augmentation.photometric_augmentation( thermal, **self.config['augmentation']['photometric']) if self.config['augmentation']['homographic']['enable']: # randomly pick one image to warp if bool(random.randint(0, 1)): valid_mask_thermal = augmentation.dummy_valid_mask( thermal.shape) keypoints_thermal = keypoints optical, keypoints_optical, valid_mask_optical, H = augmentation.homographic_augmentation( optical, keypoints, return_homography=True, **self.config['augmentation']['homographic']) out['optical']['homography'] = torch.from_numpy( H.astype(np.float32)) out['thermal']['homography'] = torch.eye( 3, dtype=torch.float32) else: valid_mask_optical = augmentation.dummy_valid_mask( optical.shape) keypoints_optical = keypoints thermal, keypoints_thermal, valid_mask_thermal, H = augmentation.homographic_augmentation( thermal, keypoints, return_homography=True, **self.config['augmentation']['homographic']) out['thermal']['homography'] = torch.from_numpy( H.astype(np.float32)) out['optical']['homography'] = torch.eye( 3, dtype=torch.float32) else: keypoints_optical = keypoints keypoints_thermal = keypoints valid_mask_optical = valid_mask_thermal = augmentation.dummy_valid_mask( optical.shape) # add channel information to image and mask optical = np.expand_dims(optical, 0) thermal = np.expand_dims(thermal, 0) valid_mask_optical = np.expand_dims(valid_mask_optical, 0) valid_mask_thermal = np.expand_dims(valid_mask_thermal, 0) out['optical']['image'] = torch.from_numpy( optical.astype(np.float32)) out['optical']['valid_mask'] = torch.from_numpy( valid_mask_optical.astype(np.bool)) out['optical']['is_optical'] = torch.BoolTensor( [optical_is_optical]) if keypoints_optical is not None: keypoints_optical = utils.generate_keypoint_map( keypoints_optical, (h, w)) out['optical']['keypoints'] = torch.from_numpy( keypoints_optical.astype(np.bool)) out['thermal']['image'] = torch.from_numpy( thermal.astype(np.float32)) out['thermal']['valid_mask'] = torch.from_numpy( valid_mask_thermal.astype(np.bool)) out['thermal']['is_optical'] = torch.BoolTensor( [thermal_is_optical]) if keypoints_optical is not None: keypoints_thermal = utils.generate_keypoint_map( keypoints_thermal, (h, w)) out['thermal']['keypoints'] = torch.from_numpy( keypoints_thermal.astype(np.bool)) if self.config['return_name']: out['name'] = self.memberslist[index] return out
def main(args): # graph coo_adj = sp.load_npz("reddit_self_loop/reddit_self_loop_graph.npz") graph = DGLGraph(coo_adj, readonly=True) # features and labels reddit_data = np.load("reddit_self_loop/reddit_data.npz") features = reddit_data["feature"] labels = reddit_data["label"] num_labels = 41 # tarin/val/test indices node_ids = reddit_data["node_ids"] node_types = reddit_data["node_types"] train_mask = (node_types == 1) val_mask = (node_types == 2) test_mask = (node_types == 3) graph.ndata['train_mask'] = train_mask graph.ndata['val_mask'] = val_mask graph.ndata['test_mask'] = test_mask graph.ndata['feat'] = features graph.ndata['label'] = labels features = torch.Tensor(features) in_feats = features.shape[1] labels = torch.LongTensor(labels) train_nid = torch.LongTensor(np.where(train_mask == True)[0]) train_mask = torch.BoolTensor(train_mask) val_nid = torch.LongTensor(np.where(val_mask == True)[0]) val_mask = torch.BoolTensor(val_mask) test_nid = torch.LongTensor(np.where(test_mask == True)[0]) test_mask = torch.BoolTensor(test_mask) g = dgl.graph(graph.all_edges()) # 转为HetroGraph g.ndata['features'] = features gpu = args.gpu use_cuda = gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(gpu) g.to(torch.device('cuda:{}'.format(gpu))) labels = labels.cuda() fanouts = list(map(int, args.fan_out.split(','))) sampler = Sample(g, fanouts, args.num_neg) # 将数据集打乱顺序,分多个batch,每个batch采样两个B batch_size = args.batch_size num_workers = args.num_workers # train_ids = torch.LongTensor(np.arange(g.number_of_edges())) dataloader = DataLoader(dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.obtain_Bs, shuffle=True, drop_last=False, num_workers=num_workers) #print('Loading...') #t0 = time.time() #DLoaders = [] #for step, (pos_graph, neg_graph, blocks) in enumerate(dataloader): #DLoaders.append((step, (pos_graph, neg_graph, blocks))) #t1 = time.time() #print('Step {} | {} s'.format(step, t1-t0)) #t0 = time.time() # 设定模型 num_hid = args.num_hidden ks = args.num_layers dropout_r = args.dropout agg = args.agg bias = args.bias norm = args.norm model = GraphSAGE(in_feats, num_hid, num_labels, ks, bias=bias, aggregator=agg, activation=F.relu, norm=norm, dropout=dropout_r, use_cuda=use_cuda) if use_cuda: model.cuda() loss_fcn = Unsuper_Cross_Entropy() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # acc def compute_acc(logits, labels, train_nids, val_nids, test_nids): logits = logits.cpu().numpy() labels = labels.cpu().numpy() train_nids = train_nids.cpu().numpy() val_nids = val_nids.cpu().numpy() test_nids = test_nids.cpu().numpy() # 输出标准化 logits = (logits - logits.mean(0)) / logits.std(0) clf = LogisticRegression(multi_class='multinomial', max_iter=10000) clf.fit(logits[train_nids], labels[train_nid]) pred = clf.predict(logits) ''' pred = torch.argmax(logits, dim=1) f1_micro_eval = ((pred[val_nids] == labels[val_nids]).float().sum() / pred[val_nids].shape[0]).item() f1_micro_test = ((pred[test_nids] == labels[test_nids]).float().sum() / pred[test_nids].shape[0]).item() ''' f1_micro_eval = metrics.f1_score(labels[val_nids], pred[val_nids], average='micro') f1_micro_test = metrics.f1_score(labels[test_nids], pred[test_nids], average='micro') return f1_micro_eval, f1_micro_test # eval def evaluation(model, g, labels, train_nids, val_nids, test_nids, batch_size): model.eval() with torch.no_grad(): logits = model.infer(g, batch_size) model.train() return compute_acc(logits, labels, train_nids, val_nids, test_nids) # 训练、验证与测试 n_epochs = args.num_epochs log_every = args.log_every eval_every = args.eval_every iter_pos = [] iter_neg = [] iter_d = [] iter_t = [] best_eval_acc = 0 best_test_acc = 0 for epoch in range(n_epochs): time_epoch_0 = time.time() time_step = time.time() for step, (pos_graph, neg_graph, blocks) in enumerate(dataloader): #for (step, (pos_graph, neg_graph, blocks)) in DLoaders: input_nodes = blocks[0].srcdata[dgl.NID] batch_inputs = g.ndata['features'][input_nodes] if use_cuda: batch_inputs = batch_inputs.cuda() time_load = time.time() batch_pred = model(batch_inputs, blocks) loss = loss_fcn(batch_pred, pos_graph, neg_graph, use_cuda) optimizer.zero_grad() loss.backward() optimizer.step() time_train = time.time() edge_pos = pos_graph.number_of_edges() edge_neg = neg_graph.number_of_edges() iter_pos.append(edge_pos / (time_train - time_step)) iter_neg.append(edge_neg / (time_train - time_step)) iter_d.append(time_load - time_step) iter_t.append(time_train - time_load) if step % log_every == 0: if step == 0: print( 'Epoch {:05d} | Step {:05d} | Loss {:.4f} | ' 'Speed (samples/sec) {:.4f} & {:.4f} | Load Time(sec) {:.4f} | Train Time(sec) {:.4f}' .format(epoch, step, loss.item(), np.mean(iter_pos), np.mean(iter_neg), np.mean(iter_d), np.mean(iter_t))) else: print( 'Epoch {:05d} | Step {:05d} | Loss {:.4f} | ' 'Speed (samples/sec) {:.4f} & {:.4f} | Load Time(sec) {:.4f} | Train Time(sec) {:.4f}' .format(epoch, step, loss.item(), np.mean(iter_pos[3:]), np.mean(iter_neg[3:]), np.mean(iter_d[3:]), np.mean(iter_t[3:]))) time_step = time.time() #if step == 2: #break if epoch % eval_every == 0: print('\n') print('Eval-ing...') time_ev_0 = time.time() eval_acc, test_acc = evaluation(model, g, labels, train_nid, val_nid, test_nid, batch_size) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc time_ev_1 = time.time() print('Eval Acc {:.4f} | Eval Time(s): {:.4f}'.format( eval_acc, time_ev_1 - time_ev_0)) print('Best Eval Acc {:.4f} | Best Test Acc {:.4f}'.format( best_eval_acc, best_test_acc)) time_step = time.time() #if epoch == 1: #break time_epoch_1 = time.time() print('Epoch Time(s): {:.4f}'.format(time_epoch_1 - time_epoch_0)) if eval_every != 1: print('\n') print('Eval-ing...') time_ev_0 = time.time() eval_acc, test_acc = evaluation(model, g, labels, train_nid, val_nid, test_nid, batch_size) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc time_ev_1 = time.time() print('Eval Acc {:.4f} | Eval Time(s): {:.4f}'.format( eval_acc, time_ev_1 - time_ev_0)) print('Best Eval Acc {:.4f} | Best Test Acc {:.4f}'.format( best_eval_acc, best_test_acc)) print('\n') print('Finish!')
def __init__(self): self.state = torch.tensor([]) self.state_ = torch.tensor([])# next_state self.done = torch.BoolTensor([])
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
def eval_step(self, get_loss, test=False, prefix=""): # eval mode self.deb.eval() self.model.eval() self.pre_trainer.encoder.eval() self.pre_trainer.decoder.eval() total_stats = [] text_z_prime = { KEYS["input"]: [], KEYS["gen"]: [], KEYS["deb"]: [], "origin_labels": [], "pred_label": [] } references = [] hypothesis = [] hypothesis2 = [] with torch.no_grad(): for batch in tqdm(self.val_data_iter, desc='val'): n_words, xe_loss, n_valid = 0, 0, 0 (x, lengths, langs), y1, y2, weight_out = batch flag = True """ # only on negative example #negative_examples = ~(y2.squeeze() < self.params.threshold) negative_examples = y2.squeeze() > self.params.threshold batch, flag = select_with_mask(batch, mask = negative_examples) (x, lengths, langs), y1, y2, weight_out = batch #""" if flag: y = y2 if self.params.version == 3 else y1 x, y, lengths, langs = to_cuda(x, y, lengths, langs) #langs = langs if self.params.n_langs > 1 else None #langs = None batch = (x, lengths, langs), y1, y2, weight_out _, _, z, _, stats, y_hat = self.classif_step( get_loss, y, batch) z = z.transpose(0, 1) # (bs-ϵ, seq_len, dim) bs = z.size(0) z_prime = self.deb('fwd', x=z, lengths=lengths, causal=False) z_prime = z_prime.transpose(0, 1) # (bs-ϵ, seq_len, dim) non_mask_deb = torch.BoolTensor([True] * bs) loss_rec, word_scores, y_ = self.enc_dec( x, lengths, langs, z, non_mask_deb, bs) # update stats n_words += y_.size(0) xe_loss += loss_rec.item() * len(y_) n_valid += (word_scores.max(1)[1] == y_).sum().item() # compute perplexity and prediction accuracy n_words = n_words + eps stats['rec_ppl'] = np.exp(xe_loss / n_words) stats['rec_acc'] = 100. * n_valid / n_words texts = self.generate(x, lengths, langs, z, z_prime=z_prime, log=False) for k, v in texts.items(): text_z_prime[k].append(v) references.extend(texts[KEYS["input"]]) hypothesis.extend(texts[KEYS["gen"]]) hypothesis2.extend(texts[KEYS["deb"]]) text_z_prime["origin_labels"].append(y.cpu().numpy()) text_z_prime["pred_label"].append(y_hat.cpu().numpy()) total_stats.append(stats) self.end_eval(text_z_prime, references, hypothesis, hypothesis2) if test: pre_train_scores = {} return total_stats, pre_train_scores return total_stats
def _optimize_classifier_evaluate(self, only_labeled=False, only_unlabeled=False, update_lam=False): """ Optimize the classifier on the full dataset and then evaluate the model. :param only_labeled: Whether to only use the labeled data :param only_unlabeled: Whether the data is only unlabeled :param update_lam: Whether to update the regularization parameter lambda """ self.model.eval() if self.params.ckn: self.model.model = opt_utils.compute_normalizations( self.model.model) if only_labeled is False and only_unlabeled is False: all_features = opt_utils.compute_all_features( self.data.train_labeled_loader, self.data.train_unlabeled_loader, self.data.valid_loader, self.data.test_loader, self.model, normalize=self.params.normalize, standardize=self.params.standardize, augment=self.params.augment) y_labeled_one_hot = opt_utils.one_hot_embedding( all_features['train_labeled']['y'], self.params.nclasses).to(defaults.device) y_unlabeled = opt_utils.nearest_neighbor( all_features['train_labeled']['x'], all_features['train_unlabeled']['x'], all_features['train_labeled']['y'], self.params.nn) y_unlabeled_one_hot = opt_utils.one_hot_embedding( y_unlabeled, self.params.nclasses) x_train = torch.cat( (all_features['train_labeled']['x'], all_features['train_unlabeled']['x'])).to(defaults.device) if not update_lam: with torch.autograd.no_grad(): _, w_last, b_last = ulr_utils.ulr_square_loss_y( x_train, torch.cat((y_labeled_one_hot, y_unlabeled_one_hot)), self.params.lam) else: y_train = torch.argmax( torch.cat((y_labeled_one_hot, y_unlabeled_one_hot)), 1) test_acc, valid_acc, train_acc, test_loss, train_loss, w, best_lambda = train_classifier.train( (x_train, y_train), (all_features['valid']['x'], all_features['valid']['y']), (all_features['test']['x'], all_features['test']['y']), self.model, self.params.nclasses, self.params.maxiter_wlast_full, w_init=None, normalize=True, standardize=False, loss_name='square', lambdas=None, input_features=True) self.params.lam = best_lambda w_last = w[1:, :] b_last = w[0, :] elif only_unlabeled is False: all_features = opt_utils.compute_all_features( self.data.train_labeled_loader, self.data.train_unlabeled_loader, self.data.valid_loader, self.data.test_loader, self.model, normalize=self.params.normalize, standardize=self.params.standardize, augment=self.params.augment) if self.iteration != 0 and not update_lam: y_labeled_one_hot = opt_utils.one_hot_embedding( all_features['train_labeled']['y'], self.params.nclasses) _, w_last, b_last = ulr_utils.ulr_square_loss_y( all_features['train_labeled']['x'].to(defaults.device), y_labeled_one_hot, self.params.lam) else: test_acc, valid_acc, train_acc, test_loss, train_loss, w, best_lambda = train_classifier.train( (all_features['train_labeled']['x'], all_features['train_labeled']['y']), (all_features['valid']['x'], all_features['valid']['y']), (all_features['test']['x'], all_features['test']['y']), self.model, self.params.nclasses, self.params.maxiter_wlast_full, w_init=None, normalize=True, standardize=False, loss_name='square', lambdas=None, input_features=True) self.params.lam = best_lambda self.w_last = w w_last = w[1:, :] b_last = w[0, :] else: all_features = opt_utils.compute_all_features( None, None, None, self.data.test_loader, self.model, normalize=self.params.normalize, standardize=self.params.standardize, augment=self.params.augment) with torch.autograd.no_grad(): n = len(all_features['test']['x']) mask = (torch.BoolTensor(n, n).zero_() + 1).to(defaults.device) known = torch.zeros(n, n).to(defaults.device) torch.diagonal(known).fill_(1) torch.diagonal(mask).fill_(0) x_test = all_features['test']['x'].to(defaults.device) M, eigengap = label_utils.optimize_labels( x_test, self.params.nclasses, self.params.lam, mask=mask, known_values=known, nmin=self.params.min_frac_points_class * n, nmax=self.params.max_frac_points_class * n, eigenvalues=False) M = M.type(torch.get_default_dtype()) y_test = all_features['test']['y'].to(defaults.device) yhat_test = torch.LongTensor( label_utils.get_estimated_labels(M, y_test, self.params.nclasses)) if self.params.labeling_method == 'pseudo labeling': yhat_one_hot = opt_utils.one_hot_embedding( yhat_test, self.params.nclasses) obj, self.w_last, self.b_last = ulr_utils.ulr_square_loss_y( x_test, yhat_one_hot, self.params.lam, 0) test_accuracy = torch.mean((y_test.cpu() == yhat_test).float()) if self.iteration == 0: print('Iteration \t Test accuracy') print(self.iteration, '\t\t', '{:06.4f}'.format(test_accuracy.item())) results = {'test_accuracy': test_accuracy} self.results.update(self.iteration, **results) if not only_unlabeled: results = opt_utils.evaluate_features(self.params, w_last, b_last, all_features) if not only_labeled: if self.iteration == 0: opt_utils.print_results(self.iteration, results, header=True) else: opt_utils.print_results(self.iteration, results, header=False) self.results.update(self.iteration, **results) if self.params.ckn: for layer_num in range(len(self.model.model.layers)): self.model.model.layers[layer_num].store_normalization = False self.model.train()
def forward(self, feat, right, wrong, probs, fake=None, fake_diff_mask=None): np.set_printoptions(precision=4) num_wrong = wrong.size(1) batch_size = feat.size(0) smooth_dist_summary = torch.sum(torch.sum(probs, dim=1), dim=0) feat = feat.view(-1, self.ninp, 1) right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat) wrong_dis = torch.bmm(wrong, feat) thresh_mask = torch.gt(probs, self.contra_thresh) contra_mask = torch.BoolTensor(probs.size()).cuda() contra_mask[:, :, :] = False contra_mask[:, :, 0] = True decrease_contra_mask = contra_mask * torch.logical_not(thresh_mask) probs[decrease_contra_mask] = 0. one_hot_probs = torch.nn.functional.one_hot(probs.argmax(dim=2), 3).double() dist_summary = torch.sum(torch.sum(one_hot_probs, dim=1), dim=0) pair_wise_score_diff = torch.squeeze( right_dis.expand_as(wrong_dis) - wrong_dis) if self.debug: if self.iter % self.log_iter == 0: # print('---------------- Score difference: --------------') # rows = [['data_'+str(i) for i in range(batch_size)]] # pair_wise_score_diff_np = pair_wise_score_diff.cpu().detach().numpy() # wrong_scores_np = wrong_dis.cpu().detach().numpy() # right_scores_np = right_dis.cpu().detach().numpy() # # for j in range(num_wrong): # row = [] # for i in range(batch_size): # row.append('%.4f | %.4f | %.4f' % (np.around(right_scores_np[i][0][0], 4), np.around(wrong_scores_np[i][j][0], 4), # np.round(pair_wise_score_diff_np[i][j], 4))) # rows.append(row) # st = Texttable() # st.add_rows(rows) # print(st.draw()) print('----------------Probabilities------------------') print(probs.cpu().detach().numpy()) print('----------------One hot------------------------') print(one_hot_probs.cpu().detach().numpy()) print('----------------dist_summary-------------------') print(dist_summary.cpu().detach().numpy()) print('----------------smooth_dist_summary------------') print(smooth_dist_summary.cpu().detach().numpy()) pause() w = one_hot_probs[:, :, 0] * self.alphaC + one_hot_probs[:, :, 1] * self.alphaE + one_hot_probs[:, :, 2] * self.alphaN #b x neg truth_separation_probs = 1. / (1 + torch.exp(-self.sigma * (pair_wise_score_diff))) log_likelihood_expanded = torch.log(truth_separation_probs) # b x neg weighted_log_likelihood = log_likelihood_expanded * w loss_dis = -torch.sum(torch.sum(weighted_log_likelihood, dim=1)) loss_norm = right.norm() + feat.norm() + wrong.norm() # if fake: # fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat) # fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask) # # margin_score = F.relu(torch.log(fake_score + 1) - self.margin) # loss_fake = torch.sum(margin_score) # loss_dis += loss_fake # loss_norm += fake.norm() loss = (loss_dis + self.alpha_norm * loss_norm) / batch_size # if fake: # return loss, loss_fake.data[0] / batch_size # else: return loss, dist_summary, smooth_dist_summary