Example #1
0
 def generate_mask(start, end):
     mask = torch.BoolTensor(count)
     mask[:] = False
     mask[start:end] = True
     return mask
Example #2
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)
    test_nid = np.nonzero(data.test_mask)[0].astype(np.int64)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().item()
    n_val_samples = val_mask.sum().item()
    n_test_samples = test_mask.sum().item()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))

    # create GCN model
    g = DGLGraph(data.graph, readonly=True)
    norm = 1. / g.in_degrees().float().unsqueeze(1)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        norm = norm.cuda()

    g.ndata['features'] = features

    num_neighbors = args.num_neighbors

    g.ndata['norm'] = norm

    model = GCNSampling(in_feats, args.n_hidden, n_classes, args.n_layers,
                        F.relu, args.dropout)

    if cuda:
        model.cuda()

    loss_fcn = nn.CrossEntropyLoss()

    infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, args.n_layers,
                           F.relu)

    if cuda:
        infer_model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.batch_size,
                                                       args.num_neighbors,
                                                       neighbor_type='in',
                                                       shuffle=True,
                                                       num_workers=32,
                                                       num_hops=args.n_layers +
                                                       1,
                                                       seed_nodes=train_nid):
            nf.copy_from_parent()
            model.train()
            # forward
            pred = model(nf)
            batch_nids = nf.layer_parent_nid(-1).to(device=pred.device,
                                                    dtype=torch.long)
            batch_labels = labels[batch_nids]
            loss = loss_fcn(pred, batch_labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        for infer_param, param in zip(infer_model.parameters(),
                                      model.parameters()):
            infer_param.data.copy_(param.data)

        num_acc = 0.

        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.test_batch_size,
                                                       g.number_of_nodes(),
                                                       neighbor_type='in',
                                                       num_workers=32,
                                                       num_hops=args.n_layers +
                                                       1,
                                                       seed_nodes=test_nid):
            nf.copy_from_parent()
            infer_model.eval()
            with torch.no_grad():
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1).to(device=pred.device,
                                                        dtype=torch.long)
                batch_labels = labels[batch_nids]
                num_acc += (pred.argmax(
                    dim=1) == batch_labels).sum().cpu().item()

        print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
Example #3
0
    def _add_constraints(self, mask, known, y_train_labeled,
                         y_train_unlabeled_truth):
        """
        Add additional constraints to the equivalence matrix.

        :param mask: Binary matrix with value 0 in entry (i,j) if it is known whether i and j belong to the same class
                     and 1 else
        :param known: Binary matrix with value 1 in entry (i,j) if it is known that i and j belong to the same class and
                      0 else
        :param y_train_labeled: Labels for the labeled subset of the batch
        :param y_train_unlabeled_truth: True (generally unknown) labels for the unlabeled subset of the batch
        :return: mask: Binary matrix with value 0 in entry (i,j) if it is known whether i and j belong to the same class
                       and 1 else
        :return: known: Binary matrix with value 1 in entry (i,j) if it is known that i and j belong to the same class
                        and 0 else
        """
        if self.params.add_constraints_method == 'random':
            n = len(mask)
            nl = len(y_train_labeled)
            mask = (torch.BoolTensor(n, n).zero_() + 1)
            y_labeled_one_hot = opt_utils.one_hot_embedding(
                y_train_labeled, self.params.nclasses)

            mask = mask.cpu().numpy()
            idxs = np.random.choice([0, 1],
                                    size=(n, n),
                                    p=[
                                        1 - self.params.add_constraints_frac,
                                        self.params.add_constraints_frac
                                    ])
            idxs = np.triu(idxs, k=1)
            idxs = idxs + idxs.T
            idxs = idxs.astype('bool')
            mask = mask * (~idxs)
            true_y = opt_utils.one_hot_embedding(
                torch.cat((y_train_labeled,
                           y_train_unlabeled_truth.to(defaults.device))),
                self.params.nclasses)
            true_m = true_y.mm(true_y.t())

            known = true_m * torch.Tensor(idxs).to(defaults.device)
            mask = torch.from_numpy(mask).to(defaults.device)

            known[:nl, :nl] = y_labeled_one_hot.mm(y_labeled_one_hot.t())
            torch.diagonal(known).fill_(1)
            mask[:nl, :nl] = 0
            torch.diagonal(mask).fill_(0)

            # Remove 1's among (labeled, unlabeled) pairs
            bad_idxs = known[:nl, nl:] == 1
            known[:nl, nl:][bad_idxs] = 0
            mask[:nl, nl:][bad_idxs] = 1

            bad_idxs = known[nl:, :nl] == 1
            known[nl:, :nl][bad_idxs] = 0
            mask[nl:, :nl][bad_idxs] = 1

        elif self.params.add_constraints_method == 'specific':
            mask = mask.cpu().numpy()
            nl = len(y_train_labeled)
            idxs_unlabeled = np.isin(y_train_unlabeled_truth.cpu(),
                                     self.params.add_constraints_classes)
            idxs_labeled = np.isin(y_train_labeled.cpu(),
                                   self.params.add_constraints_classes)
            mask[:nl, nl:][np.ix_(~idxs_labeled, idxs_unlabeled)] = 0
            mask[nl:, :nl][np.ix_(idxs_unlabeled, ~idxs_labeled)] = 0
            mask[:nl, nl:][np.ix_(idxs_labeled, ~idxs_unlabeled)] = 0
            mask[nl:, :nl][np.ix_(~idxs_unlabeled, idxs_labeled)] = 0
            mask[nl:, nl:][np.ix_(idxs_unlabeled, ~idxs_unlabeled)] = 0
            mask[nl:, nl:][np.ix_(~idxs_unlabeled, idxs_unlabeled)] = 0
            mask = torch.from_numpy(mask).to(defaults.device)

        return mask, known
Example #4
0
def run(args, device, data):
    # Unpack data
    train_mask, val_mask, in_feats, labels, n_classes, g = data
    train_nid = th.LongTensor(np.nonzero(train_mask)[0])
    val_nid = th.LongTensor(np.nonzero(val_mask)[0])
    train_mask = th.BoolTensor(train_mask)
    val_mask = th.BoolTensor(val_mask)

    # Create sampler
    sampler = NeighborSampler(
        g, [int(fanout) for fanout in args.fan_out.split(',')])

    # Create PyTorch DataLoader for constructing blocks
    dataloader = DataLoader(dataset=train_nid.numpy(),
                            batch_size=args.batch_size,
                            collate_fn=sampler.sample_blocks,
                            shuffle=True,
                            drop_last=False,
                            num_workers=args.num_workers)

    # Define model and optimizer
    model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu,
                 args.dropout)
    model = model.to(device)
    loss_fcn = nn.BCEWithLogitsLoss()
    loss_fcn = loss_fcn.to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Training loop
    avg = 0
    iter_tput = []
    for epoch in range(args.num_epochs):
        tic = time.time()

        # Loop over the dataloader to sample the computation dependency graph as a list of
        # blocks.
        for step, blocks in enumerate(dataloader):
            tic_step = time.time()

            # The nodes for input lies at the LHS side of the first block.
            # The nodes for output lies at the RHS side of the last block.
            input_nodes = blocks[0].srcdata[dgl.NID]
            seeds = blocks[-1].dstdata[dgl.NID]

            # Load the input features as well as output labels
            batch_inputs, batch_labels = load_subtensor(
                g, labels, seeds, input_nodes, device)

            # Compute loss and prediction
            batch_pred = model(blocks, batch_inputs)
            loss = loss_fcn(batch_pred, batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            iter_tput.append(len(seeds) / (time.time() - tic_step))
            if step % args.log_every == 0:
                acc = compute_f1(batch_pred, batch_labels)
                gpu_mem_alloc = th.cuda.max_memory_allocated(
                ) / 1000000 if th.cuda.is_available() else 0
                print(
                    'Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB'
                    .format(epoch, step, loss.item(), acc.item(),
                            np.mean(iter_tput[3:]), gpu_mem_alloc))

        toc = time.time()
        print('Epoch Time(s): {:.4f}'.format(toc - tic))
        if epoch >= 5:
            avg += toc - tic
        if epoch % args.eval_every == 0 and epoch != 0:
            eval_acc = evaluate(model, g, g.ndata['features'], labels,
                                val_mask, args.batch_size, device)
            print('Eval Acc {:.4f}'.format(eval_acc))

    print('Avg epoch time: {}'.format(avg / (epoch - 4)))
Example #5
0
                                    requires_grad=False)

        # Dilations & padding
        self._set_dilations(seq_len)

        # Channel combinations (multivariate)
        if c_in > 1:
            self._set_channel_combinations(c_in)

        # Bias
        for i in range(self.num_dilations):
            self.register_buffer(
                f'biases_{i}',
                torch.empty(
                    (self.num_kernels, self.num_features_per_dilation[i])))
        self.register_buffer('prefit', torch.BoolTensor([False]))

    def fit(self, X, chunksize=None):
        num_samples = X.shape[0]
        if chunksize is None:
            chunksize = min(num_samples, self.num_dilations * self.num_kernels)
        else:
            chunksize = min(num_samples, chunksize)
        np.random.seed(self.random_state)
        idxs = np.random.choice(num_samples, chunksize, False)
        self.fitting = True
        self(X[idxs])
        self.fitting = False

    def forward(self, x):
        _features = []
Example #6
0
def map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, prefix,
                covariates_df=None, interaction_s=None, maf_threshold_interaction=0.05,
                group_s=None, window=1000000, run_eigenmt=False,
                output_dir='.', write_top=True, write_stats=True, logger=None, verbose=True):
    """
    cis-QTL mapping: nominal associations for all variant-phenotype pairs

    Association results for each chromosome are written to parquet files
    in the format <output_dir>/<prefix>.cis_qtl_pairs.<chr>.parquet

    If interaction_s is provided, the top association per phenotype is
    written to <output_dir>/<prefix>.cis_qtl_top_assoc.txt.gz unless
    write_top is set to False, in which case it is returned as a DataFrame
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if logger is None:
        logger = SimpleLogger()
    if group_s is not None:
        group_dict = group_s.to_dict()

    logger.write('cis-QTL mapping: nominal associations for all variant-phenotype pairs')
    logger.write('  * {} samples'.format(phenotype_df.shape[1]))
    logger.write('  * {} phenotypes'.format(phenotype_df.shape[0]))
    if covariates_df is not None:
        assert np.all(phenotype_df.columns==covariates_df.index)
        logger.write('  * {} covariates'.format(covariates_df.shape[1]))
        residualizer = Residualizer(torch.tensor(covariates_df.values, dtype=torch.float32).to(device))
        dof = phenotype_df.shape[1] - 2 - covariates_df.shape[1]
    else:
        residualizer = None
        dof = phenotype_df.shape[1] - 2
    logger.write('  * {} variants'.format(variant_df.shape[0]))
    if interaction_s is not None:
        assert np.all(interaction_s.index==phenotype_df.columns)
        logger.write('  * including interaction term')
        if maf_threshold_interaction>0:
            logger.write('    * using {:.2f} MAF threshold'.format(maf_threshold_interaction))

    genotype_ix = np.array([genotype_df.columns.tolist().index(i) for i in phenotype_df.columns])
    genotype_ix_t = torch.from_numpy(genotype_ix).to(device)
    if interaction_s is not None:
        dof -= 2
        interaction_t = torch.tensor(interaction_s.values.reshape(1,-1), dtype=torch.float32).to(device)
        if maf_threshold_interaction > 0:
            interaction_mask_t = torch.BoolTensor(interaction_s >= interaction_s.median()).to(device)
        else:
            interaction_mask_t = None

    igc = genotypeio.InputGeneratorCis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, group_s=group_s, window=window)
    # iterate over chromosomes
    best_assoc = []
    start_time = time.time()
    k = 0
    logger.write('  * Computing associations')
    for chrom in igc.chrs:
        logger.write('    Mapping chromosome {}'.format(chrom))
        # allocate arrays
        n = 0
        if group_s is None:
            for i in igc.phenotype_pos_df[igc.phenotype_pos_df['chr']==chrom].index:
                j = igc.cis_ranges[i]
                n += j[1] - j[0] + 1
        else:
            for i in igc.group_s[igc.phenotype_pos_df['chr']==chrom].drop_duplicates().index:
                j = igc.cis_ranges[i]
                n += j[1] - j[0] + 1

        chr_res = OrderedDict()
        chr_res['phenotype_id'] = []
        chr_res['variant_id'] = []
        chr_res['tss_distance'] = np.empty(n, dtype=np.int32)
        chr_res['maf'] =          np.empty(n, dtype=np.float32)
        chr_res['ma_samples'] =   np.empty(n, dtype=np.int32)
        chr_res['ma_count'] =     np.empty(n, dtype=np.int32)
        if interaction_s is None:
            chr_res['pval_nominal'] = np.empty(n, dtype=np.float64)
            chr_res['slope'] =        np.empty(n, dtype=np.float32)
            chr_res['slope_se'] =     np.empty(n, dtype=np.float32)
        else:
            chr_res['pval_g'] =  np.empty(n, dtype=np.float64)
            chr_res['b_g'] =     np.empty(n, dtype=np.float32)
            chr_res['b_g_se'] =  np.empty(n, dtype=np.float32)
            chr_res['pval_i'] =  np.empty(n, dtype=np.float64)
            chr_res['b_i'] =     np.empty(n, dtype=np.float32)
            chr_res['b_i_se'] =  np.empty(n, dtype=np.float32)
            chr_res['pval_gi'] = np.empty(n, dtype=np.float64)
            chr_res['b_gi'] =    np.empty(n, dtype=np.float32)
            chr_res['b_gi_se'] = np.empty(n, dtype=np.float32)

        start = 0
        if group_s is None:
            for k, (phenotype, genotypes, genotype_range, phenotype_id) in enumerate(igc.generate_data(chrom=chrom, verbose=verbose), k+1):
                # copy genotypes to GPU
                phenotype_t = torch.tensor(phenotype, dtype=torch.float).to(device)
                genotypes_t = torch.tensor(genotypes, dtype=torch.float).to(device)
                genotypes_t = genotypes_t[:,genotype_ix_t]
                impute_mean(genotypes_t)

                variant_ids = variant_df.index[genotype_range[0]:genotype_range[-1]+1]
                tss_distance = np.int32(variant_df['pos'].values[genotype_range[0]:genotype_range[-1]+1] - igc.phenotype_tss[phenotype_id])

                if interaction_s is None:
                    res = calculate_cis_nominal(genotypes_t, phenotype_t, residualizer=residualizer)
                    tstat, slope, slope_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res]
                    n = len(variant_ids)
                else:
                    genotypes_t, mask_t = filter_maf_interaction(genotypes_t, interaction_mask_t=interaction_mask_t,
                                                                 maf_threshold_interaction=maf_threshold_interaction)
                    if genotypes_t.shape[0]>0:
                        res = calculate_interaction_nominal(genotypes_t, phenotype_t.unsqueeze(0), interaction_t,
                                                            residualizer=residualizer, return_sparse=False)
                        tstat, b, b_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res]
                        mask = mask_t.cpu().numpy()
                        variant_ids = variant_ids[mask]
                        tss_distance = tss_distance[mask]
                        n = len(variant_ids)

                        # top association
                        ix = np.nanargmax(np.abs(tstat[:,2]))
                        top_s = pd.Series([phenotype_id, variant_ids[ix], tss_distance[ix], maf[ix], ma_samples[ix], ma_count[ix],
                                           tstat[ix,0], b[ix,0], b_se[ix,0],
                                           tstat[ix,1], b[ix,1], b_se[ix,1],
                                           tstat[ix,2], b[ix,2], b_se[ix,2]], index=chr_res.keys())
                        if run_eigenmt:  # compute eigenMT correction
                            top_s['tests_emt'] = eigenmt.compute_tests(genotypes_t, var_thresh=0.99, variant_window=200)
                        best_assoc.append(top_s)
                    else:  # all genotypes in window were filtered out
                        n = 0

                if n > 0:
                    chr_res['phenotype_id'].extend([phenotype_id]*n)
                    chr_res['variant_id'].extend(variant_ids)
                    chr_res['tss_distance'][start:start+n] = tss_distance
                    chr_res['maf'][start:start+n] = maf
                    chr_res['ma_samples'][start:start+n] = ma_samples
                    chr_res['ma_count'][start:start+n] = ma_count
                    if interaction_s is None:
                        chr_res['pval_nominal'][start:start+n] = tstat
                        chr_res['slope'][start:start+n] = slope
                        chr_res['slope_se'][start:start+n] = slope_se
                    else:
                        chr_res['pval_g'][start:start+n]  = tstat[:,0]
                        chr_res['b_g'][start:start+n]     = b[:,0]
                        chr_res['b_g_se'][start:start+n]  = b_se[:,0]
                        chr_res['pval_i'][start:start+n]  = tstat[:,1]
                        chr_res['b_i'][start:start+n]     = b[:,1]
                        chr_res['b_i_se'][start:start+n]  = b_se[:,1]
                        chr_res['pval_gi'][start:start+n] = tstat[:,2]
                        chr_res['b_gi'][start:start+n]    = b[:,2]
                        chr_res['b_gi_se'][start:start+n] = b_se[:,2]
                start += n  # update pointer
        else:  # groups
            for k, (phenotypes, genotypes, genotype_range, phenotype_ids, group_id) in enumerate(igc.generate_data(chrom=chrom, verbose=verbose), k+1):

                # copy genotypes to GPU
                genotypes_t = torch.tensor(genotypes, dtype=torch.float).to(device)
                genotypes_t = genotypes_t[:,genotype_ix_t]
                impute_mean(genotypes_t)

                variant_ids = variant_df.index[genotype_range[0]:genotype_range[-1]+1]
                # assuming that the TSS for all grouped phenotypes is the same
                tss_distance = np.int32(variant_df['pos'].values[genotype_range[0]:genotype_range[-1]+1] - igc.phenotype_tss[phenotype_ids[0]])

                if interaction_s is not None:
                    genotypes_t, mask_t = filter_maf_interaction(genotypes_t, interaction_mask_t=interaction_mask_t,
                                                                 maf_threshold_interaction=maf_threshold_interaction)
                    mask = mask_t.cpu().numpy()
                    variant_ids = variant_ids[mask]
                    tss_distance = tss_distance[mask]

                n = len(variant_ids)

                if genotypes_t.shape[0]>0:
                    # process first phenotype in group
                    phenotype_id = phenotype_ids[0]
                    phenotype_t = torch.tensor(phenotypes[0], dtype=torch.float).to(device)

                    if interaction_s is None:
                        res = calculate_cis_nominal(genotypes_t, phenotype_t, residualizer=residualizer)
                        tstat, slope, slope_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res]
                    else:
                        res = calculate_interaction_nominal(genotypes_t, phenotype_t.unsqueeze(0), interaction_t,
                                                            residualizer=residualizer, return_sparse=False)
                        tstat, b, b_se, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res]
                    px = [phenotype_id]*n

                    # iterate over remaining phenotypes in group
                    for phenotype, phenotype_id in zip(phenotypes[1:], phenotype_ids[1:]):
                        phenotype_t = torch.tensor(phenotype, dtype=torch.float).to(device)
                        if interaction_s is None:
                            res = calculate_cis_nominal(genotypes_t, phenotype_t, residualizer=residualizer)
                            tstat0, slope0, slope_se0, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res]
                        else:
                            res = calculate_interaction_nominal(genotypes_t, phenotype_t.unsqueeze(0), interaction_t,
                                                                residualizer=residualizer, return_sparse=False)
                            tstat0, b0, b_se0, maf, ma_samples, ma_count = [i.cpu().numpy() for i in res]

                        # find associations that are stronger for current phenotype
                        if interaction_s is None:
                            ix = np.where(np.abs(tstat0) > np.abs(tstat))[0]
                        else:
                            ix = np.where(np.abs(tstat0[:,2]) > np.abs(tstat[:,2]))[0]

                        # update relevant positions
                        for j in ix:
                            px[j] = phenotype_id
                        if interaction_s is None:
                            tstat[ix] = tstat0[ix]
                            slope[ix] = slope0[ix]
                            slope_se[ix] = slope_se0[ix]
                        else:
                            tstat[ix] = tstat0[ix]
                            b[ix] = b0[ix]
                            b_se[ix] = b_se0[ix]

                    chr_res['phenotype_id'].extend(px)
                    chr_res['variant_id'].extend(variant_ids)
                    chr_res['tss_distance'][start:start+n] = tss_distance
                    chr_res['maf'][start:start+n] = maf
                    chr_res['ma_samples'][start:start+n] = ma_samples
                    chr_res['ma_count'][start:start+n] = ma_count
                    if interaction_s is None:
                        chr_res['pval_nominal'][start:start+n] = tstat
                        chr_res['slope'][start:start+n] = slope
                        chr_res['slope_se'][start:start+n] = slope_se
                    else:
                        chr_res['pval_g'][start:start+n]  = tstat[:,0]
                        chr_res['b_g'][start:start+n]     = b[:,0]
                        chr_res['b_g_se'][start:start+n]  = b_se[:,0]
                        chr_res['pval_i'][start:start+n]  = tstat[:,1]
                        chr_res['b_i'][start:start+n]     = b[:,1]
                        chr_res['b_i_se'][start:start+n]  = b_se[:,1]
                        chr_res['pval_gi'][start:start+n] = tstat[:,2]
                        chr_res['b_gi'][start:start+n]    = b[:,2]
                        chr_res['b_gi_se'][start:start+n] = b_se[:,2]

                    # top association for the group
                    if interaction_s is not None:
                        ix = np.nanargmax(np.abs(tstat[:,2]))
                        top_s = pd.Series([chr_res['phenotype_id'][start:start+n][ix], variant_ids[ix], tss_distance[ix], maf[ix], ma_samples[ix], ma_count[ix],
                                           tstat[ix,0], b[ix,0], b_se[ix,0],
                                           tstat[ix,1], b[ix,1], b_se[ix,1],
                                           tstat[ix,2], b[ix,2], b_se[ix,2]], index=chr_res.keys())
                        top_s['num_phenotypes'] = len(phenotype_ids)
                        if run_eigenmt:  # compute eigenMT correction
                            top_s['tests_emt'] = eigenmt.compute_tests(genotypes_t, var_thresh=0.99, variant_window=200)
                        best_assoc.append(top_s)

                start += n  # update pointer

        logger.write('    time elapsed: {:.2f} min'.format((time.time()-start_time)/60))

        # convert to dataframe, compute p-values and write current chromosome
        if start < len(chr_res['maf']):
            for x in chr_res:
                chr_res[x] = chr_res[x][:start]

        if write_stats:
            chr_res_df = pd.DataFrame(chr_res)
            if interaction_s is None:
                m = chr_res_df['pval_nominal'].notnull()
                chr_res_df.loc[m, 'pval_nominal'] = 2*stats.t.cdf(-chr_res_df.loc[m, 'pval_nominal'].abs(), dof)
            else:
                m = chr_res_df['pval_gi'].notnull()
                chr_res_df.loc[m, 'pval_g'] =  2*stats.t.cdf(-chr_res_df.loc[m, 'pval_g'].abs(), dof)
                chr_res_df.loc[m, 'pval_i'] =  2*stats.t.cdf(-chr_res_df.loc[m, 'pval_i'].abs(), dof)
                chr_res_df.loc[m, 'pval_gi'] = 2*stats.t.cdf(-chr_res_df.loc[m, 'pval_gi'].abs(), dof)
            print('    * writing output')
            chr_res_df.to_parquet(os.path.join(output_dir, '{}.cis_qtl_pairs.{}.parquet'.format(prefix, chrom)))

    if interaction_s is not None and len(best_assoc) > 0:
        best_assoc = pd.concat(best_assoc, axis=1, sort=False).T.set_index('phenotype_id').infer_objects()
        m = best_assoc['pval_g'].notnull()
        best_assoc.loc[m, 'pval_g'] =  2*stats.t.cdf(-best_assoc.loc[m, 'pval_g'].abs(), dof)
        best_assoc.loc[m, 'pval_i'] =  2*stats.t.cdf(-best_assoc.loc[m, 'pval_i'].abs(), dof)
        best_assoc.loc[m, 'pval_gi'] = 2*stats.t.cdf(-best_assoc.loc[m, 'pval_gi'].abs(), dof)
        if run_eigenmt:
            if group_s is None:
                best_assoc['pval_emt'] = np.minimum(best_assoc['tests_emt']*best_assoc['pval_gi'], 1)
            else:
                best_assoc['pval_emt'] = np.minimum(best_assoc['num_phenotypes']*best_assoc['tests_emt']*best_assoc['pval_gi'], 1)
            best_assoc['pval_adj_bh'] = eigenmt.padjust_bh(best_assoc['pval_emt'])
        if write_top:
            best_assoc.to_csv(os.path.join(output_dir, '{}.cis_qtl_top_assoc.txt.gz'.format(prefix)),
                              sep='\t', float_format='%.6g')
        else:
            return best_assoc
    logger.write('done.')
Example #7
0
def main():
    # Praise argparser!
    parser = argparse.ArgumentParser(
        description=
        "Inference script for performing joint tasks on ATIS datasets.")
    parser.add_argument("--train_path",
                        type=str,
                        help="path of train dataset.")
    parser.add_argument("--test_path", type=str, help="path of test dataset.")
    parser.add_argument("--model_dir",
                        type=str,
                        default="./models/",
                        help='path for saved trained models.')

    parser.add_argument('--max_length',
                        type=int,
                        default=60,
                        help='max sequence length')
    parser.add_argument('--embedding_size',
                        type=int,
                        default=100,
                        help='dimension of word embedding vectors')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=50,
                        help='dimension of lstm hidden states')

    args = parser.parse_args()

    # Load data
    print("Loading data...")
    _, word2index, tag2index, intent2index = preprocessing(
        args.train_path, args.max_length)
    index2tag = {v: k for k, v in tag2index.items()}
    index2intent = {v: k for k, v in intent2index.items()}

    # Load model
    print("Loading model...")
    encoder = Encoder(len(word2index), args.embedding_size, args.hidden_size)
    decoder = Decoder(len(tag2index), len(intent2index),
                      len(tag2index) // 3, args.hidden_size * 2)
    encoder.load_state_dict(
        torch.load(os.path.join(args.model_dir, 'jointnlu-encoder.pkl'),
                   map_location=None if USE_CUDA else "cpu"))
    decoder.load_state_dict(
        torch.load(os.path.join(args.model_dir, 'jointnlu-decoder.pkl'),
                   map_location=None if USE_CUDA else "cpu"))

    if USE_CUDA:
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # Switch to evaluation mode
    encoder.eval()
    decoder.eval()

    # Preprocess test data
    test = open(args.test_path, "r").readlines()
    test = [t[:-1] for t in test]
    test = [[
        t.split("\t")[0].split(" "),
        t.split("\t")[1].split(" ")[:-1],
        t.split("\t")[1].split(" ")[-1]
    ] for t in test]
    test = [
        [t[0][1:-1], t[1][1:], t[2].split("#")[0]] for t in test
    ]  # Note here I split embedded multiple labels into separate labels and get the first one.
    # This could lower error rate.

    slot_f1 = []
    intent_err = []

    # Test cases.
    for index in range(len(test)):
        test_raw = test[index][0]
        test_in = prepare_sequence(test_raw, word2index).to("cpu")
        test_mask = Variable(
            torch.BoolTensor(tuple(map(
                lambda s: s == 0,
                test_in.data)))).cuda() if USE_CUDA else Variable(
                    torch.BoolTensor(tuple(map(lambda s: s == 0,
                                               test_in.data)))).view(1, -1)

        if USE_CUDA:
            start_decode = Variable(
                torch.LongTensor([[word2index['<SOS>']] * 1
                                  ])).cuda().transpose(1, 0)
        else:
            start_decode = Variable(
                torch.LongTensor([[word2index['<SOS>']] * 1])).transpose(1, 0)

        output, hidden_c = encoder(test_in.unsqueeze(0),
                                   test_mask.unsqueeze(0))
        tag_score, intent_score = decoder(start_decode, hidden_c, output,
                                          test_mask)

        v, i = torch.max(tag_score, 1)
        slot_pred = list(map(lambda ii: index2tag[ii], i.data.tolist()))
        slot_gt = test[index][1]
        # Calculate f1_micro with sklearn. Pretty handy.
        slot_f1.append(f1_score(slot_gt, slot_pred, average="micro"))

        v, i = torch.max(intent_score, 1)
        intent_pred = index2intent[i.data.tolist()[0]]
        intent_gt = test[index][2]
        if intent_pred != intent_gt:
            intent_err.append([test[index][0], intent_gt, intent_pred])

        # Print our results.
        print("Input Sentence\t: ", *test[index][0])

        print("Truth\t\t: ", *slot_gt)
        print("Prediction\t: ", *slot_pred)

        print("Truth\t\t: ", intent_gt)
        print("Prediction\t: ", intent_pred)

        print()

    # Print out everything I need to finish my report.

    # print("Got slot err ", len(slot_err[0]))
    # print(*slot_err, sep="\n")
    print("Got intent err ", len(intent_err))
    print("--- BEGIN ERR PRINT ---")
    for case in intent_err:
        print("Input  : ", *case[0])
        print("Truth  : ", case[1])
        print("Predict: ", case[2])
        print()
    print("--- ENDOF ERR PRINT ---")
    print("Total ", len(test))
    print("Slot f1_micro avg %f" % np.average(slot_f1))
    print("Intent acc %f" % (1 - len(intent_err) / len(test)))
Example #8
0
def run_experiment(p, csv_path, out_dir, data_cols='_mri_vol'):
    """
    Function to run the experiments.
    p contain all the hyperparameters needed to run the experiments
    We assume that all the parameters needed are present in p!!
    out_dir is the out directory
    #hyperparameters
    """

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    #Seed
    torch.manual_seed(p["seed"])
    np.random.seed(p["seed"])

    #Redirect output to the out dir
    # sys.stdout = open(out_dir + 'output.out', 'w')

    #save parameters to the out dir
    with open(out_dir + "params.txt", "w") as f:
        f.write(str(p))

    # DEVICE
    ## Decidint on device on device.
    DEVICE_ID = 0
    DEVICE = torch.device(
        'cuda:' + str(DEVICE_ID) if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        torch.cuda.set_device(DEVICE_ID)

    # LOAD DATA
    X_train, X_test, Y_train, Y_test, mri_col = open_MRI_data_var(
        csv_path,
        train_set=0.9,
        normalize=True,
        return_covariates=True,
        data_cols=data_cols)
    #TEMPORAL

    #Combine test and train Y for later
    Y = {}
    for k in Y_train.keys():
        Y[k] = Y_train[k] + Y_test[k]

    # List of (nt, nfeatures) numpy objects
    p["x_size"] = X_train[0].shape[1]
    print(p["x_size"])

    # Apply padding to both X_train and X_val
    # REMOVE LAST POINT OF EACH INDIVIDUAL
    X_train_tensor = [torch.FloatTensor(t[:-1, :]) for t in X_train]
    X_train_pad = nn.utils.rnn.pad_sequence(X_train_tensor,
                                            batch_first=False,
                                            padding_value=np.nan)
    X_test_tensor = [torch.FloatTensor(t) for t in X_test]
    X_test_pad = nn.utils.rnn.pad_sequence(X_test_tensor,
                                           batch_first=False,
                                           padding_value=np.nan)

    p["ntp"] = max(X_train_pad.size(0), X_test_pad.size(0))

    # Those datasets are of size [Tmax, Batch_size, nfeatures]
    # Save mask to unpad later when testing
    mask_train = ~torch.isnan(X_train_pad)
    mask_test = ~torch.isnan(X_test_pad)

    # convert to tensor
    mask_train_tensor = torch.BoolTensor(mask_train)
    mask_test_tensor = torch.BoolTensor(mask_test)

    #convert those NaN to zeros
    X_train_pad[torch.isnan(X_train_pad)] = 0
    X_test_pad[torch.isnan(X_test_pad)] = 0

    # Define model and optimizer
    model = rnnvae.ModelRNNVAE(p["x_size"], p["h_size"], p["hidden"],
                               p["n_layers"], p["hidden"], p["n_layers"],
                               p["hidden"], p["n_layers"], p["z_dim"],
                               p["hidden"], p["n_layers"], p["clip"],
                               p["n_epochs"], p["batch_size"], DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=p["learning_rate"])
    model.optimizer = optimizer

    model = model.to(DEVICE)
    # Fit the model
    model.fit(X_train_pad.to(DEVICE), X_test_pad.to(DEVICE),
              mask_train_tensor.to(DEVICE), mask_test_tensor.to(DEVICE))

    ### After training, save the model!
    model.save(out_dir, 'model.pt')

    # Predict the reconstructions from X_val and X_train
    X_test_fwd = model.predict(X_test_pad.to(DEVICE))
    X_train_fwd = model.predict(X_train_pad.to(DEVICE))

    #Reformulate things
    X_train_fwd['xnext'] = np.array(X_train_fwd['xnext']).swapaxes(0, 1)
    X_train_fwd['z'] = np.array(X_train_fwd['z']).swapaxes(0, 1)
    X_test_fwd['xnext'] = np.array(X_test_fwd['xnext']).swapaxes(0, 1)
    X_test_fwd['z'] = np.array(X_test_fwd['z']).swapaxes(0, 1)

    X_test_hat = X_test_fwd["xnext"]
    X_train_hat = X_train_fwd["xnext"]

    # Unpad using the masks
    #after masking, need to rehsape to (nt, nfeat)
    X_test_hat = [
        X[mask_test[:, i, :]].reshape((-1, p["x_size"]))
        for (i, X) in enumerate(X_test_hat)
    ]
    X_train_hat = [
        X[mask_train[:, i, :]].reshape((-1, p["x_size"]))
        for (i, X) in enumerate(X_train_hat)
    ]

    #Compute mean absolute error over all sequences
    mse_train = np.mean([
        mean_absolute_error(xval[:-1, :], xhat)
        for (xval, xhat) in zip(X_train, X_train_hat)
    ])
    print('MSE over the train set: ' + str(mse_train))

    #Compute mean absolute error over all sequences
    mse_test = np.mean([
        mean_absolute_error(xval, xhat)
        for (xval, xhat) in zip(X_test, X_test_hat)
    ])
    print('MSE over the test set: ' + str(mse_test))

    #plot validation and
    plot_total_loss(model.loss['total'], model.val_loss['total'], "Total loss",
                    out_dir, "total_loss.png")
    plot_total_loss(model.loss['kl'], model.val_loss['kl'], "kl_loss", out_dir,
                    "kl_loss.png")
    plot_total_loss(model.loss['ll'], model.val_loss['ll'], "ll_loss", out_dir,
                    "ll_loss.png")  #Negative to see downard curve

    # Visualization of trajectories
    """
    subj = 6
    feature = 12

    # For train
    plot_trajectory(X_train, X_train_hat, subj, 'all', out_dir, f'traj_train_s_{subj}_f_all') # testing for a given subject
    plot_trajectory(X_train, X_train_hat, subj, feature, out_dir, f'traj_train_s_{subj}_f_{feature}') # testing for a given feature

    # For test
    plot_trajectory(X_test, X_test_hat, subj, 'all', out_dir, f'traj_test_s_{subj}_f_all') # testing for a given subject
    plot_trajectory(X_test, X_test_hat, subj, feature, out_dir, f'traj_test_s_{subj}_f_{feature}') # testing for a given feature
    """

    z_train = X_train_fwd['z']
    z_test = X_test_fwd['z']

    # select only the existing time points
    # Repeat the mask for each latent features, as we can have variable features, need to treat the mask
    #Use ptile to repeat it as many times as p["z_dim"], and transpose it
    z_test = [
        X[np.tile(mask_test[:, i, 0], (p["z_dim"], 1)).T].reshape(
            (-1, p["z_dim"])) for (i, X) in enumerate(z_test)
    ]
    z_train = [
        X[np.tile(mask_train[:, i, 0], (p["z_dim"], 1)).T].reshape(
            (-1, p["z_dim"])) for (i, X) in enumerate(z_train)
    ]
    z = z_train + z_test

    # Dir for projections
    proj_path = 'z_proj/'
    if not os.path.exists(out_dir + proj_path):
        os.makedirs(out_dir + proj_path)

    #plot latent space
    for dim0 in range(p["z_dim"]):
        for dim1 in range(dim0, p["z_dim"]):
            if dim0 == dim1: continue  # very dirty
            plot_z_time_2d(z,
                           p["ntp"], [dim0, dim1],
                           out_dir + proj_path,
                           out_name=f'z_d{dim0}_d{dim1}')

    # Dir for projections
    sampling_path = 'z_proj_dx/'
    if not os.path.exists(out_dir + sampling_path):
        os.makedirs(out_dir + sampling_path)

    #plot latent space
    for dim0 in range(p["z_dim"]):
        for dim1 in range(dim0, p["z_dim"]):
            if dim0 == dim1: continue  # very dirty
            plot_z_time_2d(z,
                           p["ntp"], [dim0, dim1],
                           out_dir + sampling_path,
                           c='DX',
                           Y=Y,
                           out_name=f'z_d{dim0}_d{dim1}')

    # Dir for projections
    sampling_path = 'z_proj_age/'
    if not os.path.exists(out_dir + sampling_path):
        os.makedirs(out_dir + sampling_path)

    #plot latent space
    for dim0 in range(p["z_dim"]):
        for dim1 in range(dim0, p["z_dim"]):
            if dim0 == dim1: continue  # very dirty
            plot_z_time_2d(z,
                           p["ntp"], [dim0, dim1],
                           out_dir + sampling_path,
                           c='AGE',
                           Y=Y,
                           out_name=f'z_d{dim0}_d{dim1}')

    # Compute MSE
    # Predict for max+1 and select only the positions that I am interested in
    #this sequence predict DO NOT work well
    Y_true = [p[-1, :] for p in X_train]
    Y_pred = []

    for i in range(X_train_pad.size(1)):
        x = torch.FloatTensor(X_train[i][:-1, :])
        x = x.unsqueeze(1)
        tp = x.size(0)  # max time points (and timepoint to predict)
        if tp == 0:
            continue
        X_fwd = model.sequence_predict(x.to(DEVICE), tp + 1)
        X_hat = X_fwd['xnext']
        Y_pred.append(X_hat[tp, 0, :])  #get predicted point

    #For each patient in X_hat, saveonly the timepoint that we want
    #Compute mse
    mse_predict = mean_squared_error(Y_true, Y_pred)
    print('MSE over a future timepoint prediction: ' + str(mse_predict))

    # TODO: THIS SAMPLING PROCEDURE NEEDS TO BE UPDATED
    """
    nt = len(X_train_pad)
    nsamples = 1000
    X_sample = model.sample_latent(nsamples, nt)

    #Get the samples
    X_sample['xnext'] = np.array(X_sample['xnext']).swapaxes(0,1)
    X_sample['z'] = np.array(X_sample['z']).swapaxes(0,1)

    # Dir for projections
    sampling_path = 'z_proj_sampling/'
    if not os.path.exists(out_dir + sampling_path):
        os.makedirs(out_dir + sampling_path)

    #plot latent space
    for dim0 in range(p["z_dim"]):
        for dim1 in range(dim0, p["z_dim"]):
            if dim0 == dim1: continue   # very dirty
            plot_z_time_2d(X_sample['z'], p["ntp"], [dim0, dim1], out_dir + 'z_proj_sampling/', out_name=f'z_d{dim0}_d{dim1}')
    """

    loss = {
        "mse_train": mse_train,
        "mse_test": mse_test,
        "mse_predict": mse_predict,
        "loss_total": model.loss['total'][-1],
        "loss_kl": model.loss['kl'][-1],
        "loss_ll": model.loss['ll'][-1]
    }

    return loss
def main(args):
    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    path = osp.join('dataset', 'Reddit')
    dataset = Reddit(path)
    data = dataset[0]

    features = data.x.to(device)
    labels = data.y.to(device)
    train_mask = torch.BoolTensor(data.train_mask).to(device)
    val_mask = torch.BoolTensor(data.val_mask).to(device)
    test_mask = torch.BoolTensor(data.test_mask).to(device)

    edge_index = data.edge_index.to(device)
    edge_index, _ = remove_self_loops(edge_index)
    edge_index, _ = add_self_loops(edge_index, num_nodes=features.size(0))

    model = GAT(num_layers=args.num_layers,
                in_feats=features.size(-1),
                num_hidden=args.num_hidden,
                num_classes=dataset.num_classes,
                heads=[1, 1, 1],
                dropout=args.dropout).to(device)

    loss_fcn = nn.CrossEntropyLoss()

    logger = Logger(args.runs, args)
    dur = []
    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        for epoch in range(1, 1 + args.epochs):
            model.train()
            if epoch >= 3:
                t0 = time.time()
            # forward
            logits = model(features, edge_index)
            loss = loss_fcn(logits[train_mask], labels[train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if epoch >= 3:
                dur.append(time.time() - t0)
                print('Training time/epoch {}'.format(np.mean(dur)))

            if not args.eval:
                continue

            train_acc, val_acc, test_acc = evaluate(model, features, edge_index, labels, train_mask, val_mask, test_mask)
            logger.add_result(run, (train_acc, val_acc, test_acc))

            print("Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}".format(run, epoch, loss.item(), train_acc, val_acc, test_acc))

        if args.eval:
            logger.print_statistics(run)

    if args.eval:
        logger.print_statistics()
Example #10
0
 def _set_batch_skip_search(self, valid_exs: List[Message], batch: Batch) -> Batch:
     skip_search = [ex.get(self.opt['skip_search_key'], False) for ex in valid_exs]
     batch.skip_search = torch.BoolTensor(skip_search)
     return batch
 def bfixed(self, *cmp):
     return torch.BoolTensor([list(cmp)
                              ]).to(self.device).repeat(self.batch_size, 1)
Example #12
0
    def __init__(self, opt, dicts, positional_encoder, encoder_type):

        super(TransformerEncoder, self).__init__()

        self.model_size = opt.model_size
        self.n_heads = opt.n_heads
        self.inner_size = opt.inner_size
        if hasattr(opt, 'encoder_layers') and opt.encoder_layers != -1:
            self.layers = opt.encoder_layers
        else:
            self.layers = opt.layers
        self.dropout = opt.dropout
        self.word_dropout = opt.word_dropout
        self.attn_dropout = opt.attn_dropout
        self.emb_dropout = opt.emb_dropout
        self.time = opt.time
        self.version = opt.version
        self.input_type = encoder_type

        # input lookup table
        if encoder_type != "text":
            self.audio_trans = nn.Linear(dicts, self.model_size)
        else:
            self.word_lut = nn.Embedding(dicts.size(),
                                         self.model_size,
                                         padding_idx=onmt.Constants.PAD)

        if opt.time == 'positional_encoding':
            self.time_transformer = positional_encoder
        elif opt.time == 'gru':
            self.time_transformer = nn.GRU(self.model_size,
                                           self.model_size,
                                           1,
                                           batch_first=True)
        elif opt.time == 'lstm':
            self.time_transformer = nn.LSTM(self.model_size,
                                            self.model_size,
                                            1,
                                            batch_first=True)

        self.preprocess_layer = PrePostProcessing(self.model_size,
                                                  self.emb_dropout,
                                                  sequence='d',
                                                  static=False)

        self.postprocess_layer = PrePostProcessing(self.model_size,
                                                   0,
                                                   sequence='n')

        self.positional_encoder = positional_encoder

        self.limit_rhs_steps = opt.limit_rhs_steps

        self.build_modules(limit_rhs_steps=opt.limit_rhs_steps)
        if self.limit_rhs_steps is not None:
            largest_rhs_mask = positional_encoder.len_max + self.limit_rhs_steps
            rhs_mask = torch.BoolTensor(
                np.triu(np.ones((largest_rhs_mask, largest_rhs_mask)),
                        k=1 + self.limit_rhs_steps).astype('uint8'))
            self.register_buffer('rhs_mask', rhs_mask)

        if opt.freeze_encoder:
            for p in self.parameters():
                p.requires_grad = False
                print(p.requires_grad)
Example #13
0
    def predict(                                                                # called from automatic_speech_recognition.py l217     # builds and returns result_dict (containing “audio”, “duration”, “results”)
        self,
        audio_path: str,
        signal: np.ndarray,                                                         # 'signal' = audio as ndarray
        top_db: int = 48,
        vad: bool = False,                                                          # vad = False first
        batch_size: int = 1,
    ) -> dict:
        result_dict = dict()

        duration = librosa.get_duration(signal, sr=self.SAMPLE_RATE)                # get overall duration (in seconds) from entire audio file
        batch_inference = True if duration > 50.0 else False                        # True if duration > 50s--so can use vad if >50s (if less than 50s, split into dB criteria and speech recognition happens)

        result_dict["audio"] = audio_path                                           # audio path--does NOT go through model
        result_dict["duration"] = str(datetime.timedelta(seconds=duration))         # str(duration of audio found by subtracting)--does NOT go through model
        result_dict["results"] = list()
        """ building up empty "results" list (by building dict 'hypo_dict' and appending to list 'results') """
        if batch_inference:                                                         # if duration > 50s:
            if vad:                                                                     # if vad:
                speech_intervals = self.vad_model(                                          # vad.py __call__ (l177): 'speech_intervals' = model output (of VoiceActivityDetection, which uses ConvVADModel to get the probability of the labels); list of lists of frequencies over interval frames (VoiceActivityDetection; vad.py); since call on object, goes to __call__ (vad.py l178)
                    signal,
                    sample_rate=self.SAMPLE_RATE,
                )
            else:                                                                       # else:
                speech_intervals = self._split_audio(signal, top_db)                        # rule-based: get list of non-silent intervals (==splices from 'signal', an ndarray ver of audio) from audio
            # either way, we get a list 'speech_intervals' of splices (of non-silent intervals) of the original 'signal' ndarray
            batches, total_speech_sections, total_durations = self._create_batches(      # return lists: 'batches' (of 'batch': tensors), 'total_speech_sections' (list of list 'speech_sections (for 1 batch)' of dicts 'speech_section' for 1 interval, shape {"start": START_TIME, "end": END_TIME}), 'total_durations' (of 'duration')
                speech_intervals,
                batch_size,
            )

            for batch_idx, batch in enumerate(batches):                                 # for each batch (dict of 'inputs' tensor (non-silent interval of 'signal'; all the batches add up to 211833) and 'input_lengths' int)
                net_input, sample = dict(), dict()

                net_input["padding_mask"] = get_mask_from_lengths(                          # net_input["padding_mask"] = tensor same size as batch["inputs"] filled w/ False
                    inputs=batch["inputs"],
                    seq_lengths=batch["input_lengths"],
                ).to(self.device)
                net_input["source"] = batch["inputs"].to(self.device)                       # net_input["source"] = batch["inputs"] (==the tensor w/ frequencies of a section from overall signal)
                sample["net_input"] = net_input                                             # sample = dict containing net_input (dict; "source": tensor w/ frequencies of a section, "padding_mask": tensor w/ same size as "source" filled w/ False)

                # yapf: disable
                if sample["net_input"]["source"].size(1) < self.MINIMUM_INPUT_LENGTH:       # skip this iteration if section is too short
                    continue
                # yapf: enable
                                                                                            # hypos: list of list of dict [[{'tokens': tensor of ints, 'score: 0}; tensor([ 8, 11, 14, 11, 10,  5,  8, 48, 10, 32,  6, 37,  7, 11, 10,  5, 32, 12, 26, 22,  6, 18, 27,  8, 13,  5]), "score": 0}]]
                hypos = self.generator.generate(      # W2lViterbiDecoder -> W2lDecoder.generate   # Generate a batch of inferences (for each batch (section), generate tensor using wav2vec model
                    self.model,                                                             # model = BrainWav2VecCtc.build_model (built w/ pretrained weights from model_path: (/home/kris/.pororo/misc/wav2vec.ko.pt))
                    sample,

                    prefix_tokens=None,
                )

                for hypo_idx, hypo in enumerate(hypos):                                     # TODO: does 'hypos' ever have more than 1 hypo?  # For each inference (i.e. for 1 section):       # hypo_idx: 0; hypo: [{'tokens': tensor([ 8, 11, 14, 11, 10,  5,  8, 48, 10, 32,  6, 37,  7, 11, 10,  5, 32, 12, 26, 22,  6, 18, 27,  8, 13,  5]), 'score': 0}]
                    hypo_dict = dict()
                    hyp_pieces = self.target_dict.string(                                       # convert tensor of ints to string (letters) using dict --> hyp_pieces: all tokens (letter by letter), "ᄀ ᅳ ᄂ ᅳ ᆫ | ᄀ ᅫ ᆫ ᄎ ᅡ ᆭ ᄋ ᅳ ᆫ | ᄎ ᅥ ᆨ ᄒ ᅡ ᄅ ᅧ ᄀ ᅩ |"
                        hypo[0]["tokens"].int().cpu())                                          # dict = target dict loaded from FB;
                    speech_section = total_speech_sections[batch_idx][hypo_idx]                 # get dict 'speech_section' (for this section); {"start": START_TIME, "end": END_TIME}

                    speech_start_time = str(                                                    # get rounded str ver of start time (e.g. 0:00:00)
                        datetime.timedelta(
                            seconds=int(round(
                                speech_section["start"],
                                0,
                            ))))
                    speech_end_time = str(                                                      # get rounded str ver of end time
                        datetime.timedelta(
                            seconds=int(round(
                                speech_section["end"],
                                0,
                            ))))

                    # yapf: disable                                                         # hypo_dict: dict printed out when asr is run (inside dict 'results')
                    hypo_dict["speech_section"] = f"{speech_start_time} ~ {speech_end_time}"    # time stamps for segment
                    hypo_dict["length_ms"] = total_durations[batch_idx][hypo_idx] * 1000        # 'total_durations': list (of 'duration'); getting ith duration
                    hypo_dict["speech"] = self._text_postprocess(hyp_pieces)                    # puts individual letters together to make proper sentence  # "그는 괜찮은 척하려고"
                    # yapf: enable

                    if hypo_dict["speech"]:                                                     # if the text is not empty: (remove empty sections)
                        result_dict["results"].append(hypo_dict)                                    # append this dict to overall 'result_dict'

                del hypos, net_input, sample                                                # 'hypos': batch of inferences, 'net_input': ?, 'sample': input?

        else:                                                                           # if duration <= 50s (i.e. batch_interference = False):
            net_input, sample, hypo_dict = dict(), dict(), dict()

            feature, duration = self._parse_audio(signal)                                   # feature (tensor ver of signal; [211883]) and duration (in sec)    # duration: 13.2426875

            net_input["source"] = feature.unsqueeze(0).to(self.device)                      # add a dimension of 1 in index 0 to feature (change to 2D)        # TODO: figure out math

            padding_mask = torch.BoolTensor(                                                # ? # will be passed onto Wav2Vec2Model as input (will have to check that code later)
                net_input["source"].size(1)).fill_(False)
            net_input["padding_mask"] = padding_mask.unsqueeze(0).to(                       # net_input["source"].shape: torch.Size([1, 211883]), net_input["padding_mask"].shape: torch.Size([1, 211883]), filled with False
                self.device)

            sample["net_input"] = net_input                                                 # add dict 'net_input' to dict 'sample'

            hypo = self.generator.generate(                                                 # Generate a batch of inferences using wav2vec model (W2lViterbiDecoder)
                self.model,                                                                 # self.model = BrainWav2VecCtc.build_model
                sample,                                                                     # 'hypo': [[{'tokens': tensor([ 8, 11, 14, 11, 10,  5,  8, 48, 10, 32,  6, 37,  7, 11, 10,  5, 32, 12, 26,  5, 22,  6, 18, 27,  8, 13,  5,  7, 23,  5, 49, 11, 14, 11, 10,  5, 8, 12,  5,  8,  6, 46,  7,  6, 29, 15,  6,  5,  8, 11, 14, 27,  7, 20, 5, 19,  6, 18,  6, 25,  7, 11, 17,  5,  7, 12, 59,  8,  9,  5,  7, 56, 22, 23,  5,  7, 23,  5, 49, 12, 29, 16,  9, 21,  6, 10,  5, 22, 12, 43, 49, 11,  8, 13,  7, 27, 29, 15,  6,  5,  7, 45, 25, 15, 13, 10,  7, 11, 17,  5,  7,  6, 33, 27, 29, 49, 12, 18,  6,  5]), 'score': 0}]]
                prefix_tokens=None,
            )
            hyp_pieces = self.target_dict.string(                                           # 'hyp_pieces': string version of tensor of token indices, converted by using 'target_dict' (.pororo/misc/ko.ltr.txt)
                hypo[0][0]["tokens"].int().cpu())                                           # hypo[0][0] (Cf. hypo[0])      # hyp_pieces: ᄀ ᅳ ᄂ ᅳ ᆫ | ᄀ ᅫ ᆫ ᄎ ᅡ ᆭ ᄋ ᅳ ᆫ | ᄎ ᅥ ᆨ | ᄒ ᅡ ᄅ ᅧ ᄀ ᅩ | ᄋ ᅢ | ᄊ ᅳ ᄂ ᅳ ᆫ | ᄀ ᅥ | ᄀ ᅡ ᇀ ᄋ ᅡ ᆻ ᄃ ᅡ | ᄀ ᅳ ᄂ ᅧ ᄋ ᅦ | ᄉ ᅡ ᄅ ᅡ ᆼ ᄋ ᅳ ᆯ | ᄋ ᅥ ᆮ ᄀ ᅵ | ᄋ ᅱ ᄒ ᅢ | ᄋ ᅢ | ᄊ ᅥ ᆻ ᄌ ᅵ ᄆ ᅡ ᆫ | ᄒ ᅥ ᆺ ᄊ ᅳ ᄀ ᅩ ᄋ ᅧ ᆻ ᄃ ᅡ | ᄋ ᅭ ᆼ ᄃ ᅩ ᆫ ᄋ ᅳ ᆯ | ᄋ ᅡ ᄁ ᅧ ᆻ ᄊ ᅥ ᄅ ᅡ |, len(hyp_pieces): 239

            speech_start_time = str(datetime.timedelta(seconds=0))                          # start_time set to 0
            speech_end_time = str(
                datetime.timedelta(seconds=int(round(duration, 0))))                        # end_time

            hypo_dict[
                "speech_section"] = f"{speech_start_time} ~ {speech_end_time}"              # fill up 'hypo_dict' (dict inside 'results')
            hypo_dict["length_ms"] = duration * 1000                                        # total_durations[batch_idx][hypo_idx] * 1000
            hypo_dict["speech"] = self._text_postprocess(hyp_pieces)                        # 그는 괜찮은 척 하려고 애 쓰는 거 같았다 그녀에 사랑을 얻기 위해 애 썼지만 헛쓰고였다 용돈을 아꼈써라

            if hypo_dict["speech"]:
                result_dict["results"].append(hypo_dict)

        return result_dict
Example #14
0
    def apply_model(self, ner_model, features):
        """
        apply_model function for LM-LSTM-CRF

        args:
            ner_model: sequence labeling model
            feature (list): list of words list
        """
        char_features = encode2char_safe(features, self.c_map)

        if self.caseless:
            word_features = encode_safe(
                list(map(lambda t: list(map(lambda x: x.lower(), t)),
                         features)), self.f_map, self.f_map['<unk>'])
        else:
            word_features = encode_safe(features, self.f_map,
                                        self.f_map['<unk>'])

        fea_len = [list(map(lambda t: len(t) + 1, f)) for f in char_features]
        forw_features = concatChar(char_features, self.c_map)

        word_len = max(map(lambda t: len(t) + 1, word_features))
        char_len = max(
            map(lambda t: len(t[0]) + word_len - len(t[1]),
                zip(forw_features, word_features)))
        forw_t = list(
            map(lambda t: t + [self.pad_char] * (char_len - len(t)),
                forw_features))
        back_t = torch.LongTensor(list(map(lambda t: t[::-1], forw_t)))
        forw_t = torch.LongTensor(forw_t)
        forw_p = torch.LongTensor(
            list(
                map(
                    lambda t: list(
                        itertools.accumulate(t + [1] * (word_len - len(t)))),
                    fea_len)))
        back_p = torch.LongTensor(
            list(
                map(
                    lambda t: [char_len - 1] +
                    [char_len - 1 - tup for tup in t[:-1]], forw_p)))

        masks = torch.BoolTensor(
            list(
                map(
                    lambda t: [1] * (len(t) + 1) + [0] *
                    (word_len - len(t) - 1), word_features)))
        word_t = torch.LongTensor(
            list(
                map(lambda t: t + [self.pad_word] * (word_len - len(t)),
                    word_features)))

        if self.if_cuda:
            f_f = autograd.Variable(forw_t.transpose(0, 1)).cuda()
            f_p = autograd.Variable(forw_p.transpose(0, 1)).cuda()
            b_f = autograd.Variable(back_t.transpose(0, 1)).cuda()
            b_p = autograd.Variable(back_p.transpose(0, 1)).cuda()
            w_f = autograd.Variable(word_t.transpose(0, 1)).cuda()
            mask_v = masks.transpose(0, 1).cuda()
        else:
            f_f = autograd.Variable(forw_t.transpose(0, 1))
            f_p = autograd.Variable(forw_p.transpose(0, 1))
            b_f = autograd.Variable(back_t.transpose(0, 1))
            b_p = autograd.Variable(back_p.transpose(0, 1))
            w_f = autograd.Variable(word_t.transpose(0, 1))
            mask_v = masks.transpose(0, 1)

        scores = ner_model(f_f, f_p, b_f, b_p, w_f)
        decoded = self.decoder.decode(scores.data, mask_v)

        return decoded
Example #15
0
def main(args):

    torch.manual_seed(1234)

    if args.dataset == 'cora' or args.dataset == 'citeseer' or args.dataset == 'pubmed':
        data = load_data(args)
        features = torch.FloatTensor(data.features)

        labels = torch.LongTensor(data.labels)
        in_feats = features.shape[1]
        g = data.graph
        if args.dataset == 'cora':
            g.remove_edges_from(nx.selfloop_edges(g))
            g.add_edges_from(zip(g.nodes(), g.nodes()))
        g = DGLGraph(g)
        attr_matrix = data.features
        labels = data.labels

    else:
        if args.dataset == 'physics':
            data = Coauthor('physics')
        if args.dataset == 'cs':
            data = Coauthor('cs')
        if args.dataset == 'computers':
            data = AmazonCoBuy('computers')
        if args.dataset == 'photo':
            data = AmazonCoBuy('photo')

        g = data
        g = data[0]
        attr_matrix = g.ndata['feat']
        labels = g.ndata['label']

        features = torch.FloatTensor(g.ndata['feat'])

    ### LCC of the graph
    n_components = 1
    sparse_graph = g.adjacency_matrix_scipy(return_edge_ids=False)
    _, component_indices = sp.csgraph.connected_components(sparse_graph)
    component_sizes = np.bincount(component_indices)
    components_to_keep = np.argsort(
        component_sizes
    )[::-1][:n_components]  # reverse order to sort descending
    nodes_to_keep = [
        idx for (idx, component) in enumerate(component_indices)
        if component in components_to_keep
    ]

    adj_matrix = sparse_graph[nodes_to_keep][:, nodes_to_keep]
    num_nodes = len(nodes_to_keep)
    g = adj_matrix
    g = DGLGraph(g)
    g = remove_self_loop(g)
    g = add_self_loop(g)
    g = DGLGraph(g)

    g.ndata['feat'] = attr_matrix[nodes_to_keep]
    features = torch.FloatTensor(g.ndata['feat'].float())
    if args.dataset == 'cora' or args.dataset == 'pubmed':
        features = features / (features.norm(dim=1) + 1e-8)[:, None]
    g.ndata['label'] = labels[nodes_to_keep]
    labels = torch.LongTensor(g.ndata['label'])

    in_feats = features.shape[1]

    unique_l = np.unique(labels, return_counts=False)
    n_classes = len(unique_l)
    n_nodes = g.number_of_nodes()
    n_edges = g.number_of_edges()

    print('Number of nodes', n_nodes, 'Number of edges', n_edges)

    enc = OneHotEncoder()
    enc.fit(labels.reshape(-1, 1))
    ylabels = enc.transform(labels.reshape(-1, 1)).toarray()

    for beta in [args.beta]:
        for K in [args.num_clusters]:
            for alpha in [args.alpha]:
                accs = []
                t_st = time.time()

                sets = "imbalanced"

                for k in range(2):  #number of differnet trainings
                    #print(k)

                    random_state = np.random.RandomState()
                    if sets == "imbalanced":
                        train_idx, val_idx, test_idx = get_train_val_test_split(
                            random_state,
                            ylabels,
                            train_examples_per_class=None,
                            val_examples_per_class=None,
                            test_examples_per_class=None,
                            train_size=20 * n_classes,
                            val_size=30 * n_classes,
                            test_size=None)
                    elif sets == "balanced":
                        train_idx, val_idx, test_idx = get_train_val_test_split(
                            random_state,
                            ylabels,
                            train_examples_per_class=20,
                            val_examples_per_class=30,
                            test_examples_per_class=None,
                            train_size=None,
                            val_size=None,
                            test_size=None)
                    else:
                        ("No such set configuration (imbalanced/balanced)")

                    n_nodes = len(nodes_to_keep)
                    train_mask = np.zeros(n_nodes)
                    train_mask[train_idx] = 1
                    val_mask = np.zeros(n_nodes)
                    val_mask[val_idx] = 1
                    test_mask = np.zeros(n_nodes)
                    test_mask[test_idx] = 1
                    train_mask = torch.BoolTensor(train_mask)
                    val_mask = torch.BoolTensor(val_mask)
                    test_mask = torch.BoolTensor(test_mask)
                    """
                    Planetoid Split for CORA, CiteSeer, PubMed
                    train_mask = torch.BoolTensor(data.train_mask)
                    val_mask = torch.BoolTensor(data.val_mask)
                    test_mask = torch.BoolTensor(data.test_mask)
                    train_mask2 = torch.BoolTensor(data.train_mask)
                    val_mask2 = torch.BoolTensor(data.val_mask)
                    test_mask2 = torch.BoolTensor(data.test_mask)
                    """

                    if args.gpu < 0:
                        cuda = False

                    else:
                        cuda = True
                        torch.cuda.set_device(args.gpu)
                        features = features.cuda()
                        labels = labels.cuda()
                        train_mask = train_mask.cuda()
                        val_mask = val_mask.cuda()
                        test_mask = test_mask.cuda()

                    gic = GIC(g, in_feats, args.n_hidden, args.n_layers,
                              nn.PReLU(args.n_hidden), args.dropout, K, beta,
                              alpha)

                    if cuda:
                        gic.cuda()

                    gic_optimizer = torch.optim.Adam(
                        gic.parameters(),
                        lr=args.gic_lr,
                        weight_decay=args.weight_decay)

                    # train GIC
                    cnt_wait = 0
                    best = 1e9
                    best_t = 0
                    dur = []

                    for epoch in range(args.n_gic_epochs):
                        gic.train()
                        if epoch >= 3:
                            t0 = time.time()

                        gic_optimizer.zero_grad()
                        loss = gic(features)
                        #print(loss)
                        loss.backward()
                        gic_optimizer.step()

                        if loss < best:
                            best = loss
                            best_t = epoch
                            cnt_wait = 0
                            torch.save(gic.state_dict(), 'best_gic.pkl')
                        else:
                            cnt_wait += 1

                        if cnt_wait == args.patience:
                            #print('Early stopping!')
                            break

                        if epoch >= 3:
                            dur.append(time.time() - t0)

                        #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | "
                        #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                        #n_edges / np.mean(dur) / 1000))

                    # train classifier
                    #print('Loading {}th epoch'.format(best_t))
                    gic.load_state_dict(torch.load('best_gic.pkl'))
                    embeds = gic.encoder(features, corrupt=False)
                    embeds = embeds / (embeds + 1e-8).norm(dim=1)[:, None]
                    embeds = embeds.detach()

                    # create classifier model
                    classifier = Classifier(args.n_hidden, n_classes)
                    if cuda:
                        classifier.cuda()

                    classifier_optimizer = torch.optim.Adam(
                        classifier.parameters(),
                        lr=args.classifier_lr,
                        weight_decay=args.weight_decay)

                    dur = []
                    best_a = 0
                    cnt_wait = 0
                    for epoch in range(args.n_classifier_epochs):
                        classifier.train()
                        if epoch >= 3:
                            t0 = time.time()

                        classifier_optimizer.zero_grad()
                        preds = classifier(embeds)
                        loss = F.nll_loss(preds[train_mask],
                                          labels[train_mask])
                        loss.backward()
                        classifier_optimizer.step()

                        if epoch >= 3:
                            dur.append(time.time() - t0)

                        acc = evaluate(
                            classifier, embeds, labels, val_mask
                        )  #+ evaluate(classifier, embeds, labels, train_mask)

                        if acc > best_a and epoch > 100:
                            best_a = acc
                            best_t = epoch

                            torch.save(classifier.state_dict(),
                                       'best_class.pkl')

                        #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
                        #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                        #acc, n_edges / np.mean(dur) / 1000))

                    acc = evaluate(classifier, embeds, labels, test_mask)
                    accs.append(acc)

                print('=================== ', ' alpha', alpha, ' beta ', beta,
                      'K', K)
                print(args.dataset, ' Acc (mean)', mean(accs), ' (std)',
                      stdev(accs))
                print('=================== time', int(
                    (time.time() - t_st) / 60))
Example #16
0
def main():
    game = pyspiel.load_game(
        f"quoridor(ansi_color_output=true,board_size={BOARD_SIZE},wall_count={WALL_COUNT})"
    )
    board_diam = 2 * BOARD_SIZE - 1
    agent = DQNAgent(2 * BOARD_SIZE - 1, game.num_distinct_actions())
    epsilon = EPSILON_START
    results = []
    wins, draws, loses = (0, 0, 0)
    for episode in range(EPISODES):
        #Start game/episode
        state = game.new_initial_state()

        #Loop inside one game episode
        while not state.is_terminal():
            pl = state.current_player()
            nn_input = get_nn_input(game, state)

            state_action_q_values = agent.forward(get_nn_input(game, state))
            rotated_state_action_q_values = state_action_q_values if state.current_player(
            ) == 0 else torch.flip(state_action_q_values.clone(), [1])
            if random.random() <= epsilon:
                actual_action = random.choice(state.legal_actions())
                rotated_action = actual_action if state.current_player(
                ) == 0 else (board_diam**2 - 1) - actual_action
            else:
                actual_action = torch.argmax(
                    (rotated_state_action_q_values -
                     torch.min(rotated_state_action_q_values) + 1) *
                    torch.tensor(state.legal_actions_mask())).item()
                rotated_action = actual_action if state.current_player(
                ) == 0 else (board_diam**2 - 1) - actual_action
            state.apply_action(actual_action)

            rewards = state.rewards()
            if state.is_terminal():
                with torch.no_grad():
                    state_action_q_values_target = state_action_q_values.clone(
                    ).detach()
                    state_action_q_values_target[0][rotated_action] = rewards[
                        pl]
                agent.backward(state_action_q_values,
                               state_action_q_values_target)
            else:
                with torch.no_grad():
                    next_state_action_q_values = agent.forward(
                        get_nn_input(game, state))
                    if (state.current_player() == 1):
                        next_state_action_q_values = torch.flip(
                            next_state_action_q_values, [1])
                    state_action_q_values_target = state_action_q_values.clone(
                    ).detach()
                    next_mask = torch.BoolTensor(state.legal_actions_mask())
                    next_legal_q_values = torch.masked_select(
                        next_state_action_q_values, next_mask)
                    state_action_q_values_target[0][rotated_action] = rewards[
                        pl] - GAMMA * torch.max(next_legal_q_values)
                agent.backward(state_action_q_values,
                               state_action_q_values_target)
        if (rewards[0] == 1): wins += 1
        if (rewards[0] == 0): draws += 1
        if (rewards[0] == -1): loses += 1
        if (episode % 100 == 0):
            print("Episode: ", episode, epsilon)
            print(f"W:{wins}, D:{draws}, L:{loses}")
            wins, draws, loses = (0, 0, 0)
        if epsilon > EPSILON_END:
            epsilon -= EPSILON_DECAY
        if (episode % 500 == 0):
            torch.save(
                agent,
                f"/mnt/QuoridorAI/Agents/SelfLearned{BOARD_SIZE}x{BOARD_SIZE}-{episode}"
            )
Example #17
0
def main():
    parser = argparse.ArgumentParser(description='GraphSAGE')
    parser.add_argument("--dataset", type=str)
    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--dropout",
                        type=float,
                        default=0.5,
                        help="dropout probability")
    parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
    parser.add_argument("--epochs",
                        type=int,
                        default=200,
                        help="number of training epochs")
    parser.add_argument("--n-hidden",
                        type=int,
                        default=16,
                        help="number of hidden gcn units")
    parser.add_argument("--aggr",
                        type=str,
                        choices=['sum', 'mean'],
                        default='mean',
                        help='Aggregation for messages')
    parser.add_argument("--weight-decay",
                        type=float,
                        default=5e-4,
                        help="Weight for L2 loss")
    parser.add_argument("--eval",
                        action='store_true',
                        help='If not set, we will only do the training part.')
    parser.add_argument("--runs", type=int, default=10)
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    path = osp.join('dataset', args.dataset)
    dataset = Planetoid(path, args.dataset, transform=T.NormalizeFeatures())
    data = dataset[0]

    features = data.x.to(device)
    labels = data.y.to(device)
    edge_index = data.edge_index.to(device)
    adj = SparseTensor(row=edge_index[0], col=edge_index[1])
    train_mask = torch.BoolTensor(data.train_mask).to(device)
    val_mask = torch.BoolTensor(data.val_mask).to(device)
    test_mask = torch.BoolTensor(data.test_mask).to(device)

    model = GraphSAGE(dataset.num_features, args.n_hidden, dataset.num_classes,
                      args.aggr, F.relu, args.dropout).to(device)

    loss_fcn = nn.CrossEntropyLoss()

    logger = Logger(args.runs, args)
    dur = []
    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
        for epoch in range(1, args.epochs + 1):
            model.train()
            if epoch >= 3:
                t0 = time.time()
            # forward
            logits = model(features, adj)
            loss = loss_fcn(logits[train_mask], labels[train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if epoch >= 3:
                dur.append(time.time() - t0)
                print('Training time/epoch {}'.format(np.mean(dur)))

            if not args.eval:
                continue

            train_acc, val_acc, test_acc = evaluate(model, features, adj,
                                                    labels, train_mask,
                                                    val_mask, test_mask)
            logger.add_result(run, (train_acc, val_acc, test_acc))

            print(
                "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}"
                .format(run, epoch, loss.item(), train_acc, val_acc, test_acc))

        if args.eval:
            logger.print_statistics(run)

    if args.eval:
        logger.print_statistics()
Example #18
0
def run(proc_id, n_gpus, args, devices, data):
    # Start up distributed training, if enabled.
    dev_id = devices[proc_id]
    if n_gpus > 1:
        dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
            master_ip='127.0.0.1', master_port='12345')
        world_size = n_gpus
        th.distributed.init_process_group(backend="nccl",
                                          init_method=dist_init_method,
                                          world_size=world_size,
                                          rank=proc_id)
    th.cuda.set_device(dev_id)

    # Unpack data
    train_mask, val_mask, in_feats, labels, n_classes, g = data
    train_nid = th.LongTensor(np.nonzero(train_mask)[0])
    val_nid = th.LongTensor(np.nonzero(val_mask)[0])
    train_mask = th.BoolTensor(train_mask)
    val_mask = th.BoolTensor(val_mask)

    # Split train_nid
    train_nid = th.split(train_nid, len(train_nid) // n_gpus)[proc_id]

    # Create PyTorch DataLoader for constructing blocks
    sampler = dgl.sampling.MultiLayerNeighborSampler(
        [int(fanout) for fanout in args.fan_out.split(',')])
    dataloader = dgl.sampling.NodeDataLoader(g,
                                             train_nid,
                                             sampler,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             drop_last=False,
                                             num_workers=args.num_workers)

    # Define model and optimizer
    model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu,
                 args.dropout)
    model = model.to(dev_id)
    if n_gpus > 1:
        model = DistributedDataParallel(model,
                                        device_ids=[dev_id],
                                        output_device=dev_id)
    loss_fcn = nn.CrossEntropyLoss()
    loss_fcn = loss_fcn.to(dev_id)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Training loop
    avg = 0
    iter_tput = []
    for epoch in range(args.num_epochs):
        tic = time.time()

        # Loop over the dataloader to sample the computation dependency graph as a list of
        # blocks.
        for step, (input_nodes, seeds, blocks) in enumerate(dataloader):
            if proc_id == 0:
                tic_step = time.time()

            # Load the input features as well as output labels
            batch_inputs, batch_labels = load_subtensor(
                g, labels, seeds, input_nodes, dev_id)

            # Compute loss and prediction
            batch_pred = model(blocks, batch_inputs)
            loss = loss_fcn(batch_pred, batch_labels)
            optimizer.zero_grad()
            loss.backward()

            if n_gpus > 1:
                for param in model.parameters():
                    if param.requires_grad and param.grad is not None:
                        th.distributed.all_reduce(
                            param.grad.data, op=th.distributed.ReduceOp.SUM)
                        param.grad.data /= n_gpus
            optimizer.step()

            if proc_id == 0:
                iter_tput.append(
                    len(seeds) * n_gpus / (time.time() - tic_step))
            if step % args.log_every == 0 and proc_id == 0:
                acc = compute_acc(batch_pred, batch_labels)
                print(
                    'Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB'
                    .format(epoch, step, loss.item(), acc.item(),
                            np.mean(iter_tput[3:]),
                            th.cuda.max_memory_allocated() / 1000000))

        if n_gpus > 1:
            th.distributed.barrier()

        toc = time.time()
        if proc_id == 0:
            print('Epoch Time(s): {:.4f}'.format(toc - tic))
            if epoch >= 5:
                avg += toc - tic
            if epoch % args.eval_every == 0 and epoch != 0:
                if n_gpus == 1:
                    eval_acc = evaluate(model, g, g.ndata['features'], labels,
                                        val_mask, args.batch_size, devices[0])
                else:
                    eval_acc = evaluate(model.module, g, g.ndata['features'],
                                        labels, val_mask, args.batch_size,
                                        devices[0])
                print('Eval Acc {:.4f}'.format(eval_acc))

    if n_gpus > 1:
        th.distributed.barrier()
    if proc_id == 0:
        print('Avg epoch time: {}'.format(avg / (epoch - 4)))
Example #19
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    multitask_data = set(['ppi'])
    multitask = args.dataset in multitask_data

    # load and preprocess dataset
    data = load_data(args)

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)

    # Normalize features
    if args.normalize:
        train_feats = data.features[train_nid]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats)
        features = scaler.transform(data.features)
    else:
        features = data.features

    features = torch.FloatTensor(features)
    if not multitask:
        labels = torch.LongTensor(data.labels)
    else:
        labels = torch.FloatTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.int().sum().item()
    n_val_samples = val_mask.int().sum().item()
    n_test_samples = test_mask.int().sum().item()

    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
            (n_edges, n_classes,
            n_train_samples,
            n_val_samples,
            n_test_samples))
    # create GCN model
    g = data.graph
    if args.self_loop and not args.dataset.startswith('reddit'):
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
        print("adding self-loop edges")
    g = DGLGraph(g, readonly=True)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    print(torch.cuda.get_device_name(0))

    g.ndata['features'] = features
    g.ndata['labels'] = labels
    g.ndata['train_mask'] = train_mask
    print('labels shape:', labels.shape)

    cluster_iterator = ClusterIter(
        args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp)

    print("features shape, ", features.shape)

    model = GraphSAGE(in_feats,
                      args.n_hidden,
                      n_classes,
                      args.n_layers,
                      F.relu,
                      args.dropout,
                      args.use_pp)

    if cuda:
        model.cuda()

    # logger and so on
    log_dir = save_log_dir(args)
    writer = SummaryWriter(log_dir)
    logger = Logger(os.path.join(log_dir, 'loggings'))
    logger.write(args)

    # Loss function
    if multitask:
        print('Using multi-label loss')
        loss_f = nn.BCEWithLogitsLoss()
    else:
        print('Using multi-class loss')
        loss_f = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()
    print("current memory after model before training",
          torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024)
    start_time = time.time()
    best_f1 = -1

    for epoch in range(args.n_epochs):
        for j, cluster in enumerate(cluster_iterator):
            # sync with upper level training graph
            cluster.copy_from_parent()
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['labels']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # in PPI case, `log_every` is chosen to log one time per epoch. 
            # Choose your log freq dynamically when you want more info within one epoch
            if j % args.log_every == 0:
                print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/"
                      f"{len(cluster_iterator)}:training loss", loss.item())
                writer.add_scalar('train/loss', loss.item(),
                                  global_step=j + epoch * len(cluster_iterator))
        print("current memory:",
              torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)

        # evaluate
        if epoch % args.val_every == 0:
            val_f1_mic, val_f1_mac = evaluate(
                model, g, labels, val_mask, multitask)
            print(
                "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac))
            if val_f1_mic > best_f1:
                best_f1 = val_f1_mic
                print('new best val f1:', best_f1)
                torch.save(model.state_dict(), os.path.join(
                    log_dir, 'best_model.pkl'))
            writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch)
            writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch)

    end_time = time.time()
    print(f'training using time {start_time-end_time}')

    # test
    if args.use_val:
        model.load_state_dict(torch.load(os.path.join(
            log_dir, 'best_model.pkl')))
    test_f1_mic, test_f1_mac = evaluate(
        model, g, labels, test_mask, multitask)
    print("Test F1-mic{:.4f}, Test F1-mac{:.4f}". format(test_f1_mic, test_f1_mac))
    writer.add_scalar('test/f1-mic', test_f1_mic)
    writer.add_scalar('test/f1-mac', test_f1_mac)
Example #20
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)
    test_nid = np.nonzero(data.test_mask)[0].astype(np.int64)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.int().sum().item()
    n_val_samples = val_mask.int().sum().item()
    n_test_samples = test_mask.int().sum().item()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))

    # create GCN model
    g = DGLGraph(data.graph, readonly=True)
    norm = 1. / g.in_degrees().float().unsqueeze(1)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        norm = norm.cuda()

    g.ndata['features'] = features

    num_neighbors = args.num_neighbors
    n_layers = args.n_layers

    g.ndata['norm'] = norm

    g.update_all(
        fn.copy_src(src='features',
                    out='m'), fn.sum(msg='m', out='preprocess'), lambda node:
        {'preprocess': node.data['preprocess'] * node.data['norm']})

    for i in range(n_layers):
        g.ndata['h_{}'.format(i)] = torch.zeros(
            features.shape[0], args.n_hidden).to(device=features.device)

    g.ndata['h_{}'.format(n_layers - 1)] = torch.zeros(
        features.shape[0], 2 * args.n_hidden).to(device=features.device)

    model = GCNSampling(in_feats, args.n_hidden, n_classes, n_layers, F.relu,
                        args.dropout)

    loss_fcn = nn.CrossEntropyLoss()

    infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, n_layers,
                           F.relu)

    if cuda:
        model.cuda()
        infer_model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # Create sampler receiver
    sampler = dgl.contrib.sampling.SamplerReceiver(graph=g,
                                                   addr=args.ip,
                                                   num_sender=args.num_sampler)

    for epoch in range(args.n_epochs):
        for nf in sampler:
            for i in range(n_layers):
                agg_history_str = 'agg_h_{}'.format(i)
                g.pull(
                    nf.layer_parent_nid(i + 1).long(),
                    fn.copy_src(src='h_{}'.format(i), out='m'),
                    fn.sum(msg='m', out=agg_history_str), lambda node: {
                        agg_history_str:
                        node.data[agg_history_str] * node.data['norm']
                    })

            node_embed_names = [['preprocess', 'h_0']]
            for i in range(1, n_layers):
                node_embed_names.append(
                    ['h_{}'.format(i), 'agg_h_{}'.format(i - 1)])
            node_embed_names.append(['agg_h_{}'.format(n_layers - 1)])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            model.train()
            # forward
            pred = model(nf)
            batch_nids = nf.layer_parent_nid(-1).to(device=pred.device).long()
            batch_labels = labels[batch_nids]
            loss = loss_fcn(pred, batch_labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)]
            node_embed_names.append([])
            nf.copy_to_parent(node_embed_names=node_embed_names)

        for infer_param, param in zip(infer_model.parameters(),
                                      model.parameters()):
            infer_param.data.copy_(param.data)

        num_acc = 0.

        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.test_batch_size,
                                                       g.number_of_nodes(),
                                                       neighbor_type='in',
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       seed_nodes=test_nid):
            node_embed_names = [['preprocess']]
            for i in range(n_layers):
                node_embed_names.append(['norm'])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            infer_model.eval()
            with torch.no_grad():
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1).to(
                    device=pred.device).long()
                batch_labels = labels[batch_nids]
                num_acc += (pred.argmax(
                    dim=1) == batch_labels).sum().cpu().item()

        print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
    def visualize_subgraph(self,
                           node_idx: Optional[int],
                           edge_index: Tensor,
                           edge_mask: Tensor,
                           y: Optional[Tensor] = None,
                           threshold: Optional[int] = None,
                           edge_y: Optional[Tensor] = None,
                           node_alpha: Optional[Tensor] = None,
                           seed: int = 10,
                           **kwargs):
        r"""Visualizes the subgraph given an edge mask :attr:`edge_mask`.

        Args:
            node_idx (int): The node id to explain.
                Set to :obj:`None` to explain a graph.
            edge_index (LongTensor): The edge indices.
            edge_mask (Tensor): The edge mask.
            y (Tensor, optional): The ground-truth node-prediction labels used
                as node colorings. All nodes will have the same color
                if :attr:`node_idx` is :obj:`-1`.(default: :obj:`None`).
            threshold (float, optional): Sets a threshold for visualizing
                important edges. If set to :obj:`None`, will visualize all
                edges with transparancy indicating the importance of edges.
                (default: :obj:`None`)
            edge_y (Tensor, optional): The edge labels used as edge colorings.
            node_alpha (Tensor, optional): Tensor of floats (0 - 1) indicating
                transparency of each node.
            seed (int, optional): Random seed of the :obj:`networkx` node
                placement algorithm. (default: :obj:`10`)
            **kwargs (optional): Additional arguments passed to
                :func:`nx.draw`.

        :rtype: :class:`matplotlib.axes.Axes`, :class:`networkx.DiGraph`
        """
        import matplotlib.pyplot as plt
        import networkx as nx

        assert edge_mask.size(0) == edge_index.size(1)

        if node_idx is None or node_idx < 0:
            hard_edge_mask = torch.BoolTensor([True] * edge_index.size(1),
                                              device=edge_mask.device)
            subset = torch.arange(edge_index.max().item() + 1,
                                  device=edge_index.device)
            y = None

        else:
            # Only operate on a k-hop subgraph around `node_idx`.
            subset, edge_index, _, hard_edge_mask = k_hop_subgraph(
                node_idx,
                self.num_hops,
                edge_index,
                relabel_nodes=True,
                num_nodes=None,
                flow=self._flow())

        edge_mask = edge_mask[hard_edge_mask]

        if threshold is not None:
            edge_mask = (edge_mask >= threshold).to(torch.float)

        if y is None:
            y = torch.zeros(edge_index.max().item() + 1,
                            device=edge_index.device)
        else:
            y = y[subset].to(torch.float) / y.max().item()

        if edge_y is None:
            edge_color = ['black'] * edge_index.size(1)
        else:
            colors = list(plt.rcParams['axes.prop_cycle'])
            edge_color = [
                colors[i % len(colors)]['color']
                for i in edge_y[hard_edge_mask]
            ]

        data = Data(edge_index=edge_index,
                    att=edge_mask,
                    edge_color=edge_color,
                    y=y,
                    num_nodes=y.size(0)).to('cpu')
        G = to_networkx(data,
                        node_attrs=['y'],
                        edge_attrs=['att', 'edge_color'])
        mapping = {k: i for k, i in enumerate(subset.tolist())}
        G = nx.relabel_nodes(G, mapping)

        node_args = set(signature(nx.draw_networkx_nodes).parameters.keys())
        node_kwargs = {k: v for k, v in kwargs.items() if k in node_args}
        node_kwargs['node_size'] = kwargs.get('node_size') or 800
        node_kwargs['cmap'] = kwargs.get('cmap') or 'cool'

        label_args = set(signature(nx.draw_networkx_labels).parameters.keys())
        label_kwargs = {k: v for k, v in kwargs.items() if k in label_args}
        label_kwargs['font_size'] = kwargs.get('font_size') or 10

        pos = nx.spring_layout(G, seed=seed)
        ax = plt.gca()
        for source, target, data in G.edges(data=True):
            ax.annotate('',
                        xy=pos[target],
                        xycoords='data',
                        xytext=pos[source],
                        textcoords='data',
                        arrowprops=dict(
                            arrowstyle="->",
                            alpha=max(data['att'], 0.1),
                            color=data['edge_color'],
                            shrinkA=sqrt(node_kwargs['node_size']) / 2.0,
                            shrinkB=sqrt(node_kwargs['node_size']) / 2.0,
                            connectionstyle="arc3,rad=0.1",
                        ))

        if node_alpha is None:
            nx.draw_networkx_nodes(G,
                                   pos,
                                   node_color=y.tolist(),
                                   **node_kwargs)
        else:
            node_alpha_subset = node_alpha[subset]
            assert ((node_alpha_subset >= 0) & (node_alpha_subset <= 1)).all()
            nx.draw_networkx_nodes(G,
                                   pos,
                                   alpha=node_alpha_subset.tolist(),
                                   node_color=y.tolist(),
                                   **node_kwargs)

        nx.draw_networkx_labels(G, pos, **label_kwargs)

        return ax, G
Example #22
0
def run(args, device, data):
    # Unpack data
    train_mask, val_mask, test_mask, in_feats, labels, ind_labels, n_classes, g, ind_g, lp_dict = data

    train_nid = th.LongTensor(np.nonzero(train_mask)[0])
    val_nid = th.LongTensor(np.nonzero(val_mask)[0])
    train_mask = th.BoolTensor(train_mask)
    val_mask = th.BoolTensor(val_mask)
    test_mask = th.BoolTensor(test_mask)

    # Create sampler
    sampler = NeighborSampler(
        g, [int(fanout) for fanout in args.fan_out.split(',')])

    # Create PyTorch DataLoader for constructing blocks
    dataloader = DataLoader(dataset=train_nid.numpy(),
                            batch_size=args.batch_size,
                            collate_fn=sampler.sample_blocks,
                            shuffle=True,
                            drop_last=False,
                            num_workers=args.num_workers)

    # Define model and optimizer
    model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu,
                 args.dropout)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    coeffs = Variable(torch.FloatTensor([1., 3.0]).to(device),
                      requires_grad=True)
    coeffs_optimizer = optim.SGD([coeffs], lr=1e-1, momentum=0.0)

    # Training loop
    avg = 0
    iter_tput = []
    steps_per_epoch = len(dataloader)
    for epoch in range(args.num_epochs):
        tic = time.time()

        # Loop over the dataloader to sample the computation dependency graph as a list of
        # blocks.
        for step, blocks in enumerate(dataloader):
            tic_step = time.time()

            # The nodes for input lies at the LHS side of the first block.
            # The nodes for output lies at the RHS side of the last block.
            input_nodes = blocks[0].srcdata[dgl.NID]
            seeds = blocks[-1].dstdata[dgl.NID]

            # Load the input features as well as output labels
            batch_inputs, batch_labels = load_subtensor(
                g, labels, seeds, input_nodes, device)
            # Compute loss and prediction
            model.train()
            batch_pred = model(blocks, batch_inputs)
            loss = loss_fcn(batch_pred.squeeze(), batch_labels.squeeze(),
                            seeds, lp_dict['adj'], coeffs, device, False)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (step + 1) % (steps_per_epoch // 2) == 0:
                model.train()
                batch_pred = model(blocks, batch_inputs)
                loss = loss_fcn(batch_pred.squeeze(), batch_labels.squeeze(),
                                seeds, lp_dict['adj'], coeffs, device, True)
                coeffs_optimizer.zero_grad()
                loss.backward()
                coeffs_optimizer.step()

            iter_tput.append(len(seeds) / (time.time() - tic_step))
            if step % args.log_every == 0:
                r2 = compute_r2(batch_pred, batch_labels)
                gpu_mem_alloc = th.cuda.max_memory_allocated(
                ) / 1000000 if th.cuda.is_available() else 0
                #print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train R2 {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB'.format(epoch, step, loss.item(), r2.item(), np.mean(iter_tput[3:]), gpu_mem_alloc))
                print(
                    'Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train R2 {:.4f} | alpha: {:.4f} | beta: {:.4f}'
                    .format(epoch, step, loss.item(), r2.item(),
                            th.tanh(coeffs[0]).item(),
                            th.exp(coeffs[1]).item()))

        toc = time.time()
        print('Epoch Time(s): {:.4f}'.format(toc - tic))
        if epoch >= 5:
            avg += toc - tic
        if epoch % args.eval_every == 0 and epoch != 0:
            eval_r2 = evaluate(model, g, g.ndata['features'], labels, val_mask,
                               args.batch_size, device)
            print('Eval R2: {:.4f}'.format(eval_r2))

    evaluate_test(model, g, g.ndata['features'], labels, test_mask,
                  args.batch_size, device, lp_dict, coeffs, "2012")
    evaluate_test(model, ind_g, ind_g.ndata['features'], ind_labels, test_mask,
                  args.batch_size, device, lp_dict, coeffs, "2016")

    print('Avg epoch time: {}'.format(avg / (epoch - 4)))
Example #23
0
    def __getitem__(self, idx):
        input_example = self.data_list[idx]
        text = input_example.text
        label = input_example.label
        word_tokens = ['[CLS]']
        label_list = ['[CLS]']
        label_mask = [0]  # value in (0, 1) - 0 signifies invalid token

        input_ids = [self.tokenizer.convert_tokens_to_ids('[CLS]')]
        label_ids = [self.label_map['[CLS]']]

        # iterate over individual tokens and their labels
        for word, label in zip(text.split(), label):
            tokenized_word = self.tokenizer.tokenize(word)

            for token in tokenized_word:
                word_tokens.append(token)
                input_ids.append(self.tokenizer.convert_tokens_to_ids(token))

            label_list.append(label)
            label_ids.append(self.label_map[label])
            label_mask.append(1)
            # len(tokenized_word) > 1 only if it splits word in between, in which case
            # the first token gets assigned NER tag and the remaining ones get assigned
            # X
            for i in range(1, len(tokenized_word)):
                label_list.append('X')
                label_ids.append(self.label_map['X'])
                label_mask.append(0)

        assert len(word_tokens) == len(label_list) == len(input_ids) == len(
            label_ids) == len(label_mask)

        if len(word_tokens) >= self.max_len:
            word_tokens = word_tokens[:(self.max_len - 1)]
            label_list = label_list[:(self.max_len - 1)]
            input_ids = input_ids[:(self.max_len - 1)]
            label_ids = label_ids[:(self.max_len - 1)]
            label_mask = label_mask[:(self.max_len - 1)]

        assert len(word_tokens) < self.max_len, len(word_tokens)

        word_tokens.append('[SEP]')
        label_list.append('[SEP]')
        input_ids.append(self.tokenizer.convert_tokens_to_ids('[SEP]'))
        label_ids.append(self.label_map['[SEP]'])
        label_mask.append(0)

        assert len(word_tokens) == len(label_list) == len(input_ids) == len(
            label_ids) == len(label_mask)

        sentence_id = [0 for _ in input_ids]
        attention_mask = [1 for _ in input_ids]

        while len(input_ids) < self.max_len:
            input_ids.append(0)
            label_ids.append(self.label_map['X'])
            attention_mask.append(0)
            sentence_id.append(0)
            label_mask.append(0)

        assert len(word_tokens) == len(label_list)
        assert len(input_ids) == len(label_ids) == len(attention_mask) == len(
            sentence_id) == len(label_mask) == self.max_len, len(input_ids)
        # return word_tokens, label_list,
        return torch.LongTensor(input_ids), torch.LongTensor(
            label_ids), torch.LongTensor(attention_mask), torch.LongTensor(
                sentence_id), torch.BoolTensor(label_mask)
    def __getitem__(self, index):
        h5_file = h5py.File(self.config['filename'], 'r', swmr=True)
        sample = h5_file[self.memberslist[index]]

        optical = sample['optical'][...]
        if self.config['raw_thermal']:
            thermal = sample['thermal_raw'][...]
        else:
            thermal = sample['thermal'][...]

        if thermal.shape != optical.shape:
            raise ValueError(
                'ImagePairDataset: The optical and thermal image must have the same shape'
            )

        if self.config['keypoints_filename'] is not None:
            with h5py.File(self.config['keypoints_filename'], 'r',
                           swmr=True) as keypoints_file:
                keypoints = np.array(
                    keypoints_file[self.memberslist[index]]['keypoints'])
        else:
            keypoints = None

        # subsample images if requested
        if self.config['height'] > 0 or self.config['width'] > 0:
            if self.config['height'] > 0:
                h = self.config['height']
            else:
                h = thermal.shape[0]

            if self.config['width'] > 0:
                w = self.config['width']
            else:
                w = thermal.shape[1]

            if w > thermal.shape[1] or h > thermal.shape[0]:
                raise ValueError(
                    'ImagePairDataset: Requested height/width exceeds original image size'
                )

            # subsample the image
            i_h = random.randint(0, thermal.shape[0] - h)
            i_w = random.randint(0, thermal.shape[1] - w)

            optical = optical[i_h:i_h + h, i_w:i_w + w]
            thermal = thermal[i_h:i_h + h, i_w:i_w + w]

            if keypoints is not None:
                # shift keypoints
                keypoints = keypoints - np.array([[i_h, i_w]])

                # filter out bad ones
                keypoints = keypoints[np.logical_and(
                    np.logical_and(keypoints[:, 0] >= 0, keypoints[:, 0] < h),
                    np.logical_and(keypoints[:, 1] >= 0, keypoints[:, 1] < w))]

        else:
            h = thermal.shape[0]
            w = thermal.shape[1]

        out = {}

        if self.config['single_image']:
            is_optical = bool(random.randint(0, 1))

            if is_optical:
                image = optical
            else:
                image = thermal

            # augmentation
            if self.config['augmentation']['photometric']['enable']:
                image = augmentation.photometric_augmentation(
                    image, **self.config['augmentation']['photometric'])

            if self.config['augmentation']['homographic']['enable']:
                image, keypoints, valid_mask = augmentation.homographic_augmentation(
                    image, keypoints,
                    **self.config['augmentation']['homographic'])
            else:
                valid_mask = augmentation.dummy_valid_mask(image.shape)

            # add channel information to image and mask
            image = np.expand_dims(image, 0)
            valid_mask = np.expand_dims(valid_mask, 0)

            # add to output dict
            out['image'] = torch.from_numpy(image.astype(np.float32))
            out['valid_mask'] = torch.from_numpy(valid_mask.astype(np.bool))
            out['is_optical'] = torch.BoolTensor([is_optical])
            if keypoints is not None:
                keypoints = utils.generate_keypoint_map(keypoints, (h, w))
                out['keypoints'] = torch.from_numpy(keypoints.astype(np.bool))

        else:
            # initialize the images
            out['optical'] = {}
            out['thermal'] = {}

            optical_is_optical = True
            thermal_is_optical = False
            if self.config['random_pairs']:
                tmp_optical = optical
                tmp_thermal = thermal
                if bool(random.randint(0, 1)):
                    optical = tmp_thermal
                    optical_is_optical = False
                if bool(random.randint(0, 1)):
                    thermal = tmp_optical
                    thermal_is_optical = True

            # augmentation
            if self.config['augmentation']['photometric']['enable']:
                optical = augmentation.photometric_augmentation(
                    optical, **self.config['augmentation']['photometric'])
                thermal = augmentation.photometric_augmentation(
                    thermal, **self.config['augmentation']['photometric'])

            if self.config['augmentation']['homographic']['enable']:
                # randomly pick one image to warp
                if bool(random.randint(0, 1)):
                    valid_mask_thermal = augmentation.dummy_valid_mask(
                        thermal.shape)
                    keypoints_thermal = keypoints
                    optical, keypoints_optical, valid_mask_optical, H = augmentation.homographic_augmentation(
                        optical,
                        keypoints,
                        return_homography=True,
                        **self.config['augmentation']['homographic'])
                    out['optical']['homography'] = torch.from_numpy(
                        H.astype(np.float32))
                    out['thermal']['homography'] = torch.eye(
                        3, dtype=torch.float32)
                else:
                    valid_mask_optical = augmentation.dummy_valid_mask(
                        optical.shape)
                    keypoints_optical = keypoints
                    thermal, keypoints_thermal, valid_mask_thermal, H = augmentation.homographic_augmentation(
                        thermal,
                        keypoints,
                        return_homography=True,
                        **self.config['augmentation']['homographic'])
                    out['thermal']['homography'] = torch.from_numpy(
                        H.astype(np.float32))
                    out['optical']['homography'] = torch.eye(
                        3, dtype=torch.float32)
            else:
                keypoints_optical = keypoints
                keypoints_thermal = keypoints
                valid_mask_optical = valid_mask_thermal = augmentation.dummy_valid_mask(
                    optical.shape)

            # add channel information to image and mask
            optical = np.expand_dims(optical, 0)
            thermal = np.expand_dims(thermal, 0)
            valid_mask_optical = np.expand_dims(valid_mask_optical, 0)
            valid_mask_thermal = np.expand_dims(valid_mask_thermal, 0)

            out['optical']['image'] = torch.from_numpy(
                optical.astype(np.float32))
            out['optical']['valid_mask'] = torch.from_numpy(
                valid_mask_optical.astype(np.bool))
            out['optical']['is_optical'] = torch.BoolTensor(
                [optical_is_optical])
            if keypoints_optical is not None:
                keypoints_optical = utils.generate_keypoint_map(
                    keypoints_optical, (h, w))
                out['optical']['keypoints'] = torch.from_numpy(
                    keypoints_optical.astype(np.bool))

            out['thermal']['image'] = torch.from_numpy(
                thermal.astype(np.float32))
            out['thermal']['valid_mask'] = torch.from_numpy(
                valid_mask_thermal.astype(np.bool))
            out['thermal']['is_optical'] = torch.BoolTensor(
                [thermal_is_optical])
            if keypoints_optical is not None:
                keypoints_thermal = utils.generate_keypoint_map(
                    keypoints_thermal, (h, w))
                out['thermal']['keypoints'] = torch.from_numpy(
                    keypoints_thermal.astype(np.bool))

        if self.config['return_name']:
            out['name'] = self.memberslist[index]

        return out
Example #25
0
def main(args):
    # graph
    coo_adj = sp.load_npz("reddit_self_loop/reddit_self_loop_graph.npz")
    graph = DGLGraph(coo_adj, readonly=True)
    # features and labels
    reddit_data = np.load("reddit_self_loop/reddit_data.npz")
    features = reddit_data["feature"]
    labels = reddit_data["label"]
    num_labels = 41
    # tarin/val/test indices
    node_ids = reddit_data["node_ids"]
    node_types = reddit_data["node_types"]
    train_mask = (node_types == 1)
    val_mask = (node_types == 2)
    test_mask = (node_types == 3)
    graph.ndata['train_mask'] = train_mask
    graph.ndata['val_mask'] = val_mask
    graph.ndata['test_mask'] = test_mask
    graph.ndata['feat'] = features
    graph.ndata['label'] = labels
    features = torch.Tensor(features)
    in_feats = features.shape[1]
    labels = torch.LongTensor(labels)
    train_nid = torch.LongTensor(np.where(train_mask == True)[0])
    train_mask = torch.BoolTensor(train_mask)
    val_nid = torch.LongTensor(np.where(val_mask == True)[0])
    val_mask = torch.BoolTensor(val_mask)
    test_nid = torch.LongTensor(np.where(test_mask == True)[0])
    test_mask = torch.BoolTensor(test_mask)

    g = dgl.graph(graph.all_edges())  # 转为HetroGraph
    g.ndata['features'] = features

    gpu = args.gpu
    use_cuda = gpu >= 0 and torch.cuda.is_available()
    if use_cuda:
        torch.cuda.set_device(gpu)
        g.to(torch.device('cuda:{}'.format(gpu)))
        labels = labels.cuda()

    fanouts = list(map(int, args.fan_out.split(',')))
    sampler = Sample(g, fanouts, args.num_neg)
    # 将数据集打乱顺序,分多个batch,每个batch采样两个B
    batch_size = args.batch_size
    num_workers = args.num_workers
    # train_ids = torch.LongTensor(np.arange(g.number_of_edges()))
    dataloader = DataLoader(dataset=train_nid.numpy(),
                            batch_size=batch_size,
                            collate_fn=sampler.obtain_Bs,
                            shuffle=True,
                            drop_last=False,
                            num_workers=num_workers)
    #print('Loading...')
    #t0 = time.time()
    #DLoaders = []
    #for step, (pos_graph, neg_graph, blocks) in enumerate(dataloader):
    #DLoaders.append((step, (pos_graph, neg_graph, blocks)))
    #t1 = time.time()
    #print('Step {} | {} s'.format(step, t1-t0))
    #t0 = time.time()

    # 设定模型
    num_hid = args.num_hidden
    ks = args.num_layers
    dropout_r = args.dropout
    agg = args.agg
    bias = args.bias
    norm = args.norm
    model = GraphSAGE(in_feats,
                      num_hid,
                      num_labels,
                      ks,
                      bias=bias,
                      aggregator=agg,
                      activation=F.relu,
                      norm=norm,
                      dropout=dropout_r,
                      use_cuda=use_cuda)
    if use_cuda:
        model.cuda()
    loss_fcn = Unsuper_Cross_Entropy()
    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # acc
    def compute_acc(logits, labels, train_nids, val_nids, test_nids):
        logits = logits.cpu().numpy()
        labels = labels.cpu().numpy()
        train_nids = train_nids.cpu().numpy()
        val_nids = val_nids.cpu().numpy()
        test_nids = test_nids.cpu().numpy()

        # 输出标准化
        logits = (logits - logits.mean(0)) / logits.std(0)

        clf = LogisticRegression(multi_class='multinomial', max_iter=10000)
        clf.fit(logits[train_nids], labels[train_nid])

        pred = clf.predict(logits)
        '''
        pred = torch.argmax(logits, dim=1)
        f1_micro_eval = ((pred[val_nids] == labels[val_nids]).float().sum() / pred[val_nids].shape[0]).item()
        f1_micro_test = ((pred[test_nids] == labels[test_nids]).float().sum() / pred[test_nids].shape[0]).item()
        '''
        f1_micro_eval = metrics.f1_score(labels[val_nids],
                                         pred[val_nids],
                                         average='micro')
        f1_micro_test = metrics.f1_score(labels[test_nids],
                                         pred[test_nids],
                                         average='micro')
        return f1_micro_eval, f1_micro_test

    # eval
    def evaluation(model, g, labels, train_nids, val_nids, test_nids,
                   batch_size):
        model.eval()
        with torch.no_grad():
            logits = model.infer(g, batch_size)
        model.train()
        return compute_acc(logits, labels, train_nids, val_nids, test_nids)

    # 训练、验证与测试
    n_epochs = args.num_epochs
    log_every = args.log_every
    eval_every = args.eval_every
    iter_pos = []
    iter_neg = []
    iter_d = []
    iter_t = []
    best_eval_acc = 0
    best_test_acc = 0
    for epoch in range(n_epochs):
        time_epoch_0 = time.time()
        time_step = time.time()
        for step, (pos_graph, neg_graph, blocks) in enumerate(dataloader):
            #for (step, (pos_graph, neg_graph, blocks)) in DLoaders:
            input_nodes = blocks[0].srcdata[dgl.NID]
            batch_inputs = g.ndata['features'][input_nodes]
            if use_cuda:
                batch_inputs = batch_inputs.cuda()
            time_load = time.time()

            batch_pred = model(batch_inputs, blocks)
            loss = loss_fcn(batch_pred, pos_graph, neg_graph, use_cuda)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            time_train = time.time()

            edge_pos = pos_graph.number_of_edges()
            edge_neg = neg_graph.number_of_edges()
            iter_pos.append(edge_pos / (time_train - time_step))
            iter_neg.append(edge_neg / (time_train - time_step))
            iter_d.append(time_load - time_step)
            iter_t.append(time_train - time_load)

            if step % log_every == 0:
                if step == 0:
                    print(
                        'Epoch {:05d} | Step {:05d} | Loss {:.4f} | '
                        'Speed (samples/sec) {:.4f} & {:.4f} | Load Time(sec) {:.4f} | Train Time(sec) {:.4f}'
                        .format(epoch, step, loss.item(), np.mean(iter_pos),
                                np.mean(iter_neg), np.mean(iter_d),
                                np.mean(iter_t)))
                else:
                    print(
                        'Epoch {:05d} | Step {:05d} | Loss {:.4f} | '
                        'Speed (samples/sec) {:.4f} & {:.4f} | Load Time(sec) {:.4f} | Train Time(sec) {:.4f}'
                        .format(epoch, step, loss.item(),
                                np.mean(iter_pos[3:]), np.mean(iter_neg[3:]),
                                np.mean(iter_d[3:]), np.mean(iter_t[3:])))

            time_step = time.time()
            #if step == 2:
            #break

        if epoch % eval_every == 0:
            print('\n')
            print('Eval-ing...')
            time_ev_0 = time.time()
            eval_acc, test_acc = evaluation(model, g, labels, train_nid,
                                            val_nid, test_nid, batch_size)
            if eval_acc > best_eval_acc:
                best_eval_acc = eval_acc
                best_test_acc = test_acc
            time_ev_1 = time.time()
            print('Eval Acc {:.4f} | Eval Time(s): {:.4f}'.format(
                eval_acc, time_ev_1 - time_ev_0))
            print('Best Eval Acc {:.4f} | Best Test Acc {:.4f}'.format(
                best_eval_acc, best_test_acc))
            time_step = time.time()
            #if epoch == 1:
            #break

        time_epoch_1 = time.time()
        print('Epoch Time(s): {:.4f}'.format(time_epoch_1 - time_epoch_0))
    if eval_every != 1:
        print('\n')
        print('Eval-ing...')
        time_ev_0 = time.time()
        eval_acc, test_acc = evaluation(model, g, labels, train_nid, val_nid,
                                        test_nid, batch_size)
        if eval_acc > best_eval_acc:
            best_eval_acc = eval_acc
            best_test_acc = test_acc
        time_ev_1 = time.time()
        print('Eval Acc {:.4f} | Eval Time(s): {:.4f}'.format(
            eval_acc, time_ev_1 - time_ev_0))
        print('Best Eval Acc {:.4f} | Best Test Acc {:.4f}'.format(
            best_eval_acc, best_test_acc))
    print('\n')
    print('Finish!')
Example #26
0
 def __init__(self):
     self.state = torch.tensor([])
     self.state_ = torch.tensor([])# next_state
     self.done = torch.BoolTensor([])
Example #27
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if args.self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test accuracy {:.2%}".format(acc))
Example #28
0
    def eval_step(self, get_loss, test=False, prefix=""):
        # eval mode
        self.deb.eval()
        self.model.eval()
        self.pre_trainer.encoder.eval()
        self.pre_trainer.decoder.eval()

        total_stats = []
        text_z_prime = {
            KEYS["input"]: [],
            KEYS["gen"]: [],
            KEYS["deb"]: [],
            "origin_labels": [],
            "pred_label": []
        }
        references = []
        hypothesis = []
        hypothesis2 = []
        with torch.no_grad():
            for batch in tqdm(self.val_data_iter, desc='val'):
                n_words, xe_loss, n_valid = 0, 0, 0
                (x, lengths, langs), y1, y2, weight_out = batch
                flag = True
                """
                # only on negative example
                #negative_examples = ~(y2.squeeze() < self.params.threshold)
                negative_examples = y2.squeeze() > self.params.threshold
                batch, flag = select_with_mask(batch, mask = negative_examples)
                (x, lengths, langs), y1, y2, weight_out = batch
                #"""
                if flag:
                    y = y2 if self.params.version == 3 else y1
                    x, y, lengths, langs = to_cuda(x, y, lengths, langs)
                    #langs = langs if self.params.n_langs > 1 else None
                    #langs = None
                    batch = (x, lengths, langs), y1, y2, weight_out
                    _, _, z, _, stats, y_hat = self.classif_step(
                        get_loss, y, batch)
                    z = z.transpose(0, 1)  # (bs-ϵ, seq_len, dim)
                    bs = z.size(0)

                    z_prime = self.deb('fwd',
                                       x=z,
                                       lengths=lengths,
                                       causal=False)
                    z_prime = z_prime.transpose(0, 1)  # (bs-ϵ, seq_len, dim)

                    non_mask_deb = torch.BoolTensor([True] * bs)
                    loss_rec, word_scores, y_ = self.enc_dec(
                        x, lengths, langs, z, non_mask_deb, bs)
                    # update stats
                    n_words += y_.size(0)
                    xe_loss += loss_rec.item() * len(y_)
                    n_valid += (word_scores.max(1)[1] == y_).sum().item()
                    # compute perplexity and prediction accuracy
                    n_words = n_words + eps
                    stats['rec_ppl'] = np.exp(xe_loss / n_words)
                    stats['rec_acc'] = 100. * n_valid / n_words

                    texts = self.generate(x,
                                          lengths,
                                          langs,
                                          z,
                                          z_prime=z_prime,
                                          log=False)
                    for k, v in texts.items():
                        text_z_prime[k].append(v)
                    references.extend(texts[KEYS["input"]])
                    hypothesis.extend(texts[KEYS["gen"]])
                    hypothesis2.extend(texts[KEYS["deb"]])
                    text_z_prime["origin_labels"].append(y.cpu().numpy())
                    text_z_prime["pred_label"].append(y_hat.cpu().numpy())

                    total_stats.append(stats)

        self.end_eval(text_z_prime, references, hypothesis, hypothesis2)

        if test:
            pre_train_scores = {}
            return total_stats, pre_train_scores

        return total_stats
Example #29
0
    def _optimize_classifier_evaluate(self,
                                      only_labeled=False,
                                      only_unlabeled=False,
                                      update_lam=False):
        """
        Optimize the classifier on the full dataset and then evaluate the model.

        :param only_labeled: Whether to only use the labeled data
        :param only_unlabeled: Whether the data is only unlabeled
        :param update_lam: Whether to update the regularization parameter lambda
        """
        self.model.eval()
        if self.params.ckn:
            self.model.model = opt_utils.compute_normalizations(
                self.model.model)

        if only_labeled is False and only_unlabeled is False:
            all_features = opt_utils.compute_all_features(
                self.data.train_labeled_loader,
                self.data.train_unlabeled_loader,
                self.data.valid_loader,
                self.data.test_loader,
                self.model,
                normalize=self.params.normalize,
                standardize=self.params.standardize,
                augment=self.params.augment)
            y_labeled_one_hot = opt_utils.one_hot_embedding(
                all_features['train_labeled']['y'],
                self.params.nclasses).to(defaults.device)
            y_unlabeled = opt_utils.nearest_neighbor(
                all_features['train_labeled']['x'],
                all_features['train_unlabeled']['x'],
                all_features['train_labeled']['y'], self.params.nn)
            y_unlabeled_one_hot = opt_utils.one_hot_embedding(
                y_unlabeled, self.params.nclasses)
            x_train = torch.cat(
                (all_features['train_labeled']['x'],
                 all_features['train_unlabeled']['x'])).to(defaults.device)
            if not update_lam:
                with torch.autograd.no_grad():
                    _, w_last, b_last = ulr_utils.ulr_square_loss_y(
                        x_train,
                        torch.cat((y_labeled_one_hot, y_unlabeled_one_hot)),
                        self.params.lam)
            else:
                y_train = torch.argmax(
                    torch.cat((y_labeled_one_hot, y_unlabeled_one_hot)), 1)
                test_acc, valid_acc, train_acc, test_loss, train_loss, w, best_lambda = train_classifier.train(
                    (x_train, y_train),
                    (all_features['valid']['x'], all_features['valid']['y']),
                    (all_features['test']['x'], all_features['test']['y']),
                    self.model,
                    self.params.nclasses,
                    self.params.maxiter_wlast_full,
                    w_init=None,
                    normalize=True,
                    standardize=False,
                    loss_name='square',
                    lambdas=None,
                    input_features=True)
                self.params.lam = best_lambda
                w_last = w[1:, :]
                b_last = w[0, :]
        elif only_unlabeled is False:
            all_features = opt_utils.compute_all_features(
                self.data.train_labeled_loader,
                self.data.train_unlabeled_loader,
                self.data.valid_loader,
                self.data.test_loader,
                self.model,
                normalize=self.params.normalize,
                standardize=self.params.standardize,
                augment=self.params.augment)
            if self.iteration != 0 and not update_lam:
                y_labeled_one_hot = opt_utils.one_hot_embedding(
                    all_features['train_labeled']['y'], self.params.nclasses)
                _, w_last, b_last = ulr_utils.ulr_square_loss_y(
                    all_features['train_labeled']['x'].to(defaults.device),
                    y_labeled_one_hot, self.params.lam)
            else:
                test_acc, valid_acc, train_acc, test_loss, train_loss, w, best_lambda = train_classifier.train(
                    (all_features['train_labeled']['x'],
                     all_features['train_labeled']['y']),
                    (all_features['valid']['x'], all_features['valid']['y']),
                    (all_features['test']['x'], all_features['test']['y']),
                    self.model,
                    self.params.nclasses,
                    self.params.maxiter_wlast_full,
                    w_init=None,
                    normalize=True,
                    standardize=False,
                    loss_name='square',
                    lambdas=None,
                    input_features=True)
                self.params.lam = best_lambda
                self.w_last = w
                w_last = w[1:, :]
                b_last = w[0, :]
        else:
            all_features = opt_utils.compute_all_features(
                None,
                None,
                None,
                self.data.test_loader,
                self.model,
                normalize=self.params.normalize,
                standardize=self.params.standardize,
                augment=self.params.augment)
            with torch.autograd.no_grad():
                n = len(all_features['test']['x'])
                mask = (torch.BoolTensor(n, n).zero_() + 1).to(defaults.device)
                known = torch.zeros(n, n).to(defaults.device)
                torch.diagonal(known).fill_(1)
                torch.diagonal(mask).fill_(0)

                x_test = all_features['test']['x'].to(defaults.device)
                M, eigengap = label_utils.optimize_labels(
                    x_test,
                    self.params.nclasses,
                    self.params.lam,
                    mask=mask,
                    known_values=known,
                    nmin=self.params.min_frac_points_class * n,
                    nmax=self.params.max_frac_points_class * n,
                    eigenvalues=False)
                M = M.type(torch.get_default_dtype())
                y_test = all_features['test']['y'].to(defaults.device)
                yhat_test = torch.LongTensor(
                    label_utils.get_estimated_labels(M, y_test,
                                                     self.params.nclasses))
                if self.params.labeling_method == 'pseudo labeling':
                    yhat_one_hot = opt_utils.one_hot_embedding(
                        yhat_test, self.params.nclasses)
                    obj, self.w_last, self.b_last = ulr_utils.ulr_square_loss_y(
                        x_test, yhat_one_hot, self.params.lam, 0)

                test_accuracy = torch.mean((y_test.cpu() == yhat_test).float())
                if self.iteration == 0:
                    print('Iteration \t Test accuracy')
                print(self.iteration, '\t\t',
                      '{:06.4f}'.format(test_accuracy.item()))
                results = {'test_accuracy': test_accuracy}
                self.results.update(self.iteration, **results)

        if not only_unlabeled:
            results = opt_utils.evaluate_features(self.params, w_last, b_last,
                                                  all_features)
            if not only_labeled:
                if self.iteration == 0:
                    opt_utils.print_results(self.iteration,
                                            results,
                                            header=True)
                else:
                    opt_utils.print_results(self.iteration,
                                            results,
                                            header=False)
            self.results.update(self.iteration, **results)

        if self.params.ckn:
            for layer_num in range(len(self.model.model.layers)):
                self.model.model.layers[layer_num].store_normalization = False
        self.model.train()
Example #30
0
    def forward(self,
                feat,
                right,
                wrong,
                probs,
                fake=None,
                fake_diff_mask=None):
        np.set_printoptions(precision=4)
        num_wrong = wrong.size(1)
        batch_size = feat.size(0)

        smooth_dist_summary = torch.sum(torch.sum(probs, dim=1), dim=0)

        feat = feat.view(-1, self.ninp, 1)
        right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat)
        wrong_dis = torch.bmm(wrong, feat)

        thresh_mask = torch.gt(probs, self.contra_thresh)
        contra_mask = torch.BoolTensor(probs.size()).cuda()
        contra_mask[:, :, :] = False
        contra_mask[:, :, 0] = True

        decrease_contra_mask = contra_mask * torch.logical_not(thresh_mask)

        probs[decrease_contra_mask] = 0.
        one_hot_probs = torch.nn.functional.one_hot(probs.argmax(dim=2),
                                                    3).double()

        dist_summary = torch.sum(torch.sum(one_hot_probs, dim=1), dim=0)

        pair_wise_score_diff = torch.squeeze(
            right_dis.expand_as(wrong_dis) - wrong_dis)
        if self.debug:
            if self.iter % self.log_iter == 0:
                # print('---------------- Score difference: --------------')
                # rows = [['data_'+str(i) for i in range(batch_size)]]
                # pair_wise_score_diff_np = pair_wise_score_diff.cpu().detach().numpy()
                # wrong_scores_np = wrong_dis.cpu().detach().numpy()
                # right_scores_np = right_dis.cpu().detach().numpy()
                #
                # for j in range(num_wrong):
                #     row = []
                #     for i in range(batch_size):
                #         row.append('%.4f | %.4f | %.4f' % (np.around(right_scores_np[i][0][0], 4), np.around(wrong_scores_np[i][j][0], 4),
                #                                      np.round(pair_wise_score_diff_np[i][j], 4)))
                #     rows.append(row)
                # st = Texttable()
                # st.add_rows(rows)
                # print(st.draw())
                print('----------------Probabilities------------------')
                print(probs.cpu().detach().numpy())
                print('----------------One hot------------------------')
                print(one_hot_probs.cpu().detach().numpy())
                print('----------------dist_summary-------------------')
                print(dist_summary.cpu().detach().numpy())
                print('----------------smooth_dist_summary------------')
                print(smooth_dist_summary.cpu().detach().numpy())
                pause()

        w = one_hot_probs[:, :,
                          0] * self.alphaC + one_hot_probs[:, :,
                                                           1] * self.alphaE + one_hot_probs[:, :,
                                                                                            2] * self.alphaN  #b x neg

        truth_separation_probs = 1. / (1 + torch.exp(-self.sigma *
                                                     (pair_wise_score_diff)))

        log_likelihood_expanded = torch.log(truth_separation_probs)  # b x neg

        weighted_log_likelihood = log_likelihood_expanded * w

        loss_dis = -torch.sum(torch.sum(weighted_log_likelihood, dim=1))

        loss_norm = right.norm() + feat.norm() + wrong.norm()

        # if fake:
        #     fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat)
        #     fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask)
        #
        #     margin_score = F.relu(torch.log(fake_score + 1) - self.margin)
        #     loss_fake = torch.sum(margin_score)
        #     loss_dis += loss_fake
        #     loss_norm += fake.norm()

        loss = (loss_dis + self.alpha_norm * loss_norm) / batch_size
        # if fake:
        #     return loss, loss_fake.data[0] / batch_size
        # else:
        return loss, dist_summary, smooth_dist_summary