def prepare_batch(encoder, batch): global task2title_max_steps, embedding_size (sents_tup, seq_lens), res_tup = batch res = C(encode(encoder, res_tup)) sents = encode(encoder, sents_tup) sents = sents.view(-1, task2title_max_steps, embedding_size) #norm = sents.norm(p=2, dim=2, keepdim=True) #return (sents.div(norm), seq_lens), res.div(res.norm(p=2, dim=1, keepdim=True)) return (sents, seq_lens), res
def forward(self, inp): inp, lengths = inp inp = C(inp) out, imm = self.gru(inp) # select only outputs at lengths[i] padded_lengths = [ i * inp.size(1) + v - 1 for i, v in enumerate(lengths) ] out_ = out.contiguous().view(-1, self.inner_size)[padded_lengths, :] # then feed them to fully connected out_ = self.linear(out_) out_ = nn.functional.tanh(out_) return out_.div(out_.norm(p=2, dim=1, keepdim=True))
# Combinatoric selections from utils import C limit = 1000000 candidate = [] for n in range(1, 101): for r in range(1, n): if C(n, r) > limit: candidate.append((n,r)) print len(candidate)
""" # dataset = 'debug_BA:train_size=1000,test_size=100,num_nodes_training=16,num_nodes_testing=64' #dataset = 'debug_BA:train_size=1000,test_size=100,num_nodes_training=0,num_nodes_testing=0' dataset = 'aids700nef' parser.add_argument('--dataset', default=dataset) dataset_version = None # 'v2' parser.add_argument('--dataset_version', default=dataset_version) filter_large_size = None parser.add_argument('--filter_large_size', type=int, default=filter_large_size) # None or >= 1 select_node_pair = None parser.add_argument('--select_node_pair', type=str, default=select_node_pair) # None or gid1_gid2 c = C()#counting parser.add_argument('--node_fe_{}'.format(c.c()), default='one_hot') # parser.add_argument('--node_fe_{}'.format(c.c()), # default='local_degree_profile') natts, eatts, tvt_options, align_metric_options, *_ = \ get_dataset_conf(dataset) """ Must use exactly one alignment metric across the entire run. """ #align_metric = align_metric_options[0] #if len(align_metric_options) == 2: """ Choose which metric to use. """ #align_metric = 'ged' align_metric = 'mcs' parser.add_argument('--align_metric', default=align_metric)
def train(model, data_loader, encoder, training=None, testing=None): global task2title_path, task2title_batch_size num_batches = (data_loader.get_total_samples(source=training) + task2title_batch_size - 1) // task2title_batch_size if enable_cuda: model = model.cuda() model.train() optimizer = optim.Adam(model.parameters()) best_loss = 1e8 with open(task2title_path + '.run.log', 'a') as logfile, open(task2title_path + '.vectors.log', 'a') as vector_file: for epoch in range(num_epochs): log('starting epoch ', epoch + 1, log_file=logfile) total_loss = 0 last_saved = -save_backoff for batchid, batch in enumerate( data_loader.get_samples(batch_size=task2title_batch_size, max_seq=task2title_max_steps, source=training)): steps, results = prepare_batch(encoder, batch) predicted = model(steps) optimizer.zero_grad() length = len(batch[1][1]) y = torch.FloatTensor(length).fill_(1) y = y.cuda() if enable_cuda else y loss = nn.functional.cosine_embedding_loss( predicted, results, C(y)) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), gradient_clip) optimizer.step() this_step_loss = loss.sum().data[0] total_loss += this_step_loss if batchid % log_every == 0: log("\tBatch {}/{}, average loss: {}, current loss: {}". format(batchid, num_batches, total_loss / (batchid + 1), this_step_loss), log_file=logfile) #log("\t\tPred norms: ", (predicted).norm(dim=1).data.tolist(), log_file=logfile) #log("\t\tGT norms: ", (results).norm(dim=1).data.tolist(), log_file=logfile) log("\t\tDiff norms: ", (predicted - results).norm(dim=1).data.tolist(), log_file=logfile) if this_step_loss < best_loss and (last_saved + save_backoff) <= batchid: log("\t\tSaving best at epoch {}, batch {}, loss {}...". format(epoch, batchid, this_step_loss), log_file=logfile) torch.save(model, task2title_path + ".best.pyt") best_loss = this_step_loss last_saved = batchid if batchid % save_every == 0: log("\t\tSaving regularly at epoch {}, batch {}...".format( epoch, batchid), log_file=logfile) torch.save(model, task2title_path + ".regular.pyt") # torch.save(model, task2title_path+".epoch-{}.pyt".format(epoch)) if testing: model.eval() for batchid, batch in enumerate( data_loader.get_samples(batch_size=task2title_batch_size, max_seq=task2title_max_steps, source=testing)): steps, results = prepare_batch(encoder, batch) predicted = model(steps) length = len(batch[1][1]) for v_id in range(length): print('gt ', results.data[v_id, :].tolist(), file=vector_file) print('pr ', predicted.data[v_id, :].tolist(), file=vector_file) model.train() return model
dataset: (for MCS) aids700nef linux imdbmulti redditmulti10k """ dataset = 'aids700nef' parser.add_argument('--dataset', default=dataset) filter_large_size = None parser.add_argument('--filter_large_size', type=int, default=filter_large_size) # None or >= 1 select_node_pair = None parser.add_argument('--select_node_pair', type=str, default=select_node_pair) # None or gid1_gid2 c = C() parser.add_argument('--node_fe_{}'.format(c.c()), default='one_hot') # parser.add_argument('--node_fe_{}'.format(c.c()), # default='local_degree_profile') natts, eatts, tvt_options, align_metric_options, *_ = \ get_dataset_conf(dataset) """ Must use exactly one alignment metric across the entire run. """ align_metric = align_metric_options[0] if len(align_metric_options) == 2: """ Choose which metric to use. """ align_metric = 'ged' # align_metric = 'mcs' parser.add_argument('--align_metric', default=align_metric)
# generate names for each attribute # used when output to excel import utils from utils import C import xlwt CTN = C(0, 0, 5, 0, 60) B_BOTTOM = CTN.bmin B_TOP = CTN.bmax STEP = CTN.step def generate_names(): names = ['R', 'G', 'B', 'RGB_STD'] for i in range(B_BOTTOM, B_TOP, STEP): names.append('RGB_DIS_' + str(i)) names.append('DIS_STD') for i in range(B_BOTTOM, B_TOP, STEP): names.append('RGB_STD_' + str(i)) names.append('STD_STD') names.append('MAX_DIS') names.append('MIN_DIS') for i in range(B_BOTTOM, B_TOP, STEP): names.append('RGB_MEAN_1_' + str(i)) names.append('RGB_MEAN_2_' + str(i)) names.append('RGB_MEAN_3_' + str(i)) names.append('RGB_MEAN_4_' + str(i)) names.append('RGB_MEAN_5_' + str(i)) names.append('RGB_MEAN_6_' + str(i)) names.append('BI') names.append('CI') names.append('RI')