Example #1
0
def prepare_batch(encoder, batch):
    global task2title_max_steps, embedding_size
    (sents_tup, seq_lens), res_tup = batch
    res = C(encode(encoder, res_tup))
    sents = encode(encoder, sents_tup)
    sents = sents.view(-1, task2title_max_steps, embedding_size)
    #norm = sents.norm(p=2, dim=2, keepdim=True)

    #return (sents.div(norm), seq_lens), res.div(res.norm(p=2, dim=1, keepdim=True))
    return (sents, seq_lens), res
    def forward(self, inp):
        inp, lengths = inp
        inp = C(inp)
        out, imm = self.gru(inp)

        # select only outputs at lengths[i]
        padded_lengths = [
            i * inp.size(1) + v - 1 for i, v in enumerate(lengths)
        ]
        out_ = out.contiguous().view(-1, self.inner_size)[padded_lengths, :]

        # then feed them to fully connected
        out_ = self.linear(out_)

        out_ = nn.functional.tanh(out_)

        return out_.div(out_.norm(p=2, dim=1, keepdim=True))
Example #3
0
# Combinatoric selections

from utils import C

limit = 1000000

candidate = [] 
for n in range(1, 101):
    for r in range(1, n):
        if C(n, r) > limit:
            candidate.append((n,r))

print len(candidate)
Example #4
0
"""
# dataset = 'debug_BA:train_size=1000,test_size=100,num_nodes_training=16,num_nodes_testing=64'
#dataset = 'debug_BA:train_size=1000,test_size=100,num_nodes_training=0,num_nodes_testing=0'
dataset = 'aids700nef'
parser.add_argument('--dataset', default=dataset)

dataset_version = None  # 'v2'
parser.add_argument('--dataset_version', default=dataset_version)

filter_large_size = None 
parser.add_argument('--filter_large_size', type=int, default=filter_large_size)  # None or >= 1

select_node_pair = None
parser.add_argument('--select_node_pair', type=str, default=select_node_pair)  # None or gid1_gid2

c = C()#counting
parser.add_argument('--node_fe_{}'.format(c.c()), default='one_hot')

# parser.add_argument('--node_fe_{}'.format(c.c()),
#                     default='local_degree_profile')

natts, eatts, tvt_options, align_metric_options, *_ = \
    get_dataset_conf(dataset)

""" Must use exactly one alignment metric across the entire run. """
#align_metric = align_metric_options[0]
#if len(align_metric_options) == 2:
""" Choose which metric to use. """
#align_metric = 'ged'
align_metric = 'mcs'
parser.add_argument('--align_metric', default=align_metric)
Example #5
0
def train(model, data_loader, encoder, training=None, testing=None):
    global task2title_path, task2title_batch_size
    num_batches = (data_loader.get_total_samples(source=training) +
                   task2title_batch_size - 1) // task2title_batch_size

    if enable_cuda:
        model = model.cuda()
    model.train()

    optimizer = optim.Adam(model.parameters())

    best_loss = 1e8

    with open(task2title_path + '.run.log',
              'a') as logfile, open(task2title_path + '.vectors.log',
                                    'a') as vector_file:
        for epoch in range(num_epochs):
            log('starting epoch ', epoch + 1, log_file=logfile)
            total_loss = 0
            last_saved = -save_backoff
            for batchid, batch in enumerate(
                    data_loader.get_samples(batch_size=task2title_batch_size,
                                            max_seq=task2title_max_steps,
                                            source=training)):
                steps, results = prepare_batch(encoder, batch)

                predicted = model(steps)
                optimizer.zero_grad()

                length = len(batch[1][1])
                y = torch.FloatTensor(length).fill_(1)
                y = y.cuda() if enable_cuda else y

                loss = nn.functional.cosine_embedding_loss(
                    predicted, results, C(y))

                loss.backward()

                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              gradient_clip)

                optimizer.step()
                this_step_loss = loss.sum().data[0]
                total_loss += this_step_loss

                if batchid % log_every == 0:
                    log("\tBatch {}/{}, average loss: {}, current loss: {}".
                        format(batchid, num_batches,
                               total_loss / (batchid + 1), this_step_loss),
                        log_file=logfile)
                    #log("\t\tPred norms: ", (predicted).norm(dim=1).data.tolist(), log_file=logfile)
                    #log("\t\tGT norms: ", (results).norm(dim=1).data.tolist(), log_file=logfile)
                    log("\t\tDiff norms: ",
                        (predicted - results).norm(dim=1).data.tolist(),
                        log_file=logfile)

                if this_step_loss < best_loss and (last_saved +
                                                   save_backoff) <= batchid:
                    log("\t\tSaving best at epoch {}, batch {}, loss {}...".
                        format(epoch, batchid, this_step_loss),
                        log_file=logfile)
                    torch.save(model, task2title_path + ".best.pyt")
                    best_loss = this_step_loss
                    last_saved = batchid

                if batchid % save_every == 0:
                    log("\t\tSaving regularly at epoch {}, batch {}...".format(
                        epoch, batchid),
                        log_file=logfile)
                    torch.save(model, task2title_path + ".regular.pyt")

            # torch.save(model, task2title_path+".epoch-{}.pyt".format(epoch))

        if testing:
            model.eval()
            for batchid, batch in enumerate(
                    data_loader.get_samples(batch_size=task2title_batch_size,
                                            max_seq=task2title_max_steps,
                                            source=testing)):
                steps, results = prepare_batch(encoder, batch)
                predicted = model(steps)

                length = len(batch[1][1])
                for v_id in range(length):
                    print('gt ',
                          results.data[v_id, :].tolist(),
                          file=vector_file)
                    print('pr ',
                          predicted.data[v_id, :].tolist(),
                          file=vector_file)

            model.train()
    return model
Example #6
0
dataset: 
    (for MCS)
    aids700nef linux imdbmulti redditmulti10k
"""
dataset = 'aids700nef'
parser.add_argument('--dataset', default=dataset)

filter_large_size = None
parser.add_argument('--filter_large_size', type=int,
                    default=filter_large_size)  # None or >= 1

select_node_pair = None
parser.add_argument('--select_node_pair', type=str,
                    default=select_node_pair)  # None or gid1_gid2

c = C()
parser.add_argument('--node_fe_{}'.format(c.c()), default='one_hot')

# parser.add_argument('--node_fe_{}'.format(c.c()),
#                     default='local_degree_profile')

natts, eatts, tvt_options, align_metric_options, *_ = \
    get_dataset_conf(dataset)
""" Must use exactly one alignment metric across the entire run. """
align_metric = align_metric_options[0]
if len(align_metric_options) == 2:
    """ Choose which metric to use. """
    align_metric = 'ged'
    # align_metric = 'mcs'
parser.add_argument('--align_metric', default=align_metric)
Example #7
0
# generate names for each attribute
# used when output to excel
import utils
from utils import C
import xlwt
CTN = C(0, 0, 5, 0, 60)
B_BOTTOM = CTN.bmin
B_TOP = CTN.bmax
STEP = CTN.step


def generate_names():
    names = ['R', 'G', 'B', 'RGB_STD']
    for i in range(B_BOTTOM, B_TOP, STEP):
        names.append('RGB_DIS_' + str(i))
    names.append('DIS_STD')
    for i in range(B_BOTTOM, B_TOP, STEP):
        names.append('RGB_STD_' + str(i))
    names.append('STD_STD')
    names.append('MAX_DIS')
    names.append('MIN_DIS')
    for i in range(B_BOTTOM, B_TOP, STEP):
        names.append('RGB_MEAN_1_' + str(i))
        names.append('RGB_MEAN_2_' + str(i))
        names.append('RGB_MEAN_3_' + str(i))
        names.append('RGB_MEAN_4_' + str(i))
        names.append('RGB_MEAN_5_' + str(i))
        names.append('RGB_MEAN_6_' + str(i))
    names.append('BI')
    names.append('CI')
    names.append('RI')