Example #1
0
def train():
    logging.info('Loading vocab,train and val dataset.Wait a second,please')

    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    with open(args.word2id) as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)

    with open(args.train_dir) as f:
        examples = [json.loads(line) for line in f]
    train_dataset = utils.Dataset(examples)

    with open(args.val_dir) as f:
        examples = [json.loads(line) for line in f]
    val_dataset = utils.Dataset(examples)

    # update args
    args.embed_num = embed.size(0)
    args.embed_dim = embed.size(1)
    args.kernel_sizes = [int(ks) for ks in args.kernel_sizes.split(',')]
    # build model
    net = getattr(models, args.model)(args, embed)
    if use_gpu:
        net.cuda()
    # load dataset
    train_iter = DataLoader(dataset=train_dataset,
                            batch_size=args.batch_size,
                            shuffle=True)
    val_iter = DataLoader(dataset=val_dataset,
                          batch_size=args.batch_size,
                          shuffle=False)
    # loss function
    criterion = nn.BCELoss()
    # model info
    print(net)
    params = sum(p.numel() for p in list(net.parameters())) / 1e6
    print('#Params: %.1fM' % (params))

    min_loss = float('inf')
    optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
    net.train()

    t1 = time()
    for epoch in range(1, args.epochs + 1):
        for i, batch in enumerate(train_iter):
            features, targets, _, doc_lens = vocab.make_features(batch)
            features, targets = Variable(features), Variable(targets.float())
            if use_gpu:
                features = features.cuda()
                targets = targets.cuda()
            probs = net(features, doc_lens)
            loss = criterion(probs, targets)
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm(net.parameters(), args.max_norm)
            optimizer.step()
            if args.debug:
                print('Batch ID:%d Loss:%f' % (i, loss.data[0]))
                continue
            if i % args.report_every == 0:
                cur_loss = eval(net, vocab, val_iter, criterion)
                if cur_loss < min_loss:
                    min_loss = cur_loss
                    best_path = net.save()
                logging.info('Epoch: %2d Min_Val_Loss: %f Cur_Val_Loss: %f' %
                             (epoch, min_loss, cur_loss))
    t2 = time()
    logging.info('Total Cost:%f h' % ((t2 - t1) / 3600))
Example #2
0
    def __init__(self, config):
        self.config = config

        # Load train data and build vocabulary
        self.train_data, self.dev_data, self.test_data = tree.simplified_data(
            700, 100, 200)
        self.vocab = utils.Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

        # add input placeholders
        self.is_leaf_placeholder = tf.compat.v1.placeholder(
            tf.bool, (None), name='is_leaf_placeholder')
        self.left_children_placeholder = tf.compat.v1.placeholder(
            tf.int32, (None), name='left_children_placeholder')
        self.right_children_placeholder = tf.compat.v1.placeholder(
            tf.int32, (None), name='right_children_placeholder')
        self.node_word_indices_placeholder = tf.compat.v1.placeholder(
            tf.int32, (None), name='node_word_indices_placeholder')
        self.labels_placeholder = tf.compat.v1.placeholder(
            tf.int32, (None), name='labels_placeholder')

        # add model variables
        with tf.compat.v1.variable_scope('Embeddings'):
            embeddings = tf.compat.v1.get_variable(
                'embeddings', [len(self.vocab), self.config.embed_size])
        with tf.compat.v1.variable_scope('Composition'):
            W1 = tf.compat.v1.get_variable(
                'W1', [2 * self.config.embed_size, self.config.embed_size])
            b1 = tf.compat.v1.get_variable('b1', [1, self.config.embed_size])
        with tf.compat.v1.variable_scope('Projection'):
            U = tf.compat.v1.get_variable(
                'U', [self.config.embed_size, self.config.label_size])
            bs = tf.compat.v1.get_variable('bs', [1, self.config.label_size])

        # build recursive graph

        tensor_array = tf.TensorArray(tf.float32,
                                      size=0,
                                      dynamic_size=True,
                                      clear_after_read=False,
                                      infer_shape=False)

        def embed_word(word_index):
            with tf.device('/cpu:0'):
                return tf.expand_dims(tf.gather(embeddings, word_index), 0)

        def combine_children(left_tensor, right_tensor):
            return tf.nn.relu(
                tf.matmul(tf.concat([left_tensor, right_tensor], 1), W1) + b1)

        def loop_body(tensor_array, i):
            node_is_leaf = tf.gather(self.is_leaf_placeholder, i)
            node_word_index = tf.gather(self.node_word_indices_placeholder, i)
            left_child = tf.gather(self.left_children_placeholder, i)
            right_child = tf.gather(self.right_children_placeholder, i)
            node_tensor = tf.cond(
                node_is_leaf, lambda: embed_word(node_word_index),
                lambda: combine_children(tensor_array.read(left_child),
                                         tensor_array.read(right_child)))
            tensor_array = tensor_array.write(i, node_tensor)
            i = tf.add(i, 1)
            return tensor_array, i

        loop_cond = lambda tensor_array, i: \
            tf.less(i, tf.squeeze(tf.shape(self.is_leaf_placeholder)))
        self.tensor_array, _ = tf.while_loop(loop_cond,
                                             loop_body, [tensor_array, 0],
                                             parallel_iterations=1)

        # add projection layer
        self.logits = tf.matmul(self.tensor_array.concat(), U) + bs
        self.root_logits = tf.matmul(
            self.tensor_array.read(self.tensor_array.size() - 1), U) + bs
        self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1))

        # add loss layer
        regularization_loss = self.config.l2 * (tf.nn.l2_loss(W1) +
                                                tf.nn.l2_loss(U))
        included_indices = tf.where(tf.less(self.labels_placeholder, 2))
        self.full_loss = regularization_loss + tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=tf.gather(self.logits, included_indices),
                labels=tf.gather(self.labels_placeholder, included_indices)))
        self.root_loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.root_logits, labels=self.labels_placeholder[-1:]))

        # add training op
        self.train_op = tf.train.GradientDescentOptimizer(
            self.config.lr).minimize(self.full_loss)
Example #3
0
def test():

    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    with open(args.word2id) as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)

    with open(args.test_dir) as f:
        examples = [json.loads(line) for line in f]
    test_dataset = utils.Dataset(examples)

    test_iter = DataLoader(dataset=test_dataset,
                           batch_size=args.batch_size,
                           shuffle=False)
    if use_gpu:
        checkpoint = torch.load(args.load_dir, map_location='cuda:0')
    else:
        checkpoint = torch.load(args.load_dir,
                                map_location=lambda storage, loc: storage)

    # checkpoint['args']['device'] saves the device used as train time
    # if at test time, we are using a CPU, we must override device to None
    if not use_gpu:
        checkpoint['args'].device = None
    net = getattr(models, checkpoint['args'].model)(checkpoint['args'])
    net.load_state_dict(checkpoint['model'])
    if use_gpu:
        net.cuda()
    net.eval()

    doc_num = len(test_dataset)
    time_cost = 0
    file_id = 1
    for batch in tqdm(test_iter):
        features, _, summaries, doc_lens = vocab.make_features(batch)
        t1 = time()
        if use_gpu:
            probs = net(Variable(features).cuda(), doc_lens)
        else:
            probs = net(Variable(features), doc_lens)
        t2 = time()
        time_cost += t2 - t1
        start = 0
        for doc_id, doc_len in enumerate(doc_lens):
            stop = start + doc_len
            prob = probs[start:stop]
            topk = min(args.topk, doc_len)
            try:
                topk_indices = prob.topk(topk)[1].cpu().data.numpy()
            except:
                continue
            topk_indices.sort()
            doc = batch['doc'][doc_id].split('\n')[:doc_len]
            hyp = [doc[index] for index in topk_indices]
            ref = summaries[doc_id]
            with open(os.path.join(args.ref, str(file_id) + '.txt'), 'w') as f:
                f.write(ref)
            with open(os.path.join(args.hyp, str(file_id) + '.txt'), 'w') as f:
                f.write('\n'.join(hyp))
            start = stop
            file_id = file_id + 1
    print('Speed: %.2f docs / s' % (doc_num / time_cost))
Example #4
0
def train():
    #command: python main.py -device -1 -batch_size 32 -model RNN_RNN -seed 1 -save_dir checkpoints/XXX.pt

    logging.info('Loading vocab,train and val dataset.Wait a second,please')
    #embedding --->load default='data/embedding.npz'
    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    #word2id ---> load default='data/word2id.json'
    with open(args.word2id) as f:
        word2id = json.load(f)

    vocab = utils.Vocab(embed, word2id)
    # load training dataset
    #train_dir--->load default='data/train.json'
    with open(args.train_dir) as f:
        examples = [json.loads(line) for line in f]
    train_dataset = utils.Dataset(examples)

    # load validation dataset
    with open(args.val_dir) as f:
        examples = [json.loads(line) for line in f]
    val_dataset = utils.Dataset(examples)

    # update args
    args.embed_num = embed.size(0)   #number of embeddings(default---> 100)
    args.embed_dim = embed.size(1)   # size of each embedding (default---> 100)
    args.kernel_sizes = [int(ks) for ks in args.kernel_sizes.split(',')] #(default kernel size for pooling---> {3,4,5} )

    # build model
    net = getattr(models,args.model)(args,embed)  #get model and embedding
    
    train_iter = DataLoader(dataset=train_dataset,batch_size=args.batch_size, shuffle=True)
    val_iter = DataLoader(dataset=val_dataset,batch_size=args.batch_size, shuffle=False)

    # loss function
    #Creates a criterion that measures the Binary Cross Entropy between the target and the output:
    criterion = nn.BCELoss()

    # model info
    print("model info is: ", net)
    params = sum(p.numel() for p in list(net.parameters())) / 1e6
    print('#Params: %.1fM' % (params))
    
    min_loss = float('inf')
    optimizer = torch.optim.Adam(net.parameters(),lr=args.lr)
    net.train()
    
    t1 = time() 
    for epoch in range(1,args.epochs+1):
        for i,batch in enumerate(train_iter):
            features,targets,_,doc_lens = vocab.make_features(batch)
            features,targets = Variable(features), Variable(targets.float())
            
            if use_gpu:
                features = features.cuda()
                targets = targets.cuda()
            else:
                print("no gpu")
            probs = net(features,doc_lens)
            #criteria(expected, target)
            loss = criterion(probs,targets)
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm(net.parameters(), args.max_norm)
            optimizer.step()
            if args.debug:
                print('Batch ID:%d Loss:%f' %(i,loss.data[0]))
                continue
            if i % args.report_every == 0:
                cur_loss = eval(net,vocab,val_iter,criterion)
                if cur_loss < min_loss:
                    min_loss = cur_loss
                    best_path = net.save()
                logging.info('Epoch: %2d Min_Val_Loss: %f Cur_Val_Loss: %f'
                        % (epoch,min_loss,cur_loss))
    t2 = time()
    logging.info('Total Cost:%f h'%((t2-t1)/3600))
Example #5
0
    def __init__(self, config):
        self.config = config

        # Load train data and build vocabulary
        self.train_data, self.dev_data, self.test_data = tree.simplified_data(
            700, 100, 200)
        # print("data ",self.train_data))
        self.vocab = utils.Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

        cluster = tf.train.ClusterSpec(
            {"local": ["localhost:2222", "localhost:2223"]})

        # add input placeholders
        self.is_leaf_placeholder = tf.placeholder(tf.int32, (None),
                                                  name='is_leaf_placeholder')
        self.node_word_indices_placeholder = tf.placeholder(
            tf.int32, (None), name='node_word_indices_placeholder')
        self.labels_placeholder = tf.placeholder(tf.int32, (None),
                                                 name='labels_placeholder')
        self.cons_placeholder = tf.placeholder(tf.int32, (None), name='cons')

        # add model variables
        # making initialization deterministic for now
        # initializer = tf.random_normal_initializer(seed=1)
        with tf.variable_scope('Embeddings'):
            self.embeddings = tf.get_variable(
                'embeddings', [len(self.vocab), self.config.embed_size])

        with tf.variable_scope('Composition'):
            W1 = tf.get_variable(
                'W1', [2 * self.config.embed_size, self.config.embed_size])
            b1 = tf.get_variable('b1', [1, self.config.embed_size])

        with tf.variable_scope('Projection'):
            U = tf.get_variable(
                'U', [self.config.embed_size, self.config.label_size])
            bs = tf.get_variable('bs', [1, self.config.label_size])

        # Build recursive graph
        def embed_word(word_index, embeddings):
            return tf.expand_dims(tf.gather(embeddings, word_index), 0)

        def combine_children(left_tensor, right_tensor, W, b):
            return tf.nn.relu(
                tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b)

        def find_loss(node_tensor, i, labels, U, bs):
            # add projection layer
            node_logits = tf.matmul(node_tensor, U) + bs
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=node_logits, labels=labels[i:i + 1])
            return loss

        def base_case(node_word_indices, i, embeddings, labels, U, bs):

            word_index = tf.gather(node_word_indices, i)
            node_tensor = embed_word(word_index, embeddings)
            loss = find_loss(node_tensor, i, labels, U, bs)

            return [node_tensor, loss]

        def rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels,
                     U, bs):

            with tf.device("/job:local/replica:0/task:0/device:CPU:0"):
                left_node, left_loss = rec(i * 2, is_leaf, node_word_indices,
                                           embeddings, W, b, labels, U, bs)
                right_node, right_loss = rec(i * 2 + 1, is_leaf,
                                             node_word_indices, embeddings, W,
                                             b, labels, U, bs)

            with tf.device("/job:local/replica:0/task:1/device:CPU:0"):
                node_tensor = combine_children(left_node, right_node, W, b)

            node_loss = find_loss(node_tensor, i, labels, U, bs)
            loss = tf.concat([left_loss, node_loss, right_loss], 0)

            return [node_tensor, loss]

        # Function Declaration
        rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32),
                                       ("node_word_indices", tf.int32),
                                       ("embeddings", tf.float32),
                                       ("W", tf.float32), ("b", tf.float32),
                                       ("labels", tf.int32), ("U", tf.float32),
                                       ("bs", tf.float32)],
                               [("ret", tf.float32), ("ret1", tf.float32)])

        # Function Definition
        @function.Defun(tf.int32,
                        tf.int32,
                        tf.int32,
                        tf.float32,
                        tf.float32,
                        tf.float32,
                        tf.int32,
                        tf.float32,
                        tf.float32,
                        func_name="Rec",
                        grad_func="GradFac",
                        create_grad_func=True,
                        out_names=["ret", "ret1"])
        def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b, labels, U,
                    bs):
            node_tensor, loss = \
                tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)),
                        lambda: base_case(node_word_indices, i, embeddings, labels, U, bs),
                        lambda: rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs))
            return [node_tensor, loss]

        RecImpl.add_to_graph(tf.get_default_graph())

        self.node_tensor, self.full_loss = rec(
            self.cons_placeholder, self.is_leaf_placeholder,
            self.node_word_indices_placeholder, self.embeddings, W1, b1,
            self.labels_placeholder, U, bs)

        # add projection layer
        self.root_logits = tf.matmul(self.node_tensor, U) + bs
        self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1))

        # add loss layer
        with tf.device("/job:local/replica:0/task:1/device:CPU:0"):
            l1 = tf.nn.l2_loss(W1)
        with tf.device("/job:local/replica:0/task:0/device:CPU:0"):
            l2 = tf.nn.l2_loss(U)

        l = l1 + l2
        regularization_loss = self.config.l2 * l

        with tf.device("/job:local/replica:0/task:1/device:CPU:0"):
            x = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.root_logits, labels=self.labels_placeholder[1:2])
        self.root_loss = regularization_loss + tf.reduce_sum(x)

        # # add training op
        self.full_loss = tf.reduce_sum(self.full_loss)
        self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(
            self.full_loss)
Example #6
0
  def __init__(self, config):
    self.config = config

    # Load train data and build vocabulary
    self.train_data, self.dev_data, self.test_data = tree.simplified_data(700,
                                                                          100,
                                                                          200)
    self.config.max_tree_nodes = tree.get_max_tree_nodes(self.train_data + self.dev_data + self.test_data)
        
    print(self.config.max_tree_nodes)

    # print("data ",self.train_data))
    self.vocab = utils.Vocab()
    train_sents = [t.get_words() for t in self.train_data]
    self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

    # add input placeholders
    dim1 = self.config.batch_size
    dim2 = self.config.max_tree_nodes

    self.is_leaf_placeholder = tf.placeholder(
        tf.bool, [dim1, dim2], name='is_leaf_placeholder')
    self.left_children_placeholder = tf.placeholder(
        tf.int32, [dim1, dim2], name='left_children_placeholder')
    self.right_children_placeholder = tf.placeholder(
        tf.int32, [dim1, dim2], name='right_children_placeholder')
    self.node_word_indices_placeholder = tf.placeholder(
        tf.int32, [dim1, dim2], name='node_word_indices_placeholder')
    self.labels_placeholder = tf.placeholder(
        tf.int32, [dim1, dim2], name='labels_placeholder')
    self.tree_size_placeholder = tf.placeholder(
        tf.int32, [dim1], name='tree_size_placeholder')
    # add model variables
    # making initialization deterministic for now
    # initializer = tf.random_normal_initializer(seed=1)
    with tf.variable_scope('Embeddings'):
        self.embeddings = tf.get_variable('embeddings',
                                     [len(self.vocab),
                                     self.config.embed_size])
    with tf.variable_scope('Composition'):
        self.W1 = tf.get_variable('W1',
                             [2 * self.config.embed_size,
                                 self.config.embed_size])
        self.b1 = tf.get_variable('b1', [1, self.config.embed_size]) 
    with tf.variable_scope('Projection'):
        self.U = tf.get_variable('U',
                            [self.config.embed_size,
                             self.config.label_size])
        self.bs = tf.get_variable('bs', [1, self.config.label_size])

    # Build recursive graph

    outloss = []
    prediction = []
    root_loss = []

    for idx_batch in range(self.config.batch_size):

        self.root_prediction, self.full_loss, self.root_loss = self.compute_tree(idx_batch)

        prediction.append(self.root_prediction)
        outloss.append(self.full_loss)
        root_loss.append(self.root_loss)

    batch_loss = tf.stack(outloss)
    self.pred = tf.stack(prediction)
    self.rloss = tf.stack(root_loss)

    # Compute batch loss
    self.total_loss = tf.reduce_mean(batch_loss)
    # Add training op
    self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(self.total_loss)
Example #7
0
 def _build_vocab(self):
     self.vocab = utils.Vocab(cfg.vocab_size)
     vp = cfg.vocab_path_train if cfg.mode == 'train' or cfg.vocab_path_eval is None else cfg.vocab_path_eval
     # vp = cfg.vocab_path+'.json.freq.json'
     self.vocab.load_vocab(vp)
     return self.vocab.vocab_size
Example #8
0
def test():
    logging.info('Loading vocab,test dataset.Wait a second,please')

    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    with open(args.word2id,encoding='utf-8') as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)

    # update args
    args.embed_num = embed.size(0)
    args.embed_dim = embed.size(1)
    # build model
    
    with open(args.test_dir,encoding='utf-8') as f:
        examples = [json.loads(line) for line in f]
    test_dataset = utils.Dataset(examples)

    test_iter = DataLoader(dataset=test_dataset,
                            batch_size=args.batch_size,
                            shuffle=False)
    if use_gpu:
        checkpoint = torch.load(args.load_dir)
    else:
        checkpoint = torch.load(args.load_dir, map_location=lambda storage, loc: storage)

    # checkpoint['args']['device'] saves the device used as train time
    # if at test time, we are using a CPU, we must override device to None
    if not use_gpu:
        checkpoint['args'].device = None
    net = getattr(model,checkpoint['args'].model)(checkpoint['args'],embed)
    net.load_state_dict(checkpoint['model'])
    if use_gpu:
        net.cuda()
    net.eval()
    print('running test!')
    doc_num = len(test_dataset)
    time_cost = 0
    file_id = 1
    count = 0
    for batch in tqdm(test_iter):
        count+=1
        features, targets,summaries, doc_lens = vocab.make_features(batch)
        t1 = time()
        if use_gpu:
            features = features.cuda()
            targets = targets.cuda()
        probs = net(features, doc_lens)
        # probs=probs.to('cpu')
        # y_pred = np.where(probs>=0.6,1,0)
        # y_true = targets
        # accuracy += accuracy_score(y_true,y_pred)
        start = 0
        for doc_id,doc_len in enumerate(doc_lens):
            stop = start + doc_len
            prob = probs[start:stop]
            # print(prob)
            
            # label = labels[start:stop]
            # prob_n = prob.cpu().data.numpy()
            
            
            topk = min(args.topk,doc_len)
            topk_indices = prob.topk(topk)[1].to('cpu')
            topk_indices.sort()
            doc = batch['doc'][doc_id].split('\n')[:doc_len]
            hyp = [doc[index] for index in topk_indices]
            ref = summaries[doc_id]
            with open(os.path.join(args.ref,str(file_id)+'.txt'), 'w',encoding='utf-8') as f:
                f.write(ref)
            with open(os.path.join(args.hyp,str(file_id)+'.txt'), 'w',encoding='utf-8') as f:
                f.write('.\n'.join(hyp))
            start = stop
            file_id = file_id + 1
        t2 = time()
        time_cost += t2 - t1
    print('Speed: %.2f docs / s' % (doc_num / time_cost))
    def __init__(self, config):
        self.config = config

        # Load train data and build vocabulary
        self.train_data, self.dev_data, self.test_data = tree.simplified_data(700,
                                                                              100,
                                                                              200)
        # print("data ",self.train_data))
        self.vocab = utils.Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

        # add input placeholders
        self.is_leaf_placeholder = tf.placeholder(
            tf.int32, (None), name='is_leaf_placeholder')
        self.node_word_indices_placeholder = tf.placeholder(
            tf.int32, (None), name='node_word_indices_placeholder')
        self.labels_placeholder = tf.placeholder(
            tf.int32, (None), name='labels_placeholder')
        self.cons_placeholder = tf.placeholder(
            tf.int32, (None), name='cons')

        # add model variables
        # making initialization deterministic for now
        initializer = tf.random_normal_initializer(seed=1)
        with tf.variable_scope('Embeddings'):
            self.embeddings = tf.get_variable('embeddings',
                                         [len(self.vocab),
                                         self.config.embed_size])
                                         # ,
                                         # initializer=initializer) #tf.constant_initializer(2.0))

        with tf.variable_scope('Composition'):
            W1 = tf.get_variable('W1',
                                 [2 * self.config.embed_size,
                                     self.config.embed_size])
            # ,
            #                      initializer=initializer) #tf.constant_initializer(0.0))
            b1 = tf.get_variable('b1', [1, self.config.embed_size]) 
            # ,
            #                      initializer=initializer) #tf.constant_initializer(0.0))

        with tf.variable_scope('Projection'):
            U = tf.get_variable('U',
                                [self.config.embed_size,
                                 self.config.label_size])
            # ,
            #                     initializer=initializer) #tf.constant_initializer(0.0))
            bs = tf.get_variable('bs', [1, self.config.label_size])
            # , 
            #                     initializer=initializer) #tf.constant_initializer(0.0))

        # Build recursive graph
        # tensor_array = tf.TensorArray(
        #     tf.float32,
        #     size=0,
        #     dynamic_size=True,
        #     clear_after_read=False,
        #     infer_shape=False)

        # Build recursive graph
        def embed_word(word_index, embeddings):
            # with tf.device('/cpu:0'):
            return tf.expand_dims(tf.gather(embeddings, word_index), 0)

        def combine_children(left_tensor, right_tensor, W, b):
            return tf.nn.relu(tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b)

        # Function Declaration
        rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), 
            ("node_word_indices", tf.int32), ("embeddings", tf.float32), ("W", tf.float32),("b", tf.float32)], 
            [("ret", tf.float32)])

        # Function Definition
        @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret"])
        def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b):
            node_word_index = tf.gather(node_word_indices, i)
            node_tensor = \
                tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)),
                        lambda: embed_word(node_word_index, embeddings),
                        lambda: combine_children(rec(i*2, is_leaf, node_word_indices, embeddings, W, b),
                                               rec(i*2+1, is_leaf, node_word_indices, embeddings, W, b), W, b))
            return node_tensor

        RecImpl.add_to_graph(tf.get_default_graph())


        self.node_tensor = rec(self.cons_placeholder, self.is_leaf_placeholder, 
                            self.node_word_indices_placeholder, self.embeddings, W1, b1)


        # add projection layer

        # self.logits = tf.matmul(self.tensor_array.concat(), U) + bs

        # 1x35 * 35x35 + 1x35 -> 1x35 projection
        self.root_logits = tf.matmul(self.node_tensor, U) + bs
        self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1))

        # add loss layer
        # regularization_loss = self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U))
        # included_indices = tf.where(tf.less(self.labels_placeholder, 2))

        # self.full_loss = regularization_loss + tf.reduce_sum(
        #     tf.nn.sparse_softmax_cross_entropy_with_logits(
        #         logits=tf.gather(self.logits, included_indices),labels=tf.gather(self.labels_placeholder, included_indices)))

        self.root_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.root_logits,labels=self.labels_placeholder[1:2]))
        
        # # add training op
        self.train_op = tf.train.GradientDescentOptimizer(self.config.lr).minimize(self.root_loss)
Example #10
0
  def __init__(self, config):
    self.config = config

    # Load train data and build vocabulary
    self.train_data, self.dev_data, self.test_data, self.real_test = tree.simplified_data(4000,
                                                                          500,
                                                                          500)
    
    # add input placeholders
    self.is_leaf_placeholder = tf.placeholder(
        tf.bool, (None), name='is_leaf_placeholder')
    self.left_children_placeholder = tf.placeholder(
        tf.int32, (None), name='left_children_placeholder')
    self.right_children_placeholder = tf.placeholder(
        tf.int32, (None), name='right_children_placeholder')
    self.node_word_indices_placeholder = tf.placeholder(
        tf.int32, (None), name='node_word_indices_placeholder')
    self.labels_placeholder = tf.placeholder(
        tf.int32, (None), name='labels_placeholder')

    self.vocab = utils.Vocab()
    data = self.train_data
    train_sents = [t.get_words() for t in data]
    vocab_size = self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))
    '''
    def loadGloveModel(gloveFile):
      print("Loading Glove Model")
      f = open(gloveFile,'r')
      words = []
      embeddings = []
      for line in f:
        splitLine = line.split()
        word = splitLine[0]
        embedding = [float(val) for val in splitLine[1:]]
        words.append(word)
        embeddings.append(embedding)
      print "Done.",len(words)," words loaded!"
      return words, embeddings
      
    self.glove_words, self.embeddings = loadGloveModel("filtered_glove_300.txt")
    num = 0
    embed = np.zeros((vocab_size, self.config.embed_size), dtype='f')
    for i in range(vocab_size):
      word = self.vocab.decode(i)
      if word in self.glove_words:
        indx = self.glove_words.index(word)
        num += 1
        np.append(embed, self.embeddings[indx])
      else:
        np.append(embed, np.random.uniform(-0.1, 0.1, self.config.embed_size))
    print(num) 
    '''
    with tf.variable_scope('Embeddings'):
      embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size])
      #embeddings = tf.get_variable('embeddings', initializer=embed, trainable=True)
    a = np.zeros((self.config.embed_size, self.config.embed_size), dtype='f')
    np.fill_diagonal(a, 0.5)
    b = np.zeros((2*self.config.embed_size, self.config.embed_size), dtype='f')
    b = np.vstack((a,a))
    print(b)
    with tf.variable_scope('Composition'):
      W1 = tf.Variable(b, name='W1', dtype=tf.float32)
      b1 = tf.get_variable('b1', [1, self.config.embed_size])
    with tf.variable_scope('Projection'):
      U = tf.get_variable('U', [self.config.embed_size, self.config.label_size])
      bs = tf.get_variable('bs', [1, self.config.label_size])

    # build recursive graph
    tensor_array = tf.TensorArray(
        tf.float32,
        size=0,
        dynamic_size=True,
        clear_after_read=False,
        infer_shape=False)
    
    def embed_word(word_index):
      with tf.device('/cpu:0'):
        return tf.expand_dims(tf.gather(embeddings, word_index), 0)

    def combine_children(left_tensor, right_tensor):
      return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1)

    def loop_body(tensor_array, i):
      node_is_leaf = tf.gather(self.is_leaf_placeholder, i)
      node_word_index = tf.gather(self.node_word_indices_placeholder, i)
      left_child = tf.gather(self.left_children_placeholder, i)
      right_child = tf.gather(self.right_children_placeholder, i)
      node_tensor = tf.cond(
          node_is_leaf,
          lambda: embed_word(node_word_index),
          lambda: combine_children(tensor_array.read(left_child),
                                   tensor_array.read(right_child)))
      tensor_array = tensor_array.write(i, node_tensor)
      i = tf.add(i, 1)
      return tensor_array, i

    loop_cond = lambda tensor_array, i: \
        tf.less(i, tf.squeeze(tf.shape(self.is_leaf_placeholder)))
    self.tensor_array, _ = tf.while_loop(
        loop_cond, loop_body, [tensor_array, 0], parallel_iterations=1)

    # add projection layer
    self.logits = tf.matmul(self.tensor_array.concat(), U) + bs
    self.root_logits = tf.matmul(
        self.tensor_array.read(self.tensor_array.size() - 1), U) + bs
    self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1))

    # add loss layer
    regularization_loss = self.config.l2 * (
        tf.nn.l2_loss(W1) + tf.nn.l2_loss(U))
    included_indices = tf.where(tf.less(self.labels_placeholder, 2))
    #self.full_loss = regularization_loss + tf.reduce_sum(
        #tf.nn.sparse_softmax_cross_entropy_with_logits(
            #tf.gather(self.logits, included_indices), tf.gather(
                #self.labels_placeholder, included_indices)))
    self.root_loss = tf.reduce_sum(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            self.root_logits, self.labels_placeholder[-1:]))

    # add training op
    self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(
        self.root_loss)
Example #11
0
def train():
    print('Loading vocab, train and valid dataset...')
    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    args.embed_num = embed.size(0)
    args.embed_dim = embed.size(1)
    with open(args.word2id) as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)

    train_data = []
    fns = os.listdir(args.train_dir)
    fns.sort()
    for fn in tqdm(fns):
        f = open(args.train_dir + fn, 'r')
        train_data.append(json.load(f))
        f.close()

    val_data = []
    fns = os.listdir(args.valid_dir)
    fns.sort()
    for fn in tqdm(fns):
        f = open(args.valid_dir + fn, 'r')
        val_data.append(json.load(f))
        f.close()

    net = getattr(model, args.model)(args, embed)
    loss1 = getattr(model, 'hinge_loss_1')()
    loss2 = getattr(model, 'myLoss2')()  # 训练SRL和句子打分阶段的loss
    if use_cuda:
        net.cuda()
        loss1.cuda()
        loss2.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
    net.train()

    # 训练SRL打分
    print('Begin train SRL predictor...')
    for epoch in range(1, args.srl_epochs + 1):
        for i, blog in enumerate(train_data):
            sents, sent_targets, doc_lens, doc_targets, events, event_targets, event_tfs, event_prs, event_lens, event_sent_lens, _1, _2, = vocab.make_tensors(
                blog, args)
            if use_cuda:
                sents = sents.cuda()
                events = events.cuda()
                event_targets = event_targets.cuda()
                event_tfs = event_tfs.cuda()
            event_probs = net(sents, doc_lens, events, event_lens,
                              event_sent_lens, event_tfs, True)
            loss = loss1(event_probs, event_targets)
            optimizer.zero_grad()
            if loss.data.item() > 1e-10:
                loss.backward()
            clip_grad_norm_(net.parameters(), args.max_norm)
            optimizer.step()

            print('SRL EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' %
                  (epoch, args.srl_epochs, i, len(train_data), loss))

            cnt = (epoch - 1) * len(train_data) + i
            if cnt % args.valid_every == 0 and cnt / args.valid_every >= 0:
                print('Begin SRL valid...Epoch %d, Batch %d' % (epoch, i))
                p_5, p_10, p_20, mse = evaluate_srl(net, loss1, vocab,
                                                    val_data)
                save_path = args.save_dir + args.model + '_SRL_%d_%.4f_%.4f_%.4f_%.4f' % (
                    cnt / args.valid_every, p_5, p_10, p_20, mse)
                net.save(save_path)
                print('Epoch: %2d Loss: %f' % (epoch, loss))
        adjust_learning_rate(optimizer, epoch)
    """
Example #12
0
def main():

    utils.print_config(args)

    if 'train' not in args.mode:
        args.keep_rate = 1.0
    args.use_pretrain = True if args.use_pretrain == 'True' else False
    args.use_aux_task = True if args.use_aux_task == 'True' else False

    if args.mode == 'lm_train':
        args.model = 'lm'
        args.data_path = "./data/wikitext/wikitext-103/processed_wiki_train.bin"
        args.use_pretrain = False

    args.model_path = os.path.join(args.model_path, args.exp_name).format(
        args.model)  #model_path default="data/log/{}

    if not os.path.exists(args.model_path):
        if 'train' not in args.mode:
            print(args.model_path)
            raise ValueError
        os.makedirs(args.model_path)
    with open(os.path.join(args.model_path, 'config.json'),
              'w',
              encoding='utf8') as f:
        json.dump(vars(args), f)

    print("Default models path: {}".format(args.model_path))

    print('code start/ {} mode / {} models'.format(args.mode, args.model))
    utils.assign_specific_gpu(args.gpu_nums)

    vocab = utils.Vocab()

    vardicts = utils.get_pretrain_weights(
        args.true_pretrain_ckpt_path
    ) if args.use_pretrain and args.mode == 'train' else None

    if args.mode == 'decode':
        if args.model == 'mmi_bidi': args.beam_size = args.mmi_bsize
        args.batch_size = args.beam_size

    modelhps = deepcopy(args)
    if modelhps.mode == 'decode':
        modelhps.max_dec_len = 1

    if args.model == 'vanilla':
        model = BaseModel(vocab, modelhps)
    elif args.model == 'mmi_bidi':
        if args.mode == 'decode':
            bw_graph = tf.Graph()
            with bw_graph.as_default():
                bw_model = BaseModel(vocab, args)

            bw_sess = tf.Session(graph=bw_graph, config=utils.gpu_config())

            with bw_sess.as_default():
                with bw_graph.as_default():
                    bidi_ckpt_path = utils.load_ckpt(bw_model.hps,
                                                     bw_model.saver, bw_sess)

            fw_graph = tf.Graph()
            with fw_graph.as_default():
                modelhps.model_path = modelhps.model_path.replace(
                    'mmi_bidi', 'vanilla')
                modelhps.model = 'vanilla'
                fw_model = BaseModel(vocab, modelhps)
            fw_sess = tf.Session(graph=fw_graph)
            with fw_sess.as_default():
                with fw_graph.as_default():
                    ckpt_path = utils.load_ckpt(fw_model.hps, fw_model.saver,
                                                fw_sess)
        else:
            model = BaseModel(vocab, modelhps)

    elif args.model == 'lm':
        model = LMModel(vocab, modelhps)
    elif args.model == 'embmin':
        model = DiverEmbMin(vocab, modelhps)
    else:
        raise ValueError
    print('models load end')

    if args.mode in ['train', 'lm_train']:
        train(model, vocab, vardicts)
    elif args.mode == 'decode':
        import time

        if args.model == 'mmi_bidi':
            batcher = Batcher(
                vocab, bw_model.hps.data_path.replace('train_', 'test_'), args)
            decoder = BeamsearchDecoder(fw_model,
                                        batcher,
                                        vocab,
                                        fw_sess=fw_sess,
                                        bw_model=bw_model,
                                        bw_sess=bw_sess,
                                        bidi_ckpt_path=bidi_ckpt_path)
        else:
            batcher = Batcher(vocab,
                              model.hps.data_path.replace('train_', 'test_'),
                              args)
            decoder = BeamsearchDecoder(model, batcher, vocab)
        decoder.decode()
    elif args.mode == 'eval':
        pass
Example #13
0
    def __init__(self, vocab_file_path=None, model_file_path=None):
        """

        :param vocab_file_path: tuple of code vocab, ast vocab, nl vocab, if given, build vocab by given path
        :param model_file_path:
        """

        # dataset
        self.train_dataset = data.CodePtrDataset(
            code_path=config.train_code_path,
            ast_path=config.train_sbt_path,
            nl_path=config.train_nl_path)
        self.train_dataset_size = len(self.train_dataset)
        self.train_dataloader = DataLoader(
            dataset=self.train_dataset,
            batch_size=config.batch_size,
            shuffle=True,
            collate_fn=lambda *args: utils.unsort_collate_fn(
                args,
                code_vocab=self.code_vocab,
                ast_vocab=self.ast_vocab,
                nl_vocab=self.nl_vocab))

        # vocab
        self.code_vocab: utils.Vocab
        self.ast_vocab: utils.Vocab
        self.nl_vocab: utils.Vocab
        # load vocab from given path
        if vocab_file_path:
            code_vocab_path, ast_vocab_path, nl_vocab_path = vocab_file_path
            self.code_vocab = utils.load_vocab_pk(code_vocab_path)
            self.ast_vocab = utils.load_vocab_pk(ast_vocab_path)
            self.nl_vocab = utils.load_vocab_pk(nl_vocab_path)
        # new vocab
        else:
            self.code_vocab = utils.Vocab('code_vocab')
            self.ast_vocab = utils.Vocab('ast_vocab')
            self.nl_vocab = utils.Vocab('nl_vocab')
            codes, asts, nls = self.train_dataset.get_dataset()
            for code, ast, nl in zip(codes, asts, nls):
                self.code_vocab.add_sentence(code)
                self.ast_vocab.add_sentence(ast)
                self.nl_vocab.add_sentence(nl)

            self.origin_code_vocab_size = len(self.code_vocab)
            self.origin_nl_vocab_size = len(self.nl_vocab)

            # trim vocabulary
            self.code_vocab.trim(config.code_vocab_size)
            self.nl_vocab.trim(config.nl_vocab_size)
            # save vocabulary
            self.code_vocab.save(config.code_vocab_path)
            self.ast_vocab.save(config.ast_vocab_path)
            self.nl_vocab.save(config.nl_vocab_path)
            self.code_vocab.save_txt(config.code_vocab_txt_path)
            self.ast_vocab.save_txt(config.ast_vocab_txt_path)
            self.nl_vocab.save_txt(config.nl_vocab_txt_path)

        self.code_vocab_size = len(self.code_vocab)
        self.ast_vocab_size = len(self.ast_vocab)
        self.nl_vocab_size = len(self.nl_vocab)

        # model
        self.model = models.Model(code_vocab_size=self.code_vocab_size,
                                  ast_vocab_size=self.ast_vocab_size,
                                  nl_vocab_size=self.nl_vocab_size,
                                  model_file_path=model_file_path)
        self.params = list(self.model.code_encoder.parameters()) + \
            list(self.model.ast_encoder.parameters()) + \
            list(self.model.reduce_hidden.parameters()) + \
            list(self.model.decoder.parameters())

        # optimizer
        self.optimizer = Adam([
            {
                'params': self.model.code_encoder.parameters(),
                'lr': config.code_encoder_lr
            },
            {
                'params': self.model.ast_encoder.parameters(),
                'lr': config.ast_encoder_lr
            },
            {
                'params': self.model.reduce_hidden.parameters(),
                'lr': config.reduce_hidden_lr
            },
            {
                'params': self.model.decoder.parameters(),
                'lr': config.decoder_lr
            },
        ],
                              betas=(0.9, 0.999),
                              eps=1e-08,
                              weight_decay=0,
                              amsgrad=False)

        if config.use_lr_decay:
            self.lr_scheduler = lr_scheduler.StepLR(
                self.optimizer,
                step_size=config.lr_decay_every,
                gamma=config.lr_decay_rate)

        # best score and model(state dict)
        self.min_loss: float = 1000
        self.best_model: dict = {}
        self.best_epoch_batch: (int, int) = (None, None)

        # eval instance
        self.eval_instance = eval.Eval(self.get_cur_state_dict())

        # early stopping
        self.early_stopping = None
        if config.use_early_stopping:
            self.early_stopping = utils.EarlyStopping()

        config.model_dir = os.path.join(config.model_dir,
                                        utils.get_timestamp())
        if not os.path.exists(config.model_dir):
            os.makedirs(config.model_dir)
    def __init__(self, config):
        self.config = config

        # Load train data and build vocabulary
        self.train_data, self.dev_data, self.test_data = tree.simplified_data(700,
                                                                              100,
                                                                              200)
        max_height = tree.get_max_tree_height(self.train_data + self.dev_data + self.test_data)
        self.config.max_tree_height = pow(2, max_height + 1)
        
        print(self.config.max_tree_height)
        
        # print("data ",self.train_data))
        self.vocab = utils.Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

        # add input placeholders
        dim1 = self.config.batch_size
        dim2 = self.config.max_tree_height

        self.is_leaf_placeholder = tf.placeholder(
            tf.int32, [dim1, dim2], name='is_leaf_placeholder')
        self.node_word_indices_placeholder = tf.placeholder(
            tf.int32, [dim1, dim2], name='node_word_indices_placeholder')
        self.labels_placeholder = tf.placeholder(
            tf.int32, [dim1, dim2], name='labels_placeholder')
        self.cons_placeholder = tf.placeholder(
            tf.int32, (None), name='cons')

        # add model variables
        with tf.variable_scope('Embeddings'):
            self.embeddings = tf.get_variable('embeddings',
                                         [len(self.vocab),
                                         self.config.embed_size])
        with tf.variable_scope('Composition'):
            self.W1 = tf.get_variable('W1',
                                 [2 * self.config.embed_size,
                                     self.config.embed_size])
            self.b1 = tf.get_variable('b1', [1, self.config.embed_size]) 
        with tf.variable_scope('Projection'):
            self.U = tf.get_variable('U',
                                [self.config.embed_size,
                                 self.config.label_size])
            self.bs = tf.get_variable('bs', [1, self.config.label_size])

        # Build recursive graph
        def embed_word(word_index, embeddings):
            return tf.expand_dims(tf.gather(embeddings, word_index), 0)

        def combine_children(left_tensor, right_tensor, W, b):
            return tf.nn.relu(tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b)

        def find_loss(node_tensor, i, labels, U, bs):
            # add projection layer
            node_logits = tf.matmul(node_tensor, U) + bs
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=node_logits, labels=labels[i:i+1])
            return loss

        def base_case(node_word_indices, i, embeddings, labels, U, bs):

            word_index = tf.gather(node_word_indices, i)
            node_tensor = embed_word(word_index, embeddings)
            loss = find_loss(node_tensor, i, labels, U, bs)
            
            return [node_tensor, loss]

        def rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs):

            left_node, left_loss = self.rec(i*2, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs)
            right_node, right_loss = self.rec(i*2+1, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs)
            node_tensor = combine_children(left_node, right_node, W, b)
            node_loss = find_loss(node_tensor, i, labels, U, bs)
            loss = tf.concat([left_loss, node_loss, right_loss], 0)

            return [node_tensor, loss]

        # Function Declaration
        self.rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), ("node_word_indices", tf.int32), 
            ("embeddings", tf.float32), ("W", tf.float32), ("b", tf.float32), ("labels", tf.int32), ("U", tf.float32), ("bs", tf.float32)], 
            [("ret", tf.float32), ("ret1", tf.float32)])

        # Function Definition
        @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret", "ret1"])
        def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs):
            node_tensor, loss = \
                tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)),
                        lambda: base_case(node_word_indices, i, embeddings, labels, U, bs),
                        lambda: rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs))
            return [node_tensor, loss]

        RecImpl.add_to_graph(tf.get_default_graph())

        outloss = []
        prediction = []
        root_loss = []

        for idx_batch in range(self.config.batch_size):

            self.root_prediction, self.full_loss, self.root_loss = self.compute_tree(idx_batch)

            prediction.append(self.root_prediction)
            outloss.append(self.full_loss)
            root_loss.append(self.root_loss)

        batch_loss = tf.stack(outloss)
        self.pred = tf.stack(prediction)
        self.rloss = tf.stack(root_loss)

        # Compute batch loss
        # reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        # regpart = tf.add_n(reg_losses)
        # loss = tf.reduce_mean(batch_loss)
        # self.total_loss = loss + 0.5*regpart
        self.total_loss = tf.reduce_mean(batch_loss)
        # Add training op
        self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(self.total_loss)
Example #15
0
def train():
    print('Loading vocab, train and valid dataset...')
    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    args.embed_num = embed.size(0)
    args.embed_dim = embed.size(1)
    with open(args.word2id) as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)

    train_data = []
    fns = os.listdir(args.train_dir)
    fns.sort()
    for fn in tqdm(fns):
        f = open(args.train_dir + fn, 'r')
        train_data.append(json.load(f))
        f.close()

    val_data = []
    fns = os.listdir(args.valid_dir)
    fns.sort()
    for fn in tqdm(fns):
        f = open(args.valid_dir + fn, 'r')
        val_data.append(json.load(f))
        f.close()

    net = getattr(model, args.model)(args, embed)
    myloss = nn.MSELoss()
    if use_cuda:
        net.cuda()
        myloss.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
    net.train()

    # 训练SRL打分
    print('Begin train SRL predictor...')
    for epoch in range(1, args.srl_epochs + 1):
        for i, blog in enumerate(train_data):
            sents, sent_targets, doc_lens, doc_targets, events, event_targets, event_tfs, event_prs, event_lens, event_sent_lens, _1, _2, = vocab.make_tensors(
                blog, args)
            if use_cuda:
                sents = sents.cuda()
                events = events.cuda()
                event_targets = event_targets.cuda()
                event_tfs = event_tfs.cuda()
            event_probs = net(sents, doc_lens, events, event_lens, event_tfs,
                              True)
            loss = myloss(event_probs, event_targets)
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm_(net.parameters(), args.max_norm)
            optimizer.step()
            print('SRL EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' %
                  (epoch, args.srl_epochs, i, len(train_data), loss))
        adjust_learning_rate(optimizer, epoch)
    train_srl_score, valid_srl_score, loss1, loss2 = srl_predict(
        net, myloss, vocab, train_data, val_data)
    print('SRL predict loss: train: %f valid: %f' % (loss1, loss2))

    # 训练句子打分
    print('Begin train Sent predictor...')
    adjust_learning_rate(optimizer, 0)
    for epoch in range(1, args.sent_epochs + 1):
        for i, blog in enumerate(train_data):
            sents, sent_targets, doc_lens, doc_targets, events, event_targets, event_tfs, event_prs, event_lens, event_sent_lens, _1, _2, = vocab.make_tensors(
                blog, args)
            event_scores = train_srl_score[i]
            if use_cuda:
                sents = sents.cuda()
                sent_targets = sent_targets.cuda()
                events = events.cuda()
                event_scores = event_scores.cuda()
            sent_probs = net(sents, doc_lens, events, event_lens, event_scores,
                             False)
            loss = myloss(sent_probs, sent_targets)
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm_(net.parameters(), args.max_norm)
            optimizer.step()
            print('SENT EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' %
                  (epoch, args.sent_epochs, i, len(train_data), loss))

            cnt = (epoch - 1) * len(train_data) + i
            if cnt % args.valid_every == 0 and cnt / args.valid_every > 0:
                print('Begin valid... Epoch %d, Batch %d' % (epoch, i))
                cur_loss, r1, r2, rl, rsu = evaluate(net, myloss, vocab,
                                                     val_data, valid_srl_score,
                                                     True)
                save_path = args.save_dir + args.model + '_SENT_%d_%.4f_%.4f_%.4f_%.4f_%.4f' % (
                    cnt / args.valid_every, cur_loss, r1, r2, rl, rsu)
                net.save(save_path)
                print(
                    'Epoch: %2d Loss: %f Rouge-1: %f Rouge-2: %f Rouge-l: %f Rouge-SU4: %f'
                    % (epoch, cur_loss, r1, r2, rl, rsu))
        adjust_learning_rate(optimizer, epoch)
Example #16
0
def train():
    logging.info('Loading vocab, train and val dataset...')

    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    with open(args.word2id) as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)

    with open(args.train_dir) as f:
        examples = [json.loads(line) for line in f]
    train_dataset = utils.Dataset(examples)

    with open(args.val_dir) as f:
        examples = [json.loads(line) for line in f]
    val_dataset = utils.Dataset(examples)

    # update args
    args.embed_num = embed.size(0)
    args.embed_dim = embed.size(1)
    args.kernel_sizes = [int(ks)
                         for ks in args.kernel_sizes.split(',')]  # for CNN_RNN

    # build model
    net = getattr(models, args.model)(args, embed)
    if use_gpu:
        net.cuda()

    # load dataset
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    valid_loader = DataLoader(dataset=val_dataset,
                              batch_size=args.batch_size,
                              shuffle=False)

    # loss function
    criterion = nn.BCELoss()

    # model info
    print(net)
    params = sum(p.numel() for p in list(net.parameters())) / 1e6
    print('#Params: %.1fM' % (params))

    min_loss = float('inf')
    optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
    net.train()

    # Tensorbard
    writer = SummaryWriter(f'runs/{args.model}')

    t1 = time()
    for epoch in tqdm(range(1, args.epochs + 1), desc='Epoch', position=0):
        for i, batch in enumerate(tqdm(train_loader, desc='Train',
                                       position=1)):
            features, targets, _, doc_lens = vocab.make_features(batch)
            features, targets = Variable(features), Variable(targets.float())
            if use_gpu:
                features = features.cuda()
                targets = targets.cuda()

            probs = net(features, doc_lens)
            loss = criterion(probs, targets)
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm_(net.parameters(), args.max_norm)
            optimizer.step()

            # TensorBoard
            train_acc = accuracy(probs, targets)
            writer.add_scalar('train_loss_batch', loss,
                              epoch * len(train_loader) + i)
            writer.add_scalar('train_acc_batch', train_acc,
                              epoch * len(train_loader) + i)

            if args.debug:
                print(f'Batch ID: {i}, Loss: {loss.item()}, Acc: {train_acc}')
                continue

            if i % args.report_every == 0:
                cur_loss, cur_acc = eval(net, vocab, valid_loader, criterion)
                if cur_loss < min_loss:
                    min_loss = cur_loss
                    best_path = net.save()
                logging.info(
                    f'Epoch: {epoch}, Min_Val_Loss: {min_loss}, Cur_Val_Loss: {cur_loss}, Cur_Val_Acc: {cur_acc}'
                )

                # TensorBoard
                writer.add_scalar('valid_loss', cur_loss, epoch)
                writer.add_scalar('valid_acc', cur_acc, epoch)

    t2 = time()
    logging.info('Total Time:%f h' % ((t2 - t1) / 3600))