Example #1
0
def train(train_feat, train_lex, train_y, _args, f_cost, f_update, f_debug, epoch_id, learning_rate):
    # This function is called from the main method. and it is primarily responsible for updating the
    #parameters. Because of the way that create_circuit works that creates f_cost, f_update etc. this function
    #needs to be flexible and can't be put in a lib.
    #Look at lstm_dependency_parsing_simplification.py for more pointers.
    def train_crf(features, words, labels, learning_rate, f_cost, f_update, f_debug):
        ' Since function is called only for side effects, it is likely useless anywhere else'
        if labels.shape[0] < 2:
            return 0.0
        iter_cost = f_cost(features, words, labels)
        #_, gold_y, pred_y = f_debug(words, labels)
	f_update(learning_rate)
	return iter_cost

    shuffle([train_feat, train_lex, train_y], _args.seed)
    tic = time.time()
    aggregate_cost = 0.0
    for i, (x_f, x_w, y) in enumerate(zip(train_feat, train_lex, train_y)):
        
        try:
            aggregate_cost += train_crf(x_f, x_w, y, learning_rate, f_cost, f_update, f_debug)
        except IndexError:
            import pdb; pdb.set_trace()
        if _args.verbose == 2 and i % 10 == 0:
            print '[learning] epoch %i >> %2.2f%%' % (epoch_id, (i + 1) * 100. / _args.nsentences),
            print 'completed in %.2f (sec) <<\r' % (time.time() - tic),
            sys.stdout.flush()
    if _args.verbose == 2:
	    print '>> Epoch completed in %.2f (sec) <<' % (time.time() - tic), 'training cost: %.2f' % (aggregate_cost)
    #print 'training, current learning rate:', learning_rate
    return
Example #2
0
def train_seq(train_lex, train_f, train_y, _args, f_cost, f_update, epoch_id,
              learning_rate):
    ''' This function is called from the main method. and it is primarily responsible for updating the
    parameters. Because of the way that create_circuit works that creates f_cost, f_update etc. this function
    needs to be flexible and can't be put in a lib.
    Look at lstm_dependency_parsing_simplification.py for more pointers.
    '''
    def train_crf(features, words, labels, learning_rate, f_cost, f_update):
        ' Since function is called only for side effects, it is likely useless anywhere else'
        if labels.shape[0] < 2:
            return 0.0
        iter_cost = f_cost(features, words, labels)
        f_update(learning_rate)
        return iter_cost

    def train_lstm(features, words, labels, learning_rate, f_cost, f_update,
                   _args):
        ' Since function is called only for side effects, it is likely useless anywhere else'
        if labels.shape[0] < 2:
            return
        # add a dummy x_f
        iter_cost = f_cost(features, words, labels)
        f_update(learning_rate)
        return iter_cost

    ## main body of train_seq
    if train_f == None:
        shuffle([train_lex, train_y], _args.seed)
    else:
        shuffle([train_lex, train_f, train_y], _args.seed)
    tic = time.time()
    aggregate_cost = 0.0
    for i, (features, words,
            labels) in enumerate(zip(train_f, train_lex, train_y)):
        if len(words) < 2:
            continue
        assert len(words) == len(labels)  #+ 2
        if _args.model == 'lstm':  #train_f == None:
            aggregate_cost += train_lstm(features, words, labels,
                                         learning_rate, f_cost, f_update,
                                         _args)
        elif _args.model == 'crf':
            aggregate_cost += train_crf(features, words, labels, learning_rate,
                                        f_cost, f_update)
        else:
            raise NotImplementedError
        if _args.verbose == 2 and i % 10 == 0:
            print '[learning] epoch %i >> %2.2f%%' % (epoch_id, (i + 1) *
                                                      100. / _args.nsentences),
            print 'completed in %.2f (sec) <<\r' % (time.time() - tic),
            sys.stdout.flush()
    if _args.verbose == 2:
        print '>> Epoch completed in %.2f (sec) <<' % (
            time.time() - tic), 'training cost: %.2f' % (aggregate_cost)
Example #3
0
def train_alternative(_args, f_costs_and_updates, epoch_id, learning_rate_arr,
                      nsentences_arr, words_arr, label_arr, idx_arr,
                      dep_mask_arr, batch_size):
    num_tasks = len(f_costs_and_updates)
    print('num_tasks:', num_tasks)
    for i in range(num_tasks):
        f_cost, f_update = f_costs_and_updates[i]
        nsent = nsentences_arr[i]
        if nsent < len(words_arr[i]):
            if epoch_id == 0:
                if dep_mask_arr[0] is not None:
                    shuffle([
                        words_arr[i], idx_arr[i], label_arr[i], dep_mask_arr[i]
                    ], _args.seed)
                else:
                    shuffle([words_arr[i], idx_arr[i], label_arr[i]],
                            _args.seed)
            if _args.graph:
                train_single(
                    words_arr[i][epoch_id * nsent:(epoch_id + 1) * nsent],
                    idx_arr[i][epoch_id * nsent:(epoch_id + 1) * nsent],
                    label_arr[i][epoch_id * nsent:(epoch_id + 1) * nsent],
                    _args, f_cost, f_update, epoch_id, learning_rate_arr[i],
                    nsentences_arr[i], batch_size,
                    dep_mask_arr[i][epoch_id * nsent:(epoch_id + 1) * nsent],
                    _args.weighted)
            else:
                train_single(
                    words_arr[i][epoch_id * nsent:(epoch_id + 1) * nsent],
                    idx_arr[i][epoch_id * nsent:(epoch_id + 1) * nsent],
                    label_arr[i][epoch_id * nsent:(epoch_id + 1) * nsent],
                    _args, f_cost, f_update, epoch_id, learning_rate_arr[i],
                    nsentences_arr[i], batch_size, None, _args.weighted)
        else:
            if _args.graph:
                train_single(words_arr[i], idx_arr[i], label_arr[i], _args,
                             f_cost, f_update, epoch_id, learning_rate_arr[i],
                             nsentences_arr[i], batch_size, dep_mask_arr[i],
                             _args.weighted)
            else:
                train_single(words_arr[i], idx_arr[i], label_arr[i], _args,
                             f_cost, f_update, epoch_id, learning_rate_arr[i],
                             nsentences_arr[i], batch_size, None,
                             _args.weighted)
Example #4
0
def train_joint(_args, f_cost, f_update, epoch_id, learning_rate, num_tasks, nsentences, words_arr, feat_arr, label_arr):
    ''' This function is called from the main method. and it is primarily responsible for updating the parameters.'''
    def train_one_instance(learning_rate, f_cost, f_update, *inputs):
        ' Since function is called only for side effects, it is likely useless anywhere else'
        iter_cost = f_cost(*inputs)
        f_update(learning_rate)
        return iter_cost

    #shuffle([tl1, tf1, ty1], _args.seed)
    for i in range(num_tasks):
        shuffle([words_arr[i], feat_arr[i], label_arr[i]], _args.seed)
    tic = time.time()
    aggregate_cost = 0.0
    input_params = feat_arr + words_arr + label_arr
    for i, one_input in enumerate(zip(*input_params)):
        aggregate_cost += train_one_instance(learning_rate, f_cost, f_update, *one_input)
        if _args.verbose == 2 and i % 10 == 0:
            print '[learning] epoch %i >> %2.2f%%' % (epoch_id, (i + 1) * 100. / nsentences),
            print 'completed in %.2f (sec) <<\r' % (time.time() - tic),
            sys.stdout.flush()
    if _args.verbose == 2:
        print '>> Epoch completed in %.2f (sec) <<' % (time.time() - tic), 'training cost: %.2f' % (aggregate_cost)
Example #5
0
def train_single(tw, tf, ty, _args, f_cost, f_update, epoch_id, learning_rate, nsentences):
    def train_one_instance(f, w, l, learning_rate, f_cost, f_update):
        ' Since function is called only for side effects, it is likely useless anywhere else'
        iter_cost = f_cost(f, w, l)
        f_update(learning_rate)
        return iter_cost

    shuffle([tw, tf, ty], _args.seed)
    if nsentences != len(tw):
        tw = tw[:nsentences]
        tf = tf[:nsentences]
        ty = ty[:nsentences]
    tic = time.time()
    aggregate_cost = 0.0
    for i, (f, x, y) in enumerate(zip(tf, tw, ty)):
        assert len(x) >= 2
        assert len(x) == len(y) #+ 2
        aggregate_cost += train_one_instance(f, x, y, learning_rate, f_cost, f_update)
        if _args.verbose == 2 and i % 10 == 0:
            print '[learning] epoch %i >> %2.2f%%' % (epoch_id, (i + 1) * 100. / nsentences),
            print 'completed in %.2f (sec) <<\r' % (time.time() - tic),
            sys.stdout.flush()
    if _args.verbose == 2:
        print '>> Epoch completed in %.2f (sec) <<' % (time.time() - tic), 'training cost: %.2f' % (aggregate_cost)
Example #6
0
def train_single(train_lex,
                 train_idxs,
                 train_y,
                 _args,
                 f_cost,
                 f_update,
                 epoch_id,
                 learning_rate,
                 nsentences,
                 batchsize=1,
                 dep=None,
                 weighted=False):
    ''' This function is called from the main method. and it is primarily responsible for updating the
    parameters. Because of the way that create_relation_circuit works that creates f_cost, f_update etc. this function
    needs to be flexible and can't be put in a lib.
    Look at lstm_dependency_parsing_simplification.py for more pointers.
    '''

    # None-batched version
    def train_instance(words, idxs, sample_weights, label, learning_rate,
                       f_cost, f_update):
        ' Since function is called only for side effects, it is likely useless anywhere else'
        if words.shape[0] < 2:
            return 0.0

        # need to change here, add sample weights
        inputs = idxs + [words, sample_weights, label]
        iter_cost = f_cost(*inputs)  #words, id1, id2, labels)
        f_update(learning_rate)
        return iter_cost

    # Mini-batch version
    def train_batch(words, masks, idxs, sample_weights, label, learning_rate,
                    f_cost, f_update):
        if words.shape[0] < 2:
            return 0.0

        # need to change here, add sample weights
        inputs = idxs + [words, masks, sample_weights, label]
        iter_cost = f_cost(*inputs)  #words, id1, id2, labels)
        f_update(learning_rate)
        return iter_cost

    ## main body of train

    # generate the weights according to the train label distribution
    total_pos = 0
    total_neg = 0
    for y in train_y:
        if y[0] == 0 and y[1] == 1:
            total_pos += 1
        else:
            total_neg += 1

    print("total pos: %d neg:%d \n" % (total_pos, total_neg))

    sample_weights = [0] * (total_neg + total_pos)
    for idx, y in enumerate(train_y):
        if y[0] == 0 and y[1] == 1:
            sample_weights[idx] = 0.5 * (total_neg + total_pos) / (total_pos)
        else:
            sample_weights[idx] = 0.5 * (total_neg + total_pos) / (total_neg)

    if dep:
        shuffle([train_lex, train_idxs, train_y, sample_weights, dep],
                _args.seed)
    else:
        shuffle([train_lex, train_idxs, train_y, sample_weights], _args.seed)

    if nsentences < len(train_lex):
        train_lex = train_lex[:nsentences]
        train_idxs = train_idxs[:nsentences]
        train_y = train_y[:nsentences]
        sample_weights = sample_weights[:nsentences]

    tic = time.time()
    aggregate_cost = 0.0
    temp_cost_arr = [0.0] * 2

    # make the judge on whether use mini-batch or not.
    # No mini-batch
    if batchsize == 1:
        for i, (words, idxs, label, weight) in enumerate(
                zip(train_lex, train_idxs, train_y, sample_weights)):
            if len(words) < 2:
                continue
            #assert len(words) == len(labels) #+ 2
            idxs = conv_idxs(idxs, len(words))
            if _args.graph:
                assert dep is not None
                if weighted:
                    aggregate_cost += train_batch(words, dep[i], idxs, weight,
                                                  label, learning_rate, f_cost,
                                                  f_update)
                else:
                    aggregate_cost += train_batch(words, dep[i].sum(axis=-1),
                                                  idxs, weight, label,
                                                  learning_rate, f_cost,
                                                  f_update)
            else:
                aggregate_cost += train_instance(words, idxs, weight, label,
                                                 learning_rate, f_cost,
                                                 f_update)
            if _args.verbose == 2 and i % 10 == 0:
                print '[learning] epoch %i >> %2.2f%%' % (epoch_id, (i + 1) *
                                                          100. / nsentences),
                print 'completed in %.2f (sec). << avg loss: %.2f <<\r' % (
                    time.time() - tic, aggregate_cost / (i + 1)),
                sys.stdout.flush()
    # Mini-batch
    else:
        nb_idxs = get_minibatches_idx(len(train_lex), batchsize, shuffle=False)
        nbatches = len(nb_idxs)
        for i, tr_idxs in enumerate(nb_idxs):
            words = [train_lex[ii] for ii in tr_idxs]
            eidxs = [train_idxs[ii] for ii in tr_idxs]
            labels = [train_y[ii] for ii in tr_idxs]
            weights = [sample_weights[ii] for ii in tr_idxs]

            orig_eidxs = eidxs
            if _args.graph:
                assert dep is not None
                masks = [dep[ii] for ii in tr_idxs]
            else:
                masks = None
            x, masks, eidxs, weight = prepare_data(words,
                                                   eidxs,
                                                   masks,
                                                   weights,
                                                   maxlen=200)

            #print 'mask shape:', masks.shape
            if weighted or dep is None:
                iter_cost = train_batch(x, masks, eidxs, weight, labels,
                                        learning_rate, f_cost, f_update)
                aggregate_cost += iter_cost  #[0]
            else:
                aggregate_cost += train_batch(x, masks.sum(axis=-1), eidxs,
                                              weight, labels, learning_rate,
                                              f_cost, f_update)
            if _args.verbose == 2:
                print '[learning] epoch %i >> %2.2f%%' % (epoch_id, (i + 1) *
                                                          100. / nbatches),
                print 'completed in %.2f (sec). << avg loss: %.2f <<\r' % (
                    time.time() - tic, aggregate_cost / (i + 1)),
                #print 'completed in %.2f (sec). << avg loss: %.2f <<%%' % (time.time() - tic, aggregate_cost/(i+1)),
                #print 'average cost for each part: (%.2f, %.2f) <<\r' %(temp_cost_arr[0]/(i+1), temp_cost_arr[1]/(i+1)),
                sys.stdout.flush()
    if _args.verbose == 2:
        print '\n>> Epoch completed in %.2f (sec) <<' % (
            time.time() - tic), 'training cost: %.2f' % (aggregate_cost)
Example #7
0
def train_single(_args, f_cost, f_update, epoch_id, learning_rate, nsentences, *data, **kwargs): #train_lex, train_idxs, train_y, batchsize=1, dep=None, weighted=False):
    ''' This function is called from the main method. and it is primarily responsible for updating the
    parameters. Because of the way that create_relation_circuit works that creates f_cost, f_update etc. this function
    needs to be flexible and can't be put in a lib.
    Look at lstm_dependency_parsing_simplification.py for more pointers.
    '''
    batchsize = kwargs.pop('batchsize', 1)
    dep = kwargs.pop('dep', None)
    weighted = kwargs.pop('weighted', False)
    # None-batched version
    def train_instance(learning_rate, f_cost, f_update, *inputs):
        ' Since function is called only for side effects, it is likely useless anywhere else'
        if inputs[0].shape[0] < 2:
            return 0.0
        #inputs = idxs + [words, label]
        iter_cost = f_cost(*inputs) #words, id1, id2, labels)
        f_update(learning_rate)
        return iter_cost

    # Mini-batch version
    '''def train_batch(words, masks, idxs, label, learning_rate, f_cost, f_update):
        if words.shape[0] < 2:
            return 0.0
        inputs = idxs + [words, masks, label]
        iter_cost = f_cost(*inputs) #words, id1, id2, labels)
        f_update(learning_rate)
        return iter_cost
    '''
    ## main body of train
    #print type(data)
    data = list(data)
    if dep:
        #shuffle([train_lex, train_idxs, train_y, dep], _args.seed)
        shuffle(data + [dep], _args.seed)
    else:
        shuffle(data, _args.seed)
    if nsentences < len(data[0]):
        data = [elem[:nsentences] for elem in data]
    tic = time.time()
    aggregate_cost = 0.0
    temp_cost_arr = [0.0] * 2

    # make the judge on whether use mini-batch or not.
    # No mini-batch
    if batchsize == 1:
        print "Error: batch size cannot be 1"
        pass
    # Mini-batch
    else:
        nb_idxs = get_minibatches_idx(len(data[0]), batchsize, shuffle=False)
        nbatches = len(nb_idxs)
        for i, tr_idxs in enumerate(nb_idxs):
            #words = [train_lex[ii] for ii in tr_idxs]
            #eidxs = [train_idxs[ii] for ii in tr_idxs]
            #labels = [train_y[ii] for ii in tr_idxs]
            #print [len(elem) for elem in data]
            batch_data = [[elem[ii] for ii in tr_idxs] for elem in data]
            #orig_eidxs = eidxs
            if _args.graph:
                assert dep is not None
                masks = [dep[ii] for ii in tr_idxs]
            else:
                masks = None
            x, x_masks, obj, obj_masks = prepare_data(batch_data[0], batch_data[1], masks, None, maxlen=200)
            '''print x.shape, len(words)
            for elem, wd in zip(numpy.transpose(x, (1,0,2)), words):
                print 'words:', wd
                print 'converted words:', elem
            '''
            if weighted or dep is None:
                iter_cost = train_instance(learning_rate, f_cost, f_update, x, obj, batch_data[-1], x_masks, obj_masks )

                ## for debug with professor and Nunyin ##
                # print len(x), len(x_masks), len(obj), len(batch_data[-1]), len(obj_masks)
                # print x
                # print obj
                # print x_masks
                # print obj_masks
                # print batch_data[-1]
                # print iter_cost
                ## for debug with professor and Nunyin ##

                #for ii, c in enumerate(iter_cost):
                #    temp_cost_arr[ii] += c
                aggregate_cost += iter_cost#[0]

            else:
                aggregate_cost += train_instance(learning_rate, f_cost, f_update, x, obj, batch_[-1], masks.sum(axis=-1) )
            if _args.verbose == 2 :
                print '[learning] epoch %i >> %2.2f%%' % (epoch_id, (i + 1) * 100. / nbatches),
                print 'completed in %.2f (sec). << avg loss: %.2f <<\r' % (time.time() - tic, aggregate_cost/(i+1)),
                #print 'completed in %.2f (sec). << avg loss: %.2f <<%%' % (time.time() - tic, aggregate_cost/(i+1)),
                #print 'average cost for each part: (%.2f, %.2f) <<\r' %(temp_cost_arr[0]/(i+1), temp_cost_arr[1]/(i+1)),
                sys.stdout.flush()
    if _args.verbose == 2:
        print '\n>> Epoch completed in %.2f (sec) <<' % (time.time() - tic), 'training cost: %.2f' % (aggregate_cost)