Ejemplo n.º 1
0
Archivo: dy_test.py Proyecto: jcyk/CWS
def test(cws,filename,output_path):

    def seg(char_seq,text):
        lens = cws.forward(char_seq)
        res, begin =[], 0
        for wlen in lens:
            res.append(''.join(text[begin:begin+wlen]))
            begin+=wlen
        return res

    char_seqs = prepareData(cws.character_idx_map,filename,test=True)
    fo = open(output_path,'wb')
    seq_idx = 0 
    for line in open(filename).readlines():
        sent = unicode(line.decode('utf8')).split()
        Left = 0
        output_sent = []
        for idx,word in enumerate(sent):
            if len(re.sub('\W','',word,flags=re.U))==0:
                if idx>Left:
                    words =seg(char_seqs[seq_idx],list(''.join(sent[Left:idx])))
                    seq_idx += 1
                    output_sent.extend(words)
                Left = idx+1
                output_sent.append(word)
        if Left!=len(sent):
            words = seg(char_seqs[seq_idx],list(''.join(sent[Left:])))
            seq_idx += 1
            output_sent.extend(words)
        output_sent = '  '.join(output_sent).encode('utf8')+'\r\n'
        fo.write(output_sent)
    fo.close()
Ejemplo n.º 2
0
def test(cws, filename, output_path):
    def seg(char_seq, text):
        lens = cws.forward(char_seq)
        res, begin = [], 0
        for wlen in lens:
            res.append(''.join(text[begin:begin + wlen]))
            begin += wlen
        return res

    char_seqs = prepareData(cws.character_idx_map, filename, test=True)
    #fo = open(output_path,'wb')
    fo = open(output_path, 'w')
    seq_idx = 0
    for line in open(filename).readlines():
        #sent = str(line.decode('utf8')).split()
        sent = line.split()
        Left = 0
        output_sent = []
        for idx, word in enumerate(sent):
            if len(re.sub('\W', '', word, flags=re.U)) == 0:
                if idx > Left:
                    words = seg(char_seqs[seq_idx],
                                list(''.join(sent[Left:idx])))
                    seq_idx += 1
                    output_sent.extend(words)
                Left = idx + 1
                output_sent.append(word)
        if Left != len(sent):
            words = seg(char_seqs[seq_idx], list(''.join(sent[Left:])))
            seq_idx += 1
            output_sent.extend(words)
        #output_sent = '  '.join(output_sent).encode('utf8')+'\r\n'
        output_sent = '  '.join(output_sent) + '\r\n'
        fo.write(output_sent)
    fo.close()
Ejemplo n.º 3
0
def test(character_idx_map, options, params, path, filename, batch_size=512):

    X = tools.prepareData(character_idx_map, path, test=True)
    dropout = (1 - options['dropout_rate']) * np.ones(
        (options['ndims'], ), dtype=theano.config.floatX)
    start, n = 0, len(X)
    idx_list = range(n)
    lens = [len(x) for x in X]
    idx_list = sorted(idx_list, cmp=lambda x, y: cmp(lens[x], lens[y]))
    Y = []
    print 'count_test_sentences', len(X)

    for i in range(n // batch_size):
        batch_idx = idx_list[start:start + batch_size]
        x = [X[t] for t in batch_idx]
        x_lens = [lens[t] for t in batch_idx]
        x = tools.asMatrix(x)
        sY = tools.segment(params, options, x, x_lens, dropout)
        Y.extend(sY)
        start += batch_size
    if start != n:
        batch_idx = idx_list[start:]
        x = [X[t] for t in batch_idx]
        x_lens = [lens[t] for t in batch_idx]
        x = tools.asMatrix(x)
        sY = tools.segment(params, options, x, x_lens, dropout)
        Y.extend(sY)
    table = {}
    nb = 0
    for idx in idx_list:
        table[idx] = nb
        nb += 1
    output_result(Y, table, path, filename)
Ejemplo n.º 4
0
def main():
    # Model Parameters
    degree = 13
    whis = 2.5
    lambda_ = 0.0001

    # Load the training data
    print("Loading the training Datas...")
    y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

    # Clean and prepare our data
    print("Clean and prepare the training datas...")
    y_train, tX_train, ids_train = prepareData(y, tX, ids, degree, whis)

    # Train our models
    print("Train the models...")
    weights_0, loss_0 = ridge_regression(y_train[0], tX_train[0], lambda_)
    weights_1, loss_1 = ridge_regression(y_train[1], tX_train[1], lambda_)
    weights_2, loss_2 = ridge_regression(y_train[2], tX_train[2], lambda_)
    weights_3, loss_3 = ridge_regression(y_train[3], tX_train[3], lambda_)

    # Load the dataset to predict
    print("Loading the testing Datas...")
    y_test, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

    # Prepare the data in the same way as the train dataset
    print("Clean and prepare the testing datas...")
    y_test, tX_test, ids_test = prepareData(y_test, tX_test, ids_test, degree,
                                            whis)

    # Predict each class
    print("Predict the testing datas...")
    y_pred_0 = predict_labels(weights_0, tX_test[0])
    y_pred_1 = predict_labels(weights_1, tX_test[1])
    y_pred_2 = predict_labels(weights_2, tX_test[2])
    y_pred_3 = predict_labels(weights_3, tX_test[3])

    # Concatenate the results
    y_pred = np.concatenate([y_pred_0, y_pred_1, y_pred_2, y_pred_3])
    ids_test = np.concatenate(
        [ids_test[0], ids_test[1], ids_test[2], ids_test[3]])

    # Write the results in a csv file
    print("Writing the results...")
    create_csv_submission(ids_test, y_pred, OUTPUT_PATH)

    print("DONE!, your predictions are available in ", OUTPUT_PATH)
Ejemplo n.º 5
0
Archivo: test.py Proyecto: jcyk/CWS
def test(character_idx_map,
         options,
         params,
         path,
         filename,
         batch_size = 512
         ):
    
    X = tools.prepareData(character_idx_map,path,test=True)
    dropout = (1-options['dropout_rate'])*np.ones((options['ndims'],), dtype=theano.config.floatX)
    start,n = 0,len(X)
    idx_list = range(n)
    lens = [len(x) for x in X]
    idx_list = sorted(idx_list,cmp = lambda x,y: cmp(lens[x],lens[y]))
    Y = []
    print 'count_test_sentences',len(X)
    
    for i in range(n//batch_size):
        batch_idx = idx_list[start:start+batch_size]
        x = [X[t] for t in batch_idx]
        x_lens = [lens[t] for t in batch_idx]
        x = tools.asMatrix(x)
        sY = tools.segment(params,options,x,x_lens,dropout)
        Y.extend(sY)
        start+=batch_size
    if start!=n:
        batch_idx = idx_list[start:]
        x = [X[t] for t in batch_idx]
        x_lens = [lens[t] for t in batch_idx]
        x = tools.asMatrix(x)
        sY = tools.segment(params,options,x,x_lens,dropout)
        Y.extend(sY)
    table = {}
    nb= 0
    for idx in idx_list:
        table[idx] = nb
        nb+=1
    output_result(Y,table,path,filename)
Ejemplo n.º 6
0
def dy_train_model(max_epochs=50,
                   batch_size=256,
                   ndims=50,
                   nhiddens=50,
                   dropout_rate=0.2,
                   regularization=0.000001,
                   margin_loss_discount=0.2,
                   max_word_len=4,
                   start_point=1,
                   load_params=None,
                   max_sent_len=60,
                   beam_size=4,
                   shuffle_data=True,
                   train_file='../data/train',
                   dev_file='../data/dev',
                   lr=0.2,
                   pre_training='../w2v/c_vecs_50'):
    options = locals().copy()
    print 'Model options:'
    for kk, vv in options.iteritems():
        print '\t', kk, '\t', vv

    Cemb, character_idx_map = initCemb(ndims, train_file, pre_training)

    cws = CWS(Cemb, character_idx_map, options)

    if load_params is not None:
        cws.load(load_params)

    char_seq, _, truth = prepareData(character_idx_map, train_file)

    if max_sent_len is not None:
        survived = []
        for idx, seq in enumerate(char_seq):
            if len(seq) <= max_sent_len and len(seq) > 1:
                survived.append(idx)
        char_seq = [char_seq[idx] for idx in survived]
        truth = [truth[idx] for idx in survived]
    n = len(char_seq)
    print 'Total number of training instances:', n

    print 'Start training model'
    start_time = time.time()
    nsamples = 0
    for eidx in xrange(max_epochs):

        idx_list = range(n)
        if shuffle_data:
            random.shuffle(idx_list)

        for idx in idx_list:
            loss = cws.backward(char_seq[idx], truth[idx])
            if np.isnan(loss):
                print 'somthing went wrong, loss is nan.'
                return
            nsamples += 1
            if nsamples % batch_size == 0:
                cws.trainer.update(1. / batch_size)

        cws.trainer.update_epoch(1.)
        end_time = time.time()
        print 'Trained %s epoch(s) (%d samples) took %.lfs per epoch' % (
            eidx + 1, nsamples, (end_time - start_time) / (eidx + 1))
        test(cws, dev_file, '../result/dev_result%d' % (eidx + start_point))
Ejemplo n.º 7
0
def dy_train_model(max_epochs=30,
                   batch_size=256,
                   char_dims=50,
                   word_dims=100,
                   nhiddens=50,
                   dropout_rate=0.2,
                   margin_loss_discount=0.2,
                   max_word_len=4,
                   load_params=None,
                   max_sent_len=60,
                   shuffle_data=True,
                   train_file='../data/train',
                   dev_file='../data/dev',
                   lr=0.5,
                   edecay=0.1,
                   momentum=0.5,
                   pre_trained='../w2v/char_vecs_100',
                   word_proportion=0.5):
    options = locals().copy()  # Copy the local parameters to options
    print 'Model options:'
    for kk, vv in options.iteritems():
        print '\t', kk, '\t', vv

    # Based on the train_file, get the most frequent characters to generate matrix
    # Cemb: Character embedding matrix, {index, vector}
    # character_idx_map: {character:index}
    Cemb, character_idx_map = initCemb(char_dims, train_file, pre_trained)

    # Define parameters, trainer
    cws = CWS(Cemb, character_idx_map, options)

    # Load prams adn test
    if load_params is not None:
        cws.load(load_params)
        test(cws, dev_file, 'result')

    # Convert word corpus to sentence, index list
    # char_seq: [sentence[cha_idx]]
    # truth: [sentence[char_label]]
    char_seq, _, truth = prepareData(character_idx_map, train_file)

    # Remove too long or null sentence
    if max_sent_len is not None:
        survived = []
        for idx, seq in enumerate(char_seq):
            if len(seq) <= max_sent_len and len(seq) > 1:
                survived.append(idx)
        char_seq = [char_seq[idx] for idx in survived]
        truth = [truth[idx] for idx in survived]

    # Generate frequent word matrix H
    if word_proportion > 0:
        word_counter = Counter()
        # Loop characters in sentence
        for chars, labels in zip(char_seq, truth):
            # Loop labels from 1 to n
            # Generate tuple word index combinations (idx)
            # Count the number of occurrence
            word_counter.update(
                tuple(chars[idx - label:idx])
                for idx, label in enumerate(labels, 1))
        # Put most frequent word in list
        known_word_count = int(word_proportion * len(word_counter))
        known_words = dict(word_counter.most_common()
                           [:known_word_count])  # {idx: number of occurrence}
        idx = 0
        # Set know_words to {idx1:idx2}
        for word in known_words:
            known_words[word] = idx
            idx += 1
        # we keep a short list H of the most frequent words, generate parameter matrix H
        # Add known_words and param['word_embed'] as lookup_parameters
        cws.use_word_embed(known_words)

    n = len(char_seq)
    print 'Total number of training instances:', n

    print 'Start training model'
    start_time = time.time()
    nsamples = 0
    for eidx in xrange(max_epochs):
        idx_list = range(n)
        # Random the sentences
        if shuffle_data:
            np.random.shuffle(idx_list)

        total_loss = 0
        total_times = 0

        for idx in idx_list:
            loss = cws.backward(char_seq[idx],
                                truth[idx])  # Construct computation graph
            total_loss += loss
            if np.isnan(loss):
                print 'somthing went wrong, loss is nan.'
                return
            nsamples += 1
            if nsamples % batch_size == 0:
                cws.trainer.update()
                total_times += batch_size
                print '%s/%s, average loss:%s' % (total_times, n,
                                                  total_loss / batch_size)
                total_loss = 0

        # edecay is not avaiable after dynet 1.0
        # I have to manually update learning rate
        cws.trainer.learning_rate /= 1 + options['edecay']

        total_times = 0

        # Deprecated
        # cws.trainer.update_epoch(1.)
        end_time = time.time()
        print 'Trained %s epoch(s) (%d samples) took %.lfs per epoch' % (
            eidx + 1, nsamples, (end_time - start_time) / (eidx + 1))
        test(cws, dev_file, '../result/dev_result%d' % (eidx + 1))
        os.system('python score.py %s %d %d' % (dev_file, eidx + 1, eidx + 1))
        cws.save('epoch%d' % (eidx + 1))
        print 'Current model saved'
Ejemplo n.º 8
0
Archivo: dy_model.py Proyecto: jcyk/CWS
def dy_train_model(
    max_epochs = 50,
    batch_size = 256,
    ndims = 50,
    nhiddens = 50,
    dropout_rate = 0.2,
    regularization = 0.000001,
    margin_loss_discount = 0.2,
    max_word_len = 4,
    start_point = 1,
    load_params = None,
    max_sent_len = 60,
    beam_size = 4,
    shuffle_data = True,
    train_file = '../data/train',
    dev_file = '../data/dev',
    lr = 0.2,
    pre_training = '../w2v/c_vecs_50'
):
    options = locals().copy()
    print 'Model options:'
    for kk,vv in options.iteritems():
        print '\t',kk,'\t',vv
    
    Cemb, character_idx_map = initCemb(ndims,train_file,pre_training)

    cws = CWS(Cemb,character_idx_map,options)

    if load_params is not None:
        cws.load(load_params)

    char_seq, _ , truth = prepareData(character_idx_map,train_file)
    
    if max_sent_len is not None:
        survived = []
        for idx,seq in enumerate(char_seq):
            if len(seq)<=max_sent_len and len(seq)>1:
                survived.append(idx)
        char_seq =  [ char_seq[idx]  for idx in survived]
        truth = [ truth[idx] for idx in survived]
    n = len(char_seq)
    print 'Total number of training instances:',n
    
    print 'Start training model'
    start_time = time.time()
    nsamples = 0
    for eidx in xrange(max_epochs):
        
        idx_list = range(n)
        if shuffle_data:
            random.shuffle(idx_list)

        for idx in idx_list:
            loss = cws.backward(char_seq[idx],truth[idx])
            if np.isnan(loss):
                print 'somthing went wrong, loss is nan.'
                return
            nsamples += 1
            if nsamples % batch_size == 0:
                cws.trainer.update(1./batch_size)

        cws.trainer.update_epoch(1.)
        end_time = time.time()
        print 'Trained %s epoch(s) (%d samples) took %.lfs per epoch'%(eidx+1,nsamples,(end_time-start_time)/(eidx+1))       
        test(cws,dev_file,'../result/dev_result%d'%(eidx+start_point))
Ejemplo n.º 9
0
def dy_train_model(max_epochs=30,
                   batch_size=256,
                   char_dims=50,
                   word_dims=100,
                   nhiddens=50,
                   dropout_rate=0.2,
                   margin_loss_discount=0.2,
                   max_word_len=4,
                   load_params=None,
                   max_sent_len=60,
                   shuffle_data=True,
                   train_file='../data/train',
                   dev_file='../data/dev',
                   lr=0.5,
                   edecay=0.1,
                   momentum=0.5,
                   pre_trained='../w2v/char_vecs_100',
                   word_proportion=0.5):
    options = locals().copy()
    print 'Model options:'
    for kk, vv in options.iteritems():
        print '\t', kk, '\t', vv

    Cemb, character_idx_map = initCemb(char_dims, train_file, pre_trained)

    cws = CWS(Cemb, character_idx_map, options)

    if load_params is not None:
        cws.load(load_params)
        test(cws, dev_file, 'result')

    char_seq, _, truth = prepareData(character_idx_map, train_file)

    if max_sent_len is not None:
        survived = []
        for idx, seq in enumerate(char_seq):
            if len(seq) <= max_sent_len and len(seq) > 1:
                survived.append(idx)
        char_seq = [char_seq[idx] for idx in survived]
        truth = [truth[idx] for idx in survived]

    if word_proportion > 0:
        word_counter = Counter()
        for chars, labels in zip(char_seq, truth):
            word_counter.update(
                tuple(chars[idx - label:idx])
                for idx, label in enumerate(labels, 1))
        known_word_count = int(word_proportion * len(word_counter))
        known_words = dict(word_counter.most_common()[:known_word_count])
        idx = 0
        for word in known_words:
            known_words[word] = idx
            idx += 1
        cws.use_word_embed(known_words)

    n = len(char_seq)
    print 'Total number of training instances:', n

    print 'Start training model'
    start_time = time.time()
    nsamples = 0
    for eidx in xrange(max_epochs):
        idx_list = range(n)
        if shuffle_data:
            np.random.shuffle(idx_list)

        for idx in idx_list:
            loss = cws.backward(char_seq[idx], truth[idx])
            if np.isnan(loss):
                print 'somthing went wrong, loss is nan.'
                return
            nsamples += 1
            if nsamples % batch_size == 0:
                cws.trainer.update(1.)

        cws.trainer.update_epoch(1.)
        end_time = time.time()
        print 'Trained %s epoch(s) (%d samples) took %.lfs per epoch' % (
            eidx + 1, nsamples, (end_time - start_time) / (eidx + 1))
        test(cws, dev_file, '../result/dev_result%d' % (eidx + 1))
        os.system('python score.py %s %d %d' % (dev_file, eidx + 1, eidx + 1))
        cws.save('epoch%d' % (eidx + 1))
        print 'Current model saved'
Ejemplo n.º 10
0
def train_model(max_epoches=30,
                optimizer=adadelta,
                batch_size=256,
                ndims=100,
                nhiddens=150,
                dropout_rate=0.,
                regularization=0.,
                margin_loss_discount=0.2,
                max_word_len=4,
                start_point=1,
                load_params=None,
                resume_training=False,
                max_sent_len=60,
                beam_size=4,
                shuffle_data=True,
                train_file='../data/train',
                dev_file='../data/dev',
                lr=0.2,
                pre_training='../w2v/c_vecs_100'):
    options = locals().copy()
    print 'model options:', options
    print 'Building model'

    Cemb, character_idx_map = tools.initCemb(ndims, train_file, pre_training)

    print '%saving config file'
    config = {}
    config['options'] = options
    config['options']['optimizer'] = optimizer.__name__
    config['character_idx_map'] = character_idx_map
    f = open('config', 'wb')
    f.write(json.dumps(config))
    f.close()
    print '%resume model building'

    params = initParams(Cemb, options)
    if load_params is not None:
        pp = np.load(load_params)
        for kk, vv in params.iteritems():
            if kk not in pp:
                raise Warning('%s is not in the archive' % kk)
            params[kk] = pp[kk]
    tparams = initTparams(params)
    if optimizer is adadelta:
        ms_up, ms_grad = prepare_adadelta(tparams)
    if optimizer is adagrad:
        if resume_training:
            ss_grad = initTparams(np.load('backup.npz'))
        else:
            ss_grad = prepare_adagrad(tparams)
    T_x, T_dropout, T_y, T_yy, T_y_mask, T_yy_mask, T_cost = build_model(
        tparams, options)
    weight_decay = (tparams['U']**2).sum() + (tparams['Wy']**2).sum()
    weight_decay *= regularization
    T_cost += weight_decay

    if optimizer is adadelta:
        T_updates = optimizer(ms_up, ms_grad, tparams, T_cost)
    elif optimizer is sgd:
        LR, T_updates = optimizer(tparams, T_cost, lr)
    elif optimizer is adagrad:
        T_updates = optimizer(ss_grad, tparams, T_cost, lr)

    f_update = theano.function(
        [T_x, T_dropout, T_y, T_yy, T_y_mask, T_yy_mask],
        T_cost,
        updates=T_updates)

    print 'Loading data'
    seqs, lenss, tagss = tools.prepareData(character_idx_map, train_file)
    if max_sent_len is not None:
        survived = []
        for idx, seq in enumerate(seqs):
            if len(seq) <= max_sent_len and len(seq) > 1:
                survived.append(idx)
        seqs = [seqs[idx] for idx in survived]
        lenss = [lenss[idx] for idx in survived]
        tagss = [tagss[idx] for idx in survived]

    tot_lens = [len(seq) for seq in seqs]
    print 'count_training_sentences', len(seqs)

    print 'Training model'
    start_time = time.time()
    for eidx in xrange(max_epoches):
        batches_idx = get_minibatches_idx(seqs,
                                          tot_lens,
                                          batch_size,
                                          shuffle=shuffle_data)
        for batch_idx in batches_idx:
            X = [seqs[t] for t in batch_idx]
            Y = [lenss[t] for t in batch_idx]
            Z = [tagss[t] for t in batch_idx]
            X_lens = [tot_lens[t] for t in batch_idx]
            params = get_params(tparams)
            X = tools.asMatrix(X)
            dropout = np.random.binomial(1, 1 - dropout_rate,
                                         (X.shape[1], ndims)).astype(
                                             theano.config.floatX)
            #numpy_start = time.time()
            YY = tools.segment(params, options, X, X_lens, dropout,
                               margin_loss_discount, Z)
            #print 'numpy',time.time()-numpy_start
            Y = tools.asMatrix(Y, transpose=True)
            YY = tools.asMatrix(YY, transpose=True)
            Y_mask = (Y / Y).astype(theano.config.floatX)
            YY_mask = (YY / YY).astype(theano.config.floatX)
            #theano_start = time.time()
            f_update(X, dropout, Y, YY, Y_mask, YY_mask)
            #print 'theano',time.time()-theano_start
        if optimizer is sgd:
            LR.set_value(numpy_floatX(LR.get_value() * 0.9))
        params = get_params(tparams)
        test(config['character_idx_map'], config['options'], params, dev_file,
             '../result/dev_result%s' % (eidx + start_point, ))
        np.savez('epoch_%s' % (eidx + start_point, ), **params)
        if optimizer is adagrad:
            np.savez('backup', **get_params(ss_grad))
        end_time = time.time()
        print 'Trained %s epoch(s) took %.lfs per epoch' % (
            eidx + 1, (end_time - start_time) / (eidx + 1))
Ejemplo n.º 11
0
Archivo: model.py Proyecto: jcyk/CWS
def train_model(
    max_epochs = 30,
    optimizer = adadelta,
    batch_size = 256,
    ndims = 100,
    nhiddens = 150,
    dropout_rate = 0.,
    regularization = 0.,
    margin_loss_discount = 0.2,
    max_word_len = 4,
    start_point = 1,
    load_params = None,
    resume_training = False,
    max_sent_len = 60,
    beam_size = 4,
    shuffle_data = True,
    train_file = '../data/train',
    dev_file = '../data/dev',
    lr = 0.2,
    pre_training = '../w2v/c_vecs_100'
):
    options = locals().copy()
    print 'model options:',options
    print 'Building model'
    
    Cemb,character_idx_map = tools.initCemb(ndims,train_file,pre_training)
    
    print '%saving config file'
    config = {}
    config['options'] = options
    config['options']['optimizer'] = optimizer.__name__
    config['character_idx_map'] = character_idx_map
    f = open('config','wb')
    f.write(json.dumps(config))
    f.close()
    print '%resume model building'
    
    params = initParams(Cemb,options)
    if load_params is not None:
        pp = np.load(load_params)
        for kk,vv in params.iteritems():
            if kk not in pp:
                raise Warning('%s is not in the archive' % kk)
            params[kk] = pp[kk]
    tparams = initTparams(params)
    if optimizer is adadelta:
        ms_up,ms_grad = prepare_adadelta(tparams)
    if optimizer is adagrad:
        if resume_training:
            ss_grad = initTparams(np.load('backup.npz'))
        else:
            ss_grad = prepare_adagrad(tparams)
    T_x,T_dropout,T_y,T_yy,T_y_mask,T_yy_mask,T_cost = build_model(tparams,options)
    weight_decay = (tparams['U']**2).sum()+(tparams['Wy']**2).sum()
    weight_decay *= regularization
    T_cost += weight_decay

    if optimizer is adadelta:
        T_updates = optimizer(ms_up,ms_grad,tparams,T_cost)
    elif optimizer is sgd:
        LR,T_updates = optimizer(tparams,T_cost,lr)
    elif optimizer is adagrad:
        T_updates = optimizer(ss_grad,tparams,T_cost,lr)

    f_update = theano.function([T_x,T_dropout,T_y,T_yy,T_y_mask,T_yy_mask],T_cost,updates=T_updates)

    print 'Loading data'
    seqs,lenss,tagss = tools.prepareData(character_idx_map,train_file)
    if max_sent_len is not None:
        survived = []
        for idx,seq in enumerate(seqs):
            if len(seq)<=max_sent_len and len(seq)>1:
                survived.append(idx)
        seqs =  [ seqs[idx]  for idx in survived]
        lenss = [ lenss[idx] for idx in survived]
        tagss = [ tagss[idx] for idx in survived]

    tot_lens = [len(seq) for seq in seqs]
    print 'count_training_sentences',len(seqs)
    
    print 'Training model'
    start_time = time.time()
    for eidx in xrange(max_epochs):
        batches_idx = get_minibatches_idx(seqs,tot_lens,batch_size,shuffle=shuffle_data)
        for batch_idx in batches_idx:
            X = [seqs[t]  for t in batch_idx]
            Y = [lenss[t] for t in batch_idx]
            Z = [tagss[t] for t in batch_idx]
            X_lens = [tot_lens[t] for t in batch_idx]
            params = get_params(tparams)
            X = tools.asMatrix(X)
            dropout = np.random.binomial(1,1-dropout_rate,(X.shape[1],ndims)).astype(theano.config.floatX)
            #numpy_start = time.time()
            YY= tools.segment(params,options,X,X_lens,dropout,margin_loss_discount,Z)
            #print 'numpy',time.time()-numpy_start
            Y = tools.asMatrix(Y,transpose=True)
            YY = tools.asMatrix(YY,transpose=True)
            Y_mask = (Y/Y).astype(theano.config.floatX)
            YY_mask =(YY/YY).astype(theano.config.floatX)
            #theano_start = time.time()
            f_update(X,dropout,Y,YY,Y_mask,YY_mask)
            #print 'theano',time.time()-theano_start
        if optimizer is sgd:
            LR.set_value(numpy_floatX(LR.get_value()*0.9))
        params = get_params(tparams)
        test(config['character_idx_map'],config['options'],params,dev_file,'../result/dev_result%s'%(eidx+start_point,))
        np.savez('epoch_%s'%(eidx+start_point,),**params)
        if optimizer is adagrad:
            np.savez('backup',**get_params(ss_grad))
        end_time = time.time()
        print 'Trained %s epoch(s) took %.lfs per epoch'%(eidx+1,(end_time-start_time)/(eidx+1))