def shuffle_data(self):
     """
     Shuffle subjects that are loaded. Returns None
     """
     print('DEPRECATED: Please do not shuffle inside the sleeploader')
     if self.loaded == False: print('ERROR: Data not yet loaded')
     self.data, self.hypno, self.shuffle_index, self.subjects = shuffle(self.data, self.hypno, self.shuffle_index, self.subjects, random_state=self.rng)
     return None
Esempio n. 2
0
def merge(genes_parent_a, genes_parent_b):
    crosspoint = randint(1, len(genes_parent_a) - 1)

    gene_set = set(genes_parent_a + genes_parent_b)
    genes_a = list(shuffle(genes_parent_a))
    genes_b = list(shuffle(genes_parent_b))

    genes_f = [genes_a[:crosspoint], genes_b[:crosspoint]]
    adjuncts = [genes_b[crosspoint:], genes_a[crosspoint:]]

    for genes in genes_f:
        adjunct = adjuncts.pop(0)
        for gene in adjunct:
            if gene not in genes:
                genes.append(gene)
            else:
                genes.append(gene_set.difference(genes + adjunct).pop())
    return tuple(genes_f[0]), tuple(genes_f[1])
Esempio n. 3
0
def combinar(genes1, genes2):
    crosspoint = randint(1, len(genes1) - 1)

    conjunto = set(genes1 + genes2)
    genes_a = list(shuffle(genes1))
    genes_b = list(shuffle(genes2))

    genes_f = [genes_a[:crosspoint], genes_b[:crosspoint]]
    complementos = [genes_b[crosspoint:], genes_a[crosspoint:]]

    for genes in genes_f:
        complemento = complementos.pop(0)
        for gene in complemento:
            if gene not in genes:
                genes.append(gene)
            else:
                genes.append(conjunto.difference(genes + complemento).pop())
    return tuple(genes_f[0]), tuple(genes_f[1])
Esempio n. 4
0
def combinar(genes1, genes2):
    crosspoint = randint(1, len(genes1) - 1)

    conjunto = set(genes1 + genes2)
    genes_a = list(shuffle(genes1))
    genes_b = list(shuffle(genes2))

    genes_f = [genes_a[:crosspoint], genes_b[:crosspoint]]
    complementos = [genes_b[crosspoint:], genes_a[crosspoint:]]

    for genes in genes_f:
        complemento = complementos.pop(0)
        for gene in complemento:
            if gene not in genes:
                genes.append(gene)
            else:
                genes.append(conjunto.difference(genes + complemento).pop())
    return tuple(genes_f[0]), tuple(genes_f[1])
Esempio n. 5
0
def assex(somos, tabela, horarios, professores):  # reprodção assexuada
    clones = set()
    for somo in somos:
        try:
            clones.add(Individuo(tabela, horarios,
                                 professores, shuffle(list(somo.genes))))
        except Exception:
            pass
    return clones
Esempio n. 6
0
def assex(somos, tabela, horarios, professores):  # reprodção assexuada
    clones = set()
    for somo in somos:
        try:
            clones.add(
                Individuo(tabela, horarios, professores,
                          shuffle(list(somo.genes))))
        except Exception:
            pass
    return clones
Esempio n. 7
0
def assex(chromosomes, table, spots, candidates):
    clones = set()
    for some in chromosomes:
        try:
            clones.add(
                Schedule(table, spots, candidates, fit, penality,
                         shuffle(list(some.genes))))
        except Exception as e:
            if e.args[0] != 'CRASH':
                raise e
    return clones
Esempio n. 8
0
 def __init__(self, tabela, horarios, professores, genes=None):
     if genes is None:
         genes = list(range(len(tabela)))
         genes = tuple(shuffle(genes, prob=.8))
     self.genes = genes
     self.nota = 0
     if self.crash_case(horarios, professores):
         raise Exception('CRASH')
     self.contador_zeros = 0
     self.idade = 0
     self.avaliar(tabela)  # avaliação plana
Esempio n. 9
0
 def get_next(self):
     q = tools.queue()
     if len(q) > 0:
         tools.remove_first_from_queue()
         return q[0]
     if tools.shuffle():
         fl = self.library.get_filtered_list().keys()
         if random.random() < tools.probability() and len(fl)>0:
             return random.choice(fl)
         else:
             return random.randrange(self.library.length)
     return (self.info["current"] + 1) % self.library.length
Esempio n. 10
0
    def __init__(self,
                 table,
                 spots,
                 candidates,
                 fitness_func,
                 fitness_penality_zero,
                 genes=None):
        if not genes:
            genes = list(range(len(table)))
            genes = tuple(shuffle(genes, prob=.8))
        if Schedule._crash_detection(spots, candidates, genes):
            raise Exception('CRASH')

        self.genes = genes
        self.fitness = 0

        self.zero_counter = 0

        self.fitness_func = fitness_func
        self.fitness_penality = fitness_penality_zero

        self._evaluate_fitness(table)
Esempio n. 11
0
          total_f4[0:0] = new_feature4[indexx]
          total_weight[0:0] = [[unlabel_confid[indexx]]]'''

        total_yp = [[range(s['ynum']) for j in range(len(total_word[i]))]
                    for i in range(len(total_word))]
        for i in range(len(total_word)):
            for j in range(len(total_word[i])):
                for k in range(s['ynum']):
                    if k == total_label[i][j]:
                        total_yp[i][j][k] = 0
                    else:
                        total_yp[i][j][k] = 1

        #shuffle
        shuffle([
            total_word, total_f0, total_f1, total_f2, total_f3, total_f4,
            total_label, total_weight, total_yp
        ], s['seed'])
        s['cur_epoch'] = e
        tic = time.time()
        for i in xrange(len(total_word)):
            cwords = contextwin(total_word[i], s['wsize'])
            labels = total_label[i]
            feature0 = contextwin(total_f0[i], s['fsize'])
            feature1 = contextwin(total_f1[i], s['fsize'])
            feature2 = contextwin(total_f2[i], s['fsize'])
            feature3 = contextwin(total_f3[i], s['fsize'])
            feature4 = contextwin(total_f4[i], s['fsize'])
            ypi = total_yp[i]
            #print (len(total_word))
            s['cur_lr'] *= (0.95**(1.0 / len(total_word)))
            if total_weight[i][0] > 1:
Esempio n. 12
0
    rnn = model(nh=s['nhidden'],
                nc=nclasses,
                ne=vocsize,
                de=s['emb_dimension'],
                cs=s['win'])

    # train with early stopping on validation set
    print 'train with set...'
    best_f1 = -numpy.inf
    s['clr'] = s['lr']

    print time.localtime(time.time())

    for e in xrange(s['nepochs']):
        # shuffle
        shuffle([train_lex, train_y], s['seed'])
        s['ce'] = e
        tic = time.time()
        for i in xrange(nsentences):
            #print 'i=', i
            cwords = contextwin(train_lex[i], s['win'])
            words = map(lambda x: numpy.asarray(x).astype('int32'),
                        minibatch(cwords, s['bs']))
            labels = train_y[i]
            #print 'label=', labels
            for word_batch, label_last_word in zip(words, labels):
                t = rnn.train(word_batch, label_last_word, s['clr'])
                rnn.normalize()
            if (i + 1) % 270 == 0 & s['verbose']:
                print '[learning] epoch %i >> %2.2f%%' % (
                    e, (i + 1) * 100. / nsentences
criterion2 = tools.to_cuda(nn.CrossEntropyLoss(tools.to_cuda(weight_tensor)))
criterion1 = tools.to_cuda(nn.MSELoss())

confusion_matrix = tnt.meter.ConfusionMeter(5)

t_acc = []
t_Loss = []
v_acc = []
v_Loss = []

ff = open('./models/progress.txt', 'w')
batchsize = 2

for epoch in range(1, epochs + 1):
    model.train()
    p = tools.shuffle(IDSv)
    IDSv = IDSv[p]
    LABSv = LABSv[p]
    train_losses = []
    train_losses_recon = []
    cnt = 0

    ###############################################################################

    for t in tqdm(range(0, len(IDSv) - batchsize, batchsize)):
        iter = t / batchsize
        inputs = []
        targets = []
        retargets = []
        for i in range(t, min(t + batchsize, len(IDSv))):
            input = tools.make_patch(IDSv[i])
Esempio n. 14
0
        def build_patterns():
            """ Returns list of all possible patterns (when `pre_build_patterns` setting == True) """

            if settings.use_itertools:

                with Progress(
                        items_number=settings.patterns_number,
                        color=settings.color,
                        title=
                        "[Pattern] Building patterns list (using itertools)...",
                        timing=settings.progress_timing,
                ) as progress:

                    # TODO: include `shuffle_before` setting
                    all_patterns_list = list(
                        map(
                            lambda pattern_tuple: progress.item(
                                settings.Pattern(pattern_tuple)),
                            product(settings.Peg.all_colors_list,
                                    repeat=settings.pegs_number)))
            else:

                with Progress(
                        items_number=sum(
                            settings.colors_number**i
                            for i in range(1, settings.pegs_number + 1)),
                        color=settings.color,
                        title=
                        "[Pattern] Building patterns list (using my function)...",
                        timing=settings.progress_timing,
                ) as progress:

                    all_patterns_list = [
                        ()
                    ]  # initialize temporary list containing empty tuple
                    all_colors_list = settings.Peg.all_colors_list[:]  # get local `all_colors_list` to be shuffled

                    # iterate for `pegs_number`-1 times
                    for _ in range(settings.pegs_number - 1):

                        # shuffle `all_colors_list` to build patterns from (on every iteration)
                        if settings.shuffle_before:
                            shuffle(all_colors_list, )

                        # make temporary list of tuples (on every iteration)
                        all_patterns_list = [
                            progress.item((*pattern, new_peg))
                            for pattern in all_patterns_list
                            for new_peg in all_colors_list
                        ]
                        # new pattern is tuple a one peg bigger (unpacked "old" pegs + "new" one)

                    # shuffle `all_colors_list` to build Pattern objects from
                    if settings.shuffle_before:
                        shuffle(all_colors_list, )

                    # make final list of Pattern objects
                    all_patterns_list = [
                        progress.item(settings.Pattern((*pattern, new_peg)))
                        for pattern in all_patterns_list
                        for new_peg in all_colors_list
                    ]
                    # new pattern is Pattern object a one peg bigger (unpacked "old" pegs + "new" one)

            # shuffle generated patterns list (whole list at once)
            if settings.shuffle_after:
                with Progress(
                        items_number=len(all_patterns_list) - 1,
                        color=settings.color,
                        title="[Pattern] Shuffling patterns list...",
                        timing=settings.progress_timing,
                ) as progress:
                    shuffle(
                        all_patterns_list,
                        progress=progress,
                    )

            return all_patterns_list
Esempio n. 15
0
#author Berk Gulay

from sklearn import svm
import numpy as np
from sklearn.model_selection import KFold
import tools as T



data = np.load('../features/features.npy')
data_label = np.load('../features/labels.npy')
data_label = np.reshape(data_label, (np.shape(data)[0],))

data, data_label = T.shuffle(data, data_label)
data = np.array(data)
data_label = np.array(data_label)

#train_data,train_label,test_data,test_label = k_fold(data,data_label,10)

svc = svm.SVC(kernel='rbf',gamma=0.001,C=1.2,class_weight='balanced',max_iter=500,decision_function_shape='ovr',tol=0.001,cache_size=1000,probability=True)



split_size = 10
kf = KFold(n_splits=split_size, shuffle=True)
kf.get_n_splits(data)

overallAccuracies = np.zeros(5)
generalOverallAccuracy = 0

k=0
Esempio n. 16
0
def train(config):
    """
        训练
    :param config: 参数配置
    :return:
    """
    # training parameters
    max_iterations = int(config['TRAINING']['max_iterations'])
    training_batch = int(config['TRAINING']['training_batch'])
    learning_rate = float(config['TRAINING']['learning_rate'])
    validation_ratio = float(config['TRAINING']['validation_ratio'])

    x_data = _read_data(config['BASE']['x_train_path'])
    y_data = _read_data(config['BASE']['y_train_path'])
    # pre_processing
    x_train, y_train, x_validation, y_validation = _pre_processing(x_data, y_data, validation_ratio)

    # 初始化参数
    dim = x_train.shape[1]
    train_size = x_train.shape[0]
    validation_size = x_validation.shape[0]
    w = np.zeros([dim, 1])
    b = np.zeros([1, 1])
    adagrad = 0.0
    esp = 0.0000000001

    # 保存准确率和loss
    train_loss = []
    validation_loss = []
    train_acc = []
    validation_acc = []

    # training start
    for iterator in range(max_iterations):
        # 每一轮训练迭代前先随机打乱
        x_random, y_random = tools.shuffle(x_train, y_train)

        # 再从随机打乱的数据,分批次训练并更新w,b
        for batch in range(int(x_train.shape[0] / training_batch)):
            x_batch = x_random[training_batch * batch: training_batch * (batch + 1)]
            y_batch = y_random[training_batch * batch: training_batch * (batch + 1)]

            # 计算梯度
            w_gradient, b_gradient = _gradient(x_batch, y_batch, w, b)

            adagrad += b_gradient ** 2
            # 更新w,b
            w = w - learning_rate / np.sqrt(adagrad + esp) * w_gradient.reshape(-1, 1)
            b = b - learning_rate / np.sqrt(adagrad + esp) * b_gradient

        # 一轮结束,计算训练集和验证集的准确率和loss
        y_random_predict = _logistic_regression(x_random, w, b)
        train_acc.append(tools.accuracy(np.round(_logistic_regression(x_random, w, b)), y_random))
        train_loss.append(tools.cross_entropy_loss(y_random.T, y_random_predict)[0][0] / train_size)

        y_validation_predict = _logistic_regression(x_validation, w, b)
        validation_acc.append(tools.accuracy(np.round(_logistic_regression(x_validation, w, b)), y_validation))
        validation_loss.append(tools.cross_entropy_loss(y_validation.T, y_validation_predict)[0][0] / validation_size)

    # plot
    drawer.plot_two_dimensions("acc", [train_acc, validation_acc], ["train", "validation"], True)
    drawer.plot_two_dimensions("loss", [train_loss, validation_loss], ["train", "validation"], True)
    # instanciate the model
    numpy.random.seed(s['seed'])
    random.seed(s['seed'])
    rnn = model(    nh = s['nhidden'],
                    nc = nclasses,
                    ne = vocsize,
                    de = s['emb_dimension'],
                    cs = s['win'] )

    # train with early stopping on validation set
    best_f1 = -numpy.inf
    s['clr'] = s['lr']
    for e in xrange(s['nepochs']):
        # shuffle
        shuffle([train_lex, train_ne, train_y], s['seed'])
        s['ce'] = e
        tic = time.time()
        for i in xrange(nsentences):
            cwords = contextwin(train_lex[i], s['win'])
            words  = map(lambda x: numpy.asarray(x).astype('int32'),\
                         minibatch(cwords, s['bs']))
            labels = train_y[i]

            for word_batch , label_last_word in zip(words, labels):
                rnn.train(word_batch, label_last_word, s['clr'])
                rnn.normalize()

            if s['verbose']:
                print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),
                sys.stdout.flush()
Esempio n. 18
0
        # Initialize all variables
        sess.run(tf.initialize_all_variables())

        if FLAGS.pre_training:
            sess.run(bi_lstm._weights['w_emb'].assign(pre_emb_w))
            if FLAGS.POS_emb in [1, 2]:
                sess.run(bi_lstm._weights['t_emb'].assign(pre_emb_t))

        best_f1 = 0.0
        for e in xrange(FLAGS.num_epochs):

            # shuffle
            if FLAGS.POS_emb in [1, 2]:
                shuffle([
                    train_lex, train_tags, train_tags_uni, train_cue, train_y
                ], 20)
            else:
                shuffle([train_lex, train_cue, train_y], 20)

            # TRAINING STEP
            train_tot_acc = []
            dev_tot_acc = []
            tic = time.time()
            for i in xrange(len(train_lex)):
                if FLAGS.POS_emb in [1, 2]:
                    acc_train = feeder(
                        bi_lstm, train_lex[i], train_cue[i], train_tags[i] if
                        FLAGS.POS_emb == 1 else train_tags_uni[i], train_y[i])
                else:
                    acc_train = feeder(bi_lstm, train_lex[i], train_cue[i], [],
Esempio n. 19
0
            nch = num_chunk_embeddings,
            de = s['emb_dimension'],
            dp = s['pos_emb_dimension'],
            dch = s['chunk_emb_dimension'],
            cs = s['win'],
            mp = 1.0)
'''

best_params = {}
# train with early stopping on validation set
best_f1 = -numpy.inf
s['clr'] = s['lr']
training_loss = []
for e in xrange(s['nepochs']):
    # shuffling of data per epoch
    shuffle([X_train_idxs, X_train_pos_idxs, X_train_chunk_idxs, Y_train_idxs], s['seed'])
    s['ce'] = e
    tic = time.time()
    loss = 0.0
    for i in xrange(num_train_sentences):
        #print X_train_idxs[i]
        sentence_forward = contextwin(X_train_idxs[i], s['win'])
        sentence_backward = list(reversed(sentence_forward))
        sentence_pos_forward = contextwin(X_train_pos_idxs[i], s['win'])
        sentence_pos_backward = list(reversed(sentence_pos_forward))
        sentence_chunk_forward = contextwin(X_train_chunk_idxs[i], s['win'])
        sentence_chunk_backward = list(reversed(sentence_chunk_forward))
        labels = Y_train_idxs[i]
        #loss += rnn.sentence_train(sentence_forward, sentence_pos_forward, sentence_chunk_forward, labels, s['clr'])
        #rnn.normalize()
        loss += birnn.sentence_train(sentence_forward, sentence_backward, sentence_pos_forward, sentence_pos_backward,
Esempio n. 20
0
def main():
    s = {
        'nh1': 300,
        'nh2': 300,
        'win': 3,
        'emb_dimension': 300,
        'lr': 0.01,
        'lr_decay': 0.5,  #
        'max_grad_norm': 5,  #
        'seed': 345,  #
        'nepochs': 50,
        'batch_size': 16,
        'keep_prob': 0.5,
        'check_dir': './checkpoints/GZ_EMNLP2016/kp20k_0.01_16',
        'display_test_per': 1,  #
        'lr_decay_per': 5  #
    }

    # data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl'
    # emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl'
    # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl'
    data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl'
    emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl'
    #data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl'
    # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl'
    # emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl'
    #data_set_file = 'data/ACL2017/nus/nus_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/nus/nus_t_a_GZ_embedding.pkl'
    #data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl'
    print('loading dataset.....')
    # train_set,test_set,dic,embedding = load.atisfold(data_set_file, emb_file)
    train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(
        data_set_file, emb_file)
    # idx2label = dict((k,v) for v,k in dic['labels2idx'].iteritems())
    # idx2word  = dict((k,v) for v,k in dic['words2idx'].iteritems())

    train_lex, train_y, train_z = train_set
    # train_lex: [[每条tweet的word的idx],[每条tweet的word的idx]], train_y: [[关键词的位置为1]], train_z: [[关键词的位置为0~4(开头、结尾...)]]
    # tr = int(len(train_lex)*0.9)
    # valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:]
    # train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr]
    # test_lex,  test_y, test_z  = test_set
    valid_lex, valid_y, valid_z = valid_set
    test_lex, test_y, test_z = test_set
    log_dir = s['check_dir']
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    logfile = open(str(s['check_dir']) + '/log.txt',
                   'a',
                   encoding='utf-8',
                   buffering=1)
    print('len(train_data) {}'.format(len(train_lex)))
    print('len(valid_data) {}'.format(len(valid_lex)))
    print('len(test_data) {}'.format(len(test_lex)))
    logfile.write('len(train_data) {}\n'.format(len(train_lex)))
    logfile.write('len(valid_data) {}\n'.format(len(valid_lex)))
    logfile.write('len(test_data) {}\n'.format(len(test_lex)))
    vocab = set(dic['words2idx'].keys())
    vocsize = len(vocab)
    print('len(vocab) {}'.format(vocsize))
    print("Train started!")
    logfile.write('len(vocab) {}\n'.format(vocsize))
    logfile.write("Train started!\n")
    y_nclasses = 2
    z_nclasses = 5

    nsentences = len(train_lex)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
        allow_soft_placement=True)  ###########################################
    with tf.compat.v1.Session(
            config=config) as sess:  #####################################
        rnn = model.Model(nh1=s['nh1'],
                          nh2=s['nh2'],
                          ny=y_nclasses,
                          nz=z_nclasses,
                          de=s['emb_dimension'],
                          cs=s['win'],
                          lr=s['lr'],
                          lr_decay=s['lr_decay'],
                          embedding=embedding,
                          max_gradient_norm=s['max_grad_norm'],
                          batch_size=s['batch_size'],
                          model_cell='lstm')
        #     my_model = mymodel.myModel(
        #         # nh1=s['nh1'],
        #         # nh2=s['nh2'],
        #         # ny=y_nclasses,
        #         # nz=z_nclasses,
        #         de=s['emb_dimension'],
        #         lr=s['lr'],
        #         lr_decay=s['lr_decay'],
        #         embedding=embedding,
        #         max_gradient_norm=s['max_grad_norm'],
        #         keep_prob=s['keep_prob'],
        #         model_cell='lstm'
        #     )

        # 保存模型
        checkpoint_dir = s['check_dir']
        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)
        checkpoint_prefix = os.path.join(checkpoint_dir, 'model')

        def train_step(cwords, label_y, label_z):
            feed = {
                rnn.input_x: cwords,
                rnn.input_y: label_y,
                rnn.input_z: label_z,
                rnn.keep_prob: s['keep_prob']
                # rnn.batch_size:s['batch_size']
            }
            fetches = [rnn.loss, rnn.train_op]
            loss, _ = sess.run(fetches=fetches, feed_dict=feed)
            # _,Loss = sess.run(fetches=fetches, feed_dict=feed)
            return loss

        def dev_step(cwords):
            feed = {
                rnn.input_x: cwords,
                rnn.keep_prob: 1.0
                # rnn.keep_prob:1.0,
                # rnn.batch_size:s['batch_size']
            }
            fetches = rnn.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=2)
        sess.run(tf.global_variables_initializer())

        best_f = -1
        best_e = 0
        test_best_f = -1
        test_best_e = 0
        best_res = None
        test_best_res = None
        for e in range(s['nepochs']):
            tools.shuffle([train_lex, train_y, train_z], s['seed'])
            t_start = time.time()
            start_num = 0
            # for step,batch in enumerate(tl.iterate.minibatches(train_lex,list(zip(train_y,train_z)),batch_size=s['batch_size'])):
            # for step, batch in enumerate(batch_putin(train_lex, list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size'])):
            steps = len(train_lex) // s['batch_size']
            for step in range(steps):
                # batch = batch_putin(train_lex,list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size'])
                # input_x,target=batch
                # label_y,label_z=list(zip(*target))
                input_x, label_y, label_z = train_batch_putin(
                    train_lex,
                    train_y,
                    train_z,
                    start_num=start_num,
                    batch_size=s['batch_size'])
                input_x = load.pad_sentences(input_x)
                label_y = load.pad_sentences(label_y)
                label_z = load.pad_sentences(label_z)
                cwords = tools.contextwin_2(input_x, s['win'])
                # cwords = input_x
                loss = train_step(cwords, label_y, label_z)
                start_num += s['batch_size']
                print(
                    'loss %.6f' % loss, ' [learning] epoch %i>> %2.2f%%' %
                    (e, s['batch_size'] * step * 100. / nsentences),
                    'completed in %.2f (sec) <<\r' % (time.time() - t_start))
                if step % 1000 == 0:
                    logfile.write('loss %.6f' % loss)
                    logfile.write(
                        ' [learning] epoch %i>> %2.2f%%' %
                        (e, s['batch_size'] * step * 100. / nsentences))
                    logfile.write('completed in %.2f (sec) <<\n' %
                                  (time.time() - t_start))
                # sys.stdout.flush())

            #VALID
            if e >= 0:
                print('Validing..............')
                predictions_valid = []
                predictions_test = []
                groundtruth_valid = []
                groundtruth_test = []
                start_num = 0
                steps = len(valid_lex) // s['batch_size']
                # for batch in  tl.iterate.minibatches(valid_lex,valid_z,batch_size=s['batch_size']):
                for step in range(steps):
                    # batch = batch_putin(valid_lex, valid_z, start_num=start_num, batch_size=s['batch_size'])
                    # x,z=batch
                    x, z = test_batch_putin(valid_lex,
                                            valid_z,
                                            start_num=start_num,
                                            batch_size=s['batch_size'])
                    x = load.pad_sentences(x)
                    x = tools.contextwin_2(x, s['win'])
                    predictions_valid.extend(dev_step(x))
                    groundtruth_valid.extend(z)
                    start_num += s['batch_size']

                res_valid = tools.conlleval(predictions_valid,
                                            groundtruth_valid)
                del predictions_valid
                del groundtruth_valid
                if res_valid['f'] > best_f:
                    best_f = res_valid['f']
                    best_e = e
                    best_res = res_valid
                    print('\nVALID new best:', res_valid)
                    logfile.write('\nVALID new best: ' + str(res_valid))
                    path = saver.save(sess=sess,
                                      save_path=checkpoint_prefix,
                                      global_step=e)
                    print("Save model checkpoint to {}".format(path))
                    logfile.write(
                        "\nSave model checkpoint to {}\n".format(path))
                else:
                    print('\nVALID new curr:', res_valid)
                    logfile.write('\nVALID new curr: ' + str(res_valid))

                #TEST
                print('Testing..............')
                start_num = 0
                steps = len(test_lex) // s['batch_size']
                if e % s['display_test_per'] == 0:
                    # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']):
                    for step in range(steps):
                        # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size'])
                        # x,z = batch
                        x, z = test_batch_putin(test_lex,
                                                test_z,
                                                start_num=start_num,
                                                batch_size=s['batch_size'])
                        x = load.pad_sentences(x)
                        x = tools.contextwin_2(x, s['win'])
                        predictions_test.extend(dev_step(x))
                        groundtruth_test.extend(z)
                        start_num += s['batch_size']

                    res_test = tools.conlleval(predictions_test,
                                               groundtruth_test)

                    if res_test['f'] > test_best_f:
                        test_best_f = res_test['f']
                        test_best_e = e
                        test_best_res = res_test
                        print('TEST new best:', res_test)
                        logfile.write('\nTEST new best: ' + str(res_test))
                    else:
                        print('TEST new curr:', res_test)
                        logfile.write('\nTEST new curr: ' + str(res_test))

                # learning rate decay if no improvement in 10 epochs
                if e - best_e > s['lr_decay_per']:
                    sess.run(fetches=rnn.learning_rate_decay_op)
                lr = sess.run(fetches=rnn.lr)
                print('learning rate:%f' % lr)
                logfile.write('\nlearning rate:%f\n' % lr)
                if lr < 1e-6: break

        print("Train finished!")
        print('Valid Best Result: epoch %d:  ' % (best_e), best_res)
        print('Test Best Result: epoch %d:  ' % (test_best_e), test_best_res)
        logfile.write("Train finished!\n")
        logfile.write('Valid Best Result: epoch %d:   ' % (best_e) +
                      str(best_res))
        logfile.write('\nTest Best Result: epoch %d:   ' % (test_best_e) +
                      str(test_best_res))
        logfile.close()
Esempio n. 21
0
def main():
    s = {
        'nh1': 450,  # 第1层LSTM的隐藏单元数
        'nh2': 450,  # 第2层LSTM的隐藏单元数
        'emb_dimension': 300,  # 词向量维度
        'lr': 0.0001,  # 初始学习率
        'lr_decay': 0.5,  # 学习率衰减率
        'lr_decay_per': 5,  # 如果训练5次以后准确率没有上升,则衰减学习率为原来的0.5倍
        'nepochs': 50,  # 总共迭代50个epoch
        'batch_size': 16,  # batch_size=16
        'keep_prob': 0.5,  # drop out 概率
        'check_dir':
        './checkpoints/kp20k_mycps_multisize_CNN_LSTM_attention_Adam_0.0001_16_GPU_goon',  # 模型保存地址
        'max_grad_norm': 5,  #
        'seed': 345,  #
        'display_test_per': 1,  #
        'load_ckpt_dir':
        './checkpoints/kp20k_mycps_multisize_CNN_LSTM_attention_Adam_0.0001_16_NEW',
        'again_epoch': 7  ##############################
    }

    data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl'
    emb_file = 'data/ACL2017/ACL2017_t_a_embedding.pkl'
    # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file)
    print('loading dataset.....')
    train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(
        data_set_file, emb_file)

    train_lex, train_y, train_z = train_set
    # train_lex: [[每条tweet的word的idx],[每条tweet的word的idx]], train_y: [[关键词的位置为1]], train_z: [[关键词的位置为0~4(开头、结尾...)]]
    # tr = int(len(train_lex) * 0.9)
    # valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:]    ################
    # train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr]
    valid_lex, valid_y, valid_z = valid_set
    test_lex, test_y, test_z = test_set
    log_dir = s['check_dir']
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    logfile = open(str(s['check_dir']) + '/log.txt', 'a', encoding='utf-8')
    print('len(train_data) {}'.format(len(train_lex)))
    print('len(valid_data) {}'.format(len(valid_lex)))
    print('len(test_data) {}'.format(len(test_lex)))
    logfile.write('len(train_data) {}\n'.format(len(train_lex)))
    logfile.write('len(valid_data) {}\n'.format(len(valid_lex)))
    logfile.write('len(test_data) {}\n'.format(len(test_lex)))
    vocab = set(dic['words2idx'].keys())
    vocsize = len(vocab)
    print('len(vocab) {}'.format(vocsize))
    print("Train started!")
    logfile.write('len(vocab) {}\n'.format(vocsize))
    logfile.write("Train started!\n")
    y_nclasses = 2
    z_nclasses = 5

    nsentences = len(train_lex)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=True,
        allow_soft_placement=True)  ###########################################
    with tf.compat.v1.Session(
            config=config) as sess:  #####################################
        my_model = mymodel.myModel(nh1=s['nh1'],
                                   nh2=s['nh2'],
                                   ny=y_nclasses,
                                   nz=z_nclasses,
                                   de=s['emb_dimension'],
                                   lr=s['lr'],
                                   lr_decay=s['lr_decay'],
                                   embedding=embedding,
                                   max_gradient_norm=s['max_grad_norm'],
                                   batch_size=s['batch_size'],
                                   rnn_model_cell='lstm')

        # 保存模型
        checkpoint_dir = s['check_dir']
        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)
        checkpoint_prefix = os.path.join(checkpoint_dir, 'model')

        #重新加载模型
        load_ckp_dir = s['load_ckpt_dir']
        loader = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(load_ckp_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # print(ckpt.all_model_checkpoint_paths[4])
            print(ckpt.model_checkpoint_path)
            logfile.write('loading ' + str(ckpt.model_checkpoint_path) +
                          '......\n')
            loader.restore(sess, ckpt.model_checkpoint_path)

        def train_step(cwords, label_y, label_z):
            feed = {
                my_model.cnn_input_x: cwords,
                my_model.rnn_input_y: label_y,
                my_model.rnn_input_z: label_z,
                my_model.keep_prob: s['keep_prob']
            }
            fetches = [my_model.loss, my_model.train_op]
            loss, _ = sess.run(fetches=fetches, feed_dict=feed)
            return loss

        def dev_step(cwords):
            feed = {my_model.cnn_input_x: cwords, my_model.keep_prob: 1.0}
            fetches = my_model.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=3)
        # sess.run(tf.global_variables_initializer())
        best_f = 0.32468199323141556  ################################################
        best_e = 6
        decay_e = 6
        test_best_f = 0.3285848062741426  ###############################################
        test_best_e = 6
        best_res = None
        test_best_res = None
        for e in range(
                s['again_epoch'], s['nepochs']
        ):  ################################################################
            tools.shuffle([train_lex, train_y, train_z], s['seed'])
            t_start = time.time()
            start_num = 0
            steps = len(train_lex) // s['batch_size']
            for step in range(
                    steps):  ##################################################
                input_x, label_y, label_z = train_batch_putin(
                    train_lex,
                    train_y,
                    train_z,
                    start_num=start_num,
                    batch_size=s['batch_size'])
                # input_x, label_y, label_z = batch
                # label_y, label_z = list(zip(*target))
                input_x = load.pad_sentences(input_x)
                label_y = load.pad_sentences(label_y)
                label_z = load.pad_sentences(label_z)
                loss = train_step(input_x, label_y, label_z)
                start_num += s['batch_size']
                print(
                    'loss %.6f' % loss, ' [learning] epoch %i>> %2.2f%%' %
                    (e, s['batch_size'] * step * 100. / nsentences),
                    'completed in %.2f (sec) <<\r' % (time.time() - t_start))
                if step % 1000 == 0:
                    logfile.write('loss %.6f' % loss)
                    logfile.write(
                        ' [learning] epoch %i>> %2.2f%%' %
                        (e, s['batch_size'] * step * 100. / nsentences))
                    logfile.write('completed in %.2f (sec) <<\n' %
                                  (time.time() - t_start))

            # VALID
            if e >= 0:
                print('Validing..............')
                predictions_valid = []
                predictions_test = []
                groundtruth_valid = []
                groundtruth_test = []
                start_num = 0
                steps = len(valid_lex) // s['batch_size']
                for step in range(steps):
                    x, z = test_batch_putin(valid_lex,
                                            valid_z,
                                            start_num=start_num,
                                            batch_size=s['batch_size'])
                    # x, z = batch
                    x = load.pad_sentences(x)
                    predictions_valid.extend(dev_step(x))
                    groundtruth_valid.extend(z)
                    start_num += s['batch_size']

                res_valid = tools.conlleval(predictions_valid,
                                            groundtruth_valid)
                del predictions_valid
                del groundtruth_valid
                if res_valid['f'] > best_f:
                    best_f = res_valid['f']
                    best_e = e
                    decay_e = e
                    best_res = res_valid
                    print('\nVALID new best:', res_valid)
                    logfile.write('\nVALID new best: ' + str(res_valid))
                    path = saver.save(sess=sess,
                                      save_path=checkpoint_prefix,
                                      global_step=e)
                    print("Save model checkpoint to {}".format(path))
                    logfile.write(
                        "\nSave model checkpoint to {}\n".format(path))
                else:
                    print('\nVALID new curr:', res_valid)
                    logfile.write('\nVALID new curr: ' + str(res_valid))

                # TEST
                print('Testing..............')
                start_num = 0
                steps = len(test_lex) // s['batch_size']
                if e % s['display_test_per'] == 0:
                    for step in range(steps):
                        x, z = test_batch_putin(test_lex,
                                                test_z,
                                                start_num=start_num,
                                                batch_size=s['batch_size'])
                        # x, z = batch
                        x = load.pad_sentences(x)
                        predictions_test.extend(dev_step(x))
                        groundtruth_test.extend(z)
                        start_num += s['batch_size']

                    res_test = tools.conlleval(predictions_test,
                                               groundtruth_test)
                    del predictions_test
                    del groundtruth_test
                    if res_test['f'] > test_best_f:
                        test_best_f = res_test['f']
                        test_best_e = e
                        test_best_res = res_test
                        print('TEST new best:', res_test)
                        logfile.write('\nTEST new best: ' + str(res_test) +
                                      '\n')
                    else:
                        print('TEST new curr:', res_test)
                        logfile.write('\nTEST new curr: ' + str(res_test) +
                                      '\n')

                # learning rate decay if no improvement in 10 epochs
                if e - best_e > s['lr_decay_per'] and e - decay_e > s[
                        'lr_decay_per']:
                    sess.run(fetches=my_model.learning_rate_decay_op)
                    decay_e = e
                lr = sess.run(fetches=my_model.lr)
                print('learning rate:%f' % lr)
                logfile.write('\nlearning rate:%f\n' % lr)
                if lr < 1e-6: break

        print("Train finished!")
        print('Valid Best Result: epoch %d:  ' % (best_e), best_res)
        print('Test Best Result: epoch %d:  ' % (test_best_e), test_best_res)
        logfile.write("Train finished!\n")
        logfile.write('Valid Best Result: epoch %d:   ' % (best_e) +
                      str(best_res))
        logfile.write('\nTest Best Result: epoch %d:   ' % (test_best_e) +
                      str(test_best_res))
        logfile.close()
Esempio n. 22
0
        time.sleep(1.5)
        print(my_room)

        # if power_card is None:

        in_game = int(
            input("\n1. New card from deck\n"
                  "2. Burn\n"
                  "3. CAMEO !\n"
                  "-> "))

        if in_game == 1:
            new_card = my_room.deck.pop(0)

            if len(my_room.deck) == 0:
                my_room.deck = tools.shuffle(deck=my_room.stack)
                my_room.stack = []
            print(f"\n{upper_lines:>{18}}")
            print(
                f"new card : | {colors.BOLD}{tools.show_card(new_card)}{colors.ENDC} |"
            )
            print(f"{upper_lines:>{18}}")

            if tools.show_card(new_card) not in tools.power_deck:
                card_option = int(
                    input("\n1. Do nothing\n"
                          "2. Replace with one of yours\n"
                          "-> "))

            else:
                card_option = int(
Esempio n. 23
0
def main():
    s = {
        'nh1': 300,
        'nh2': 300,
        'win': 3,
        'emb_dimension': 300,
        'lr': 0.1,
        'lr_decay': 0.5,
        'max_grad_norm': 5,
        'seed': 345,
        'nepochs': 150,
        'batch_size': 16,
        'keep_prob': 0.5,
        'check_dir': './checkpoints',
        'display_test_per': 3,
        'lr_decay_per': 10
    }

    train_set, test_set, dic, embedding = load.atisfold()

    idx2label = dict((k, v) for v, k in dic['labels2idx'].items())
    idx2word = dict((k, v) for v, k in dic['words2idx'].items())

    train_lex, train_y, train_z = train_set

    tr = int(len(train_lex) * 0.9)
    valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:]
    train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr]
    test_lex, test_y, test_z = test_set

    print('len(train_data) {}'.format(len(train_lex)))
    print('len(valid_data) {}'.format(len(valid_lex)))
    print('len(test_data) {}'.format(len(test_lex)))

    vocab = set(dic['words2idx'].keys())
    vocsize = len(vocab)
    print('len(vocab) {}'.format(vocsize))
    print("Train started!")

    y_nclasses = 2
    z_nclasses = 5

    nsentences = len(train_lex)

    # tf.reset_default_graph()
    print('#' * 30)
    with tf.Session() as sess:

        rnn = model.Model(nh1=s['nh1'],
                          nh2=s['nh2'],
                          ny=y_nclasses,
                          nz=z_nclasses,
                          de=s['emb_dimension'],
                          cs=s['win'],
                          lr=s['lr'],
                          lr_decay=s['lr_decay'],
                          embedding=embedding,
                          max_gradient_norm=s['max_grad_norm'],
                          model_cell='lstm')

        checkpoint_dir = s['check_dir']
        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)
        checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
        print("*" * 30)

        def train_step(cwords, label_y, label_z):
            feed = {
                rnn.input_x: cwords,
                rnn.input_y: label_y,
                rnn.input_z: label_z,
                rnn.keep_prob: s['keep_prob'],
                rnn.batch_size: s['batch_size']
            }
            fetches = [rnn.loss, rnn.train_op]
            loss, _ = sess.run(fetches=fetches, feed_dict=feed)
            return loss

        def dev_step(cwords):
            feed = {
                rnn.input_x: cwords,
                rnn.keep_prob: 1.0,
                rnn.batch_size: s['batch_size']
            }
            fetches = rnn.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        saver = tf.train.Saver(tf.all_variables())
        sess.run(tf.initialize_all_variables())

        best_f = -1
        best_e = 0
        test_best_f = -1
        test_best_e = 0
        best_res = None
        test_best_res = None
        for e in range(s['nepochs']):
            tools.shuffle([train_lex, train_y, train_z], s['seed'])
            t_start = time.time()
            for step, batch in enumerate(
                    tl.iterate.minibatches(train_lex,
                                           list(zip(train_y, train_z)),
                                           batch_size=s['batch_size'])):
                input_x, target = batch
                label_y, label_z = zip(*target)
                input_x = load.pad_sentences(input_x)
                label_y = load.pad_sentences(label_y)
                label_z = load.pad_sentences(label_z)
                # print(type(input_x))
                # print(type(s['win']))
                # print(input_x)
                # print(s['win'])
                cwords = tools.contextwin_2((input_x), s['win'])
                #print(s['batch_size'])
                loss = train_step(cwords, label_y, label_z)

                print(
                    'loss %.2f' % loss,
                    ' [learning] epoch %i>> %2.2f%%' %
                    (e, s['batch_size'] * step * 100. / nsentences),
                    'completed in %.2f (sec) <<\r' % (time.time() - t_start),
                )

                sys.stdout.flush()

            #VALID

            predictions_valid = []
            predictions_test = []
            groundtruth_valid = []
            groundtruth_test = []
            for batch in tl.iterate.minibatches(valid_lex,
                                                valid_z,
                                                batch_size=s['batch_size']):
                x, z = batch
                x = load.pad_sentences(x)
                x = tools.contextwin_2(x, s['win'])
                predictions_valid.extend(dev_step(x))
                groundtruth_valid.extend(z)

            res_valid = tools.conlleval(predictions_valid, groundtruth_valid,
                                        '')

            if res_valid['f'] > best_f:
                best_f = res_valid['f']
                best_e = e
                best_res = res_valid
                print('\nVALID new best:', res_valid)
                path = saver.save(sess=sess,
                                  save_path=checkpoint_prefix,
                                  global_step=e)
                print("Save model checkpoint to {}".format(path))
            else:
                print('\nVALID new curr:', res_valid)

            #TEST
            if e % s['display_test_per'] == 0:
                for batch in tl.iterate.minibatches(
                        test_lex, test_z, batch_size=s['batch_size']):
                    x, z = batch
                    x = load.pad_sentences(x)
                    x = tools.contextwin_2(x, s['win'])
                    predictions_test.extend(dev_step(x))
                    groundtruth_test.extend(z)

                res_test = tools.conlleval(predictions_test, groundtruth_test,
                                           '')

                if res_test['f'] > test_best_f:
                    test_best_f = res_test['f']
                    test_best_e = e
                    test_best_res = res_test
                    print('TEST new best:', res_test)
                else:
                    print('TEST new curr:', res_test)

            # learning rate decay if no improvement in 10 epochs
            if e - best_e > s['lr_decay_per']:
                sess.run(fetches=rnn.learning_rate_decay_op)
            lr = sess.run(fetches=rnn.lr)
            print('learning rate:%f' % lr)
            if lr < 1e-5: break
            print()

        print("Train finished!")
        print('Valid Best Result: epoch %d:  ' % (best_e), best_res)
        print('Test Best Result: epoch %d:  ' % (test_best_e), test_best_res)
Esempio n. 24
0
                Wlnum = s['Wlnum'], Wrnum = s['Wrnum'], kalpha=s['kalpha'])

    s['cur_lr'] = s['lr']

   
    test_pred = []
    for ei, di, li, si, tli, tri, tai in zip(test_e, test_d, test_l, test_s, test_tl, test_tr, test_ta ):
        test_pred += [rnn.classify(ei, di, li, si, tli, tri, tai)]                 
    res_test = conlleval(test_pred, test_y)
    print ""
    for (d,x) in res_test.items():
        print d + ": " + str(x)
    print "start train"
    for e in xrange(s['epoch']):
        #shuffle
        shuffle([train_e, train_d, train_l, train_s, train_tl, train_tr, train_ta, train_y, train_yp], s['seed'])
        s['cur_epoch'] = e
        tic = time.time()
        for i in xrange(nsentences):
            ei = train_e[i]
            di = train_d[i]
            li = train_l[i]
            si = train_s[i]
            tli = train_tl[i]
            tri = train_tr[i]
            tai = train_ta[i]
            yi = train_y[i]
            ypi = train_yp[i]
            s['cur_lr'] *= (0.95 ** (1.0 / nsentences))
            rnn.sentence_train(ei, di, li, si, tli, tri,tai, yi, s['cur_lr'], ypi)