Python TextCNNの例、network.TextCNN Pythonの例

コード例 #1

0

ファイルを表示

ファイル: predict.py プロジェクト: ZeyuanZhao50/Competition

def main(_):
    if not os.path.exists(ckpt_path + 'checkpoint'):
        print('there is not saved model, please check the ckpt path')
        exit()
    print('Loading model...')
    W_embedding = np.load(embedding_path)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        print('Local predicting...')
        local_predict(sess, model)
        print('Test predicting...')
        predict(sess, model)

コード例 #2

0

ファイルを表示

def main(_):
    #引入模型-先判断需要引入的路径存不存在
    if not os.path.exists(ckpt_path + 'checkpoint'):
        print('there is no saved model, exit the program')
        exit()
    print('loading the model')
    W_embedding = np.load(embedding_path)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #建立Session,feed数据,run
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        print('Local predicting...')
        local_predict(sess, model)
        print('Test predicting')
        predict(sess, model)

コード例 #3

0

ファイルを表示

ファイル: predict.py プロジェクト: fannn1217/text-classification-multi-label

def main(_):

    if not os.path.exists(ckpt_path + 'checkpoint'):
        print('there is not saved model, please check the ckpt path')
        exit()
    print('Loading model...')
    W_embedding = np.load(embedding_path)
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.50
    config.gpu_options.allocator_type = 'BFC'
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        #print('dev predicting...')
        #predict_dev(sess, model)
        print('test predicting...')
        predict(sess, model)

コード例 #4

0

ファイルを表示

ファイル: predict.py プロジェクト: shelleyHLX/daguanbei_2018

def main(_):
    if not os.path.exists(ckpt_path + 'checkpoint'):
        print('there is not saved model, please check the ckpt path')
        exit()
    print('Loading model...')
    W_embedding = np.load(embedding_path)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    log_path = scores_path + settings.model_name + '/'
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    logger = get_logger(log_path + 'predict.log')
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        # ckpt_path: /ckpt/wd_BiGRU/checkpoint
        model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))

        print('Valid predicting...')
        predict_valid(sess, model, logger)

        print('Test predicting...')
        predict(sess, model, logger)

コード例 #5

0

ファイルを表示

def main():
    if not os.path.exists(ckpt_path + 'checkpoint'):
        print('there is not saved model, please check the ckpt path')
        exit()
    print('Loading model...')
    W_embedding = np.load(embedding_path)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        #print('Local predicting...')
        #print ('valid batches:%d'%n_va_batches)
        #local_predict(sess, model)
        print('Test predicting...')
        print('test batches:%d' % n_tr_batches)
        results = predict(sess, model)
        sub_path_name = sub_path + model_name + str(
            strftime("%m%d%H%M")) + '.csv'
        id_list = get_id_list(id_list_path)
        #         id_list=id_list[:len(results)]
        write_submission(sub_path_name, id_list, results, sr_id2title)

コード例 #6

0

ファイルを表示

ファイル: train.py プロジェクト: shelleyHLX/daguanbei_2018

def main(_):
    global ckpt_path
    global last_score12
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)
    elif not FLAGS.is_retrain:
        shutil.rmtree(summary_path)
        os.makedirs(summary_path)
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)
    if not os.path.exists(log_path):
        os.makedirs(log_path)

    print('1.Loading data...')
    W_embedding = np.load(embedding_path)
    print('training sample_num = %d' % n_tr_batches)
    print('valid sample_num = %d' % n_va_batches)
    logger = get_logger(log_path + FLAGS.log_file_train)

    print('2.Building model...')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        with tf.variable_scope('training_ops') as vs:
            learning_rate = tf.train.exponential_decay(FLAGS.lr,
                                                       model.global_step,
                                                       FLAGS.decay_step,
                                                       FLAGS.decay_rate,
                                                       staircase=True)
            with tf.variable_scope('Optimizer1'):
                tvars1 = tf.trainable_variables()
                grads1 = tf.gradients(model.loss, tvars1)
                optimizer1 = tf.train.AdamOptimizer(
                    learning_rate=learning_rate)
                train_op1 = optimizer1.apply_gradients(
                    zip(grads1, tvars1), global_step=model.global_step)
            with tf.variable_scope('Optimizer2'):
                tvars2 = [
                    tvar for tvar in tvars1 if 'embedding' not in tvar.name
                ]
                grads2 = tf.gradients(model.loss, tvars2)
                optimizer2 = tf.train.AdamOptimizer(
                    learning_rate=learning_rate)
                train_op2 = optimizer2.apply_gradients(
                    zip(grads2, tvars2), global_step=model.global_step)
            update_op = tf.group(*model.update_emas)
            merged = tf.summary.merge_all()  # summary
            train_writer = tf.summary.FileWriter(summary_path + 'train',
                                                 sess.graph)
            test_writer = tf.summary.FileWriter(summary_path + 'test')
            training_ops = [
                v for v in tf.global_variables()
                if v.name.startswith(vs.name + '/')
            ]

        if os.path.exists(ckpt_path + "checkpoint"):
            print("Restoring Variables from Checkpoint...")
            model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
            f1_micro, f1_macro, score12 = valid_epoch(data_valid_path, sess,
                                                      model)
            print('f1_micro=%g, f1_macro=%g, score12=%g' %
                  (f1_micro, f1_macro, score12))
            sess.run(tf.variables_initializer(training_ops))
            train_op2 = train_op1
        else:
            print('Initializing Variables...')
            sess.run(tf.global_variables_initializer())

        print('3.Begin training...')
        print('max_epoch=%d, max_max_epoch=%d' %
              (FLAGS.max_epoch, FLAGS.max_max_epoch))
        logger.info('max_epoch={}, max_max_epoch={}'.format(
            FLAGS.max_epoch, FLAGS.max_max_epoch))
        train_op = train_op2
        for epoch in range(FLAGS.max_max_epoch):
            print('\nepoch: ', epoch)
            logger.info('epoch:{}'.format(epoch))
            global_step = sess.run(model.global_step)
            print('Global step %d, lr=%g' %
                  (global_step, sess.run(learning_rate)))
            if epoch == FLAGS.max_epoch:
                train_op = train_op1

            train_fetches = [merged, model.loss, train_op, update_op]
            valid_fetches = [merged, model.loss]
            train_epoch(data_train_path, sess, model, train_fetches,
                        valid_fetches, train_writer, test_writer, logger)
        # 最后再做一次验证
        f1_micro, f1_macro, score12 = valid_epoch(data_valid_path, sess, model)
        print('END:Global_step=%d: f1_micro=%g, f1_macro=%g, score12=%g' %
              (sess.run(model.global_step), f1_micro, f1_macro, score12))
        logger.info(
            'END:Global_step={}: f1_micro={}, f1_macro={}, score12={}'.format(
                sess.run(model.global_step), f1_micro, f1_macro, score12))
        if score12 > last_score12:
            saving_path = model.saver.save(sess, model_path,
                                           sess.run(model.global_step) + 1)
            print('saved new model to %s ' % saving_path)
            logger.info('saved new model to {}'.format(saving_path))

コード例 #7

0

ファイルを表示

ファイル: train.py プロジェクト: HiJuly/Zhihu-Text-Rank-6-TF

def main(_):
    global ckpt_path
    global last_f1
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)
    elif not FLAGS.is_retrain:  # 重新训练本模型，删除以前的 summary
        shutil.rmtree(summary_path)
        os.makedirs(summary_path)
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)

    print('1.Loading data...')
    W_embedding = np.load(embedding_path)
    print('training sample_num = %d' % n_tr_batches)
    print('valid sample_num = %d' % n_va_batches)

    # Initial or restore the model
    print('2.Building model...')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        with tf.variable_scope('training_ops') as vs:
            learning_rate = tf.train.exponential_decay(FLAGS.lr, model.global_step, FLAGS.decay_step,
                                                   FLAGS.decay_rate, staircase=True)
            # two optimizer: op1, update embedding; op2, do not update embedding.
            with tf.variable_scope('Optimizer1'):
                tvars1 = tf.trainable_variables()
                grads1 = tf.gradients(model.loss, tvars1)
                optimizer1 = tf.train.AdamOptimizer(learning_rate=learning_rate)
                train_op1 = optimizer1.apply_gradients(zip(grads1, tvars1),
                                                   global_step=model.global_step)
            with tf.variable_scope('Optimizer2'):
                tvars2 = [tvar for tvar in tvars1 if 'embedding' not in tvar.name]
                grads2 = tf.gradients(model.loss, tvars2)
                optimizer2 = tf.train.AdamOptimizer(learning_rate=learning_rate)
                train_op2 = optimizer2.apply_gradients(zip(grads2, tvars2),
                                                   global_step=model.global_step)
            update_op = tf.group(*model.update_emas)
            merged = tf.summary.merge_all()  # summary
            train_writer = tf.summary.FileWriter(summary_path + 'train', sess.graph)
            test_writer = tf.summary.FileWriter(summary_path + 'test')
            training_ops = [v for v in tf.global_variables() if v.name.startswith(vs.name+'/')]

        # 如果已经保存过模型，导入上次的模型
        if os.path.exists(ckpt_path + "checkpoint"):
            print("Restoring Variables from Checkpoint...")
            model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
            last_valid_cost, precision, recall, last_f1 = valid_epoch(data_valid_path, sess, model)
            print(' valid cost=%g; p=%g, r=%g, f1=%g' % (last_valid_cost, precision, recall, last_f1))
            sess.run(tf.variables_initializer(training_ops))
            train_op2 = train_op1
        else:
            print('Initializing Variables...')
            sess.run(tf.global_variables_initializer())

        print('3.Begin training...')
        print('max_epoch=%d, max_max_epoch=%d' % (FLAGS.max_epoch, FLAGS.max_max_epoch))
        train_op = train_op2
        for epoch in xrange(FLAGS.max_max_epoch):
            global_step = sess.run(model.global_step)
            print('Global step %d, lr=%g' % (global_step, sess.run(learning_rate)))
            if epoch == FLAGS.max_epoch:  # update the embedding
                train_op = train_op1
            train_fetches = [merged, model.loss, train_op, update_op]
            valid_fetches = [merged, model.loss]
            train_epoch(data_train_path, sess, model, train_fetches, valid_fetches, train_writer, test_writer)
        # 最后再做一次验证
        valid_cost, precision, recall, f1 = valid_epoch(data_valid_path, sess, model)
        print('END.Global_step=%d: valid cost=%g; p=%g, r=%g, f1=%g' % (
            sess.run(model.global_step), valid_cost, precision, recall, f1))
        if f1 > last_f1:  # save the better model
            saving_path = model.saver.save(sess, model_path, sess.run(model.global_step)+1)
            print('saved new model to %s ' % saving_path)

コード例 #8

0

ファイルを表示

def main():
    global ckpt_path
    global last_f1
    #把不存在的ckpt，summary建立起来；重新训练时删除原来的summary
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)
    elif not FLAGS.is_retain:
        shutil.rmtree(summary_path)
        os.makedirs(summary_path)
    #引入数据--已经训练好的embedding,穿插打印些提示语言
    print('1.lodaing embedding...')
    W_embedding=np.load(embedding_path)
    print('training sample_num = %d' % n_tr_batches)#23360
    print('valid sample_num = %d' % n_va_batches)#782
    
    #构建模型，  
    print('2.building model...')
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    #打开Session,模型图必须在Session中构建，对Sesssion中的config进行设置
    with tf.Session(config=config) as sess:
        #构建模型图
        model=network.TextCNN(W_embedding,settings)
        with tf.variable_scope('training_ops') as vs:
            learning_rate=tf.train.exponential_decay(lr,model.global_step,FLAGS.decay_step,FLAGS.decay_rate
                                                     ,staircase=True)
            
            #two optimizer: Embedding not updated, and updated
            with tf.variable_scope('Optimizer1'):
                tvars1=tf.trainable_variables()
                grads1=tf.gradients(model.loss,tvars1)
                optimizer1=tf.train.AdamOptimizer(learing_rate=learning_rate)
                train_op1=optimizer1.apply_gradients(zip(grads1,tvars1),global_step=model.global_step)
            with tf.varialbe_scope('Optimizer2'):
                tvars2=[tvar for tvar in tvars1 if 'embedding' not in tvar.name]
                grads2=tf.gradients(model.loss,tvars2)
                optimizer2=tf.train.AdamOptimizer(learning_rate=learning_rate)
                train_op2=optimizer2.apply_gradients(zip(grads2,tvars2),global_step=model.global_step)
            update_op=tf.group(*model.update_emas)
            merged=tf.summary.merge_all()
            train_writer=tf.summary.FileWriter(summary_path+'train', sess.graph)
            test_writer=tf.summary.FileWriter(summary_path+'test',sess.graph)
            training_ops=[v for v in tf.global_variables() if v.name.starwit(vs.name+'/')]
        
        #若保存过模型，导入
        if os.path.exists(ckpt_path+'checkpoint'):
            print('restoring model from chekcpoint...')
            model.saver.restore(sess,tf.train.latest_checkpoint(ckpt_path))
            last_valid_cost,precision, recall, last_f1=valid_epoch(data_valid_path,sess,model)
            print('valid_cost=%g, p=%g, r=%g, f1=%g'%(last_valid_cost,precision, recall, last_f1))
            sess.run(tf.variables_initializer(training_ops))
            train_op2=train_op1
        else:
            print('Initializing variables...')
            sess.run(tf.global_variables_initializer())
        
        print('3. Begin training...')
        print('max_epoch=%d, max_max_epoch=%d'%(FLAGS.max_epoch,FLAGS.max_max_epoch))
        for epoch in range(FLAGS.max_max_epoch):
            global_step=sess.run(model.global_step)
            print('Global_step=%d,lr=%g' %(global_step,sess.run(learning_rate)))
            if epoch==FLAGS.max_epoch:
                train_op=train_op1
            else:
                train_op=train_op2
            train_fetches=[merged,model.loss,train_op,update_op]
            valid_fetches=[merged,model.loss]
            train_epoch(data_train_path,sess,model,train_fetches,valid_fetches,train_writer,test_writer)
        #最后再做一次印证
        valid_cost,precision,recall,f1=valid_epoch(data_valid_path,sess, model)
        print('END.Global_step=%d: valid cost=%g; p=%g, r=%g, f1=%g' % (
            sess.run(model.global_step), valid_cost, precision, recall, f1))
        if f1>last_f1:#save the better one
            saving_path=model.save(sess, model_path,sess.run(model.global_step)+1)
            print('save new model to s%'%saving_path)

コード例 #9

0

ファイルを表示

if not os.path.exists(ckpt_path + 'checkpoint'):
    print('there is not saved model, please check the ckpt path')
    exit()

#print('加载知识点节点id与名称......')
df_train = pd.read_csv('../../raw_data/all_knowledge_set.txt',
                       sep='\t',
                       usecols=[0, 1],
                       names=['topic_id', 'topic_name'],
                       dtype={'topic_id': object})
dict_topic_id2name = dict(zip(df_train.topic_id, df_train.topic_name.values))

print('加载词向量......')
W_embedding = np.load(embedding_path)
print('定义模型结构......')
model = network.TextCNN(W_embedding, settings)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
print('初始化模型参数......')
model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))

app = Flask(__name__)


@app.route('/', methods=['POST', 'GET'])
def get_text_input():
    content = request.args.get('title')
    if not content:
        return '参数有误，正确格式 http://127.0.0.1:5002/?title=输入文本'

コード例 #10

0

ファイルを表示

def main(_):
    global ckpt_path
    global last_f1
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    firsttime = datetime.datetime.now()
    print('1.Loading data...')
    W_embedding = np.load(embedding_path)
    print('training sample_num = %d' % n_tr_batches)
    print('valid sample_num = %d' % n_va_batches)

    # Initial or restore the model
    print('2.Building model...')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = network.TextCNN(W_embedding, settings)
        with tf.variable_scope('training_ops') as vs:
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                global_step = model.global_step
                optimizer = tf.train.MomentumOptimizer(FLAGS.lr, 0.9)
                train_op = tf.contrib.layers.optimize_loss(
                    loss=model.loss,
                    global_step=global_step,
                    clip_gradients=4.0,
                    learning_rate=FLAGS.lr,
                    optimizer=optimizer,
                    update_ops=update_ops)

            training_ops = [
                v for v in tf.global_variables()
                if v.name.startswith(vs.name + '/')
            ]

        dataset = Dataset(cross_validation=3)
        train_sample = Dataset(valid=True, cross_validation=3)
        # 如果已经保存过模型，导入上次的模型
        if os.path.exists(ckpt_path + "checkpoint"):
            print("Restoring Variables from Checkpoint...")
            model.saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
            last_valid_cost, precision, recall, last_f1 = valid_epoch(
                dataset, sess, model)
            print(' valid cost=%g; p=%g, r=%g, f1=%g' %
                  (last_valid_cost, precision, recall, last_f1))
            sess.run(tf.variables_initializer(training_ops))
        else:
            print('Initializing Variables...')
            sess.run(tf.global_variables_initializer())

        print('3.Begin training...')
        for epoch in range(FLAGS.max_max_epoch):
            print(
                'epoch %d ********************************************************'
                % epoch)
            starttime = datetime.datetime.now()
            train_fetches = [train_op]
            valid_fetches = [model.loss]
            train_epoch(dataset, sess, model, train_fetches, valid_fetches, '',
                        '', train_sample)
            print('epoch %d cost time:%s' %
                  (epoch, datetime.datetime.now() - starttime))
            print('total cost time:%s' % (datetime.datetime.now() - firsttime))
        # 最后再做一次验证
        valid_cost, precision, recall, f1 = valid_epoch(dataset, sess, model)
        print('END.Global_step=%d: valid cost=%g; p=%g, r=%g, f1=%g' %
              (sess.run(model.global_step), valid_cost, precision, recall, f1))
        if f1 > last_f1:  # save the better model
            saving_path = model.saver.save(sess, model_path + str(f1) + '_',
                                           sess.run(model.global_step))
            print('saved new model to %s ' % saving_path)