Esempi in Python per time

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: data_utils

Metodo/funzione: time

Esempi su hotexamples.com: 9

time in Python: 9 esempi trovati. Questi sono i migliori esempi reali in Python per data_utils.time, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: s2s.py Progetto: exueyuan/ChatBotYellowChicken

def train():
    # 流程
    # 1.数据预处理
    # 2.seq2seq

    # ========================================================
    # 准备数据
    print("train mode.......")
    print('准备数据')
    if not os.path.exists(FLAGS.model_dir):
        os.makedirs(FLAGS.model_dir)

    # 数据预处理
    # buckets_dir 训练数据目录
    # 获取列表，列表里是四个桶，每个桶里有各自的数据库内容
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    # 每个桶的数据量添加
    bucket_sizes = []
    for i in range(len(buckets)):
        # 语句的尺寸
        bucket_size = bucket_dbs[i].size  # 不同的桶的数据量
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    # 所有样本的数目
    total_size = sum(bucket_sizes)  # 获取所有的样本数目
    print('共有数据 {} 条'.format(total_size))

    # 开始建模与训练
    gpu_options = tf.GPUOptions(
        allow_growth=True,  # 允许GPU分配是一种增量分配的方式
        per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          gpu_options=gpu_options)) as sess:
        # 　构建模型（每个桶对应一个训练对象\损失函数\summary相关信息；但是这四套代码是参数共享的）
        model = create_model(sess, False)

        # 初始化变量&模型恢复
        print("开始进行模型初始化以及模型恢复操作.....")
        sess.run(tf.global_variables_initializer())
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print("Load old model from : ", ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
            model.saver.recover_last_checkpoints(
                ckpt.all_model_checkpoint_paths)
        else:
            print("Not exist old model")

        # 计算每个桶的样本的累计占比(1号桶的占比， 1+2号桶的占比， 1+2+3号桶的占比， 1+2+3+4号桶的占比)
        buckets_scale = [
            sum(bucket_sizes[:i + 1]) / total_size
            for i in range(len(bucket_sizes))
        ]

        # 开始训练
        metrics = '  '.join(
            ['\r[{}]', '{:.1f}%', '{}/{}', 'loss={:.3f}', '{}/{}'])

        # 设置bars_max跟踪进度
        bars_max = 20
        writer = tf.summary.FileWriter('log', graph=sess.graph)
        merges = []
        # 针对每个桶(每个训练对象)获取对应的summary的的可视化输出对象
        for b_idx in model.bucket_to_summary_list:
            merges.append(tf.summary.merge(
                model.bucket_to_summary_list[b_idx]))
        print("开始模型训练.....")
        with tf.device('/gpu:0'):
            for epoch_index in range(1, FLAGS.num_epoch + 1):
                print('Epoch {}:'.format(epoch_index))
                # 获取开始的时间
                time_start = time.time()
                # 设置开始的进度为 0
                epoch_trained = 0
                batch_loss = []
                while True:
                    # 随机选择一个要训练的bucket
                    random_number = np.random.random_sample()
                    bucket_id = min([
                        i for i in range(len(buckets_scale))
                        if buckets_scale[i] > random_number
                    ])
                    # 获取数据（从随机的桶中获取数据，获取batch_size: 16条数据）
                    data, _ = model.get_batch_data(bucket_dbs, bucket_id)
                    encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                        bucket_id, data)

                    # run 迭代训练
                    _, step_loss, summary_merge, output = model.step(
                        sess, encoder_inputs, decoder_inputs, decoder_weights,
                        bucket_id, False, merges[bucket_id])

                    epoch_trained += FLAGS.batch_size
                    batch_loss.append(step_loss)
                    # 获取现在的时间
                    time_now = time.time()
                    # 获取经历的时间
                    time_spend = time_now - time_start
                    # 获取时间的进度
                    time_estimate = time_spend / (epoch_trained /
                                                  FLAGS.num_per_epoch)
                    # 获取现在的进度比例
                    percent = min(100,
                                  epoch_trained / FLAGS.num_per_epoch) * 100
                    # bars该显示多少个计算，最多显示20个
                    bars = math.floor(percent / 100 * bars_max)
                    # 进行输出操作，显示=号，-号的数量不同
                    sys.stdout.write(
                        metrics.format(
                            '=' * int(bars) + '-' * int(bars_max - bars),
                            percent, epoch_trained, FLAGS.num_per_epoch,
                            np.mean(batch_loss), data_utils.time(time_spend),
                            data_utils.time(time_estimate)))
                    # 进行输出
                    sys.stdout.flush()
                    if summary_merge is not None:
                        writer.add_summary(summary_merge,
                                           global_step=epoch_index)
                    if epoch_trained >= FLAGS.num_per_epoch:
                        model.saver.save(sess,
                                         os.path.join(FLAGS.model_dir,
                                                      FLAGS.model_name),
                                         global_step=epoch_index)
                        break
                print('\n')

        # 最终再来一次模型持久化输出
        model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))

Esempio n. 2

Mostra file

File: train.py Progetto: LiuYingKai/chatbot_demo

def train():
    """训练模型"""
    # 准备数据
    print("train mode")
    print('准备数据')
    if not os.path.exists(FLAGS.model_dir):
        os.makedirs(FLAGS.model_dir)
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有数据 {} 条'.format(total_size))
    # 开始建模与训练
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=False,
                                          gpu_options=gpu_options)) as sess:
        #　构建模型
        model = create_model(sess, False)
        # 初始化变量
        sess.run(tf.initialize_all_variables())
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        #print("ckpt path : ", ckpt.model_checkpoint_path)
        if ckpt != None:
            print("读取模型 : ", ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("not exist old model")
        buckets_scale = [
            sum(bucket_sizes[:i + 1]) / total_size
            for i in range(len(bucket_sizes))
        ]
        # 开始训练
        metrics = '  '.join(
            ['\r[{}]', '{:.1f}%', '{}/{}', 'loss={:.3f}', '{}/{}'])
        bars_max = 20
        with tf.device('/gpu:0'):
            for epoch_index in range(1, FLAGS.num_epoch + 1):
                print('Epoch {}:'.format(epoch_index))
                time_start = time.time()
                epoch_trained = 0
                batch_loss = []
                while True:
                    # 选择一个要训练的bucket
                    random_number = np.random.random_sample()
                    bucket_id = min([
                        i for i in range(len(buckets_scale))
                        if buckets_scale[i] > random_number
                    ])
                    data, data_in = model.get_batch_data(bucket_dbs, bucket_id)
                    encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                        bucket_dbs, bucket_id, data)
                    _, step_loss, output = model.step(sess, encoder_inputs,
                                                      decoder_inputs,
                                                      decoder_weights,
                                                      bucket_id, False)
                    epoch_trained += FLAGS.batch_size
                    batch_loss.append(step_loss)
                    time_now = time.time()
                    time_spend = time_now - time_start
                    time_estimate = time_spend / (epoch_trained /
                                                  FLAGS.num_per_epoch)
                    percent = min(100,
                                  epoch_trained / FLAGS.num_per_epoch) * 100
                    bars = math.floor(percent / 100 * bars_max)
                    sys.stdout.write(
                        metrics.format('=' * bars + '-' * (bars_max - bars),
                                       percent,
                                       epoch_trained, FLAGS.num_per_epoch,
                                       np.mean(batch_loss),
                                       data_utils.time(time_spend),
                                       data_utils.time(time_estimate)))
                    sys.stdout.flush()
                    if epoch_trained >= FLAGS.num_per_epoch:
                        model.saver.save(sess,
                                         os.path.join(FLAGS.model_dir,
                                                      FLAGS.model_name),
                                         global_step=epoch_index)
                        break
                print('\n')

Esempio n. 3

Mostra file

File: seq2seq.py Progetto: MSintern/msbot_seq2seq

def train():
    """训练模型"""
    # 准备数据
    print('准备数据')
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有数据 {} 条'.format(total_size))
    # 开始建模与训练
    with tf.Session() as sess:
        #　构建模型
        model = create_model(sess, False)
        # 初始化变量
        sess.run(tf.initialize_all_variables())
        buckets_scale = [
            sum(bucket_sizes[:i + 1]) / total_size
            for i in range(len(bucket_sizes))
        ]
        # 开始训练
        metrics = '  '.join([
            '\r[{}]',
            '{:.1f}%',
            '{}/{}',
            'loss={:.3f}',
            '{}/{}',
            'learning rate={:.5f}'
        ])
        bars_max = 20
        for epoch_index in range(1, FLAGS.num_epoch + 1):
            print('Epoch {}:'.format(epoch_index))
            time_start = time.time()
            epoch_trained = 0
            batch_loss = []

            #
            previous_losses=[]
            current_step=0
            loss=0
            while True:
                # 选择一个要训练的bucket
                random_number = np.random.random_sample()
                bucket_id = min([
                    i for i in range(len(buckets_scale))
                    if buckets_scale[i] > random_number
                ])
                data, data_in = model.get_batch_data(
                    bucket_dbs,
                    bucket_id
                )
                encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                    bucket_dbs,
                    bucket_id,
                    data
                )
                _, step_loss, output = model.step(
                    sess,
                    encoder_inputs,
                    decoder_inputs,
                    decoder_weights,
                    bucket_id,
                    False
                )

                loss=step_loss/FLAGS.steps_per_checkpoint
                current_step+=1

                if current_step % FLAGS.steps_per_checkpoint == 0:
                    if len(previous_losses)>2 and loss>max(previous_losses[-3:]):
                        sess.run(model.learning_rate_decay_op)
                    previous_losses.append(loss)
                    loss=0
                    if not os.path.exists(FLAGS.model_dir):
                        os.makedirs(FLAGS.model_dir)
                    model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))


                epoch_trained += FLAGS.batch_size
                batch_loss.append(step_loss)
                time_now = time.time()
                time_spend = time_now - time_start
                time_estimate = time_spend / (epoch_trained / FLAGS.num_per_epoch)
                percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                bars = math.floor(percent / 100 * bars_max)
                sys.stdout.write(metrics.format(
                    '=' * bars + '-' * (bars_max - bars),
                    percent,
                    epoch_trained, FLAGS.num_per_epoch,
                    np.mean(batch_loss),
                    data_utils.time(time_spend), data_utils.time(time_estimate),model.learning_rate.eval()
                ))
                sys.stdout.flush()
                if epoch_trained >= FLAGS.num_per_epoch:
                    break
            print('\n')

        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))

Esempio n. 4

Mostra file

def train():
    """训练模型"""
    # 准备数据
    print('准备数据')
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有数据 {} 条'.format(total_size))

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.66)
    config = tf.ConfigProto(gpu_options=gpu_options)
    # 防止 out of memory
    config.gpu_options.allocator_type = 'BFC'

    # 开始建模与训练
    with tf.Session() as sess:
        #　构建模型
        model = create_model(sess, False)
        # 初始化变量
        sess.run(tf.initialize_all_variables())
        buckets_scale = [
            sum(bucket_sizes[:i + 1]) / total_size
            for i in range(len(bucket_sizes))
        ]
        # 开始训练
        metrics = '  '.join([
            '\r[{}]',
            '{:.1f}%',
            '{}/{}',
            'loss={:.3f}',
            '{}/{}'
        ])
        bars_max = 20
        for epoch_index in range(1, FLAGS.num_epoch + 1):
            print('Epoch {}:'.format(epoch_index))
            time_start = time.time()
            epoch_trained = 0
            batch_loss = []
            while True:
                # 选择一个要训练的bucket
                random_number = np.random.random_sample()
                bucket_id = min([
                    i for i in range(len(buckets_scale))
                    if buckets_scale[i] > random_number
                ])
                data, data_in = model.get_batch_data(
                    bucket_dbs,
                    bucket_id
                )
                encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                    bucket_dbs,
                    bucket_id,
                    data
                )
                _, step_loss, output = model.step(
                    sess,
                    encoder_inputs,
                    decoder_inputs,
                    decoder_weights,
                    bucket_id,
                    False
                )
                epoch_trained += FLAGS.batch_size
                batch_loss.append(step_loss)
                time_now = time.time()
                time_spend = time_now - time_start
                time_estimate = time_spend / (epoch_trained / FLAGS.num_per_epoch)
                percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                bars = math.floor(percent / 100 * bars_max)
                sys.stdout.write(metrics.format(
                    '=' * bars + '-' * (bars_max - bars),
                    percent,
                    epoch_trained, FLAGS.num_per_epoch,
                    np.mean(batch_loss),
                    data_utils.time(time_spend), data_utils.time(time_estimate)
                ))
                sys.stdout.flush()
                if epoch_trained >= FLAGS.num_per_epoch:
                    break
            print('\n')

        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))

Esempio n. 5

Mostra file

def train():
    # Prepare Headline data.
    print("Preparing Headline data in %s" % FLAGS.data_dir)
    src_train, dest_train, src_dev, dest_dev, _, _ = data_utils.prepare_headline_data(
        FLAGS.data_dir, FLAGS.vocab_size)

    # device config for CPU usage
    # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to 4 CPU usage
    #                  inter_op_parallelism_threads=1,
    #                  intra_op_parallelism_threads=2) # n threads parallel for ops

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.size))
        model = create_model(sess, False)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              FLAGS.max_train_data_size)
        dev_set = read_data(src_dev, dest_dev)
        train_set = read_data(src_train, dest_train, FLAGS.max_train_data_size)
        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        trainbuckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        # 显示时间用
        metrics = '  '.join(
            ['\r{:.1f}%', '{}/{}', 'loss={:.3f}', 'gradients={:.3f}', '{}/{}'])
        bars_max = 20

        for current_step in range(FLAGS.num_epoch):
            print("\n")
            print('Epoch {}:'.format(current_step))
            epoch_trained = 0
            batch_loss = []
            batch_gradients = []
            time_start = time.time()
            # index_sum = 0
            while True:
                # Choose a bucket according to data distribution. We pick a random number
                # in [0, 1] and use the corresponding interval in trainbuckets_scale.
                random_number_01 = np.random.random_sample()
                bucket_id = min([
                    i for i in xrange(len(trainbuckets_scale))
                    if trainbuckets_scale[i] > random_number_01
                ])

                # Get a batch and make a step.
                encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                    train_set, bucket_id)

                step_gradients, step_loss, _ = model.step(
                    sess, encoder_inputs, decoder_inputs, target_weights,
                    bucket_id, False)
                epoch_trained += FLAGS.batch_size
                batch_loss.append(step_loss)
                batch_gradients.append(step_gradients)
                time_now = time.time()
                time_spend = time_now - time_start
                time_estimate = time_spend / (epoch_trained /
                                              FLAGS.num_per_epoch)
                percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                # bars = math.floor(percent / 100 * bars_max)
                sys.stdout.write(
                    metrics.format(
                        percent,
                        epoch_trained,
                        FLAGS.num_per_epoch,
                        # 对batch loss 取平均值
                        np.mean(batch_loss),
                        np.mean(batch_gradients),
                        data_utils.time(time_spend),
                        data_utils.time(time_estimate)))
                print("\n")
                sys.stdout.flush()
                # index_sum += 1
                # if index_sum > 4:
                #     sys.exit()

                if FLAGS.num_per_epoch < epoch_trained:
                    break

        # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:

                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.train_dir,
                                               "headline_large.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)

Esempio n. 6

Mostra file

def train():
    """训练模型"""
    # 准备数据
    print('准备数据')
    #数据预处理有两步：1、decode_conv 2、data_utils
    #原始数据集不是很好的问答式数据集。用decode_conv处理的数据，假定有ABC三个句子，则处理成两句问答：A:B,B:C，然后都插入到sqlite3里
    #生成一个conversion.db文件，然后使用data_utils来进行语句处理，即对这个db文件做进一步处理
    #对应四种格式，5_15,10_20,15_25,20_30,分别代表问句和回答句的字数上限。比如5_15即问句不超过5个字且答句不超过15个字。
    #这种方式也和命名实体识别的一个性质，是为了能最小padding，进行局部padding，如果有句子太长的，但是不太多，那么可以滤掉。
    #因为一般的对话都不会太长
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有数据 {} 条'.format(total_size))
    #到这里为止还只是拿到四个bucket里的数据,并统计了一下总的数据条数
    # 开始建模与训练
    with tf.Session() as sess:
        #整体流程即：1、创建模型 2、接收数据，并转换成模型可接收的类型 3、放入模型，计算损失 4、更新参数
        #　构建模型
        model = create_model(sess, False)
        # 初始化变量
        sess.run(tf.global_variables_initializer())
        buckets_scale = [
            sum(bucket_sizes[:i + 1]) / total_size
            for i in range(len(bucket_sizes))
        ]  #i=0,1,2,3==>bucket_sizes[: 1],
        # 开始训练
        metrics = '  '.join(
            ['\r[{}]', '{:.1f}%', '{}/{}', 'loss={:.3f}', '{}/{}'])
        bars_max = 20
        with tf.device('/gpu:0'):
            for epoch_index in range(1, FLAGS.num_epoch + 1600):
                print('Epoch {}:'.format(epoch_index))
                time_start = time.time()
                epoch_trained = 0
                batch_loss = []
                while True:
                    # 选择一个要训练的bucket
                    random_number = np.random.random_sample()
                    #tmp=[]
                    #for i in range(len(buckets_scale)):
                    #if buckets_scale[i] > random_number:
                    #tmp.append(i)
                    #bucket_id = min(tmp)
                    bucket_id = 1 if random_number <= 0.25 else 2 if random_number > 0.25 and random_number <= 0.5 else 3 if random_number > 0.5 and random_number < 0.75 else 4
                    bucket_id -= 1
                    #先选择对应的问答对长度，因为后面无论是padding还是生结果，都是根据这个位数来的
                    #bucket_id = min([i for i in range(len(buckets_scale)) if buckets_scale[i] > random_number])
                    #拿出64个问答对，data 和data_in 问答倒转
                    data, data_in = model.get_batch_data(
                        bucket_dbs, bucket_id)  #先获取到问答对和答问对
                    encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                        bucket_dbs, bucket_id, data
                    )  #再得到padding后的encoder_inputs,decoder_inputs和新生成的权重decoder_weights
                    #而这里的encoder_inputs，decoder_inputs都只是对应的字ID信息，而decoder_weights则是1和0组成的,也是和字位置一对一对应
                    #通过源码可以看出，ID只是初步信息，随机初始化一个embedding是embedding_attention_seq2seq内部会有的
                    _, step_loss, output = model.step(
                        sess, encoder_inputs, decoder_inputs, decoder_weights,
                        bucket_id, False
                    )  #给定需要喂入的参数，即encoder、decoder、weights以及选择的bucket_id
                    #根据训练和测试状态，获取输出结果。
                    epoch_trained += FLAGS.batch_size
                    batch_loss.append(step_loss)  #为了计算损失用
                    time_now = time.time()
                    time_spend = time_now - time_start
                    time_estimate = time_spend / (epoch_trained /
                                                  FLAGS.num_per_epoch)
                    percent = min(100,
                                  epoch_trained / FLAGS.num_per_epoch) * 100
                    bars = math.floor(percent / 100 * bars_max)
                    sys.stdout.write(
                        metrics.format('=' * bars + '-' * (bars_max - bars),
                                       percent,
                                       epoch_trained, FLAGS.num_per_epoch,
                                       np.mean(batch_loss),
                                       data_utils.time(time_spend),
                                       data_utils.time(time_estimate)))
                    sys.stdout.flush()
                    if epoch_trained >= FLAGS.num_per_epoch:
                        break
                print('\n')

        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        if epoch_index % 800 == 0:
            model.saver.save(sess,
                             os.path.join(FLAGS.model_dir, FLAGS.model_name))

Esempio n. 7

Mostra file

File: s2s.py Progetto: dumplingXIMEN/Seq2Seq_Chatbot_QA

def train():
    """训练模型"""
    # 准备数据
    print('准备数据')
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有数据 {} 条'.format(total_size))
    # 开始建模与训练
    with tf.Session() as sess:
        #　构建模型
        model = create_model(sess, False)
        # 初始化变量
        sess.run(tf.initialize_all_variables())
        buckets_scale = [
            sum(bucket_sizes[:i + 1]) / total_size
            for i in range(len(bucket_sizes))
        ]
        # 开始训练
        metrics = '  '.join([
            '\r[{}]',
            '{:.1f}%',
            '{}/{}',
            'loss={:.3f}',
            '{}/{}'
        ])
        bars_max = 20
        for epoch_index in range(1, FLAGS.num_epoch + 1):
            print('Epoch {}:'.format(epoch_index))
            time_start = time.time()
            epoch_trained = 0
            batch_loss = []
            while True:
                # 选择一个要训练的bucket
                random_number = np.random.random_sample()
                bucket_id = min([
                    i for i in range(len(buckets_scale))
                    if buckets_scale[i] > random_number
                ])
                data, data_in = model.get_batch_data(
                    bucket_dbs,
                    bucket_id
                )
                encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                    bucket_dbs,
                    bucket_id,
                    data
                )
                _, step_loss, output = model.step(
                    sess,
                    encoder_inputs,
                    decoder_inputs,
                    decoder_weights,
                    bucket_id,
                    False
                )
                epoch_trained += FLAGS.batch_size
                batch_loss.append(step_loss)
                time_now = time.time()
                time_spend = time_now - time_start
                time_estimate = time_spend / (epoch_trained / FLAGS.num_per_epoch)
                percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                bars = math.floor(percent / 100 * bars_max)
                sys.stdout.write(metrics.format(
                    '=' * bars + '-' * (bars_max - bars),
                    percent,
                    epoch_trained, FLAGS.num_per_epoch,
                    np.mean(batch_loss),
                    data_utils.time(time_spend), data_utils.time(time_estimate)
                ))
                sys.stdout.flush()
                if epoch_trained >= FLAGS.num_per_epoch:
                    break
            print('\n')

        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))

Esempio n. 8

Mostra file

def train():
    """訓練模型"""
    # 准备数据
    print('準備數據')
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有數據 {} 條'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有數據 {} 條'.format(total_size))
    # 開始建模
    with tf.Session() as sess:
        #　構建模型
        model = create_model(sess, False)
        # 初始化變量
        sess.run(tf.initialize_all_variables())
        buckets_scale = [
            sum(bucket_sizes[:i + 1]) / total_size
            for i in range(len(bucket_sizes))
        ]
        # 開始訓練
        metrics = '  '.join(
            ['\r[{}]', '{:.1f}%', '{}/{}', 'loss={:.3f}', '{}/{}'])
        bars_max = 20
        for epoch_index in range(1, FLAGS.num_epoch + 1):
            print('Epoch {}:'.format(epoch_index))
            time_start = time.time()
            epoch_trained = 0
            batch_loss = []
            while True:
                # 選擇一個要訓練的bucket
                random_number = np.random.random_sample()
                bucket_id = min([
                    i for i in range(len(buckets_scale))
                    if buckets_scale[i] > random_number
                ])
                data, data_in = model.get_batch_data(bucket_dbs, bucket_id)
                encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                    bucket_dbs, bucket_id, data)
                _, step_loss, output = model.step(sess, encoder_inputs,
                                                  decoder_inputs,
                                                  decoder_weights, bucket_id,
                                                  False)
                epoch_trained += FLAGS.batch_size
                batch_loss.append(step_loss)
                time_now = time.time()
                time_spend = time_now - time_start
                time_estimate = time_spend / (epoch_trained /
                                              FLAGS.num_per_epoch)
                percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                bars = math.floor(percent / 100 * bars_max)
                sys.stdout.write(
                    metrics.format('=' * bars + '-' * (bars_max - bars),
                                   percent, epoch_trained, FLAGS.num_per_epoch,
                                   np.mean(batch_loss),
                                   data_utils.time(time_spend),
                                   data_utils.time(time_estimate)))
                sys.stdout.flush()
                if epoch_trained >= FLAGS.num_per_epoch:
                    break
            print('\n')

        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))

Esempio n. 9

Mostra file

File: s2s.py Progetto: Wasim37/Seq2Seq_Chatbot

def train():
    """训练模型"""
    print('数据准备中...')
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有数据 {} 条'.format(total_size))
    
    with tf.Session() as sess:
        model = create_model(sess, False)
        sess.run(tf.global_variables_initializer())
        # 计算每个文件数据占比
        buckets_scale = [sum(bucket_sizes[:i + 1]) / total_size for i in range(len(bucket_sizes))]
        # 格式化控制台输出
        metrics = '  '.join([
            '\r[{}]',
            '{:.1f}%',
            '{}/{}',
            'loss={:.3f}',
            '{}/{}'
        ])
        bars_max = 20
        with tf.device('/gpu:0'):
            for epoch_index in range(1, FLAGS.num_epoch + 1600):
                print('Epoch {}:'.format(epoch_index))
                time_start = time.time()
                epoch_trained = 0 # 每个epoch已经训练的样本数
                batch_loss = []
                while True:
                    # 随机选择一个要训练的bucket_id
                    random_number = np.random.random_sample()
                    bucket_id = min([i for i in range(len(buckets_scale)) if buckets_scale[i] > random_number])
                    # 拿出64个问答对  data, data_in 问答倒转
                    data, data_in = model.get_batch_data(
                        bucket_dbs,
                        bucket_id
                    )
                    # 将问答对转换为模型训练可接受的格式
                    # bucket_10_20这个bucket对应的维度为：10*64 20*64 20*64
                    encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                        bucket_dbs,
                        bucket_id,
                        data
                    )
                    # 训练
                    _, step_loss, output = model.step(
                        sess,
                        encoder_inputs,
                        decoder_inputs,
                        decoder_weights,
                        bucket_id,
                        False
                    )
                    epoch_trained += FLAGS.batch_size
                    batch_loss.append(step_loss)
                    time_now = time.time()
                    time_spend = time_now - time_start
                    time_estimate = time_spend / (epoch_trained / FLAGS.num_per_epoch)
                    percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                    bars = math.floor(percent / 100 * bars_max)
                    sys.stdout.write(metrics.format(
                        '=' * bars + '-' * (bars_max - bars),
                        percent,
                        epoch_trained, FLAGS.num_per_epoch,
                        np.mean(batch_loss),
                        data_utils.time(time_spend), data_utils.time(time_estimate)
                    ))
                    sys.stdout.flush()
                    if epoch_trained >= FLAGS.num_per_epoch:
                        break
                print('\n')

        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        if epoch_index%800==0:
            model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))