Ejemplo n.º 1
0
def main(_):
    stdout_backup = sys.stdout
    #log_file = open("message.log", "a")
    #sys.stdout = log_file
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")

    #raw_data = reader.spindle_raw_data(FLAGS.data_path)
    #train_data, valid_data, test_data, train_label, valid_label, test_label = raw_data
    target_f = FLAGS.target_f

    config = get_config()
    config.num_steps = num_steps = int((250 * 0.001) / (1 / target_f))

    eval_config = get_config()
    eval_config.num_steps = num_steps
    #eval_config.batch_size = 20
    #eval_config.num_steps = 50

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.name_scope("Train"):

            #train_input = SpindleInput(config=config, data=train_data, label=train_label, name="TrainInput")
            with tf.device('/cpu:0'):
                train_input = SpindleInputDatasetAPI(config=config,
                                                     name="train")
            #pdb.set_trace()

            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = SpindleModel(is_training=True,
                                 config=config,
                                 input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):

            #valid_input = SpindleInput(config=config, data=valid_data, label=valid_label, name="ValidInput")
            valid_input = SpindleInputDatasetAPI(config=config, name="valid")

            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = SpindleModel(is_training=False,
                                      config=config,
                                      input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):

            #test_input = SpindleInput(config=eval_config, data=test_data, label=test_label, name="TestInput")
            test_input = SpindleInputDatasetAPI(config=eval_config,
                                                name="test")

            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = SpindleModel(is_training=False,
                                     config=eval_config,
                                     input_=test_input)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():

        tf.train.import_meta_graph(metagraph)

        for model in models.values():
            model.import_ops()

        sv = tf.train.Supervisor(logdir=FLAGS.save_path, save_model_secs=600)
        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
        gpu_options = tf.GPUOptions(allow_growth=True)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement,
                                      gpu_options=gpu_options)

        #total_parameters = 0
        #for variable in tf.trainable_variables():
        #  # shape is an array of tf.Dimension
        #  shape = variable.get_shape()
        #  print(shape)
        #  print(len(shape))
        #  variable_parameters = 1
        #  for dim in shape:
        #    print(dim)
        #   variable_parameters *= dim.value
        #    print(variable_parameters)
        #    total_parameters += variable_parameters
        #print(total_parameters)
        #pdb.set_trace()
        print(
            "=============================================New Test !!!!!=================================================="
        )
        print(" Learning rate = %.6f   Keep Probability = %.2f " %
              (config.learning_rate, config.keep_prob))

        with sv.managed_session(config=config_proto) as session:
            gpus = [
                x.name for x in device_lib.list_local_devices()
                if x.device_type == "GPU"
            ]
            if FLAGS.num_gpus > len(gpus):
                raise ValueError(
                    "Your machine has only %d gpus "
                    "which is less than the requested --num_gpus=%d." %
                    (len(gpus), FLAGS.num_gpus))
            #pdb.set_trace()
            #sv.saver.restore(session, tf.train.latest_checkpoint(os.path.join(FLAGS.data_path, "../saved_model/")))
            if config.test_mode == 0:
                training_loss = []
                # sv.saver.restore(session, tf.train.latest_checkpoint(os.path.join(FLAGS.data_path, "../../Cross_Valid/1/")))
                for i in range(config.max_max_epoch):

                    lr_decay = config.lr_decay**max(i + 1 - config.max_epoch,
                                                    0.0)
                    m.assign_lr(session, config.learning_rate * lr_decay)

                    print("Epoch: %d Learning rate: %.6f" %
                          (i + 1, session.run(m.lr)))
                    train_loss = run_epoch(session,
                                           m,
                                           eval_op=m.train_op,
                                           verbose=True)
                    print("Epoch: %d Train Loss: %.3f" % (i + 1, train_loss))
                    valid_loss = run_epoch(session, mvalid, verbose=False)
                    print("Epoch: %d" % (i + 1))
                    cprint("Valid Loss: %.3f" % (valid_loss), 'white',
                           'on_yellow')
                    training_loss.append(train_loss)

                #fig=plt.figure()
                #plt.plot(training_loss)
                #fig.show()
                #test_loss = run_epoch(session, mtest, verbose=False)
                #print("Test Loss: %.3f" % test_loss)

                #variables_names =[v.name for v in tf.trainable_variables()]
                #value=session.run('Model/conv1/conv1/kernel:0')
                #pdb.set_trace()
                if FLAGS.save_path:
                    print("Saving model to %s." % FLAGS.save_path)
                    sv.saver.save(session,
                                  FLAGS.save_path,
                                  global_step=sv.global_step)

            else:
                sv.saver.restore(session,
                                 tf.train.latest_checkpoint(FLAGS.model_path))
                #sv.saver.restore(session, tf.train.latest_checkpoint(os.path.join(FLAGS.data_path, "../../Cross_Valid/1")))
                #pdb.set_trace()
                #variables_names =[v.name for v in tf.trainable_variables()]
                #value=session.run('Model/conv1/conv1/kernel:0')
                #pdb.set_trace()
                test_loss = run_epoch(session, mtest, verbose=False)
                print("Test Loss: %.3f" % test_loss)
    #log_file.close()
    sys.stdout = stdout_backup
Ejemplo n.º 2
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to LM data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.lm_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, vocabulary = raw_data

    config = get_config()
    config.vocab_size = vocabulary
    eval_config = get_config()
    eval_config.vocab_size = vocabulary
    eval_config.batch_size = 1
    eval_config.num_steps = 1
    infer_config = get_config()
    infer_config.vocab_size = vocabulary
    infer_config.batch_size = 1

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = LMInput(config=config,
                                  data=train_data,
                                  name="TrainInput")
            with tf.compat.v1.variable_scope("Model",
                                             reuse=None,
                                             initializer=initializer):
                m = LMModel(is_training=True,
                            config=config,
                            input_=train_input,
                            is_inference=False)
            tf.compat.v1.summary.scalar("Training_Loss", m.cost)
            tf.compat.v1.summary.scalar("Learning_Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = LMInput(config=config,
                                  data=valid_data,
                                  name="ValidInput")
            with tf.compat.v1.variable_scope("Model",
                                             reuse=True,
                                             initializer=initializer):
                mvalid = LMModel(is_training=False,
                                 config=config,
                                 input_=valid_input,
                                 is_inference=False)
            tf.compat.v1.summary.scalar("Validation_Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = LMInput(config=eval_config,
                                 data=test_data,
                                 name="TestInput")
            with tf.compat.v1.variable_scope("Model",
                                             reuse=True,
                                             initializer=initializer):
                mtest = LMModel(is_training=False,
                                config=eval_config,
                                input_=test_input,
                                is_inference=False)

        with tf.name_scope("Infer"):
            with tf.compat.v1.variable_scope("Model",
                                             reuse=True,
                                             initializer=initializer):
                minfer = LMModel(is_training=False,
                                 config=infer_config,
                                 input_=None,
                                 is_inference=True)

        models = {"Train": m, "Valid": mvalid, "Test": mtest, "Infer": minfer}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.compat.v1.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        tf.compat.v1.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        #    sv = tf.train.MonitoredTrainingSession()#logdir=FLAGS.save_path)
        config_proto = tf.compat.v1.ConfigProto(
            allow_soft_placement=soft_placement)
        with sv.managed_session(config=config_proto) as session:

            #Export the current inference model to tflite.
            #converter = tf.lite.TFLiteConverter.from_session(session, [minfer.input_data], [minfer.logits])
            #converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
            #                                 tf.lite.OpsSet.SELECT_TF_OPS]
            #tflite_model = converter.convert()
            #open("converted_model.tflite", "wb").write(tflite_model)

            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print("Epoch: %d Learning rate: %.3f" %
                      (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True)
                print("Epoch: %d Train Perplexity: %.3f" %
                      (i + 1, train_perplexity))
                valid_perplexity = run_epoch(session, mvalid)
                print("Epoch: %d Valid Perplexity: %.3f" %
                      (i + 1, valid_perplexity))

            test_perplexity = run_epoch(session, mtest)
            print("Test Perplexity: %.3f" % test_perplexity)

            if FLAGS.save_path:
                print("Saving model to %s." % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)

            #Export the current inference model to tflite.
            converter = tf.lite.TFLiteConverter.from_session(
                session, [minfer.input_data], [minfer.logits])
            #converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
            #                                 tf.lite.OpsSet.SELECT_TF_OPS]
            tflite_model = converter.convert()
            open("converted_model.tflite", "wb").write(tflite_model)
Ejemplo n.º 3
0
def run(args):
    global FLAGS
    importlib.reload(reader)
    FLAGS = args
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    config = get_config()
    raw_data = reader.ptb_raw_data_bias(FLAGS.data_path,
                                        config.vocab_size,
                                        professions=set(FLAGS.professions))
    train_data, valid_data, test_data_m, test_data_f, vocab_size, sentence_ends_m, sentence_ends_f, professions_pos = raw_data
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("TestMale"):
            test_input_m = PTBInput(config=eval_config,
                                    data=test_data_m,
                                    name="TestInputMale")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest_m = PTBModel(is_training=False,
                                   config=eval_config,
                                   input_=test_input_m)

        with tf.name_scope("TestFemale"):
            test_input_f = PTBInput(config=eval_config,
                                    data=test_data_f,
                                    name="TestInputFemale")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest_f = PTBModel(is_training=False,
                                   config=eval_config,
                                   input_=test_input_f)

        models = {
            "Train": m,
            "Valid": mvalid,
            "TestMale": mtest_m,
            "TestFemale": mtest_f
        }
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        if args.train:
            tf.train.import_meta_graph(metagraph)
        else:
            tf.train.import_meta_graph(args.meta_file)
        for model in models.values():
            model.import_ops()

        saver = tf.train.Saver()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)

        with sv.managed_session(config=config_proto) as session:
            if args.train:
                for i in range(config.max_max_epoch):
                    lr_decay = config.lr_decay**max(i + 1 - config.max_epoch,
                                                    0.0)
                    m.assign_lr(session, config.learning_rate * lr_decay)

                    print("Epoch: %d Learning rate: %.3f" %
                          (i + 1, session.run(m.lr)))
                    start_time = time.time()
                    train_perplexity, _, _ = run_epoch(session,
                                                       m,
                                                       eval_op=m.train_op)
                    print("Time for Epoch = %.2f s" %
                          (time.time() - start_time))
                    print("Epoch: %d Train Perplexity: %.3f" %
                          (i + 1, train_perplexity))
                    valid_perplexity, _, _ = run_epoch(session, mvalid)
                    print("Epoch: %d Valid Perplexity: %.3f" %
                          (i + 1, valid_perplexity))

            else:
                saver.restore(session, FLAGS.save_path)

            doc_perp_m,sentence_perps_m,profession_costs_m = run_epoch(session, mtest_m, \
                                                    sentence_ends = sentence_ends_m, professions_pos = professions_pos)
            doc_perp_f,sentence_perps_f,profession_costs_f = run_epoch(session, mtest_f, sentence_ends = sentence_ends_f\
                                                   , professions_pos = professions_pos)

            if FLAGS.save_path and args.train:
                print("Saving model to %s." % FLAGS.save_path)
                #         sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
                saver.save(session, FLAGS.save_path)

    if FLAGS.train:
        return None, None, None, None
    else:
        return sentence_perps_m, sentence_perps_f, profession_costs_m, profession_costs_f
Ejemplo n.º 4
0
def main(model_select="small",
         dat_path = "../data",
         sav_path = "./saved_model/",
         mixing_pi = 0.25,
         prior_log_sigma1 = -1.0,
         prior_log_sigma2 = -7.0):
    
    global model_type
    global data_path
    global save_path
    global global_prior_pi
    global global_log_sigma1
    global global_log_sigma2
    global global_num_gpus
    
    model_type = model_select
    data_path = dat_path
    save_path = sav_path
    global_prior_pi = mixing_pi
    global_log_sigma1 = prior_log_sigma1
    global_log_sigma2 = prior_log_sigma2

    gpus = [x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"]

    if len(gpus) == 0:
        global_num_gpus = 1
    else:
        global_num_gpus = len(gpus)

    raw_data = reader.ptb_raw_data(data_path)
    train_data, valid_data, test_data, _, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    subprocess.Popen(["tensorboard","--logdir=tensorboard"])

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config, data=train_data, name="TrainInput")
            with tf.variable_scope("Model", reuse=None, initializer=initializer):
                m = PTBModel(is_training=True, config=config, input_=train_input)
            tf.summary.scalar("Training_Loss", m.cost)
            tf.summary.scalar("Learning_Rate", m.lr)
            tf.summary.scalar("KL_Loss", m.kl_loss)
            tf.summary.scalar("Total_Loss", m.total_loss)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
            with tf.variable_scope("Model", reuse=True, initializer=initializer):
                mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
            tf.summary.scalar("Validation_Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(
                config=eval_config, data=test_data, name="TestInput")
            with tf.variable_scope("Model", reuse=True, initializer=initializer):
                mtest = PTBModel(is_training=False, config=eval_config,
                                 input_=test_input)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        soft_placement = False
        if global_num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        with sv.managed_session(config=config_proto) as session:
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session, m, eval_op=m.train_op,
                                             verbose=True)
                print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
                valid_perplexity = run_epoch(session, mvalid)
                print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

            test_perplexity = run_epoch(session, mtest)
            print("Test Perplexity: %.3f" % test_perplexity)

            if save_path:
                print("Saving model to %s." % save_path)
                sv.saver.save(session, save_path, global_step=sv.global_step)
Ejemplo n.º 5
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    """
  gpus = [
      x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"
  ]
  if FLAGS.num_gpus > len(gpus):
    raise ValueError(
        "Your machine has only %d gpus "
        "which is less than the requested --num_gpus=%d."
        % (len(gpus), FLAGS.num_gpus))
  """

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        config_proto.gpu_options.allow_growth = True
        max_duration = FLAGS.max_duration
        with sv.managed_session(config=config_proto) as session:
            start_time_mb = datetime.datetime.now()
            total_words = 0
            print("start:", start_time_mb)
            for i in range(config.max_max_epoch):
                seconds = (datetime.datetime.now() -
                           start_time_mb).total_seconds()
                minutes = seconds / 60.0
                if max_duration is not None and minutes >= max_duration:
                    break
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                # print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
                train_perplexity, num_words = run_epoch(
                    session,
                    m,
                    eval_op=m.train_op,
                    verbose=True,
                    start_time_mb=start_time_mb)
                total_words += num_words
                # print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
                #valid_perplexity = run_epoch(session, mvalid, start_time_mb=start_time_mb)
                # print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
            end_time_mb = datetime.datetime.now()
            print("end:", str(end_time_mb))
            total_time = (end_time_mb - start_time_mb).total_seconds()
            print('Total wps: %.f' % (total_words / float(total_time)))
            """
      test_perplexity = run_epoch(session, mtest)
      print("Test Perplexity: %.3f" % test_perplexity)
      """

            if FLAGS.save_path:
                print("Saving model to %s." % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)
Ejemplo n.º 6
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    global_begin_time = time.time()

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    print('The training configuration is as follows:', flush=True)
    print_config(config)

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)
            tf.summary.scalar("Test Loss", mtest.cost)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    print("Gragh construction time: %.3f" % (time.time() - global_begin_time),
          flush=True)

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        if FLAGS.save_path:
            if not os.path.isdir(FLAGS.save_path):
                os.mkdir(FLAGS.save_path)
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)

        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        with sv.managed_session(config=config_proto) as session:
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print("Epoch: %d Learning rate: %.8f" %
                      (i + 1, session.run(m.lr)),
                      flush=True)
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             parallel=True,
                                             verbose=True)
                print("Epoch: %d Train Perplexity: %.3f" %
                      (i + 1, train_perplexity),
                      flush=True)
                valid_perplexity = run_epoch(session, mvalid)
                print("Epoch: %d Valid Perplexity: %.3f" %
                      (i + 1, valid_perplexity),
                      flush=True)
                if FLAGS.test_when_training:
                    test_perplexity = run_epoch(session, mtest)
                    print("Epoch: %d Test Perplexity: %.3f" %
                          (i + 1, test_perplexity),
                          flush=True)

                print("Current running time: %.3f" %
                      ((time.time() - global_begin_time) / 3600),
                      flush=True)

            if not FLAGS.test_when_training:
                test_perplexity = run_epoch(session, mtest)
                print("Test Perplexity: %.3f" % (test_perplexity), flush=True)

            if config.rnn_mode != CUDNN:
                print("The number of parameters: {:.3f}".format(
                    get_num_params() / 1000000),
                      flush=True)

            if FLAGS.num_gpus:
                print("Peak memory usage of GPUs: {}".format(
                    session.run(m.memory_use) / (1024**3)),
                      flush=True)

            if FLAGS.save_path:
                print("Saving model to %s." % FLAGS.save_path, flush=True)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)

    print("Global duration time: %.3f" %
          ((time.time() - global_begin_time) / 3600),
          flush=True)
Ejemplo n.º 7
0
def _main(_):
  if not FLAGS.data_path:
    raise ValueError("Must set --data_path to PTB data directory")

  os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu

  hdf5_file = FLAGS.save_path + '.hdf5'

  raw_data = reader.ptb_raw_data(FLAGS.data_path, FLAGS.vocab_path)
  train_data, valid_data, test_data, _ = raw_data

  print('data load finished!')
  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1

  def custom_getter(getter, name, *args, **kwargs):
    kwargs['trainable'] = False
    kwargs['initializer'] = _pretrained_initializer(
      name, hdf5_file)
    return getter(name, *args, **kwargs)

  with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

    with tf.name_scope("Train"):
      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
      with tf.variable_scope("Model", reuse=None, initializer=initializer):
        m = PTBModel(is_training=True, config=config, input_=train_input)

      tf.summary.scalar("Training Loss", m.cost)
      tf.summary.scalar("Learning Rate", m.lr)

    with tf.variable_scope("Model", reuse=tf.AUTO_REUSE), h5py.File(hdf5_file, 'r') as fin:
      data_dict = {}
      data_dict['embedding'] = fin['Model/embedding:0']
      data_dict['RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel'] = fin[
        'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0']
      data_dict['RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias'] = fin[
        'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0']
      data_dict['RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel'] = fin[
        'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0']
      data_dict['RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias'] = fin[
        'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0']
      data_dict['softmax_w'] = fin['Model/softmax_w:0']
      data_dict['softmax_b'] = fin['Model/softmax_b:0']
      for param_name, data in data_dict.iteritems():
        try:
          var = tf.get_variable(param_name)
          var.assign(tf.convert_to_tensor(data[...]))
        except ValueError:
          raise

    with tf.name_scope("Valid"):
      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
      tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
      test_input = PTBInput(
          config=eval_config, data=test_data, name="TestInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mtest = PTBModel(is_training=False, config=eval_config,
                         input_=test_input)

    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
      model.export_ops(name)
    metagraph = tf.train.export_meta_graph()
    if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
      raise ValueError("num_gpus > 1 is not supported for TensorFlow versions "
                       "below 1.1.0")
    soft_placement = False
    if FLAGS.num_gpus > 1:
      soft_placement = True
      util.auto_parallel(metagraph, m)

  with tf.Graph().as_default():
    tf.train.import_meta_graph(metagraph)
    for model in models.values():
      model.import_ops()
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:
      valid_perplexity, _ = run_epoch(session, mvalid)
      print("Valid Perplexity: %.3f" % valid_perplexity)
Ejemplo n.º 8
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default():
        # 相关参数的初始值为随机均匀分布,范围是[-init_scale,+init_scale]
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.iteritems():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=True)
        with sv.managed_session(config=config_proto) as session:
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print("Epoch: %d Learning rate: %.3f" %
                      (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True)
                print("Epoch: %d Train Perplexity: %.3f" %
                      (i + 1, train_perplexity))
                valid_perplexity = run_epoch(session, mvalid)
                print("Epoch: %d Valid Perplexity: %.3f" %
                      (i + 1, valid_perplexity))

            test_perplexity = run_epoch(session, mtest)
            print("Test Perplexity: %.3f" % test_perplexity)

            if FLAGS.save_path:
                print("Saving model to %s." % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)

            session.graph._unsafe_unfinalize()

            # Export tensorflow serving
            export_path = os.path.join(
                tf.compat.as_bytes(FLAGS.model_path),
                tf.compat.as_bytes(str(FLAGS.model_version)))
            builder = saved_model_builder.SavedModelBuilder(export_path)
            prediction_inputs = {
                'input':
                tf.saved_model.utils.build_tensor_info(mtest.input_data)
            }
            prediction_outputs = {
                'output':
                tf.saved_model.utils.build_tensor_info(mtest.predict),
                'cell_state':
                tf.saved_model.utils.build_tensor_info(
                    mtest.final_state[-1].c),
                'embed_lookup':
                tf.saved_model.utils.build_tensor_info(mtest.embed_lookup)
            }
            prediction_signature = tf.saved_model.signature_def_utils.build_signature_def(
                inputs=prediction_inputs,
                outputs=prediction_outputs,
                method_name=tf.saved_model.signature_constants.
                PREDICT_METHOD_NAME)
            builder.add_meta_graph_and_variables(
                session, [tf.saved_model.tag_constants.SERVING],
                signature_def_map={
                    'predict_signature': prediction_signature,
                })
            session.graph.finalize()
            builder.save()
            print("Done export!")
Ejemplo n.º 9
0
def main(_):#"========================================================================================================"


  if not FLAGS.data_path:
    raise ValueError("Must set --data_path to PTB data directory")
  gpus = [
      x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"
  ]
  if FLAGS.num_gpus > len(gpus):
    raise ValueError(
        "Your machine has only %d gpus "
        "which is less than the requested --num_gpus=%d."
        % (len(gpus), FLAGS.num_gpus))

  raw_data = reader.ptb_raw_data(FLAGS.data_path)
  train_data, valid_data, test_data, _ = raw_data


  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1


  with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

    with tf.name_scope("Train"):
      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
      with tf.variable_scope("Model", reuse=None, initializer=initializer):
        m = PTBModel(is_training=True, config=config, input_=train_input)


      tf.summary.scalar("Training Loss", m.cost)
      tf.summary.scalar("Learning Rate", m.lr)

      

    with tf.name_scope("Valid"):
      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
      tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
      test_input = PTBInput(
          config=eval_config, data=test_data, name="TestInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mtest = PTBModel(is_training=False, config=eval_config,input_=test_input)
    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
      model.export_ops(name)
    metagraph = tf.train.export_meta_graph()
    if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
      raise ValueError("num_gpus > 1 is not supported for TensorFlow versions "
                       "below 1.1.0")
    soft_placement = False
    if FLAGS.num_gpus > 1:
      soft_placement = True
      util.auto_parallel(metagraph, m)

  save_path = './data/model/mode/model.ckpt'

  with tf.Graph().as_default():
    tf.train.import_meta_graph(metagraph)
    for model in models.values():
      model.import_ops()
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:

      for i in range(config.max_max_epoch):
        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
        m.assign_lr(session, config.learning_rate * lr_decay)

        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
        train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True)

        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
        valid_perplexity = run_epoch(session, mvalid)
        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

      test_perplexity = run_epoch(session, mtest)
      print("Test Perplexity: %.3f" % test_perplexity)

      if FLAGS.save_path:
        print("Saving model to %s." % FLAGS.save_path)
        sv.saver.save(session,save_path)
      #   sess = session
      #   model_path = './data/model/'
      #   path = './data/model/'
      #   dir_list = os.listdir(path)
      #   if len(dir_list) == 0:
      #       version = 1
      #   else:
      #       last_version = len(dir_list)
      #       version = last_version + 1
      #   path = path + "{}".format(str(version))
      #   prediction_signature = (
      #       tf.saved_model.signature_def_utils.build_signature_def(
      #           inputs={'input_images': tf.saved_model.utils.build_tensor_info(train_input.input_data)},
      #           outputs={'output': tf.saved_model.utils.build_tensor_info(m.logits)},
      #           method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
      #       )
      #   )
      #   builder = tf.saved_model.builder.SavedModelBuilder(path)
      #   builder.add_meta_graph_and_variables(
      #       sess, [tf.saved_model.tag_constants.SERVING],
      #       signature_def_map={
      #           'generate_images': prediction_signature
      #       },
      #       legacy_init_op=tf.group(tf.tables_initializer(), name='legacy_init_op'))
      #   builder.save(as_text=False)


  with tf.Session(graph=tf.Graph()) as sess:
    saver = tf.train.import_meta_graph("./data/model/model.ckpt-0.meta")
    saver.restore(sess,"./data/model/model.ckpt-0")
    print("Model restore")
  
    path = './data/model/'
    dir_list = os.listdir(path)
    if len(dir_list) == 0:
        version = 1
    else:
        last_version = len(dir_list)
        version = last_version + 1
    path = path + "{}".format(str(version))
    prediction_signature = (
        tf.saved_model.signature_def_utils.build_signature_def(
            inputs={'input_images': tf.saved_model.utils.build_tensor_info(train_input.input_data)},
            outputs={'output': tf.saved_model.utils.build_tensor_info(m.logits)},
            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
        )
    )
    builder = tf.saved_model.builder.SavedModelBuilder(path)
    builder.add_meta_graph_and_variables(
        sess, [tf.saved_model.tag_constants.SERVING],
        signature_def_map={
            'generate_images': prediction_signature
        },
        legacy_init_op=tf.group(tf.tables_initializer(), name='legacy_init_op'))
    builder.save(as_text=False)
Ejemplo n.º 10
0
def main(_):
  if not FLAGS.data_path:
    raise ValueError("Must set --data_path to PTB data directory")

  os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu

  raw_data = reader.ptb_raw_data(FLAGS.data_path, FLAGS.vocab_path)
  train_data, valid_data, test_data, _ = raw_data

  print('data load finished!')
  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1

  with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

    with tf.name_scope("Train"):
      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
      with tf.variable_scope("Model", reuse=None, initializer=initializer):
        m = PTBModel(is_training=True, config=config, input_=train_input)
      tf.summary.scalar("Training Loss", m.cost)
      tf.summary.scalar("Learning Rate", m.lr)

    with tf.name_scope("Valid"):
      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
      tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
      test_input = PTBInput(
          config=eval_config, data=test_data, name="TestInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mtest = PTBModel(is_training=False, config=eval_config,
                         input_=test_input)

    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
      model.export_ops(name)
    metagraph = tf.train.export_meta_graph()
    if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
      raise ValueError("num_gpus > 1 is not supported for TensorFlow versions "
                       "below 1.1.0")
    soft_placement = False
    if FLAGS.num_gpus > 1:
      soft_placement = True
      util.auto_parallel(metagraph, m)

  with tf.Graph().as_default():
    tf.train.import_meta_graph(metagraph)
    for model in models.values():
      model.import_ops()
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:
      for i in range(config.max_max_epoch):
        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
        m.assign_lr(session, config.learning_rate * lr_decay)

        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
        train_perplexity, paras = run_epoch(session, m, eval_op=m.train_op,
                                     verbose=True)
        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
        valid_perplexity, _ = run_epoch(session, mvalid)
        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

        if FLAGS.save_path:
          print("Saving model to %s." % FLAGS.save_path)
          sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
          hdf5_file = FLAGS.save_path + '.hdf5'
          with h5py.File(hdf5_file, 'w') as fin:
            for k, v in paras.items():
              fin[k] = v

          with h5py.File(hdf5_file, 'r') as fout:
            for k in paras.keys():
              print(k)
Ejemplo n.º 11
0
def main(_):
  raw_data = reader.ptb_raw_data(FLAGS.data_path)

  train_data, valid_data, test_data, _ = raw_data

  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1

  with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

    with tf.name_scope("Train"):
      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
      with tf.variable_scope("Model", reuse=None, initializer=initializer):
        m = RNNModel(is_training=True, config=config, input_=train_input)
      tf.summary.scalar("Training Loss", m.cost)
      tf.summary.scalar("Learning Rate", m.lr)

    with tf.name_scope("Valid"):
      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mvalid = RNNModel(is_training=False, config=config, input_=valid_input)
      tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
      test_input = PTBInput(
          config=config, data=test_data, name="TestInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mtest = RNNModel(is_training=False, config=config,
                         input_=test_input)

    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
      model.export_ops(name)
    metagraph = tf.train.export_meta_graph()
    if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
      raise ValueError("num_gpus > 1 is not supported for TensorFlow versions "
                       "below 1.1.0")
    soft_placement = False
    if FLAGS.num_gpus > 1:
      soft_placement = True
      util.auto_parallel(metagraph, m)

  with tf.Graph().as_default():
    tf.train.import_meta_graph(metagraph)
    for model in models.values():
      model.import_ops()
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    config_proto.gpu_options.allow_growth = True
    with sv.managed_session(config=config_proto) as session:
      best_valid_perplexity = 10000
      valid_perplexity = 0
      best_test_perplexity = 10000
      test_perplexity = 0
      for i in range(config.max_max_epoch):
        if valid_perplexity > best_valid_perplexity or test_perplexity > best_test_perplexity:
          # lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
          if config.learning_rate > 0.0001:
            config.learning_rate = config.learning_rate * config.lr_decay
          else:
            config.learning_rate = config.learning_rate
        else:
          config.learning_rate = config.learning_rate
        m.assign_lr(session, config.learning_rate)
        print("Epoch: %d Learning rate: %.4f" % (i + 1, session.run(m.lr)))
        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
                                     verbose=True)
        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
        valid_perplexity = run_epoch(session, mvalid)
        if valid_perplexity < best_valid_perplexity:
          best_valid_perplexity = valid_perplexity
        print("Epoch: %d Valid Perplexity: %.3f best valid: %.3f" % (i + 1, valid_perplexity, best_valid_perplexity))

        test_perplexity = run_epoch(session, mtest)
        if test_perplexity < best_test_perplexity:
          best_test_perplexity = test_perplexity
          f = open('ppl_hidden_'+str(config.hidden_size)+'.txt', 'w')
          f.write('best_test_perplexity:'+str(best_test_perplexity)+'\n')
          f.write('best_valid_perplexity:'+str(best_valid_perplexity)+'\n')
          f.close()
        print("Epoch: %d Test Perplexity: %.3f best test: %.3f" % (i + 1, test_perplexity, best_test_perplexity))
       
        
        

      if FLAGS.save_path:
        print("Saving model to %s." % FLAGS.save_path)
        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
Ejemplo n.º 12
0
from __future__ import absolute_import
Ejemplo n.º 13
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _, dict_id_word = raw_data

    dict_word_id = dict(zip(dict_id_word.values(), dict_id_word.keys()))
    eos_id = dict_word_id['<eos>']

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = np.shape(test_data)[0] - 1
    saver = None
    filename = None
    if FLAGS.save_path:
        filename = FLAGS.save_path + '/lmodel.ckpt'

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)
                tf.summary.scalar("Test Loss", mtest.cost)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

        if FLAGS.save_path:
            saver = tf.train.Saver()
            try:
                saver.restore(tf.Session(), filename)
            except Exception as e:
                pass

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        with sv.managed_session(config=config_proto) as session:
            predicted_word_output = word_predict(session,
                                                 mtest,
                                                 predict_op=mtest._output,
                                                 log_output=True,
                                                 dict_ids=dict_id_word)
            if FLAGS.save_path:
                print("Saving model to %s." % filename)
                save_path = saver.save(session, filename)
Ejemplo n.º 14
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    # 输入数据并制作计算图(构建模型)
    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(
                config=config, data=train_data,
                name="TrainInput")  # reader里的x, y 即input_data, target
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)  # 记录tensorboard
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()  # TODO:这里的meta_graph是什么东西?
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    # 载入三个模型并开始训练
    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        # Supervisor模块集成了多个功能:
        # 1)自动去checkpoint加载数据或初始化数据 ,因此我们就不需要手动初始化或者从checkpoint中加载数据
        # 2)自身有一个Saver,可以用来保存checkpoint,因此不需要创建Saver,直接使用Supervisor里的Saver即可
        # 3)有一个summary_computed用来保存Summary,因此不需要创建summary_writer
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        with sv.managed_session(config=config_proto) as session:
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print("Epoch: %d Learning rate: %.3f" %
                      (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True)
                print("Epoch: %d Train Perplexity: %.3f" %
                      (i + 1, train_perplexity))
                valid_perplexity = run_epoch(session, mvalid)
                print("Epoch: %d Valid Perplexity: %.3f" %
                      (i + 1, valid_perplexity))

            test_perplexity = run_epoch(
                session, mtest
            )  # TODO: 为什么这里的mvalid、mtest模型看上去和m是独立的两个模型,实际上却"继承"了m的训练结果?
            print("Test Perplexity: %.3f" % test_perplexity
                  )  # tf.variable_scope("Model", reuse=True, ..) 复用了参数

            if FLAGS.save_path:
                print("Saving model to %s." % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)
Ejemplo n.º 15
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    #=========================== added by yctung start =============================
    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    # Create and start a server for the local task.
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    print("job name = " + FLAGS.job_name)
    if FLAGS.job_name == "ps":
        print("--- I am ps ---")
        server.join()

    elif FLAGS.job_name == "worker":
        print("--- I am worker ---")
        #=========================== added by yctung end   =============================
        with tf.Graph().as_default():
            initializer = tf.random_uniform_initializer(
                -config.init_scale, config.init_scale)

            with tf.name_scope("Train"):
                train_input = PTBInput(config=config,
                                       data=train_data,
                                       name="TrainInput")
                with tf.variable_scope("Model",
                                       reuse=None,
                                       initializer=initializer):
                    m = PTBModel(is_training=True,
                                 config=config,
                                 input_=train_input)
                tf.summary.scalar("Training Loss", m.cost)
                tf.summary.scalar("Learning Rate", m.lr)

            with tf.name_scope("Valid"):
                valid_input = PTBInput(config=config,
                                       data=valid_data,
                                       name="ValidInput")
                with tf.variable_scope("Model",
                                       reuse=True,
                                       initializer=initializer):
                    mvalid = PTBModel(is_training=False,
                                      config=config,
                                      input_=valid_input)
                tf.summary.scalar("Validation Loss", mvalid.cost)

            with tf.name_scope("Test"):
                test_input = PTBInput(config=eval_config,
                                      data=test_data,
                                      name="TestInput")
                with tf.variable_scope("Model",
                                       reuse=True,
                                       initializer=initializer):
                    mtest = PTBModel(is_training=False,
                                     config=eval_config,
                                     input_=test_input)

            models = {"Train": m, "Valid": mvalid, "Test": mtest}
            for name, model in models.items():
                model.export_ops(name)
            metagraph = tf.train.export_meta_graph()
            if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
                raise ValueError(
                    "num_gpus > 1 is not supported for TensorFlow versions "
                    "below 1.1.0")
            soft_placement = False
            if FLAGS.num_gpus > 1:
                soft_placement = True
                util.auto_parallel(metagraph, m)

        with tf.Graph().as_default():
            tf.train.import_meta_graph(metagraph)
            for model in models.values():
                model.import_ops()

            # sv = tf.train.Supervisor(logdir=FLAGS.save_path)
            # config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
            # with sv.managed_session(config=config_proto) as session:

            # yctung: add the distributed session setting
            is_chief = (FLAGS.task_index == 0
                        )  #checks if this is the chief node
            sv = tf.train.Supervisor(logdir=FLAGS.save_path, is_chief=is_chief)
            config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
            with sv.prepare_or_wait_for_session(server.target) as session:

                for i in range(config.max_max_epoch):
                    lr_decay = config.lr_decay**max(i + 1 - config.max_epoch,
                                                    0.0)
                    m.assign_lr(session, config.learning_rate * lr_decay)

                    print("Epoch: %d Learning rate: %.3f" %
                          (i + 1, session.run(m.lr)))
                    train_perplexity = run_epoch(session,
                                                 m,
                                                 eval_op=m.train_op,
                                                 verbose=True)
                    print("Epoch: %d Train Perplexity: %.3f" %
                          (i + 1, train_perplexity))
                    valid_perplexity = run_epoch(session, mvalid)
                    print("Epoch: %d Valid Perplexity: %.3f" %
                          (i + 1, valid_perplexity))

                test_perplexity = run_epoch(session, mtest)
                print("Test Perplexity: %.3f" % test_perplexity)

                if FLAGS.save_path:
                    print("Saving model to %s." % FLAGS.save_path)
                    sv.saver.save(session,
                                  FLAGS.save_path,
                                  global_step=sv.global_step)
Ejemplo n.º 16
0
def train_and_validate(batch_size_20, batch_size_40, batch_size_60,
                       batch_size_80, learning_rate):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()

    # Modify default model configuration with input hyperparameters
    # Proposed strategy will input discretized/integer value as float so casting to int is needed
    # Basic strategy will input a float value
    # config.batch_size = int(round(batch_size))
    config.learning_rate = learning_rate
    # For categorical parameters, use one-hot encoding to set config
    batch_size_list = [
        batch_size_20,
        batch_size_40,
        batch_size_60,
        batch_size_80,
    ]
    batch_size_idx = batch_size_list.index(max(batch_size_list))
    config.batch_size = 20 * (batch_size_idx + 1)
    # if batch_size_20 == 1:
    #     config.batch_size = 20
    # elif batch_size_40 == 1:
    #     config.batch_size = 40
    # elif batch_size_60 == 1:
    #     config.batch_size = 60
    # elif batch_size_80 == 1:
    #     config.batch_size = 80
    # else:
    #     raise Exception("Categorical parameter is not properly one-hot encoded")
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1
    print("lr=%s bs=%s" % (config.learning_rate, config.batch_size))
    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if (StrictVersion(tf.__version__) < StrictVersion("1.1.0")
                and FLAGS.num_gpus > 1):
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        with sv.managed_session(config=config_proto) as session:
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                if FLAGS.verbose:
                    print("Epoch: %d Learning rate: %.3f" %
                          (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=False)
                if FLAGS.verbose:
                    print("Epoch: %d Train Perplexity: %.3f" %
                          (i + 1, train_perplexity))
                valid_perplexity = run_epoch(session, mvalid)
                if FLAGS.verbose:
                    print("Epoch: %d Valid Perplexity: %.3f" %
                          (i + 1, valid_perplexity))

            # test_perplexity = run_epoch(session, mtest)
            # print("Test Perplexity: %.3f" % test_perplexity)

            if FLAGS.save_path:
                if FLAGS.verbose:
                    print("Saving model to %s." % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)
    # Returning negative value since target is to be maximized
    return -valid_perplexity
Ejemplo n.º 17
0
        train_input = MSI_Input(config=config, data=x_train, name="TrainInput")
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            m = MSI_Model(is_training=True, config=config, input_=train_input)
        tf.summary.scalar("Training Loss", m.cost)
        tf.summary.scalar("Learning Rate", m.lr)

    with tf.name_scope("Valid"):
        valid_input = MSI_Input(config=config, data=x_val, name="ValidInput")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mvalid = MSI_Model(is_training=False, config=config, input_=valid_input)
        tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
        test_input = MSI_Input(config=eval_config, data=x_test, name="TestInput")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mtest = MSI_Model(is_training=False, config=eval_config, input_=test_input)

    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
        model.export_ops(name)
    metagraph = tf.train.export_meta_graph()

    soft_placement = False
    if n_GPUS > 1:
        soft_placement = True
        util.auto_parallel(metagraph, m)


if __name__ == "__main__":
    tf.app.run()
Ejemplo n.º 18
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError('Must set --data_path to PTB data directory')
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == 'GPU'
    ]

    if FLAGS.num_gpus > len(gpus):
        raise ValueError('Your machine has only %d gpus '
                         'which is less than the requested --num_gpus=%d.' %
                         (len(gpus), FLAGS.num_gpus))

    # Genereate words to ids dictionary and convert words to ids
    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    # Get hyperparameters
    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        # Generate
        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss,", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope('Valid'):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name='ValidInput')
            with tf.variable_scope('Model',
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar('Validation Loss', mvalid.cost)

        with tf.name_scope('Test'):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name='TestInput')
            with tf.variable_scope('Model',
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        # Add ops to collection (tf.add_to_collection), The collection is managed by tensorflow"
        models = {'Train': m, 'Valid': mvalid, 'Test': mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph(
        )  # Export the graph, it can be stored in the disk
        if tf.__version__ < '1.1.0' and FLAGS.num_gpus > 1:
            raise ValueError(
                'num_gpus > 1 is not supported for TensorFlow versions '
                'below 1.1.0')
        # Parallel config
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():

        # Import ops and graph
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()

        # Use supervisor to save and load checkpoint, pre-train variables
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement
                                      )  # Used to set config for session
        with sv.managed_session(config=config_proto) as session:
            # Times to loop corpusvxcvzxvxvzxvzxvz
            for i in range(config.max_max_epoch):
                time1 = time.time()
                # Calculate learning decay
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print('Epoch: %d Learning rate: %.3f' %
                      (i + 1, session.run(m.lr)))

                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True)
                print('Epoch: %d Train Perplexity: %.3f' %
                      (i + 1, train_perplexity))

                valid_perplexity = run_epoch(session, mvalid)
                print('Epoch: %d Valid Perplexity: %.3f' %
                      (i + 1, valid_perplexity))

                print('One loop used %d s' % time.time() - time1)

            test_perplexity = run_epoch(session, mtest)
            print('Epoch: %d Valid Perplexity: %.3f' % test_perplexity)

            if FLAGS.save_path:
                print('Saving model to %s.' % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)
Ejemplo n.º 19
0
def train(configs, data):
    config, eval_config = configs
    train_data, valid_data, test_data = data

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)
            tf.summary.scalar("Test Loss", mtest.cost)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        if FLAGS.save_path:
            if not os.path.isdir(FLAGS.save_path):
                os.mkdir(FLAGS.save_path)
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)

        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        best_pp = -1
        best_epoch = 0
        with sv.managed_session(config=config_proto) as session:
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True)
                valid_perplexity = run_epoch(session, mvalid)
                if best_pp == -1 or valid_perplexity < best_pp:
                    best_pp = valid_perplexity
                    best_epoch = i + 1

    return best_pp, best_epoch
Ejemplo n.º 20
0
def main(_):
  if not FLAGS.data_path:
    raise ValueError("Must set --data_path to PTB data directory")
  gpus = [
      x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"
  ]
  if FLAGS.num_gpus > len(gpus):
    raise ValueError(
        "Your machine has only %d gpus "
        "which is less than the requested --num_gpus=%d."
        % (len(gpus), FLAGS.num_gpus))

  raw_data = reader.ptb_raw_data(FLAGS.data_path)
  train_data, valid_data, test_data, _ = raw_data

  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1
  eval_config.num_steps = 1

  with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

    with tf.name_scope("Train"):
      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
      with tf.variable_scope("Model", reuse=None, initializer=initializer):
        m = PTBModel(is_training=True, config=config, input_=train_input)
      tf.summary.scalar("Training Loss", m.cost)
      tf.summary.scalar("Learning Rate", m.lr)

    with tf.name_scope("Valid"):
      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
      tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
      test_input = PTBInput(
          config=eval_config, data=test_data, name="TestInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mtest = PTBModel(is_training=False, config=eval_config,
                         input_=test_input)

    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
      model.export_ops(name)
    metagraph = tf.train.export_meta_graph()
    if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
      raise ValueError("num_gpus > 1 is not supported for TensorFlow versions "
                       "below 1.1.0")
    soft_placement = False
    if FLAGS.num_gpus > 1:
      soft_placement = True
      util.auto_parallel(metagraph, m)

  with tf.Graph().as_default():
    tf.train.import_meta_graph(metagraph)
    for model in models.values():
      model.import_ops()
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:
      for i in range(config.max_max_epoch):
        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
        m.assign_lr(session, config.learning_rate * lr_decay)

        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
                                     verbose=True)
        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
        valid_perplexity = run_epoch(session, mvalid)
        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

      test_perplexity = run_epoch(session, mtest)
      print("Test Perplexity: %.3f" % test_perplexity)

      if FLAGS.save_path:
        print("Saving model to %s." % FLAGS.save_path)
        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
Ejemplo n.º 21
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = PTBInput(config=config,
                                   data=valid_data,
                                   name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = PTBModel(is_training=False,
                                  config=config,
                                  input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        models = {"Train": m, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        temp_meta = MessageToJson(metagraph.graph_def)
        with open('kernelLogs/metagraph.json', 'w') as outfile:
            json.dump(temp_meta, outfile)
        #sys.exit()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")


#    soft_placement = True
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)
        #added by ubaid
        all_ops = tf.get_default_graph().get_operations()
        adj_list_graph = {}
        for op in all_ops:
            adj_list_graph[op.name] = set([inp.name for inp in op.inputs])
        adj_list_graph_notensors = {}
        for op in all_ops:
            adj_list_graph_notensors[op.name] = set(
                [inp.name.split(":")[0] for inp in op.inputs])

        adj_list_graph_notensors = {
            op_name: list(op_deps)
            for op_name, op_deps in adj_list_graph_notensors.items()
        }
        adj_list_graph = {
            op_name: list(op_deps)
            for op_name, op_deps in adj_list_graph.items()
        }

        with open('kernelLogs/org_graph_rnnlm_ptb_%s.json' % (FLAGS.model),
                  'w') as outfile:
            json.dump(adj_list_graph, outfile)
        with open(
                'kernelLogs/org_graph_notensors_rnnlm_ptb_%s.json' %
            (FLAGS.model), 'w') as outfile:
            json.dump(adj_list_graph_notensors, outfile)
        #sys.exit()
        #####

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)

        #config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        # added by xilenteyex
        config_proto = tf.ConfigProto(
            allow_soft_placement=soft_placement,
            graph_options=tf.GraphOptions(build_cost_model=1))
        config_proto.intra_op_parallelism_threads = 1
        config_proto.inter_op_parallelism_threads = 1
        config_proto.graph_options.optimizer_options.opt_level = -1
        config_proto.graph_options.rewrite_options.constant_folding = (
            rewriter_config_pb2.RewriterConfig.OFF)
        config_proto.graph_options.rewrite_options.arithmetic_optimization = (
            rewriter_config_pb2.RewriterConfig.OFF)
        config_proto.graph_options.rewrite_options.dependency_optimization = (
            rewriter_config_pb2.RewriterConfig.OFF)
        config_proto.graph_options.rewrite_options.layout_optimizer = (
            rewriter_config_pb2.RewriterConfig.OFF)
        ######

        with sv.managed_session(config=config_proto) as session:
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)

                print("Epoch: %d Learning rate: %.3f" %
                      (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True,
                                             epoch_no=i)
                print("Epoch: %d Train Perplexity: %.3f" %
                      (i + 1, train_perplexity))
                valid_perplexity = run_epoch(session, mvalid)
                print("Epoch: %d Valid Perplexity: %.3f" %
                      (i + 1, valid_perplexity))

            test_perplexity = run_epoch(session, mtest)
            print("Test Perplexity: %.3f" % test_perplexity)

            if FLAGS.save_path:
                print("Saving model to %s." % FLAGS.save_path)
                sv.saver.save(session,
                              FLAGS.save_path,
                              global_step=sv.global_step)
Ejemplo n.º 22
0
def main(_):
  if not FLAGS.data_path:
    raise ValueError("Must set --data_path to PTB data directory")
  gpus = [
      x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"
  ]
  if FLAGS.num_gpus > len(gpus):
    raise ValueError(
        "Your machine has only %d gpus "
        "which is less than the requested --num_gpus=%d."
        % (len(gpus), FLAGS.num_gpus))

  raw_data = reader.ptb_raw_data(FLAGS.data_path)
  train_data, valid_data, test_data, vocabulary = raw_data
  print(len(train_data))
  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 35
  eval_config.num_steps = 43

  with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

    with tf.name_scope("Train"):
      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
      print(train_input)
      with tf.variable_scope("Model", reuse=None, initializer=initializer):
        m = PTBModel(is_training=True, config=config, input_=train_input)
      tf.summary.scalar("Training Loss", m.cost)
      tf.summary.scalar("Learning Rate", m.lr)
      tf.summary.scalar("Training probs", m.probabilities)

    with tf.name_scope("Valid"):
      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
      tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
      test_input = PTBInput(
          config=eval_config, data=test_data, name="TestInput")
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mtest = PTBModel(is_training=False, config=eval_config,
                         input_=test_input)

    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
      model.export_ops(name)
    metagraph = tf.train.export_meta_graph()
    if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
      raise ValueError("num_gpus > 1 is not supported for TensorFlow versions "
                       "below 1.1.0")
    soft_placement = False
    if FLAGS.num_gpus > 1:
      soft_placement = True
      util.auto_parallel(metagraph, m)

  with tf.Graph().as_default():
    tf.train.import_meta_graph(metagraph)
    for model in models.values():
      model.import_ops()
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:
      for i in range(config.max_max_epoch):
        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
        m.assign_lr(session, config.learning_rate * lr_decay)

        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
                                     verbose=True)
        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
        valid_perplexity = run_epoch(session, mvalid)
        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

      test_perplexity = run_epoch(session, mtest)
      print("Test Perplexity: %.3f" % test_perplexity)

      if FLAGS.save_path:
        print("Saving model to %s." % FLAGS.save_path)
        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
Ejemplo n.º 23
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    #train_data, valid_data, test_data, _ = raw_data
    train_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default():

        # If we are testing an existing model ...
        if FLAGS.load_path:
            # NOTE: there are two ways to restore an existing model, rebuilding the graph from scratch then
            # calling saver.restore for those objects, or importing the old metagraph then calling saver.restore
            # and then fetching the ops/tensors via methods like get_tensor_by_name
            # what follows is the first method
            with tf.name_scope("Train"):
                train_input = PTBInput(config=config,
                                       data=train_data,
                                       name="TrainInput")
                with tf.variable_scope("Model", reuse=None):
                    m = PTBModel(is_training=True,
                                 config=config,
                                 input_=train_input,
                                 name="Train")
                tf.summary.scalar("Training Loss", m.cost)
                tf.summary.scalar("Learning Rate", m.lr)
            with tf.name_scope("Test"):
                test_input = PTBInput(config=eval_config,
                                      data=test_data,
                                      name="TestInput")  #,iter=0)
                with tf.variable_scope("Model", reuse=True):
                    mtest = PTBModel(is_training=False,
                                     config=eval_config,
                                     input_=test_input,
                                     name="Test")
            session = tf.InteractiveSession()
            saver = tf.train.Saver(
            )  #tf.train.import_meta_graph(FLAGS.load_path + ".meta")
            saver.restore(session, FLAGS.load_path)

            #mtest.import_ops()
            print("Model restored from %s." % FLAGS.load_path)
            of = open("HPL2.out", 'w')
            run_epoch(session,
                      mtest,
                      input=test_data[0],
                      ep_size=len(test_data[0]) - 1,
                      of=of)
            #run_epoch(session, mtest, input=test_input)#, ep_size=len(test_data[0]), )
            iter = 1
            for i in range(len(test_data) - 1):
                run_epoch(session,
                          mtest,
                          input=test_data[iter],
                          ep_size=len(test_data[iter]) - 1,
                          of=of)
                #run_epoch(session,mtest, input=test_input)#test_data[iter], ep_size = len(test_data[iter]))
                iter += 1
            of.close()
            quit()

        # If we are training a model ....
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        #with tf.name_scope("Valid"):
        #  valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
        #  with tf.variable_scope("Model", reuse=True, initializer=initializer):
        #    mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
        #  tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = PTBInput(config=eval_config,
                                  data=test_data,
                                  name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = PTBModel(is_training=False,
                                 config=eval_config,
                                 input_=test_input)

        models = {
            "Train": m,
            "Test": mtest
        }  #, "Valid": mvalid, "Test": mtest}
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
            raise ValueError(
                "num_gpus > 1 is not supported for TensorFlow versions "
                "below 1.1.0")
        soft_placement = False
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()

        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        with sv.managed_session(config=config_proto) as session:
            if not FLAGS.load_path:
                sv.saver.restore(session, FLAGS.save_path + "-13450")
                for i in range(config.max_max_epoch):
                    lr_decay = config.lr_decay**max(i + 1 - config.max_epoch,
                                                    0.0)
                    m.assign_lr(session, config.learning_rate * lr_decay)

                    print("Epoch: %d Learning rate: %.3f" %
                          (i + 1, session.run(m.lr)))
                    train_perplexity = run_epoch(session,
                                                 m,
                                                 eval_op=m.train_op,
                                                 verbose=True)
                    print("Epoch: %d Train Perplexity: %.3f" %
                          (i + 1, train_perplexity))
                    #valid_perplexity = run_epoch(session, mvalid)
                    #print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
                    #test_perplexity = run_epoch(session, mtest)

                    if FLAGS.save_path:
                        print("Saving model to %s." % FLAGS.save_path)
                        print("SAVED TO: %s." %
                              sv.saver.save(session,
                                            FLAGS.save_path,
                                            global_step=sv.global_step))
                        sv.saver.export_meta_graph(FLAGS.save_path + ".meta")

            else:

                test_perplexity = run_epoch(session, mtest)