Example #1
0
def main():
    sequence_schema_path = f'{input_path}/train/sequence_schema'
    context_schema_path = f'{input_path}/train/context_schema'

    context_schema,  sequence_schema = read_schemata(context_schema_path, sequence_schema_path)

    tf_ctx_schema, tf_seq_schema = build_schema(context_schema, sequence_schema)

    train_parts = glob.glob(input_path + '/train' + '/part-*')
    validation_parts = glob.glob(input_path + '/test' + '/part-*')

    run_config = RunConfig(log_step_count_steps=10,
                           save_checkpoints_steps=100,
                           save_summary_steps=200,
                           keep_checkpoint_max=32)

    shared_input_fn = partial(input_fn, params, tf_seq_schema, tf_ctx_schema)

    train_input_fn = partial(shared_input_fn, train_parts)

    validation_input_fn = partial(shared_input_fn, validation_parts)

    train_spec = TrainSpec(train_input_fn, max_steps=1000000)

    eval_spec = EvalSpec(validation_input_fn, steps=200, name='validation', start_delay_secs=30, throttle_secs=1)

    estimator = Estimator(model_fn=model.model_fn,
                          model_dir=model_dir,
                          params=params,
                          config=run_config)

    logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        level=logging.INFO)
    logging.getLogger('tensorflow').propagate = False

    train_and_evaluate(estimator=estimator,
                       train_spec=train_spec,
                       eval_spec=eval_spec)

    prediction = list(estimator.predict(input_fn=partial(predict_input_fn, {'epochs': 1, 'batch_size': 10}, grid)))

    scores = [p.tolist() for p in prediction]

    pairwise_prob = pairwise_probability(scores)

    zero = pairwise_prob[0]

    A_zero = build_diags(zero)

    print(optimize(A_zero).x)
Example #2
0
    return features, labels


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
run_config = RunConfig(model_dir='/data/cips/save/%s' % MODEL_ID,
                       session_config=config,
                       save_checkpoints_steps=2000)

estimator = DNNClassifier(hidden_units=[512],
                          feature_columns=[
                              tf.feature_column.numeric_column('feature',
                                                               shape=(768, ))
                          ],
                          n_classes=len(laws),
                          config=run_config,
                          label_vocabulary=laws_str,
                          dropout=0.1)

input_fn = lambda fp: (tf.data.TextLineDataset(fp).apply(
    tf.contrib.data.shuffle_and_repeat(buffer_size=10000)).batch(batch_size).
                       map(lambda x: tf.py_func(
                           get_encodes, [x], [tf.float32, tf.string],
                           name='bert_client')).map(lambda x, y: ({
                               'feature': x
                           }, y)).prefetch(20))

train_spec = TrainSpec(input_fn=lambda: input_fn(train_fp))
eval_spec = EvalSpec(input_fn=lambda: input_fn(eval_fp), throttle_secs=0)
train_and_evaluate(estimator, train_spec, eval_spec)
Example #3
0
config.gpu_options.allow_growth = True
run_config = RunConfig(model_dir='./model_label/',
                       session_config=config,
                       save_checkpoints_steps=1000)

estimator1= DNNClassifier(
                          hidden_units=[512],
                          feature_columns=[tf.feature_column.numeric_column('feature', shape=(768,))],
                          n_classes=len(line_labels),
                          config=run_config,
                          label_vocabulary=line_labels,
                          dropout=0.1)


# Training/Evaluating:
tf.logging.set_verbosity(tf.logging.INFO)

input_fn1 = lambda fp: (tf.data.TextLineDataset(fp)
                       .apply(tf.contrib.data.shuffle_and_repeat(buffer_size=10000))
                       .batch(batch_size)
                       .map(lambda x: tf.py_func(get_encodes1, [x], [tf.float32, tf.string ], name='bert_client'),
                            num_parallel_calls=num_parallel_calls)
                       .map(lambda x, y  : ({'feature': x}, y ))
                       .prefetch(20))

train_spec1 = TrainSpec(input_fn=lambda: input_fn1(train_fp))
eval_spec1 = EvalSpec(input_fn=lambda: input_fn1(eval_fp), throttle_secs=0)


train_and_evaluate(estimator1, train_spec1, eval_spec1)
Example #4
0
                       session_config=config,
                       save_checkpoints_steps=1000)


estimator2= DNNClassifier(
                          hidden_units=[512],
                          feature_columns=[tf.feature_column.numeric_column('feature', shape=(768,))],
                          n_classes=len(line_type),
                          config=run_config,
                          label_vocabulary=line_type,
                          dropout=0.1)

# Training/Evaluating:
tf.logging.set_verbosity(tf.logging.INFO)


input_fn2 = lambda fp: (tf.data.TextLineDataset(fp)
                        .apply(tf.contrib.data.shuffle_and_repeat(buffer_size=10000))
                        .batch(batch_size)
                        .map(lambda x: tf.py_func(get_encodes2, [x], [tf.float32, tf.string ], name='bert_client'),
                             num_parallel_calls=num_parallel_calls)
                        .map(lambda x, y  : ({'feature': x}, y ))
                        .prefetch(20))


train_spec2 = TrainSpec(input_fn=lambda: input_fn2(train_fp))
eval_spec2 = EvalSpec(input_fn=lambda: input_fn2(eval_fp), throttle_secs=0)


train_and_evaluate(estimator2, train_spec2, eval_spec2)
Example #5
0
def main(mname, model_dir, batch_size, epochs, eval_steps, eps_log_steps):
    global model_dir_hdfs
    if model_dir.startswith('hdfs'):
        model_dir_hdfs = True

    tf.logging.set_verbosity(tf.logging.DEBUG)
    # get TF logger
    log.setLevel(logging.DEBUG)
    # create formatter and add it to the handlers
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    # create file handler which logs even debug messages
    if model_dir_hdfs is False:
        if os.path.exists(model_dir) is False:
            os.makedirs(model_dir)
        log_dir = model_dir
    else:
        model_dir = os.path.join(
            model_dir, "job_cifar10_" +
            datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))
        log_dir = '.'

    # clear old log files
    with open(log_dir + '/tensorflow.log', 'w'):
        pass
    with open(log_dir + '/gpu.csv', 'w'):
        pass
    with open(log_dir + '/cpu.csv', 'w'):
        pass

    fh = logging.FileHandler(log_dir + '/tensorflow.log')

    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    log.addHandler(fh)

    log.info("TF version: %s", tf.__version__)
    log.info("Model directory: %s", model_dir)
    log.info("Batch size: %s", batch_size)
    log.info("Prefetch data all to memory: %s", True)
    log.info("Train epochs: %s", epochs)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    config.log_device_placement = True  # to log device placement (on which device the operation ran)
    sess = tf.Session(config=config)
    ktf.set_session(
        sess)  # set this TensorFlow session as the default session for Keras

    steps_per_epoch = cifar10_data.train_len() / batch_size
    log.info("Steps per epoch: %s", steps_per_epoch)
    if eval_steps is None:
        eval_steps = steps_per_epoch
    log.info("Evaluating each %i steps", eval_steps)

    if mname == "cnn":
        model = cifar10_model_cnn.cifar_model()
    else:
        model = cifar10_model_resnet.cifar_model()
        global input_name
        input_name = 'input_1'

    model.summary()

    def train_input_fn():
        dataset = tf.data.Dataset.from_generator(
            generator=cifar10_data.generator_train,
            output_types=(tf.float32, tf.float32),
            output_shapes=shapes)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(buffer_size=batch_size)
        # dataset = dataset.repeat(20)
        iterator = dataset.make_one_shot_iterator()
        features_tensors, labels = iterator.get_next()
        features = {input_name: features_tensors}
        return features, labels

    def eval_input_fn():
        dataset = tf.data.Dataset.from_generator(
            generator=cifar10_data.generator_test,
            output_types=(tf.float32, tf.float32),
            output_shapes=shapes)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(buffer_size=batch_size)
        iterator = dataset.make_one_shot_iterator()
        features_tensors, labels = iterator.get_next()
        features = {input_name: features_tensors}
        return features, labels

    my_config = RunConfig(
        save_checkpoints_steps=
        eval_steps  # Save checkpoints every n steps and run the evaluation.
        # keep_checkpoint_max = 5    # Retain the n most recent checkpoints (default 5).
    )
    estimator = tf.keras.estimator.model_to_estimator(model,
                                                      config=my_config,
                                                      model_dir=model_dir)

    examples_sec_hook = ExamplesPerSecondHook(batch_size,
                                              every_n_steps=eps_log_steps)
    # stopping_hook = early_stopping.stop_if_higher_hook(estimator, "accuracy", 0.5)

    train_hooks = [examples_sec_hook]

    train_spec = TrainSpec(input_fn=train_input_fn,
                           hooks=train_hooks,
                           max_steps=cifar10_data.train_len() / batch_size *
                           epochs)
    eval_spec = EvalSpec(input_fn=eval_input_fn,
                         steps=cifar10_data.val_len() / batch_size,
                         throttle_secs=5)  # default 100 steps

    global is_training
    is_training = True
    threading.Thread(target=lambda: collect_stats(log_dir)).start()
    start = time.time()

    train_and_evaluate(estimator, train_spec, eval_spec)

    elapsed = time.time() - start
    is_training = False
    log.info("total time taken (seconds): %s ", elapsed)
    if model_dir_hdfs:
        parse_res = parse.urlsplit(model_dir)
        netloc = parse_res[1]
        path = parse_res[2]
        webhdfs_model_dir = 'http://' + netloc + ':50070/webhdfs/v1' + path
        username = getpass.getuser()
        component_name = estimator.config.task_type + str(
            estimator.config.task_id)
        log.info("Uploading log files for %s as %s to HDFS path: %s",
                 component_name, username, webhdfs_model_dir)
        logging.shutdown()
        os.system('curl -L -i -T tensorflow.log "' + webhdfs_model_dir +
                  '/tensorflow-' + component_name +
                  '.log?op=CREATE&overwrite=false&user.name=' + username + '"')
        os.system('curl -L -i -T cpu.csv "' + webhdfs_model_dir + '/cpu-' +
                  component_name +
                  '.csv?op=CREATE&overwrite=false&user.name=' + username + '"')
        os.system('curl -L -i -T gpu.csv "' + webhdfs_model_dir + '/gpu-' +
                  component_name +
                  '.csv?op=CREATE&overwrite=false&user.name=' + username + '"')
    else:
        log.info("Creating zip archive of job results")
        logging.shutdown()
        shutil.make_archive(model_dir, 'zip', model_dir)