Example #1
0
    def train_and_eval():
        # Train
        tf_trainer.get_training_method(estimator)(
            input_fn=tf_trainer.get_input_fn(shuffle=True,
                                             batch_size=batch_size,
                                             num_epochs=num_epochs,
                                             labels=labels,
                                             **training_data),
            steps=steps)
        # Eval on the experiment dataset + any other requested
        eval_sets = [dataset]
        eval_sets.extend(eval_dataset)
        for eval_dataset_name in eval_sets:
            eval_data = gather_results.load_dataset(eval_dataset_name,
                                                    'test',
                                                    data_path=data_path,
                                                    s3=s3)
            eval_size = len(eval_data['example_ids'])

            # Run tf evaluation and store the metrics
            print("Evaluation data shape: (%d, %d)" %
                  eval_data['examples'].shape)
            eval_loss = estimator.evaluate(
                input_fn=tf_trainer.get_input_fn(batch_size=eval_size,
                                                 num_epochs=1,
                                                 labels=labels,
                                                 **eval_data),
                name=eval_dataset_name)
            # Saving and Logging loss
            print('Training eval data for %s: %r' %
                  (eval_dataset_name, eval_loss))
            eval_name = "eval_" + eval_dataset_name
            gather_results.save_data_json(dataset,
                                          eval_loss,
                                          eval_name,
                                          sub_folder=experiment)

        # Run a prediction on the "dev" set, which we use as prod, and store it
        prod_data = gather_results.load_dataset(dataset,
                                                'dev',
                                                data_path=data_path,
                                                s3=s3)
        prod_size = len(prod_data['example_ids'])

        prediction = estimator.predict(input_fn=tf_trainer.get_input_fn(
            batch_size=prod_size, num_epochs=1, labels=labels, **prod_data))

        # Convert bytes fields to string for serialization
        serializable_pred = []
        for pred in prediction:
            _classes = pred['classes']
            pred['classes'] = [x.decode("utf-8") for x in _classes]
            serializable_pred.append(pred)

        prediction_name = "prediction_" + dataset
        pred_data = zip(prod_data['example_ids'], serializable_pred,
                        prod_data['classes'])
        gather_results.save_data_json(dataset, [x for x in pred_data],
                                      prediction_name,
                                      sub_folder=experiment)
Example #2
0
 def train_and_eval():
     # Train
     tf_trainer.get_training_method(estimator)(
         input_fn=tf_trainer.get_input_fn(shuffle=True,
                                          batch_size=batch_size,
                                          num_epochs=num_epochs,
                                          labels=labels,
                                          **training_data),
         steps=steps)
     # Eval on the experiment dataset + any other requested
     eval_sets = [dataset]
     eval_sets.extend(eval_dataset)
     for eval_dataset_name in eval_sets:
         eval_data = gather_results.load_dataset(eval_dataset_name,
                                                 'test',
                                                 data_path=data_path,
                                                 s3=s3)
         eval_size = len(eval_data['example_ids'])
         print("Evaluation data shape: (%d, %d)" %
               eval_data['examples'].shape)
         eval_loss = estimator.evaluate(
             input_fn=tf_trainer.get_input_fn(batch_size=eval_size,
                                              num_epochs=1,
                                              labels=labels,
                                              **eval_data),
             name=eval_dataset_name)
         # Saving and Logging loss
         print('Training eval data for %s: %r' %
               (eval_dataset_name, eval_loss))
         eval_name = "eval_" + eval_dataset_name
         gather_results.save_data_json(dataset,
                                       eval_loss,
                                       eval_name,
                                       sub_folder=experiment)
Example #3
0
def local_trainer(dataset, experiment, eval_dataset, gpu, debug, data_path,
                  s3_profile, s3_url):
    # s3 support. When both using s3, dataset and experiment must stored
    # in the same bucket
    s3 = gather_results.get_s3_client(s3_url=s3_url, s3_profile=s3_profile)

    # Load experiment data
    experiment_data = gather_results.load_experiment(experiment,
                                                     data_path=data_path,
                                                     s3=s3)
    if not experiment_data:
        print("Experiment %s not found" % experiment)
        sys.exit(1)

    # Load dataset data
    dataset_data = gather_results.load_model_config(dataset,
                                                    data_path=data_path,
                                                    s3=s3)
    if not dataset_data:
        print("Dataset %s not found" % dataset)
        sys.exit(1)

    # Read hyper_params and params
    estimator = experiment_data['estimator']
    hyper_params = experiment_data['hyper_params']
    params = experiment_data['params']
    steps = int(hyper_params['steps'])
    num_epochs = int(hyper_params['epochs'])
    batch_size = int(hyper_params['batch_size'])
    optimizer = hyper_params['optimizer']
    learning_rate = float(hyper_params['learning_rate'])
    class_label = dataset_data['class_label']

    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)

    # Load the normalized data
    labels = gather_results.load_dataset(dataset,
                                         'labels',
                                         data_path=data_path,
                                         s3=s3)['labels']
    training_data = gather_results.load_dataset(dataset,
                                                'training',
                                                data_path=data_path,
                                                s3=s3)
    test_data = gather_results.load_dataset(dataset,
                                            'test',
                                            data_path=data_path,
                                            s3=s3)
    print("Training data shape: (%d, %d)" % training_data['examples'].shape)

    if class_label == 'node_provider':
        label_vocabulary = set([
            'rax', 'ovh', 'packethost-us-west-1', 'vexxhost',
            'limestone-regionone', 'inap-mtl01', 'fortnebula-regionone'
        ])
    elif class_label == 'node_provider_all':
        label_vocabulary = set([
            'rax-iad', 'ovh-bhs1', 'packethost-us-west-1', 'rax-dfw',
            'vexxhost-ca-ymq-1', 'ovh-gra1', 'limestone-regionone',
            'inap-mtl01', 'rax-ord', 'vexxhost-sjc1', 'fortnebula-regionone'
        ])
    else:
        label_vocabulary = None

    # Get the estimator
    model_dir = gather_results.get_model_folder(dataset, experiment)
    estimator = tf_trainer.get_estimator(
        estimator,
        hyper_params,
        params,
        labels,
        model_dir,
        optimizer=_OPTIMIZER_CLS_NAMES[optimizer](learning_rate=learning_rate),
        label_vocabulary=label_vocabulary,
        gpu=gpu)

    def train_and_eval():
        # Train
        tf_trainer.get_training_method(estimator)(
            input_fn=tf_trainer.get_input_fn(shuffle=True,
                                             batch_size=batch_size,
                                             num_epochs=num_epochs,
                                             labels=labels,
                                             **training_data),
            steps=steps)
        # Eval on the experiment dataset + any other requested
        eval_sets = [dataset]
        eval_sets.extend(eval_dataset)
        for eval_dataset_name in eval_sets:
            eval_data = gather_results.load_dataset(eval_dataset_name,
                                                    'test',
                                                    data_path=data_path,
                                                    s3=s3)
            eval_size = len(eval_data['example_ids'])

            # Run tf evaluation and store the metrics
            print("Evaluation data shape: (%d, %d)" %
                  eval_data['examples'].shape)
            eval_loss = estimator.evaluate(
                input_fn=tf_trainer.get_input_fn(batch_size=eval_size,
                                                 num_epochs=1,
                                                 labels=labels,
                                                 **eval_data),
                name=eval_dataset_name)
            # Saving and Logging loss
            print('Training eval data for %s: %r' %
                  (eval_dataset_name, eval_loss))
            eval_name = "eval_" + eval_dataset_name
            gather_results.save_data_json(dataset,
                                          eval_loss,
                                          eval_name,
                                          sub_folder=experiment)

        # Run a prediction on the "dev" set, which we use as prod, and store it
        prod_data = gather_results.load_dataset(dataset,
                                                'dev',
                                                data_path=data_path,
                                                s3=s3)
        prod_size = len(prod_data['example_ids'])

        prediction = estimator.predict(input_fn=tf_trainer.get_input_fn(
            batch_size=prod_size, num_epochs=1, labels=labels, **prod_data))

        # Convert bytes fields to string for serialization
        serializable_pred = []
        for pred in prediction:
            _classes = pred['classes']
            pred['classes'] = [x.decode("utf-8") for x in _classes]
            serializable_pred.append(pred)

        prediction_name = "prediction_" + dataset
        pred_data = zip(prod_data['example_ids'], serializable_pred,
                        prod_data['classes'])
        gather_results.save_data_json(dataset, [x for x in pred_data],
                                      prediction_name,
                                      sub_folder=experiment)

    # Now do the training and evalutation
    if gpu:
        with tf.device('/gpu:0'):
            eval_loss = train_and_eval()
    else:
        eval_loss = train_and_eval()