def train(model_fn, input_fn, params):
    model_dir = params['model_dir']
    input_dir = params['input_dir']
    batch_size = params['batch_size']
    save_checkpoints_secs = params.get('save_checkpoints_secs', 600)
    save_summary_steps = params.get('save_summary_steps', 500)
    eval_every_secs = params.get('eval_every_secs', 600)
    eval_files_ids = params['eval_files']
    train_files_ids = params['train_files']
    only_recognized = params.get('only_recognized', False)

    logging.info('Model parameters: %s' % str(params))

    if not os.path.isabs(model_dir) and not model_dir.startswith('s3:'):
        model_dir = project_dir(model_dir)

    # create the model directory
    if not model_dir.startswith('s3:'):
        os.makedirs(model_dir, exist_ok=True)

    if not os.path.isabs(input_dir) and not input_dir.startswith('s3:'):
        input_dir = project_dir(input_dir)

    # get paths to training and evaluation tfrecords
    eval_files_ids = range(eval_files_ids[0], eval_files_ids[1] + 1)
    train_files_ids = range(train_files_ids[0], train_files_ids[1] + 1)
    eval_files = [os.path.join(input_dir, 'file_%d.tfrecords' % i) for i in eval_files_ids]
    train_files = [os.path.join(input_dir, 'file_%d.tfrecords' % i) for i in train_files_ids]

    logging.info('Number of eval files: %d' % len(eval_files))
    logging.info('Number of train files: %d' % len(train_files))

    # test access to training files
    file_io.stat(train_files[0])

    # create an estimator
    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=tf.estimator.RunConfig(
            model_dir=model_dir,
            save_checkpoints_secs=save_checkpoints_secs,
            save_summary_steps=save_summary_steps,
        ),
        params=tf.contrib.training.HParams(**params),
    )

    train_input_fn = lambda: input_fn(train_files, batch_size, only_recognized=only_recognized)
    eval_input_fn = lambda: input_fn(eval_files, batch_size, epochs=1, only_recognized=False)

    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn)
    eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn, start_delay_secs=eval_every_secs)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemple #2
0
def main():
    # features, labels = read_bitmaps(project_dir('data/kaggle_simplified/tfrecords/bitmaps/file_1.tfrecords'), limit=2)
    features, labels = read_stroke3(
        project_dir('data/kaggle_simplified/test_stroke3/file_1.tfrecords'),
        limit=2)
    print(features)
    print(labels)
Exemple #3
0
def main():
    with open(project_dir('data/evaluation/labels.json')) as f:
        labels_map = json.load(f)

    id2label = list(range(0, len(labels_map)))
    for label, label_id in labels_map.items():
        id2label[label_id] = label

    with open(
            project_dir('data/kaggle_submission/3_tf_rnn/predictions.pickle'),
            'rb') as f:
        submission_predictions = pickle.load(f)

    print(len(submission_predictions))

    csv_path = project_dir('data/kaggle_submission/test_simplified.csv')
    submission_csv_path = project_dir(
        'data/kaggle_submission/3_tf_rnn/submission.csv')

    with open(submission_csv_path, 'w') as fw:
        fw.write('key_id,word\n')

        with open(csv_path) as fr:
            reader = csv.reader(fr)
            next(reader)
            i = 0
            for key_id, _, _ in reader:
                # top 3 labels
                pred_probabilities = submission_predictions[i]['logits']
                sorted_pred = sorted(
                    [(i, probability)
                     for i, probability in enumerate(pred_probabilities)],
                    key=lambda x: x[1],
                    reverse=True)
                top3_labels = ' '.join(
                    [id2label[i] for i, _ in sorted_pred[:3]])

                fw.write('%s,%s\n' % (key_id, top3_labels))

                i += 1
                if i % 10000 == 0:
                    print(i)
def predict(model_fn, input_fn, params, tfrecord_files):
    # get model directory
    model_dir = params['model_dir']
    if not os.path.isabs(model_dir) and not model_dir.startswith('s3:'):
        model_dir = project_dir(model_dir)

    batch_size = params['batch_size']

    # create an estimator
    estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir,
                                       params=tf.contrib.training.HParams(**params))

    # get predictions
    prediction_input_fn = lambda: input_fn(tfrecord_files, batch_size, epochs=1, shuffle=False)

    return estimator.predict(input_fn=prediction_input_fn)
Exemple #5
0
def main(argv: list):
    if len(argv) < 2:
        raise ValueError('Path to configuration file is not provided')

    config_path = argv[1]
    file_path = argv[2]
    output_path = project_dir(argv[3])

    model_fn, input_fn, config = import_model(config_path)

    # file_path = 's3://quickdraw-datasets-us-east-2/stroke3/file_1.tfrecords'
    # output_path = project_dir('data/eval_1_tf_rnn.pickle')

    predictor = predict(model_fn, input_fn, config, [file_path])

    predictions = []
    for i, prediction in enumerate(predictor):
        if i % 100 == 0:
            print(i)

        predictions.append(prediction)

    with open(output_path, 'wb') as f:
        pickle.dump(predictions, f)
        'country': tf.cast(example['country'], tf.uint8),
        'recognized': tf.cast(example['recognized'], tf.uint8),
        'key': tf.cast(example['key'], tf.int64),
    }

    label = tf.cast(example['label'], tf.int64)

    return features, label


if __name__ == '__main__':
    logging.getLogger().setLevel(logging.DEBUG)

    # bitmap_converter = BitmapConverter(image_size=(96, 96), stroke_width=5)
    stroke3_converter = Stroke3Converter()

    # create_tfrecords(project_dir('data/kaggle_simplified/test_csv'),
    #                  project_dir('data/kaggle_simplified/test_bitmaps96'),
    #                  num_files=1, converter=bitmap_converter)

    create_tf_records_for_submission(
        project_dir('data/kaggle_submission/test_simplified.csv'),
        project_dir(
            'data/kaggle_submission/test_simplified_stroke3.tfrecords'),
        converter=stroke3_converter)

    # output_dir = '/data500/bitmaps_s96w5'
    # labels = read_json(package_dir('data/labels.json'))
    # for file_id in range(900, 999):
    #     _convert_temporary_csvs(output_dir, file_id, labels.values(), converter=bitmap_converter)