def train(model_fn, input_fn, params): model_dir = params['model_dir'] input_dir = params['input_dir'] batch_size = params['batch_size'] save_checkpoints_secs = params.get('save_checkpoints_secs', 600) save_summary_steps = params.get('save_summary_steps', 500) eval_every_secs = params.get('eval_every_secs', 600) eval_files_ids = params['eval_files'] train_files_ids = params['train_files'] only_recognized = params.get('only_recognized', False) logging.info('Model parameters: %s' % str(params)) if not os.path.isabs(model_dir) and not model_dir.startswith('s3:'): model_dir = project_dir(model_dir) # create the model directory if not model_dir.startswith('s3:'): os.makedirs(model_dir, exist_ok=True) if not os.path.isabs(input_dir) and not input_dir.startswith('s3:'): input_dir = project_dir(input_dir) # get paths to training and evaluation tfrecords eval_files_ids = range(eval_files_ids[0], eval_files_ids[1] + 1) train_files_ids = range(train_files_ids[0], train_files_ids[1] + 1) eval_files = [os.path.join(input_dir, 'file_%d.tfrecords' % i) for i in eval_files_ids] train_files = [os.path.join(input_dir, 'file_%d.tfrecords' % i) for i in train_files_ids] logging.info('Number of eval files: %d' % len(eval_files)) logging.info('Number of train files: %d' % len(train_files)) # test access to training files file_io.stat(train_files[0]) # create an estimator estimator = tf.estimator.Estimator( model_fn=model_fn, config=tf.estimator.RunConfig( model_dir=model_dir, save_checkpoints_secs=save_checkpoints_secs, save_summary_steps=save_summary_steps, ), params=tf.contrib.training.HParams(**params), ) train_input_fn = lambda: input_fn(train_files, batch_size, only_recognized=only_recognized) eval_input_fn = lambda: input_fn(eval_files, batch_size, epochs=1, only_recognized=False) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn) eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn, start_delay_secs=eval_every_secs) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def main(): # features, labels = read_bitmaps(project_dir('data/kaggle_simplified/tfrecords/bitmaps/file_1.tfrecords'), limit=2) features, labels = read_stroke3( project_dir('data/kaggle_simplified/test_stroke3/file_1.tfrecords'), limit=2) print(features) print(labels)
def main(): with open(project_dir('data/evaluation/labels.json')) as f: labels_map = json.load(f) id2label = list(range(0, len(labels_map))) for label, label_id in labels_map.items(): id2label[label_id] = label with open( project_dir('data/kaggle_submission/3_tf_rnn/predictions.pickle'), 'rb') as f: submission_predictions = pickle.load(f) print(len(submission_predictions)) csv_path = project_dir('data/kaggle_submission/test_simplified.csv') submission_csv_path = project_dir( 'data/kaggle_submission/3_tf_rnn/submission.csv') with open(submission_csv_path, 'w') as fw: fw.write('key_id,word\n') with open(csv_path) as fr: reader = csv.reader(fr) next(reader) i = 0 for key_id, _, _ in reader: # top 3 labels pred_probabilities = submission_predictions[i]['logits'] sorted_pred = sorted( [(i, probability) for i, probability in enumerate(pred_probabilities)], key=lambda x: x[1], reverse=True) top3_labels = ' '.join( [id2label[i] for i, _ in sorted_pred[:3]]) fw.write('%s,%s\n' % (key_id, top3_labels)) i += 1 if i % 10000 == 0: print(i)
def predict(model_fn, input_fn, params, tfrecord_files): # get model directory model_dir = params['model_dir'] if not os.path.isabs(model_dir) and not model_dir.startswith('s3:'): model_dir = project_dir(model_dir) batch_size = params['batch_size'] # create an estimator estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, params=tf.contrib.training.HParams(**params)) # get predictions prediction_input_fn = lambda: input_fn(tfrecord_files, batch_size, epochs=1, shuffle=False) return estimator.predict(input_fn=prediction_input_fn)
def main(argv: list): if len(argv) < 2: raise ValueError('Path to configuration file is not provided') config_path = argv[1] file_path = argv[2] output_path = project_dir(argv[3]) model_fn, input_fn, config = import_model(config_path) # file_path = 's3://quickdraw-datasets-us-east-2/stroke3/file_1.tfrecords' # output_path = project_dir('data/eval_1_tf_rnn.pickle') predictor = predict(model_fn, input_fn, config, [file_path]) predictions = [] for i, prediction in enumerate(predictor): if i % 100 == 0: print(i) predictions.append(prediction) with open(output_path, 'wb') as f: pickle.dump(predictions, f)
'country': tf.cast(example['country'], tf.uint8), 'recognized': tf.cast(example['recognized'], tf.uint8), 'key': tf.cast(example['key'], tf.int64), } label = tf.cast(example['label'], tf.int64) return features, label if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) # bitmap_converter = BitmapConverter(image_size=(96, 96), stroke_width=5) stroke3_converter = Stroke3Converter() # create_tfrecords(project_dir('data/kaggle_simplified/test_csv'), # project_dir('data/kaggle_simplified/test_bitmaps96'), # num_files=1, converter=bitmap_converter) create_tf_records_for_submission( project_dir('data/kaggle_submission/test_simplified.csv'), project_dir( 'data/kaggle_submission/test_simplified_stroke3.tfrecords'), converter=stroke3_converter) # output_dir = '/data500/bitmaps_s96w5' # labels = read_json(package_dir('data/labels.json')) # for file_id in range(900, 999): # _convert_temporary_csvs(output_dir, file_id, labels.values(), converter=bitmap_converter)