def predict_pipeline(args): test_meta = _load_meta_testing(args) if args.dev_mode: test_meta = test_meta.sample(128, replace=False, random_state=1234) pipeline_name = _parse_neptune_params(args, 'pipeline_name') Pipeline = registered_pipelines[pipeline_name] pipeline = Pipeline( num_classes=_parse_neptune_params(args, 'top_categories') + 1, epochs=_parse_neptune_params(args, 'epochs'), workers=args.nb_workers, models_dir=os.path.join(args.models_dir, 'single_models'), ) pipeline_filepath = os.path.join( os.path.join(args.models_dir, 'pipelines'), '{}_{}'.format(args.name, pipeline_name)) pipeline = pipeline_load(pipeline, pipeline_filepath) test_filepath = os.path.join(args.raw_data_dir, 'test.bson') y_test_pred = pipeline.predict(X=test_meta, img_dataset_filepath=test_filepath) submission = test_meta[['_id']] submission['category_id'] = y_test_pred timestr = datetime.now().strftime("%Y%m%d-%H%M%S") submission_filepath = os.path.join( args.submissions_dir, '{}_{}.csv'.format('{}_{}'.format(args.name, pipeline_name), timestr)) submission.to_csv(submission_filepath, index=None)
def evaluate_pipeline(args): train_meta, valid_meta = _load_meta_training(args) if args.sample_validation: valid_meta = valid_meta.sample(args.sample_validation, replace=False, random_state=1234) if args.dev_mode: valid_meta = valid_meta.sample(128, replace=False, random_state=1234) train_filepath = os.path.join(args.raw_data_dir, 'train.bson') pipeline_name = _parse_neptune_params(args, 'pipeline_name') Pipeline = registered_pipelines[pipeline_name] pipeline = Pipeline( num_classes=_parse_neptune_params(args, 'top_categories') + 1, epochs=_parse_neptune_params(args, 'epochs'), workers=args.nb_workers, models_dir=os.path.join(args.models_dir, 'single_models'), ) pipeline_filepath = os.path.join( os.path.join(args.models_dir, 'pipelines'), '{}_{}'.format(args.name, pipeline_name)) pipeline = pipeline_load(pipeline, pipeline_filepath) y_pred = pipeline.predict(X=valid_meta, img_dataset_filepath=train_filepath) y_true = valid_meta['category_id'] score = accuracy_score(y_true, y_pred) neptune_post_pipeline_score(score)