def export_dataset(dataset, api, args, resume, session_file=None, path=None): """Exports the dataset to a CSV file given by the user or a filename based on the dataset id by default. """ filename = csv_name(args.to_csv, path, dataset) if resume: resume = c.checkpoint( c.is_dataset_exported, filename, debug=args.debug) if not resume: message = u.dated("No dataset exported. Resuming.\n") u.log_message(message, log_file=session_file, console=args.verbosity) else: message = u.dated("Exporting dataset to CSV file: %s\n" % filename) u.log_message(message, log_file=session_file, console=args.verbosity) if not resume: file_name = api.download_dataset(dataset, filename=filename) if file_name is None: sys.exit("Failed downloading CSV.") return resume
def export_dataset(dataset, api, args, resume, session_file=None, path=None): """Exports the dataset to a CSV file given by the user or a filename based on the dataset id by default. """ filename = csv_name(args.to_csv, path, dataset) if resume: resume = c.checkpoint(c.is_dataset_exported, filename, debug=args.debug) if not resume: message = u.dated("No dataset exported. Resuming.\n") u.log_message(message, log_file=session_file, console=args.verbosity) else: message = u.dated("Exporting dataset to CSV file: %s\n" % filename) u.log_message(message, log_file=session_file, console=args.verbosity) if not resume: api.download_dataset(dataset, filename=filename) return resume
def remote_predict(model, test_dataset, batch_prediction_args, args, api, resume, prediction_file=None, session_file=None, path=None, log=None): """Computes a prediction for each entry in the `test_set`. Predictions are computed remotely using the batch predictions call. """ if args.ensemble is not None and not args.dataset_off: model_or_ensemble = args.ensemble elif args.dataset_off: if hasattr(args, "ensemble_ids_") and args.ensemble_ids_: models = args.ensemble_ids_ else: models = args.model_ids_ test_datasets = args.test_dataset_ids else: model_or_ensemble = bigml.api.get_model_id(model) # if resuming, try to extract dataset form log files if resume: message = u.dated("Batch prediction not found. Resuming.\n") resume, batch_prediction = c.checkpoint(c.is_batch_prediction_created, path, debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if not resume: if not args.dataset_off: batch_prediction = create_batch_prediction( model_or_ensemble, test_dataset, batch_prediction_args, args, api, session_file=session_file, path=path, log=log) else: batch_predictions = [] for index, test_dataset_n in enumerate(test_datasets): batch_predictions.append(create_batch_prediction( \ models[index], test_dataset_n, batch_prediction_args, args, api, session_file=session_file, path=path, log=log)) if not args.no_csv and not args.dataset_off: file_name = api.download_batch_prediction(batch_prediction, prediction_file) if file_name is None: sys.exit("Failed downloading CSV.") if args.to_dataset and not args.dataset_off: batch_prediction = bigml.api.check_resource(batch_prediction, api=api) new_dataset = bigml.api.get_dataset_id( batch_prediction['object']['output_dataset_resource']) if new_dataset is not None: message = u.dated("Batch prediction dataset created: %s\n" % u.get_url(new_dataset)) u.log_message(message, log_file=session_file, console=args.verbosity) u.log_created_resources("batch_prediction_dataset", path, new_dataset, mode='a') elif args.to_dataset and args.dataset_off: predictions_datasets = [] for batch_prediction in batch_predictions: batch_prediction = bigml.api.check_resource(batch_prediction, api=api) new_dataset = bigml.api.get_dataset_id( batch_prediction['object']['output_dataset_resource']) if new_dataset is not None: predictions_datasets.append(new_dataset) message = u.dated("Batch prediction dataset created: %s\n" % u.get_url(new_dataset)) u.log_message(message, log_file=session_file, console=args.verbosity) u.log_created_resources("batch_prediction_dataset", path, new_dataset, mode='a') multi_dataset = api.create_dataset(predictions_datasets) log_created_resources("dataset_pred", path, bigml.api.get_dataset_id(multi_dataset), mode='a') dataset_id = check_resource_error(multi_dataset, "Failed to create dataset: ") try: multi_dataset = api.check_resource(multi_dataset) except ValueError, exception: sys.exit("Failed to get a finished dataset: %s" % str(exception)) message = dated("Predictions dataset created: %s\n" % get_url(multi_dataset)) log_message(message, log_file=session_file, console=args.verbosity) log_message("%s\n" % dataset_id, log_file=log) if not args.no_csv: file_name = api.download_dataset(dataset_id, prediction_file) if file_name is None: sys.exit("Failed downloading CSV.")
def remote_predict(model, test_dataset, batch_prediction_args, args, api, resume, prediction_file=None, session_file=None, path=None, log=None): """Computes a prediction for each entry in the `test_set`. Predictions are computed remotely using the batch predictions call. """ if args.ensemble is not None and not args.dataset_off: model_or_ensemble = args.ensemble elif args.dataset_off: if hasattr(args, "ensemble_ids_") and args.ensemble_ids_: models = args.ensemble_ids_ else: models = args.model_ids_ test_datasets = args.test_dataset_ids else: model_or_ensemble = bigml.api.get_model_id(model) # if resuming, try to extract dataset form log files if resume: message = u.dated("Batch prediction not found. Resuming.\n") resume, batch_prediction = c.checkpoint( c.is_batch_prediction_created, path, debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if not resume: if not args.dataset_off: batch_prediction = create_batch_prediction( model_or_ensemble, test_dataset, batch_prediction_args, args, api, session_file=session_file, path=path, log=log) else: batch_predictions = [] for index, test_dataset_n in enumerate(test_datasets): batch_predictions.append(create_batch_prediction( \ models[index], test_dataset_n, batch_prediction_args, args, api, session_file=session_file, path=path, log=log)) if not args.no_csv and not args.dataset_off: file_name = api.download_batch_prediction(batch_prediction, prediction_file) if file_name is None: sys.exit("Failed downloading CSV.") if args.to_dataset and not args.dataset_off: batch_prediction = bigml.api.check_resource(batch_prediction, api=api) new_dataset = bigml.api.get_dataset_id( batch_prediction['object']['output_dataset_resource']) if new_dataset is not None: message = u.dated("Batch prediction dataset created: %s\n" % u.get_url(new_dataset)) u.log_message(message, log_file=session_file, console=args.verbosity) u.log_created_resources("batch_prediction_dataset", path, new_dataset, mode='a') elif args.to_dataset and args.dataset_off: predictions_datasets = [] for batch_prediction in batch_predictions: batch_prediction = bigml.api.check_resource(batch_prediction, api=api) new_dataset = bigml.api.get_dataset_id( batch_prediction['object']['output_dataset_resource']) if new_dataset is not None: predictions_datasets.append(new_dataset) message = u.dated("Batch prediction dataset created: %s\n" % u.get_url(new_dataset)) u.log_message(message, log_file=session_file, console=args.verbosity) u.log_created_resources("batch_prediction_dataset", path, new_dataset, mode='a') multi_dataset = api.create_dataset(predictions_datasets) log_created_resources("dataset_pred", path, bigml.api.get_dataset_id(multi_dataset), mode='a') dataset_id = check_resource_error(multi_dataset, "Failed to create dataset: ") try: multi_dataset = api.check_resource(multi_dataset) except ValueError, exception: sys.exit("Failed to get a finished dataset: %s" % str(exception)) message = dated("Predictions dataset created: %s\n" % get_url(multi_dataset)) log_message(message, log_file=session_file, console=args.verbosity) log_message("%s\n" % dataset_id, log_file=log) if not args.no_csv: file_name = api.download_dataset(dataset_id, prediction_file) if file_name is None: sys.exit("Failed downloading CSV.")