def test_kinesis_put_upload_retrigger_event(self): events = KinesisEvents(service="Testing", mode="test") s3_bucket = "beep-input-data" obj = { "Key": "d3Batt/raw/arbin/FastCharge_000002_CH2_Metadata.csv", "LastModified": datetime.datetime(2019, 4, 4, 23, 19, 20, tzinfo=tzutc()), "ETag": '"37677ae6b73034197d59cf3075f6fb98"', "Size": 615, "StorageClass": "STANDARD", "Owner": { "DisplayName": "it-admin+materials-admin", "ID": "02d8b24e2f66c2b5937f391b7c87406d4eeab68cf887bd9933d6631536959f24", }, } retrigger_data = { "filename": obj["Key"], "bucket": s3_bucket, "size": obj["Size"], "hash": obj["ETag"].strip('"'), } response_valid = events.put_upload_retrigger_event( "complete", retrigger_data) assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200
def scan(config): print("scanning") s3 = boto3.client("s3") all_objects = s3.list_objects_v2(Bucket=S3_BUCKET, Prefix=config.s3_prefix) objects = [obj for obj in all_objects['Contents'] if obj['Size'] > 1000] # db_objects = dim_run['file_path_data'].tolist() # print(db_objects) # print(len([obj for obj in objects if obj['Key'] not in db_objects])) # objects = [obj for obj in objects if obj['Key'] not in db_objects] objects = [ obj for obj in objects if "PredictionDiagnostics" in obj['Key'] and "x" not in obj['Key'] and "Complete" not in obj['Key'] # and obj['LastModified'] < datetime.datetime(2020, 2, 15, 5, 35, 43, tzinfo=tzutc())] and "_000128_" in obj['Key'] ] print(len(objects)) events = KinesisEvents(service='S3Syncer', mode=config.mode) objects.reverse() for obj in objects: retrigger_data = { "filename": obj['Key'], "bucket": S3_BUCKET, "size": obj['Size'], "hash": obj["ETag"].strip('\"') } events.put_upload_retrigger_event('complete', retrigger_data) print(retrigger_data)
def test_kinesis_put_service_event_stress(self): events = KinesisEvents(service='Testing', mode='test') for i in range(10): array = np.random.rand(5, 5, 3) print(array.tolist()) response_valid = events.put_service_event('Test', 'starting', {"Stress test array": array.tolist()}) assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
def test_kinesis_put_analyzing_event(self): events = KinesisEvents(service='Testing', mode='test') processed_paths_list = [os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_features.json")] processed_run_list = [24] processed_result_list = ["success"] processed_message_list = [{'comment': '', 'error': ''}] output_data = {"file_list": processed_paths_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list } response_valid = events.put_analyzing_event(output_data, 'featurizing', 'complete') assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200 processed_paths_list = [os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_predictions.json")] processed_run_list = [24] processed_result_list = ["success"] processed_message_list = [{'comment': '', 'error': ''}] output_data = {"file_list": processed_paths_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list } response_valid = events.put_analyzing_event(output_data, 'predicting', 'complete') assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
def test_kinesis_put_upload_retrigger_event(self): events = KinesisEvents(service='Testing', mode='test') s3_bucket = "beep-input-data" obj = { 'Key': 'd3Batt/raw/arbin/FastCharge_000002_CH2_Metadata.csv', 'LastModified': datetime.datetime(2019, 4, 4, 23, 19, 20, tzinfo=tzutc()), 'ETag': '"37677ae6b73034197d59cf3075f6fb98"', 'Size': 615, 'StorageClass': 'STANDARD', 'Owner': { 'DisplayName': 'it-admin+materials-admin', 'ID': '02d8b24e2f66c2b5937f391b7c87406d4eeab68cf887bd9933d6631536959f24' } } retrigger_data = { "filename": obj['Key'], "bucket": s3_bucket, "size": obj['Size'], "hash": obj["ETag"].strip('\"') } response_valid = events.put_upload_retrigger_event( 'complete', retrigger_data) assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
def test_kinesis_put_service_event_stress(self): events = KinesisEvents(service="Testing", mode="test") for i in range(10): array = np.random.rand(5, 5, 3) print(array.tolist()) response_valid = events.put_service_event( "Test", "starting", {"Stress test array": array.tolist()}) assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200
def test_get_file_size(self): events = KinesisEvents(service='Testing', mode='test') file_list = [os.path.join(TEST_FILE_DIR, "2017-05-09_test-TC-contact_CH33.csv"), os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_CH29.csv"), os.path.join(TEST_FILE_DIR, "xTESLADIAG_000019_CH70.070")] file_sizes = events.get_file_size(file_list) print(file_sizes) assert file_sizes[0] == 54620 assert file_sizes[1] == 37878198 assert file_sizes[2] == 3019440
def test_kinesis_put_validation_event(self): events = KinesisEvents(service='Testing', mode='test') file_list = [os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_CH29.csv")] file_list_data = {"run_list": [24]} validity = ["valid"] messages = [{'comment': '', 'error': ''}] output_json = {'file_list': file_list, 'run_list': file_list_data['run_list'], 'validity': validity, 'message_list': messages} response_valid = events.put_validation_event(output_json, 'complete') assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
def test_kinesis_put_service_event(self): events = KinesisEvents(service='Testing', mode='test') response_valid = events.put_service_event('Test', 'starting', {"String": "test"}) assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200 response_type_error = events.put_service_event('Test', 'starting', np.array([1, 2, 3])) self.assertRaises(TypeError, response_type_error) # Test list variable type response_valid = events.put_service_event('Test', 'starting', {"List": [1, 2, 3]}) assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200 # Test float variable type response_valid = events.put_service_event('Test', 'starting', {"Float": 1238.1231234}) assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200 # Test np.array variable type response_valid = events.put_service_event( 'Test', 'starting', {"Array": np.random.rand(10, 10).tolist()}) assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200 # Test dictionary variable type response_valid = events.put_service_event('Test', 'starting', {"Dict": { "key": "value" }}) assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
def test_kinesis_put_validation_event(self): events = KinesisEvents(service="Testing", mode="test") file_list = [ os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_CH29.csv") ] file_list_data = {"run_list": [24]} validity = ["valid"] messages = [{"comment": "", "error": ""}] output_json = { "file_list": file_list, "run_list": file_list_data["run_list"], "validity": validity, "message_list": messages, } response_valid = events.put_validation_event(output_json, "complete") assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200
def test_kinesis_put_structuring_event(self): events = KinesisEvents(service='Testing', mode='test') processed_file_list = [os.path.join(TEST_FILE_DIR, "2017-06-30_2C-10per_6C_CH10_structure.json")] processed_run_list = [24] processed_result_list = ["success"] processed_message_list = [{'comment': '', 'error': ''}] invalid_file_list = [] output_json = {"file_list": processed_file_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list, "invalid_file_list": invalid_file_list} response_valid = events.put_structuring_event(output_json, 'complete') assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
def test_kinesis_put_generate_event(self): events = KinesisEvents(service='Testing', mode='test') all_output_files = \ ['/data-share/protocols/procedures/name_000000.000', '/data-share/protocols/procedures/name_000007.000', '/data-share/protocols/procedures/name_000014.000'] result = 'success' message = {'comment': '', 'error': ''} output_data = { "file_list": all_output_files, "result": result, "message": message } response_valid = events.put_generate_event(output_data, 'complete') assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
def scan(config): print("scanning") s3 = boto3.client("s3") all_objects = s3.list_objects_v2(Bucket=S3_BUCKET_IN, Prefix=config.s3_prefix) objects = [obj for obj in all_objects['Contents'] if obj['Size'] > 1000] objects = [ obj for obj in objects if "PredictionDiagnostics" in obj['Key'] and "x" not in obj['Key'] and "Complete" not in obj['Key'] # and obj['LastModified'] < datetime.datetime(2020, 3, 24, 5, 35, 43, tzinfo=tzutc()) # and "_000175_" in obj['Key'] ] old_objects = [] old = datetime.datetime.now(pytz.utc) - datetime.timedelta(hours=6) for obj in objects: name = config.s3_output + '/' + get_structure_name(obj) structure_objects = s3.list_objects_v2(Bucket=S3_BUCKET_OUT, Prefix=name) # print(structure_objects) if 'Contents' in structure_objects.keys() and len( structure_objects['Contents']) == 1: if structure_objects['Contents'][0]['LastModified'] < old: old_objects.append(obj) else: old_objects.append(obj) objects = old_objects print(len(objects)) events = KinesisEvents(service='S3Syncer', mode=config.mode) objects.reverse() for obj in objects: retrigger_data = { "filename": obj['Key'], "bucket": S3_BUCKET_IN, "size": obj['Size'], "hash": obj["ETag"].strip('\"') } events.put_upload_retrigger_event('complete', retrigger_data) print(retrigger_data) time.sleep(0.1)
def test_kinesis_put_generate_event(self): events = KinesisEvents(service="Testing", mode="test") all_output_files = [ "/data-share/protocols/procedures/name_000000.000", "/data-share/protocols/procedures/name_000007.000", "/data-share/protocols/procedures/name_000014.000", ] result = "success" message = {"comment": "", "error": ""} output_data = { "file_list": all_output_files, "result": result, "message": message, } response_valid = events.put_generate_event(output_data, "complete") assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200
def process_csv_file_list_from_json( file_list_json, processed_dir="data-share/protocols/" ): """ Args: file_list_json (str): processed_dir (str): Returns: str: """ # Get file list and validity from json, if ends with .json, # assume it's a file, if not assume it's a json string if file_list_json.endswith(".json"): file_list_data = loadfn(file_list_json) else: file_list_data = json.loads(file_list_json) # Setup Events events = KinesisEvents(service="ProtocolGenerator", mode=file_list_data["mode"]) outputs = WorkflowOutputs() file_list = file_list_data["file_list"] all_output_files = [] protocol_dir = os.path.join( os.environ.get("BEEP_PROCESSING_DIR", "/"), processed_dir ) for filename in file_list: output_files, result, message = generate_protocol_files_from_csv( filename, output_directory=protocol_dir ) all_output_files.extend(output_files) output_data = {"file_list": all_output_files, "result": result, "message": message} events.put_generate_event(output_data, "complete") # Workflow outputs outputs.put_generate_outputs_list(output_data, "complete") return json.dumps(output_data)
def test_kinesis_put_structuring_event(self): events = KinesisEvents(service="Testing", mode="test") processed_file_list = [ os.path.join(TEST_FILE_DIR, "2017-06-30_2C-10per_6C_CH10_structure.json") ] processed_run_list = [24] processed_result_list = ["success"] processed_message_list = [{"comment": "", "error": ""}] invalid_file_list = [] output_json = { "file_list": processed_file_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list, "invalid_file_list": invalid_file_list, } response_valid = events.put_structuring_event(output_json, "complete") assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200
def test_kinesis_put_analyzing_event(self): events = KinesisEvents(service="Testing", mode="test") processed_paths_list = [ os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_features.json") ] processed_run_list = [24] processed_result_list = ["success"] processed_message_list = [{"comment": "", "error": ""}] output_data = { "file_list": processed_paths_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list, } response_valid = events.put_analyzing_event(output_data, "featurizing", "complete") assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200 processed_paths_list = [ os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_predictions.json") ] processed_run_list = [24] processed_result_list = ["success"] processed_message_list = [{"comment": "", "error": ""}] output_data = { "file_list": processed_paths_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list, } response_valid = events.put_analyzing_event(output_data, "predicting", "complete") assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200
def validate_file_list_from_json(file_list_json, record_results=False, skip_existing=False, validator_class=SimpleValidator): """ Validates a list of files from json input Args: file_list_json (str): input for validation files, should be a json string with attribute "file_list" or a filename (e. g. something.json) corresponding to a json object with a similar attribute. record_results (bool): Whether to record the validation results locally or not (defaults to False). skip_existing (bool): Whether to skip already validated files. This is done by checking if the file is in the validation_records. skip_existing only matters if record_results is True. (defaults to False) validator_class (ValidatorBeep or SimpleValidator): validator class to use in validation. Returns: str: json dump of the validator results. """ # Process input json if file_list_json.endswith(".json"): file_list_data = loadfn(file_list_json) else: file_list_data = json.loads(file_list_json) # Setup Events events = KinesisEvents(service='DataValidator', mode=file_list_data['mode']) file_list = file_list_data['file_list'] validator = validator_class() all_results = validator.validate_from_paths( file_list, record_results=record_results, skip_existing=skip_existing, ) # Get validities and recast to strings (valid/invalid) based on result validity = [ all_results[os.path.split(file)[-1]]['validated'] for file in file_list ] validity = list(map(lambda x: 'valid' if x else 'invalid', validity)) # Get errors errors = [ all_results[os.path.split(file)[-1]]['errors'] for file in file_list ] messages = [{'comment': '', 'error': error} for error in errors] output_json = { 'file_list': file_list, 'run_list': file_list_data['run_list'], 'validity': validity, 'message_list': messages } events.put_validation_event(output_json, 'complete') return json.dumps(output_json)
def validate_file_list_from_json( file_list_json, record_results=False, skip_existing=False, validator_class=SimpleValidator, ): """ Validates a list of files from json input Args: file_list_json (str): input for validation files, should be a json string with attribute "file_list" or a filename (e. g. something.json) corresponding to a json object with a similar attribute. record_results (bool): Whether to record the validation results locally or not (defaults to False). skip_existing (bool): Whether to skip already validated files. This is done by checking if the file is in the validation_records. skip_existing only matters if record_results is True. (defaults to False) validator_class (ValidatorBeep or SimpleValidator): validator class to use in validation. Returns: str: json dump of the validator results. """ # Process input json if file_list_json.endswith(".json"): file_list_data = loadfn(file_list_json) else: file_list_data = json.loads(file_list_json) # Setup Events events = KinesisEvents(service="DataValidator", mode=file_list_data["mode"]) outputs = WorkflowOutputs() file_list = file_list_data["file_list"] validator = validator_class() all_results = validator.validate_from_paths( file_list, record_results=record_results, skip_existing=skip_existing, ) # Get validities and recast to strings (valid/invalid) based on result validity = [ all_results[os.path.split(file)[-1]]["validated"] for file in file_list ] validity = list(map(lambda x: "valid" if x else "invalid", validity)) # Get errors errors = [ all_results[os.path.split(file)[-1]]["errors"] for file in file_list ] messages = [{"comment": "", "error": error} for error in errors] output_json = { "file_list": file_list, "run_list": file_list_data["run_list"], "validity": validity, "message_list": messages, } events.put_validation_event(output_json, "complete") # Workflow outputs file_list_size = len(output_json["file_list"]) if file_list_size > 1 or file_list_size == 0: logger.warning("{file_list_size} files being validated, should be 1") output_data = { "filename": output_json["file_list"][0], "run_id": output_json["run_list"][0], "result": output_json["validity"][0], } outputs.put_workflow_outputs(output_data, "validating") return json.dumps(output_json)
def test_kinesis_put_basic_event(self): events = KinesisEvents(service='Testing', mode='test') response = events.put_basic_event('test_events', 'This is a basic event test') assert response['ResponseMetadata']['HTTPStatusCode'] == 200
def test_kinesis_put_basic_event(self): events = KinesisEvents(service="Testing", mode="test") response = events.put_basic_event("test_events", "This is a basic event test") assert response["ResponseMetadata"]["HTTPStatusCode"] == 200
def process_file_list_from_json(file_list_json, processed_dir='data-share/features/', features_label='full_model', predict_only=False, prediction_type="multi", predicted_quantity="cycle"): """ Function to take a json file containing processed cycler run file locations, extract features, dump the processed file into a predetermined directory, and return a jsonable dict of feature file locations. Args: file_list_json (str): json string or json filename corresponding to a dictionary with a file_list attribute, if this string ends with ".json", a json file is assumed and loaded, otherwise interpreted as a json string. processed_dir (str): location for processed cycler run output files to be placed. features_label (str): name of feature generation method. predict_only (bool): whether to calculate predictions or not. prediction_type (str): Single or multi-point predictions. predicted_quantity (str): quantity being predicted - cycle or capacity. Returns: str: json string of feature files (with key "file_list"). """ # Get file list and validity from json, if ends with .json, # assume it's a file, if not assume it's a json string if file_list_json.endswith(".json"): file_list_data = loadfn(file_list_json) else: file_list_data = json.loads(file_list_json) # Setup Events events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode']) # Add root path to processed_dir processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"), processed_dir) file_list = file_list_data['file_list'] run_ids = file_list_data['run_list'] processed_run_list = [] processed_result_list = [] processed_message_list = [] processed_paths_list = [] required_cycle_num = 100 #for full model for path, run_id in zip(file_list, run_ids): logger.info('run_id=%s featurizing=%s', str(run_id), path, extra=s) #check if there is enough data to try featurizing if not len(loadfn(path).summary) > required_cycle_num: logger.info("run_id=%s Insufficient data for featurization", str(run_id), extra=s) processed_paths_list.append(path) processed_run_list.append(run_id) processed_result_list.append("incomplete") processed_message_list.append({ 'comment': 'Insufficient data for featurization', 'error': '' }) else: processed_data = DegradationPredictor.from_processed_cycler_run_file( path, features_label=features_label, predict_only=predict_only, prediction_type=prediction_type, predicted_quantity=predicted_quantity) new_filename = os.path.basename(path) new_filename = scrub_underscore_suffix(new_filename) # Append model_name along with "features" to demarcate # different models when saving the feature vectors. new_filename = add_suffix_to_filename( new_filename, "_" + features_label + "_" + prediction_type + "_features") processed_path = os.path.join(processed_dir, new_filename) processed_path = os.path.abspath(processed_path) dumpfn(processed_data, processed_path) processed_paths_list.append(processed_path) processed_run_list.append(run_id) processed_result_list.append("success") processed_message_list.append({'comment': '', 'error': ''}) output_data = { "file_list": processed_paths_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list } events.put_analyzing_event(output_data, 'featurizing', 'complete') # Return jsonable file list return json.dumps(output_data)
def process_file_list_from_json(file_list_json, processed_dir='data-share/features/', features_label='full_model', predict_only=False, prediction_type="multi", predicted_quantity="cycle"): """ Function to take a json file containing processed cycler run file locations, extract features, dump the processed file into a predetermined directory, and return a jsonable dict of feature file locations. Args: file_list_json (str): json string or json filename corresponding to a dictionary with a file_list attribute, if this string ends with ".json", a json file is assumed and loaded, otherwise interpreted as a json string. processed_dir (str): location for processed cycler run output files to be placed. features_label (str): name of feature generation method. predict_only (bool): whether to calculate predictions or not. prediction_type (str): Single or multi-point predictions. predicted_quantity (str): quantity being predicted - cycle or capacity. Returns: str: json string of feature files (with key "file_list"). """ # Get file list and validity from json, if ends with .json, # assume it's a file, if not assume it's a json string if file_list_json.endswith(".json"): file_list_data = loadfn(file_list_json) else: file_list_data = json.loads(file_list_json) # Setup Events events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode']) # Add root path to processed_dir processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"), processed_dir) file_list = file_list_data['file_list'] run_ids = file_list_data['run_list'] processed_run_list = [] processed_result_list = [] processed_message_list = [] processed_paths_list = [] for path, run_id in zip(file_list, run_ids): logger.info('run_id=%s featurizing=%s', str(run_id), path, extra=s) processed_cycler_run = loadfn(path) featurizer_classes = [DeltaQFastCharge, TrajectoryFastCharge] for featurizer_class in featurizer_classes: featurizer = featurizer_class.from_run(path, processed_dir, processed_cycler_run) if featurizer: dumpfn(featurizer, featurizer.name) processed_paths_list.append(featurizer.name) processed_run_list.append(run_id) processed_result_list.append("success") processed_message_list.append({'comment': '', 'error': ''}) logger.info('Successfully generated %s', featurizer.name, extra=s) else: processed_paths_list.append(path) processed_run_list.append(run_id) processed_result_list.append("incomplete") processed_message_list.append({ 'comment': 'Insufficient or incorrect data for featurization', 'error': '' }) logger.info('Unable to featurize %s', path, extra=s) output_data = { "file_list": processed_paths_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list } events.put_analyzing_event(output_data, 'featurizing', 'complete') # Return jsonable file list return json.dumps(output_data)
def process_file_list_from_json(file_list_json, model_dir="/data-share/models/", processed_dir='data-share/predictions/', hyperparameters=None, model_name=None, predict_only=True): """ Function to take a json file containing featurized json locations, train a new model if necessary, write files containing predictions into a predetermined directory, and return a jsonable dict of prediction file locations Args: file_list_json (str): json string or json filename corresponding to a dictionary with a file_list attribute, if this string ends with ".json", a json file is assumed and loaded, otherwise interpreted as a json string model_dir (str): location where models are serialized and stored processed_dir (str): location for processed cycler run output files to be placed hyperparameters (dict): dictionary of hyperparameters to optimize/use for training model_name (str): name of feature generation method predict_only (bool): Returns: str: json string of feature files (with key "feature_file_list"). """ # Get file list and validity from json, if ends with .json, # assume it's a file, if not assume it's a json string if file_list_json.endswith(".json"): file_list_data = loadfn(file_list_json) else: file_list_data = json.loads(file_list_json) # Setup Events events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode']) # Add BEEP_ROOT to processed_dir processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"), processed_dir) file_list = file_list_data['file_list'] run_ids = file_list_data['run_list'] processed_run_list = [] processed_result_list = [] processed_message_list = [] processed_paths_list = [] project_name = get_project_name_from_list(file_list) if predict_only: features = loadfn(file_list[0]) if model_name is None and project_name in DEFAULT_MODEL_PROJECTS: if features.prediction_type == 'multi': model = DegradationModel.from_serialized_model(model_dir=model_dir, serialized_model='d3batt_multi_point.model') else: model = DegradationModel.from_serialized_model(model_dir=model_dir, serialized_model='d3batt_single_point.model') elif model_name is None and project_name not in DEFAULT_MODEL_PROJECTS: output_data = {"file_list": [], "run_list": [], "result_list": [], "message_list": [] } events.put_analyzing_event(output_data, 'predicting', 'error') # Return jsonable file list return json.dumps(output_data) else: model = DegradationModel.from_serialized_model(model_dir=model_dir, serialized_model=model_name) else: if hyperparameters is None: hyperparameters = {'random_state': 1, 'test_size': .3, 'k_fold': 5, 'tol': 0.001, 'l1_ratio': [.1, .5, .7, .9, .95, .99, 1] } dataset_id = file_list_data.get("dataset_id") model = DegradationModel.train(file_list_json, dataset_id=dataset_id, model_type='linear', regularization_type='elasticnet', model_name=model_name, hyperparameters=hyperparameters) logger.warning('fitting=%s dataset=%s', model.name, str(dataset_id), extra=s) for path, run_id in zip(file_list, run_ids): logger.info('model=%s run_id=%s predicting=%s', model.name, str(run_id), path, extra=s) features = loadfn(path) prediction = model.predict(features) prediction_dict = model.prediction_to_dict(prediction, features.nominal_capacity) new_filename = os.path.basename(path) new_filename = scrub_underscore_suffix(new_filename) new_filename = add_suffix_to_filename(new_filename, "_predictions") processed_path = os.path.join(processed_dir, new_filename) processed_path = os.path.abspath(processed_path) dumpfn(prediction_dict, processed_path) # Append file loc to list to be returned processed_paths_list.append(processed_path) processed_run_list.append(run_id) processed_result_list.append("success") processed_message_list.append({'comment': '', 'error': ''}) output_data = {"file_list": processed_paths_list, "run_list": processed_run_list, "result_list": processed_result_list, "message_list": processed_message_list } events.put_analyzing_event(output_data, 'predicting', 'complete') # Return jsonable file list return json.dumps(output_data)