Example #1
0
    def test_kinesis_put_analyzing_event(self):
        events = KinesisEvents(service='Testing', mode='test')
        processed_paths_list = [os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_features.json")]
        processed_run_list = [24]
        processed_result_list = ["success"]
        processed_message_list = [{'comment': '',
                                   'error': ''}]

        output_data = {"file_list": processed_paths_list,
                       "run_list": processed_run_list,
                       "result_list": processed_result_list,
                       "message_list": processed_message_list
                       }

        response_valid = events.put_analyzing_event(output_data, 'featurizing', 'complete')
        assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200

        processed_paths_list = [os.path.join(TEST_FILE_DIR, "2017-12-04_4_65C-69per_6C_predictions.json")]
        processed_run_list = [24]
        processed_result_list = ["success"]
        processed_message_list = [{'comment': '',
                                   'error': ''}]

        output_data = {"file_list": processed_paths_list,
                       "run_list": processed_run_list,
                       "result_list": processed_result_list,
                       "message_list": processed_message_list
                       }

        response_valid = events.put_analyzing_event(output_data, 'predicting', 'complete')
        assert response_valid['ResponseMetadata']['HTTPStatusCode'] == 200
Example #2
0
    def test_kinesis_put_analyzing_event(self):
        events = KinesisEvents(service="Testing", mode="test")
        processed_paths_list = [
            os.path.join(TEST_FILE_DIR,
                         "2017-12-04_4_65C-69per_6C_features.json")
        ]
        processed_run_list = [24]
        processed_result_list = ["success"]
        processed_message_list = [{"comment": "", "error": ""}]

        output_data = {
            "file_list": processed_paths_list,
            "run_list": processed_run_list,
            "result_list": processed_result_list,
            "message_list": processed_message_list,
        }

        response_valid = events.put_analyzing_event(output_data, "featurizing",
                                                    "complete")
        assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200

        processed_paths_list = [
            os.path.join(TEST_FILE_DIR,
                         "2017-12-04_4_65C-69per_6C_predictions.json")
        ]
        processed_run_list = [24]
        processed_result_list = ["success"]
        processed_message_list = [{"comment": "", "error": ""}]

        output_data = {
            "file_list": processed_paths_list,
            "run_list": processed_run_list,
            "result_list": processed_result_list,
            "message_list": processed_message_list,
        }

        response_valid = events.put_analyzing_event(output_data, "predicting",
                                                    "complete")
        assert response_valid["ResponseMetadata"]["HTTPStatusCode"] == 200
Example #3
0
def process_file_list_from_json(file_list_json,
                                processed_dir='data-share/features/',
                                features_label='full_model',
                                predict_only=False,
                                prediction_type="multi",
                                predicted_quantity="cycle"):
    """
    Function to take a json file containing processed cycler run file locations,
    extract features, dump the processed file into a predetermined directory,
    and return a jsonable dict of feature file locations.

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string.
        processed_dir (str): location for processed cycler run output files
            to be placed.
        features_label (str): name of feature generation method.
        predict_only (bool): whether to calculate predictions or not.
        prediction_type (str): Single or multi-point predictions.
        predicted_quantity (str): quantity being predicted - cycle or capacity.

    Returns:
        str: json string of feature files (with key "file_list").

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup Events
    events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode'])

    # Add root path to processed_dir
    processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"),
                                 processed_dir)
    file_list = file_list_data['file_list']
    run_ids = file_list_data['run_list']
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    processed_paths_list = []

    required_cycle_num = 100  #for full model

    for path, run_id in zip(file_list, run_ids):
        logger.info('run_id=%s featurizing=%s', str(run_id), path, extra=s)

        #check if there is enough data to try featurizing
        if not len(loadfn(path).summary) > required_cycle_num:
            logger.info("run_id=%s Insufficient data for featurization",
                        str(run_id),
                        extra=s)
            processed_paths_list.append(path)
            processed_run_list.append(run_id)
            processed_result_list.append("incomplete")
            processed_message_list.append({
                'comment': 'Insufficient data for featurization',
                'error': ''
            })

        else:
            processed_data = DegradationPredictor.from_processed_cycler_run_file(
                path,
                features_label=features_label,
                predict_only=predict_only,
                prediction_type=prediction_type,
                predicted_quantity=predicted_quantity)
            new_filename = os.path.basename(path)
            new_filename = scrub_underscore_suffix(new_filename)

            # Append model_name along with "features" to demarcate
            # different models when saving the feature vectors.
            new_filename = add_suffix_to_filename(
                new_filename,
                "_" + features_label + "_" + prediction_type + "_features")
            processed_path = os.path.join(processed_dir, new_filename)
            processed_path = os.path.abspath(processed_path)
            dumpfn(processed_data, processed_path)
            processed_paths_list.append(processed_path)
            processed_run_list.append(run_id)
            processed_result_list.append("success")
            processed_message_list.append({'comment': '', 'error': ''})

    output_data = {
        "file_list": processed_paths_list,
        "run_list": processed_run_list,
        "result_list": processed_result_list,
        "message_list": processed_message_list
    }

    events.put_analyzing_event(output_data, 'featurizing', 'complete')
    # Return jsonable file list
    return json.dumps(output_data)
Example #4
0
def process_file_list_from_json(file_list_json,
                                processed_dir='data-share/features/',
                                features_label='full_model',
                                predict_only=False,
                                prediction_type="multi",
                                predicted_quantity="cycle"):
    """
    Function to take a json file containing processed cycler run file locations,
    extract features, dump the processed file into a predetermined directory,
    and return a jsonable dict of feature file locations.

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string.
        processed_dir (str): location for processed cycler run output files
            to be placed.
        features_label (str): name of feature generation method.
        predict_only (bool): whether to calculate predictions or not.
        prediction_type (str): Single or multi-point predictions.
        predicted_quantity (str): quantity being predicted - cycle or capacity.

    Returns:
        str: json string of feature files (with key "file_list").

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup Events
    events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode'])

    # Add root path to processed_dir
    processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"),
                                 processed_dir)
    file_list = file_list_data['file_list']
    run_ids = file_list_data['run_list']
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    processed_paths_list = []

    for path, run_id in zip(file_list, run_ids):
        logger.info('run_id=%s featurizing=%s', str(run_id), path, extra=s)
        processed_cycler_run = loadfn(path)

        featurizer_classes = [DeltaQFastCharge, TrajectoryFastCharge]
        for featurizer_class in featurizer_classes:
            featurizer = featurizer_class.from_run(path, processed_dir,
                                                   processed_cycler_run)
            if featurizer:
                dumpfn(featurizer, featurizer.name)
                processed_paths_list.append(featurizer.name)
                processed_run_list.append(run_id)
                processed_result_list.append("success")
                processed_message_list.append({'comment': '', 'error': ''})
                logger.info('Successfully generated %s',
                            featurizer.name,
                            extra=s)
            else:
                processed_paths_list.append(path)
                processed_run_list.append(run_id)
                processed_result_list.append("incomplete")
                processed_message_list.append({
                    'comment':
                    'Insufficient or incorrect data for featurization',
                    'error': ''
                })
                logger.info('Unable to featurize %s', path, extra=s)

    output_data = {
        "file_list": processed_paths_list,
        "run_list": processed_run_list,
        "result_list": processed_result_list,
        "message_list": processed_message_list
    }

    events.put_analyzing_event(output_data, 'featurizing', 'complete')
    # Return jsonable file list
    return json.dumps(output_data)
Example #5
0
def process_file_list_from_json(file_list_json, model_dir="/data-share/models/",
                                processed_dir='data-share/predictions/',
                                hyperparameters=None, model_name=None, predict_only=True):
    """
    Function to take a json file containing featurized json locations,
    train a new model if necessary, write files containing predictions into a
    predetermined directory, and return a jsonable dict of prediction file locations

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string
        model_dir (str): location where models are serialized and stored
        processed_dir (str): location for processed cycler run output files
            to be placed
        hyperparameters (dict): dictionary of hyperparameters to optimize/use for training
        model_name (str): name of feature generation method
        predict_only (bool):

    Returns:
        str: json string of feature files (with key "feature_file_list").

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup Events
    events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode'])

    # Add BEEP_ROOT to processed_dir
    processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"),
                                 processed_dir)
    file_list = file_list_data['file_list']
    run_ids = file_list_data['run_list']
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    processed_paths_list = []
    project_name = get_project_name_from_list(file_list)
    if predict_only:
        features = loadfn(file_list[0])
        if model_name is None and project_name in DEFAULT_MODEL_PROJECTS:

            if features.prediction_type == 'multi':
                model = DegradationModel.from_serialized_model(model_dir=model_dir,
                                                               serialized_model='d3batt_multi_point.model')
            else:
                model = DegradationModel.from_serialized_model(model_dir=model_dir,
                                                               serialized_model='d3batt_single_point.model')

        elif model_name is None and project_name not in DEFAULT_MODEL_PROJECTS:
            output_data = {"file_list": [],
                           "run_list": [],
                           "result_list": [],
                           "message_list": []
                           }

            events.put_analyzing_event(output_data, 'predicting', 'error')

            # Return jsonable file list
            return json.dumps(output_data)

        else:
            model = DegradationModel.from_serialized_model(model_dir=model_dir,
                                                           serialized_model=model_name)

    else:
        if hyperparameters is None:
            hyperparameters = {'random_state': 1,
                               'test_size': .3,
                               'k_fold': 5,
                               'tol': 0.001,
                               'l1_ratio': [.1, .5, .7, .9, .95, .99, 1]
                               }

        dataset_id = file_list_data.get("dataset_id")
        model = DegradationModel.train(file_list_json, dataset_id=dataset_id,
                                       model_type='linear', regularization_type='elasticnet',
                                       model_name=model_name, hyperparameters=hyperparameters)
        logger.warning('fitting=%s dataset=%s', model.name, str(dataset_id), extra=s)

    for path, run_id in zip(file_list, run_ids):
        logger.info('model=%s run_id=%s predicting=%s', model.name, str(run_id), path, extra=s)
        features = loadfn(path)
        prediction = model.predict(features)
        prediction_dict = model.prediction_to_dict(prediction, features.nominal_capacity)
        new_filename = os.path.basename(path)
        new_filename = scrub_underscore_suffix(new_filename)
        new_filename = add_suffix_to_filename(new_filename, "_predictions")
        processed_path = os.path.join(processed_dir, new_filename)
        processed_path = os.path.abspath(processed_path)
        dumpfn(prediction_dict, processed_path)

        # Append file loc to list to be returned
        processed_paths_list.append(processed_path)
        processed_run_list.append(run_id)
        processed_result_list.append("success")
        processed_message_list.append({'comment': '',
                                       'error': ''})

    output_data = {"file_list": processed_paths_list,
                   "run_list": processed_run_list,
                   "result_list": processed_result_list,
                   "message_list": processed_message_list
                   }

    events.put_analyzing_event(output_data, 'predicting', 'complete')

    # Return jsonable file list
    return json.dumps(output_data)