예제 #1
0
def dummy(ctx, test):
    logger.debug(f"Test msg: {test}")
    logger.info("Info msg")
    logger.warning("Warning message")
    logger.error("Error message")
    logger.critical("CRITICAL MESSAGE!")

    if test == "throw_error":
        raise ValueError("Some error!")

    dumpfn({"example": "status"}, ctx.obj.output_status_json)
예제 #2
0
파일: run_model.py 프로젝트: xue-smile/beep
def main():
    """
    Main function of this module, takes in arguments of an input
    and output filename corresponding to featurized run data
    and creates a predictor object output for analysis/ML processing
    """
    # Parse args and construct initial cycler run
    logger.info("starting", extra=s)
    logger.info("Running version=%s", __version__, extra=s)
    try:
        args = docopt(__doc__)
        input_json = args["INPUT_JSON"]
        if args["--fit"]:
            print(
                process_file_list_from_json(
                    input_json, predict_only=False, model_dir=MODEL_DIR
                ),
                end="",
            )
        else:
            print(process_file_list_from_json(input_json, model_dir=MODEL_DIR), end="")
    except Exception as e:
        logger.error(str(e), extra=s)
        raise e
    logger.info("finish", extra=s)
    return None
예제 #3
0
def main():
    logger.info('starting', extra=s)
    logger.info('Running version=%s', __version__, extra=s)
    try:
        args = docopt(__doc__)
        input_json = args['INPUT_JSON']
        print(validate_file_list_from_json(input_json), end="")
    except Exception as e:
        logger.error(str(e), extra=s)
        raise e
    logger.info('finish', extra=s)
    return None
예제 #4
0
def main():
    """Main function for the script"""
    logger.info("starting", extra=s)
    logger.info("Running version=%s", __version__, extra=s)
    try:
        args = docopt(__doc__)
        input_json = args["INPUT_JSON"]
        print(process_csv_file_list_from_json(input_json), end="")
    except Exception as e:
        logger.error(str(e), extra=s)
        raise e
    logger.info("finish", extra=s)
    return None
예제 #5
0
def main():
    """Main function of this module, takes in arguments of an input
    and output filename and uses the input file to create a
    structured data output for analysis/ML processing.
    """
    logger.info("starting", extra=SERVICE_CONFIG)
    logger.info("Running version=%s", __version__, extra=SERVICE_CONFIG)
    try:
        args = docopt(__doc__)
        input_json = args["INPUT_JSON"]
        print(process_file_list_from_json(input_json))
    except Exception as e:
        logger.error(str(e), extra=SERVICE_CONFIG)
        raise e
    logger.info("finish", extra=SERVICE_CONFIG)
    return None
예제 #6
0
파일: validate.py 프로젝트: ardunn/beep
def main():
    """
    Main function for running of this module as a script

    Returns:
        (None)

    """
    logger.info("starting", extra=s)
    logger.info("Running version=%s", __version__, extra=s)
    try:
        args = docopt(__doc__)
        input_json = args["INPUT_JSON"]
        print(validate_file_list_from_json(input_json), end="")
    except Exception as e:
        logger.error(str(e), extra=s)
        raise e
    logger.info("finish", extra=s)
    return None
예제 #7
0
def main():
    """
    Main function of this module, takes in arguments of an input
    and output filename corresponding to structured cycler run data
    and creates a predictor object output for analysis/ML processing

    Returns:
        None

    """
    # Parse args and construct initial cycler run
    logger.info('starting', extra=s)
    logger.info('Running version=%s', __version__, extra=s)
    try:
        args = docopt(__doc__)
        input_json = args['INPUT_JSON']
        print(process_file_list_from_json(input_json), end="")
    except Exception as e:
        logger.error(str(e), extra=s)
        raise e
    logger.info('finish', extra=s)
    return None
예제 #8
0
def process_file_list_from_json(file_list_json,
                                processed_dir='data-share/features/',
                                features_label='full_model',
                                predict_only=False,
                                prediction_type="multi",
                                predicted_quantity="cycle"):
    """
    Function to take a json file containing processed cycler run file locations,
    extract features, dump the processed file into a predetermined directory,
    and return a jsonable dict of feature file locations.

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string.
        processed_dir (str): location for processed cycler run output files
            to be placed.
        features_label (str): name of feature generation method.
        predict_only (bool): whether to calculate predictions or not.
        prediction_type (str): Single or multi-point predictions.
        predicted_quantity (str): quantity being predicted - cycle or capacity.

    Returns:
        str: json string of feature files (with key "file_list").

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup Events
    events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode'])

    # Add root path to processed_dir
    processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"),
                                 processed_dir)
    file_list = file_list_data['file_list']
    run_ids = file_list_data['run_list']
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    processed_paths_list = []

    for path, run_id in zip(file_list, run_ids):
        logger.info('run_id=%s featurizing=%s', str(run_id), path, extra=s)
        processed_cycler_run = loadfn(path)

        featurizer_classes = [DeltaQFastCharge, TrajectoryFastCharge]
        for featurizer_class in featurizer_classes:
            featurizer = featurizer_class.from_run(path, processed_dir,
                                                   processed_cycler_run)
            if featurizer:
                dumpfn(featurizer, featurizer.name)
                processed_paths_list.append(featurizer.name)
                processed_run_list.append(run_id)
                processed_result_list.append("success")
                processed_message_list.append({'comment': '', 'error': ''})
                logger.info('Successfully generated %s',
                            featurizer.name,
                            extra=s)
            else:
                processed_paths_list.append(path)
                processed_run_list.append(run_id)
                processed_result_list.append("incomplete")
                processed_message_list.append({
                    'comment':
                    'Insufficient or incorrect data for featurization',
                    'error': ''
                })
                logger.info('Unable to featurize %s', path, extra=s)

    output_data = {
        "file_list": processed_paths_list,
        "run_list": processed_run_list,
        "result_list": processed_result_list,
        "message_list": processed_message_list
    }

    events.put_analyzing_event(output_data, 'featurizing', 'complete')
    # Return jsonable file list
    return json.dumps(output_data)
예제 #9
0
def process_file_list_from_json(file_list_json, model_dir="/data-share/models/",
                                processed_dir='data-share/predictions/',
                                hyperparameters=None, model_name=None, predict_only=True):
    """
    Function to take a json file containing featurized json locations,
    train a new model if necessary, write files containing predictions into a
    predetermined directory, and return a jsonable dict of prediction file locations

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string
        model_dir (str): location where models are serialized and stored
        processed_dir (str): location for processed cycler run output files
            to be placed
        hyperparameters (dict): dictionary of hyperparameters to optimize/use for training
        model_name (str): name of feature generation method
        predict_only (bool):

    Returns:
        str: json string of feature files (with key "feature_file_list").

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup Events
    events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode'])

    # Add BEEP_ROOT to processed_dir
    processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"),
                                 processed_dir)
    file_list = file_list_data['file_list']
    run_ids = file_list_data['run_list']
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    processed_paths_list = []
    project_name = get_project_name_from_list(file_list)
    if predict_only:
        features = loadfn(file_list[0])
        if model_name is None and project_name in DEFAULT_MODEL_PROJECTS:

            if features.prediction_type == 'multi':
                model = DegradationModel.from_serialized_model(model_dir=model_dir,
                                                               serialized_model='d3batt_multi_point.model')
            else:
                model = DegradationModel.from_serialized_model(model_dir=model_dir,
                                                               serialized_model='d3batt_single_point.model')

        elif model_name is None and project_name not in DEFAULT_MODEL_PROJECTS:
            output_data = {"file_list": [],
                           "run_list": [],
                           "result_list": [],
                           "message_list": []
                           }

            events.put_analyzing_event(output_data, 'predicting', 'error')

            # Return jsonable file list
            return json.dumps(output_data)

        else:
            model = DegradationModel.from_serialized_model(model_dir=model_dir,
                                                           serialized_model=model_name)

    else:
        if hyperparameters is None:
            hyperparameters = {'random_state': 1,
                               'test_size': .3,
                               'k_fold': 5,
                               'tol': 0.001,
                               'l1_ratio': [.1, .5, .7, .9, .95, .99, 1]
                               }

        dataset_id = file_list_data.get("dataset_id")
        model = DegradationModel.train(file_list_json, dataset_id=dataset_id,
                                       model_type='linear', regularization_type='elasticnet',
                                       model_name=model_name, hyperparameters=hyperparameters)
        logger.warning('fitting=%s dataset=%s', model.name, str(dataset_id), extra=s)

    for path, run_id in zip(file_list, run_ids):
        logger.info('model=%s run_id=%s predicting=%s', model.name, str(run_id), path, extra=s)
        features = loadfn(path)
        prediction = model.predict(features)
        prediction_dict = model.prediction_to_dict(prediction, features.nominal_capacity)
        new_filename = os.path.basename(path)
        new_filename = scrub_underscore_suffix(new_filename)
        new_filename = add_suffix_to_filename(new_filename, "_predictions")
        processed_path = os.path.join(processed_dir, new_filename)
        processed_path = os.path.abspath(processed_path)
        dumpfn(prediction_dict, processed_path)

        # Append file loc to list to be returned
        processed_paths_list.append(processed_path)
        processed_run_list.append(run_id)
        processed_result_list.append("success")
        processed_message_list.append({'comment': '',
                                       'error': ''})

    output_data = {"file_list": processed_paths_list,
                   "run_list": processed_run_list,
                   "result_list": processed_result_list,
                   "message_list": processed_message_list
                   }

    events.put_analyzing_event(output_data, 'predicting', 'complete')

    # Return jsonable file list
    return json.dumps(output_data)
예제 #10
0
def generate_protocol_files_from_csv(csv_filename, output_directory=None):

    """
    Generates a set of protocol files from csv filename input by
    reading protocol file input corresponding to each line of
    the csv file. Writes a csv file that.

    Args:
        csv_filename (str): CSV containing protocol file parameters.
        output_directory (str): directory in which to place the output files
    """
    # Read csv file
    protocol_params_df = pd.read_csv(csv_filename)

    new_files = []
    names = []
    result = ""
    message = {"comment": "", "error": ""}
    if output_directory is None:
        output_directory = PROCEDURE_TEMPLATE_DIR
    for index, protocol_params in protocol_params_df.iterrows():
        template = protocol_params["template"]
        # Filename for the output
        filename_prefix = "_".join(
            [
                protocol_params["project_name"],
                "{:06d}".format(protocol_params["seq_num"]),
            ]
        )

        # Switch for template invocation
        if template == "EXP.000":
            protocol = Procedure.from_exp(
                **protocol_params[["cutoff_voltage", "charge_rate", "discharge_rate"]]
            )
            filename = "{}.000".format(filename_prefix)
            filename = os.path.join(output_directory, "procedures", filename)
        elif template == "diagnosticV2.000":
            diag_params_df = pd.read_csv(
                os.path.join(PROCEDURE_TEMPLATE_DIR, "PreDiag_parameters - DP.csv")
            )
            diagnostic_params = diag_params_df[
                diag_params_df["diagnostic_parameter_set"]
                == protocol_params["diagnostic_parameter_set"]
            ].squeeze()

            # TODO: should these be separated?
            protocol = Procedure.from_regcyclev2(protocol_params)
            protocol.add_procedure_diagcyclev2(
                protocol_params["capacity_nominal"], diagnostic_params
            )
            filename = "{}.000".format(filename_prefix)
            filename = os.path.join(output_directory, "procedures", filename)
        # TODO: how are these different?
        elif template in ["diagnosticV3.000", "diagnosticV4.000"]:
            diag_params_df = pd.read_csv(
                os.path.join(PROCEDURE_TEMPLATE_DIR, "PreDiag_parameters - DP.csv")
            )
            diagnostic_params = diag_params_df[
                diag_params_df["diagnostic_parameter_set"]
                == protocol_params["diagnostic_parameter_set"]
            ].squeeze()

            protocol = Procedure.generate_procedure_regcyclev3(index, protocol_params)
            protocol.generate_procedure_diagcyclev3(
                protocol_params["capacity_nominal"], diagnostic_params
            )
            filename = "{}.000".format(filename_prefix)
            filename = os.path.join(output_directory, "procedures", filename)
        elif template == "formationV1.mps":
            protocol = Settings.from_file(os.path.join(BIOLOGIC_TEMPLATE_DIR, template))
            protocol = protocol.formation_protocol_bcs(protocol, protocol_params)
            filename = "{}.mps".format(filename_prefix)
            filename = os.path.join(output_directory, "settings", filename)
        else:
            warnings.warn("Unsupported file template {}, skipping.".format(template))
            result = "error"
            message = {
                "comment": "Unable to find template: " + template,
                "error": "Not Found",
            }
            continue

        logger.info(filename, extra=s)
        if not os.path.isfile(filename):
            protocol.to_file(filename)
            new_files.append(filename)
            names.append(filename_prefix + "_")

        elif ".sdu" in template:
            logger.warning("Schedule file generation not yet implemented", extra=s)
            result = "error"
            message = {
                "comment": "Schedule file generation is not yet implemented",
                "error": "Not Implemented",
            }

    # This block of code produces the file containing all of the run file
    # names produced in this function call. This is to make starting tests easier
    _, namefile = os.path.split(csv_filename)
    namefile = namefile.split("_")[0] + "_names_"
    namefile = namefile + datetime.datetime.now().strftime("%Y%m%d_%H%M") + ".csv"
    with open(
        os.path.join(output_directory, "names", namefile), "w", newline=""
    ) as outputfile:
        wr = csv.writer(outputfile)
        for name in names:
            wr.writerow([name])
    outputfile.close()

    if not result:
        result = "success"
        message = {
            "comment": "Generated {} protocols".format(str(len(new_files))),
            "error": "",
        }

    return new_files, result, message
예제 #11
0
def generate_protocol_files_from_csv(csv_filename, output_directory, **kwargs):
    """
    Generates a set of protocol files from csv filename input by
    reading protocol file input corresponding to each line of
    the csv file. Writes a csv file that.

    Args:
        csv_filename (str): CSV containing protocol file parameters.
        output_directory (str): directory in which to place the output files
        **kwargs: kwargs to ProcedureFile, the object which does the protocol
            file generation
    """
    # Invoke ProcedureFile object from **kwargs
    procedure_file_generator = ProcedureFile(**kwargs)

    # Read csv file
    protocol_params_df = pd.read_csv(csv_filename)

    new_files = []
    names = []
    result = ''
    message = {'comment': '',
               'error': ''}
    for index, protocol_params in protocol_params_df.iterrows():
        template = protocol_params['template']
        if template not in ["EXP.000", "diagnosticV1.000", "diagnosticV2.000", "diagnosticV3.000"]:
            warnings.warn("Unsupported file template {}, skipping.".format(template))
            result = "error"
            message = {'comment': 'Unable to find template: ' + template,
                       'error': 'Not Found'}
            continue

        if ".000" in template:
            # Generate primary procedure dictionary
            proc_dict, sp = procedure_file_generator.to_dict(
                os.path.join(PROCEDURE_TEMPLATE_DIR, "{}".format(template)),
                os.path.join(PROCEDURE_TEMPLATE_DIR, "{}.json".format(template.split('.')[0]))
            )

            # Generate EXP-based proc_dict
            if template == "EXP.000":
                proc_dict = procedure_file_generator.generate_procedure_exp(
                    proc_dict, **protocol_params[["cutoff_voltage", "charge_rate", "discharge_rate"]])
            elif template == 'diagnosticV2.000':
                diag_params_df = pd.read_csv(os.path.join(PROCEDURE_TEMPLATE_DIR,
                                                          "PreDiag_parameters - DP.csv"))
                diagnostic_params = diag_params_df[diag_params_df['diagnostic_parameter_set'] ==
                                                   protocol_params['diagnostic_parameter_set']].squeeze()

                proc_dict = procedure_file_generator.generate_procedure_regcyclev2(
                    proc_dict, protocol_params)
                proc_dict = procedure_file_generator.generate_procedure_diagcyclev2(
                    proc_dict, protocol_params["capacity_nominal"], diagnostic_params)
            elif template == 'diagnosticV3.000':
                diag_params_df = pd.read_csv(os.path.join(PROCEDURE_TEMPLATE_DIR,
                                                          "PreDiag_parameters - DP.csv"))
                diagnostic_params = diag_params_df[diag_params_df['diagnostic_parameter_set'] ==
                                                   protocol_params['diagnostic_parameter_set']].squeeze()

                proc_dict = procedure_file_generator.generate_procedure_regcyclev3(index,
                    proc_dict, protocol_params)
                proc_dict = procedure_file_generator.generate_procedure_diagcyclev3(
                    proc_dict, protocol_params["capacity_nominal"], diagnostic_params)

            filename_prefix = '_'.join(
                [protocol_params["project_name"], '{:06d}'.format(protocol_params["seq_num"])])
            filename = "{}.000".format(filename_prefix)
            filename = os.path.join(output_directory, 'procedures', filename)
            logger.info(filename, extra=s)
            if not os.path.isfile(filename):
                proc_dict = procedure_file_generator.maccor_format_dict(proc_dict)
                procedure_file_generator.dict_to_xml(
                    proc_dict=proc_dict, xml_file=filename, sp=sp)
                new_files.append(filename)
                names.append(filename_prefix + '_')

        elif '.sdu' in template:
            logger.warning('Schedule file generation not yet implemented', extra=s)
            result = "error"
            message = {'comment': 'Schedule file generation is not yet implemented',
                       'error': 'Not Implemented'}

    # This block of code produces the file containing all of the run file
    # names produced in this function call. This is to make starting tests easier
    _, namefile = os.path.split(csv_filename)
    namefile = namefile.split('_')[0] + '_names_'
    namefile = namefile + datetime.datetime.now().strftime("%Y%m%d_%H%M") + '.csv'
    with open(os.path.join(output_directory, "names", namefile), 'w') as outputfile:
        wr = csv.writer(outputfile)
        for name in names:
            wr.writerow([name])
    outputfile.close()

    if not result:
        result = "success"
        message = {'comment': 'Generated {} protocols'.format(str(len(new_files))),
                   'error': ''}

    return new_files, result, message
예제 #12
0
def generate_protocol_files_from_csv(csv_filename, output_directory=None):

    """
    Generates a set of protocol files from csv filename input by
    reading protocol file input corresponding to each line of
    the csv file. Writes a csv file that.

    Args:
        csv_filename (str): CSV containing protocol file parameters.
        output_directory (str): directory in which to place the output files
    """
    # Read csv file
    protocol_params_df = pd.read_csv(csv_filename)

    successfully_generated_files = []
    file_generation_failures = []
    names = []
    result = ""
    message = {"comment": "", "error": ""}
    if output_directory is None:
        output_directory = PROCEDURE_TEMPLATE_DIR

    for index, protocol_params in protocol_params_df.iterrows():
        template = protocol_params["template"]
        protocol = None
        # Filename for the output
        filename_prefix = "_".join(
            [
                protocol_params["project_name"],
                "{:06d}".format(protocol_params["seq_num"]),
            ]
        )
        if ".000" in template:  # Extension for maccor procedure files
            template_fullpath = os.path.join(PROCEDURE_TEMPLATE_DIR, template)
            template_length = template_detection(template_fullpath)
            if "diagnostic_parameter_set" in protocol_params:  # For parameters include diagnostics load those values
                diag_params_df = pd.read_csv(
                    os.path.join(PROCEDURE_TEMPLATE_DIR, "PreDiag_parameters - DP.csv")
                )
                diagnostic_params = diag_params_df[
                    diag_params_df["diagnostic_parameter_set"]
                    == protocol_params["diagnostic_parameter_set"]
                    ].squeeze()

            if template_length == 23 and template == "EXP.000":  # length and name for initial procedure files
                protocol = Procedure.from_exp(
                    **protocol_params[["cutoff_voltage", "charge_rate", "discharge_rate"]]
                )
            elif template_length == 72:  # length for V1 and V1 diagnostic templates without ending diagnostics
                protocol = Procedure.from_regcyclev2(protocol_params)
                protocol.add_procedure_diagcyclev2(
                    protocol_params["capacity_nominal"], diagnostic_params
                )
            elif template_length == 96:  # template length for diagnostic type cycling
                mwf_dir = os.path.join(output_directory, "mwf_files")
                if protocol_params["project_name"] == "RapidC":  # Project with charging waveform
                    waveform_name = insert_charging_parametersv1(protocol_params,
                                                                 waveform_directory=mwf_dir)
                    protocol = Procedure.generate_procedure_chargingv1(index,
                                                                       protocol_params,
                                                                       waveform_name,
                                                                       template=template_fullpath)
                elif protocol_params["project_name"] == "Drive":  # Project with discharging waveform
                    waveform_name = insert_driving_parametersv1(protocol_params,
                                                                waveform_directory=mwf_dir)
                    protocol = Procedure.generate_procedure_drivingv1(index,
                                                                      protocol_params,
                                                                      waveform_name,
                                                                      template=template_fullpath)
                else:  # Use the default parameterization for PreDiag/Prediction Diagnostic projects
                    protocol = Procedure.generate_procedure_regcyclev3(index,
                                                                       protocol_params,
                                                                       template=template_fullpath)
                protocol.generate_procedure_diagcyclev3(
                    protocol_params["capacity_nominal"], diagnostic_params
                )
            else:  # Case where its not possible to match the procedure template
                failure = {
                    "comment": "Unable to find template: " + template,
                    "error": "Not Found",
                }
                file_generation_failures.append(failure)
                warnings.warn("Unsupported file template {}, skipping.".format(template))
                result = "error"
                continue

            filename = "{}.000".format(filename_prefix)
            filename = os.path.join(output_directory, "procedures", filename)

        elif ".mps" in template and template == "formationV1.mps":  # biologic settings template and formation project
            protocol = Settings.from_file(os.path.join(BIOLOGIC_TEMPLATE_DIR, template))
            protocol = protocol.formation_protocol_bcs(protocol_params)
            filename = "{}.mps".format(filename_prefix)
            filename = os.path.join(output_directory, "settings", filename)
        elif ".sdu" in template:  # No schedule file templates implemented
            failure = {
                "comment": "Schedule file generation is not yet implemented",
                "error": "Not Implemented"
            }
            file_generation_failures.append(failure)
            logger.warning("Schedule file generation not yet implemented", extra=s)
            result = "error"
            continue
        else:  # Unable to match to any known template format
            failure = {
                "comment": "Unable to find template: " + template,
                "error": "Not Found",
            }
            file_generation_failures.append(failure)
            warnings.warn("Unsupported file template {}, skipping.".format(template))
            result = "error"
            continue

        logger.info(filename, extra=s)
        protocol.to_file(filename)
        successfully_generated_files.append(filename)
        names.append(filename_prefix + "_")

    # This block of code produces the file containing all of the run file
    # names produced in this function call. This is to make starting tests easier
    _, namefile = os.path.split(csv_filename)
    namefile = namefile.split("_")[0] + "_names_"
    namefile = namefile + datetime.datetime.now().strftime("%Y%m%d_%H%M") + ".csv"

    names_dir = os.path.join(output_directory, "names")
    os.makedirs(names_dir, exist_ok=True)

    with open(os.path.join(names_dir, namefile), "w", newline="") as outputfile:
        wr = csv.writer(outputfile)
        for name in names:
            wr.writerow([name])
    outputfile.close()

    num_generated_files = len(successfully_generated_files)
    num_generation_failures = len(file_generation_failures)
    num_files = num_generated_files + num_generation_failures

    message = {
        "comment": "Generated {} of {} protocols".format(num_generated_files, num_files),
        "error": ""
    }
    if not result:
        result = "success"
    else:
        message["error"] = "Failed to generate {} of {} protocols".format(num_generation_failures, num_files)
        logger.error(message["error"])

    return successfully_generated_files, file_generation_failures, result, message
예제 #13
0
파일: model.py 프로젝트: TRI-AMDD/beep
    def train(self, X: pd.DataFrame = None, y: pd.DataFrame = None):
        """Train on 100% of available data.

        Args:
            X (pd.Dataframe): Clean and homogenized learning features.
                If not specified, df defined in __init__ (all training
                data) is used.
            y (pd.DataFrame): Clean and homogenized targets. If not
                specified, df defined in __init__ (all training data)
                is used.

        Returns:
            model (BaseEstimator): The sklearn model, fit on training data.
            training_errors (dict): Training errors based on multiple metrics.

        """
        X = X if X is not None else self.X
        y = y if y is not None else self.y

        if not self.multi:
            y = y[self.targets[0]]

        X = self.scaler.fit_transform(X)

        logger.info(
            f"Training on {X.shape[0]} samples with {X.shape[1]} features "
            f"predicting {y.shape[0]}")

        kwargs = {
            "fit_intercept": True,
            "alphas": self.alphas,
            "cv": self.kfold,
            "max_iter": self.max_iter,
            "tol": self.tol,
            "l1_ratio": self.l1_ratio
        }

        if self.model_name == "elasticnet":
            if self.multi:
                cv_class = MultiTaskElasticNetCV
                model_class = MultiTaskElasticNet
            else:
                cv_class = ElasticNetCV
                model_class = ElasticNet
        elif self.model_name == "lasso":
            cv_class = LassoCV
            model_class = Lasso
            kwargs.pop("l1_ratio")
        elif self.model_name == "ridge":
            cv_class = RidgeCV
            model_class = Ridge
            kwargs.pop("l1_ratio")
            kwargs.pop("max_iter")
            kwargs.pop("tol")

            # Ridge has to have alphas set by hand as it has no
            # default alphas
            if not kwargs["alphas"]:
                kwargs["alphas"] = (1e-3, 1e-2, 1e-1, 1, 10, 100, 1000)
        else:
            raise NotImplementedError(f"Unsupported model '{self.model_name}'")

        # Search for optimal hyperparameters
        cv = cv_class(**kwargs)
        cv.fit(X, y)

        # Set optimal hyperparameters and refit
        optimal_hyperparameters = {"alpha": cv.alpha_}
        if self.model_name == "elasticnet":
            optimal_hyperparameters["l1_ratio"] = cv.l1_ratio_

        model_kwargs = {
            "fit_intercept": True,
            "normalize": False,
            "max_iter": self.max_iter,
        }
        model_kwargs.update(optimal_hyperparameters)
        self.optimal_hyperparameters = optimal_hyperparameters

        model = model_class(**model_kwargs)
        model.fit(X, y)
        self.model = model

        y_training = model.predict(X)
        y_training = pd.DataFrame(data=y_training, columns=self.targets)
        training_errors = self._score_arrays(y, y_training)
        return model, training_errors
예제 #14
0
def process_file_list_from_json(file_list_json,
                                processed_dir="data-share/structure/",
                                omit_raw=True):
    """Function to take a json filename corresponding to a data structure
    with a 'file_list' and a 'validity' attribute, process each file
    with a corresponding True validity, dump the processed file into
    a predetermined directory, and return a jsonable dict of processed
    cycler run file locations

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list and validity attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string.
        processed_dir (str): location for processed cycler run output
            files to be placed.
        omit_raw (bool): Omit the raw_data from being saved to file. Creates
            legacy file structure for all structured datapaths.

    Returns:
        (str): json string of processed files (with key "processed_file_list").
            Note that this list contains None values for every file that
            had a corresponding False in the validity list.

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup workflow
    outputs = WorkflowOutputs()

    # Prepend optional root to output directory
    processed_dir = os.path.join(os.environ.get("BEEP_PROCESSING_DIR", "/"),
                                 processed_dir)

    if not os.path.exists(processed_dir):
        os.makedirs(processed_dir)

    file_list = file_list_data["file_list"]
    validities = file_list_data["validity"]
    run_ids = file_list_data["run_list"]
    processed_file_list = []
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    invalid_file_list = []
    for filename, validity, run_id in zip(file_list, validities, run_ids):
        logger.info("run_id=%s structuring=%s",
                    str(run_id),
                    filename,
                    extra=SERVICE_CONFIG)
        if validity == "valid":
            # Process datapath and dump to file

            dp = auto_load(filename)
            dp.autostructure()

            # raw_cycler_run = RawCyclerRun.from_file(filename)
            # processed_cycler_run = raw_cycler_run.to_processed_cycler_run()
            new_filename, ext = os.path.splitext(os.path.basename(filename))
            new_filename = new_filename + ".json"
            new_filename = add_suffix_to_filename(new_filename, "_structure")
            structured_run_loc = os.path.join(processed_dir, new_filename)
            structured_run_loc = os.path.abspath(structured_run_loc)
            dp.to_json_file(structured_run_loc, omit_raw)

            # Append file loc to list to be returned
            processed_file_list.append(structured_run_loc)
            processed_run_list.append(run_id)
            processed_result_list.append("success")
            processed_message_list.append({"comment": "", "error": ""})

        else:
            invalid_file_list.append(filename)

    output_json = {
        "file_list": processed_file_list,
        "run_list": processed_run_list,
        "result_list": processed_result_list,
        "message_list": processed_message_list,
        "invalid_file_list": invalid_file_list,
    }

    # Workflow outputs
    file_list_size = len(output_json["file_list"])
    if file_list_size > 1 or file_list_size == 0:
        logger.warning("{file_list_size} files being validated, should be 1")

    output_data = {
        "filename": output_json["file_list"][0],
        "run_id": output_json["run_list"][0],
        "result": output_json["result_list"][0],
    }

    outputs.put_workflow_outputs(output_data, "structuring")

    # Return jsonable file list
    return json.dumps(output_json)
예제 #15
0
    def validate_from_paths(self,
                            paths,
                            record_results=False,
                            skip_existing=False,
                            record_path=DEFAULT_VALIDATION_RECORDS):
        """
        This method streamlines validation of multiple Arbin csv files given a list of paths.

        It can also do bookkeeping of validations by dumping results in a json file,
        locally until a more centralized method is implemented.

        Args:
            paths (list): a list of paths to csv files
            record_results (bool): Whether to record the validation results locally or not (defaults to False)
            skip_existing (bool): Whether to skip already validated files. This is done by checking if the
                                    file is in the validation_records. skip_existing only matters if record_results
                                    is True. (defaults to False)
            record_path (str): path to the json file storing the past validation results.
        Returns:
            dict: Results of the validation in the form of a key,value pairs where each key corresponds to the filename
                validated. For each file, the results contain a field "validated", True if validation was successful or
                False if not. "errors", "method" and "time" are simply the errors encountered during validation, method
                used for validation, and time of validation, respectively.

        """
        if record_results:
            if os.path.isfile(record_path):
                self.validation_records = loadfn(record_path)
                if skip_existing:
                    paths = [
                        path for path in paths if os.path.basename(path) not in
                        self.validation_records
                    ]
            else:
                self.validation_records = {}

        results = {}
        for path in tqdm(paths):
            name = os.path.basename(path)
            results[name] = {}
            if re.match(ARBIN_CONFIG['file_pattern'], path):
                schema_filename = os.path.join(VALIDATION_SCHEMA_DIR,
                                               "schema-arbin-lfp.yaml")
                self.schema = loadfn(schema_filename)
                df = pd.read_csv(path, index_col=0)
                validated, reason = self.validate(df)
                method = "simple_arbin"
            elif re.match(MACCOR_CONFIG['file_pattern'], path):
                schema_filename = os.path.join(VALIDATION_SCHEMA_DIR,
                                               "schema-maccor-2170.yaml")
                self.schema = loadfn(schema_filename)
                self.allow_unknown = True
                df = pd.read_csv(path, delimiter='\t', skiprows=1)

                # Columns need to be retyped and renamed for validation,
                # conversion will happen during structuring
                df['State'] = df['State'].astype(str)
                df['current'] = df['Amps']

                validated, reason = self.validate(df)
                method = "simple_maccor"
            else:
                validated, reason = False, "File type not recognized"
                method = None
            results[name].update({
                "validated":
                validated,
                "method":
                method,
                "errors":
                reason,
                "time":
                json.dumps(datetime.now(),
                           indent=4,
                           sort_keys=True,
                           default=str)
            })

            if validated:
                logger.info("%s method=%s errors=%s",
                            name,
                            method,
                            reason,
                            extra=s)
            else:
                logger.warning("%s method=%s errors=%s",
                               name,
                               method,
                               reason,
                               extra=s)

        if record_results:
            self.validation_records.update(results)
            dumpfn(self.validation_records, record_path)

        return results
예제 #16
0
def process_file_list_from_json(
    file_list_json,
    model_dir="/data-share/models/",
    processed_dir="data-share/predictions/",
    hyperparameters=None,
    model_name=None,
    predict_only=True,
):
    """
    Function to take a json file containing featurized json locations,
    train a new model if necessary, write files containing predictions into a
    predetermined directory, and return a jsonable dict of prediction file locations

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string
        model_dir (str): location where models are serialized and stored
        processed_dir (str): location for processed cycler run output files
            to be placed
        hyperparameters (dict): dictionary of hyperparameters to optimize/use for training
        model_name (str): name of feature generation method
        predict_only (bool):

    Returns:
        str: json string of feature files (with key "feature_file_list").

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup workflow TODO

    # Add BEEP_PROCESSING_DIR to processed_dir
    processed_dir = os.path.join(os.environ.get("BEEP_PROCESSING_DIR", "/"),
                                 processed_dir)
    if not os.path.exists(processed_dir):
        os.makedirs(processed_dir)

    file_list = file_list_data["file_list"]
    run_ids = file_list_data["run_list"]
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    processed_paths_list = []
    project_name = get_project_name_from_list(file_list)
    if predict_only:
        features = loadfn(file_list[0])
        if model_name is None and project_name in DEFAULT_MODEL_PROJECTS:

            if features.prediction_type == "multi":
                model = DegradationModel.from_serialized_model(
                    model_dir=model_dir,
                    serialized_model="d3batt_multi_point.model")
            else:
                model = DegradationModel.from_serialized_model(
                    model_dir=model_dir,
                    serialized_model="d3batt_single_point.model")

        elif model_name is None and project_name not in DEFAULT_MODEL_PROJECTS:
            output_data = {
                "file_list": [],
                "run_list": [],
                "result_list": [],
                "message_list": [],
            }

            # Return jsonable file list
            return json.dumps(output_data)

        else:
            model = DegradationModel.from_serialized_model(
                model_dir=model_dir, serialized_model=model_name)

    else:
        if hyperparameters is None:
            hyperparameters = {
                "random_state": 1,
                "test_size": 0.3,
                "k_fold": 5,
                "tol": 0.001,
                "l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1],
            }

        dataset_id = file_list_data.get("dataset_id")
        model = DegradationModel.train(
            file_list_json,
            dataset_id=dataset_id,
            model_type="linear",
            regularization_type="elasticnet",
            model_name=model_name,
            hyperparameters=hyperparameters,
        )
        logger.warning("fitting=%s dataset=%s",
                       model.name,
                       str(dataset_id),
                       extra=s)

    for path, run_id in zip(file_list, run_ids):
        logger.info("model=%s run_id=%s predicting=%s",
                    model.name,
                    str(run_id),
                    path,
                    extra=s)
        features = loadfn(path)
        prediction = model.predict(features)
        prediction_dict = model.prediction_to_dict(prediction,
                                                   features.nominal_capacity)
        new_filename = os.path.basename(path)
        new_filename = scrub_underscore_suffix(new_filename)
        new_filename = add_suffix_to_filename(new_filename, "_predictions")
        processed_path = os.path.join(processed_dir, new_filename)
        processed_path = os.path.abspath(processed_path)
        dumpfn(prediction_dict, processed_path)

        # Append file loc to list to be returned
        processed_paths_list.append(processed_path)
        processed_run_list.append(run_id)
        processed_result_list.append("success")
        processed_message_list.append({"comment": "", "error": ""})

    output_data = {
        "file_list": processed_paths_list,
        "run_list": processed_run_list,
        "result_list": processed_result_list,
        "message_list": processed_message_list,
    }

    # Return jsonable file list
    return json.dumps(output_data)
예제 #17
0
def process_file_list_from_json(file_list_json,
                                processed_dir='data-share/features/',
                                features_label='full_model',
                                predict_only=False,
                                prediction_type="multi",
                                predicted_quantity="cycle"):
    """
    Function to take a json file containing processed cycler run file locations,
    extract features, dump the processed file into a predetermined directory,
    and return a jsonable dict of feature file locations.

    Args:
        file_list_json (str): json string or json filename corresponding
            to a dictionary with a file_list attribute,
            if this string ends with ".json", a json file is assumed
            and loaded, otherwise interpreted as a json string.
        processed_dir (str): location for processed cycler run output files
            to be placed.
        features_label (str): name of feature generation method.
        predict_only (bool): whether to calculate predictions or not.
        prediction_type (str): Single or multi-point predictions.
        predicted_quantity (str): quantity being predicted - cycle or capacity.

    Returns:
        str: json string of feature files (with key "file_list").

    """
    # Get file list and validity from json, if ends with .json,
    # assume it's a file, if not assume it's a json string
    if file_list_json.endswith(".json"):
        file_list_data = loadfn(file_list_json)
    else:
        file_list_data = json.loads(file_list_json)

    # Setup Events
    events = KinesisEvents(service='DataAnalyzer', mode=file_list_data['mode'])

    # Add root path to processed_dir
    processed_dir = os.path.join(os.environ.get("BEEP_ROOT", "/"),
                                 processed_dir)
    file_list = file_list_data['file_list']
    run_ids = file_list_data['run_list']
    processed_run_list = []
    processed_result_list = []
    processed_message_list = []
    processed_paths_list = []

    required_cycle_num = 100  #for full model

    for path, run_id in zip(file_list, run_ids):
        logger.info('run_id=%s featurizing=%s', str(run_id), path, extra=s)

        #check if there is enough data to try featurizing
        if not len(loadfn(path).summary) > required_cycle_num:
            logger.info("run_id=%s Insufficient data for featurization",
                        str(run_id),
                        extra=s)
            processed_paths_list.append(path)
            processed_run_list.append(run_id)
            processed_result_list.append("incomplete")
            processed_message_list.append({
                'comment': 'Insufficient data for featurization',
                'error': ''
            })

        else:
            processed_data = DegradationPredictor.from_processed_cycler_run_file(
                path,
                features_label=features_label,
                predict_only=predict_only,
                prediction_type=prediction_type,
                predicted_quantity=predicted_quantity)
            new_filename = os.path.basename(path)
            new_filename = scrub_underscore_suffix(new_filename)

            # Append model_name along with "features" to demarcate
            # different models when saving the feature vectors.
            new_filename = add_suffix_to_filename(
                new_filename,
                "_" + features_label + "_" + prediction_type + "_features")
            processed_path = os.path.join(processed_dir, new_filename)
            processed_path = os.path.abspath(processed_path)
            dumpfn(processed_data, processed_path)
            processed_paths_list.append(processed_path)
            processed_run_list.append(run_id)
            processed_result_list.append("success")
            processed_message_list.append({'comment': '', 'error': ''})

    output_data = {
        "file_list": processed_paths_list,
        "run_list": processed_run_list,
        "result_list": processed_result_list,
        "message_list": processed_message_list
    }

    events.put_analyzing_event(output_data, 'featurizing', 'complete')
    # Return jsonable file list
    return json.dumps(output_data)
예제 #18
0
def generate_protocol_files_from_csv(csv_filename, output_directory=None):
    """
    Generates a set of protocol files from csv filename input by
    reading protocol file input corresponding to each line of
    the csv file. Writes a csv file that.

    Args:
        csv_filename (str): CSV containing protocol file parameters.
        output_directory (str): directory in which to place the output files
    """
    # Read csv file
    protocol_params_df = pd.read_csv(csv_filename)

    new_files = []
    names = []
    result = ''
    message = {'comment': '', 'error': ''}
    if output_directory is None:
        output_directory = PROCEDURE_TEMPLATE_DIR
    for index, protocol_params in protocol_params_df.iterrows():
        template = protocol_params['template']

        # Switch for template invocation
        if template == "EXP.000":
            procedure = Procedure.from_exp(**protocol_params[
                ["cutoff_voltage", "charge_rate", "discharge_rate"]])
        elif template == 'diagnosticV2.000':
            diag_params_df = pd.read_csv(
                os.path.join(PROCEDURE_TEMPLATE_DIR,
                             "PreDiag_parameters - DP.csv"))
            diagnostic_params = diag_params_df[
                diag_params_df['diagnostic_parameter_set'] ==
                protocol_params['diagnostic_parameter_set']].squeeze()

            # TODO: should these be separated?
            procedure = Procedure.from_regcyclev2(protocol_params)
            procedure.add_procedure_diagcyclev2(
                protocol_params["capacity_nominal"], diagnostic_params)

        # TODO: how are these different?
        elif template in ['diagnosticV3.000', 'diagnosticV4.000']:
            diag_params_df = pd.read_csv(
                os.path.join(PROCEDURE_TEMPLATE_DIR,
                             "PreDiag_parameters - DP.csv"))
            diagnostic_params = diag_params_df[
                diag_params_df['diagnostic_parameter_set'] ==
                protocol_params['diagnostic_parameter_set']].squeeze()

            procedure = Procedure.generate_procedure_regcyclev3(
                index, protocol_params)
            procedure.generate_procedure_diagcyclev3(
                protocol_params["capacity_nominal"], diagnostic_params)
        else:
            warnings.warn(
                "Unsupported file template {}, skipping.".format(template))
            result = "error"
            message = {
                'comment': 'Unable to find template: ' + template,
                'error': 'Not Found'
            }
            continue

        filename_prefix = '_'.join([
            protocol_params["project_name"],
            '{:06d}'.format(protocol_params["seq_num"])
        ])
        filename = "{}.000".format(filename_prefix)
        filename = os.path.join(output_directory, 'procedures', filename)
        logger.info(filename, extra=s)
        if not os.path.isfile(filename):
            procedure.to_file(filename)
            new_files.append(filename)
            names.append(filename_prefix + '_')

        elif '.sdu' in template:
            logger.warning('Schedule file generation not yet implemented',
                           extra=s)
            result = "error"
            message = {
                'comment': 'Schedule file generation is not yet implemented',
                'error': 'Not Implemented'
            }

    # This block of code produces the file containing all of the run file
    # names produced in this function call. This is to make starting tests easier
    _, namefile = os.path.split(csv_filename)
    namefile = namefile.split('_')[0] + '_names_'
    namefile = namefile + datetime.datetime.now().strftime(
        "%Y%m%d_%H%M") + '.csv'
    with open(os.path.join(output_directory, "names", namefile),
              'w',
              newline='') as outputfile:
        wr = csv.writer(outputfile)
        for name in names:
            wr.writerow([name])
    outputfile.close()

    if not result:
        result = "success"
        message = {
            'comment': 'Generated {} protocols'.format(str(len(new_files))),
            'error': ''
        }

    return new_files, result, message