コード例 #1
0
def main(args):
    '''
    Module entry function

    args:
      args:list, user parameters

   '''

    logger.debug(f'input-dir {args.input_dir}')
    logger.debug(f'model input dir {args.model_input_dir}')

    logger.debug(f'output-dir {args.output_dir}')

    input_df = load_data_frame_from_directory(args.input_dir).data
    logger.debug(f'{input_df.describe()}\n shape{input_df.shape} ')

    pca_module = load_model_from_directory(args.model_input_dir,
                                           model_loader=pcamodule_loader).data

    logger.debug(pca_module.pca_instance)

    output_df = score(pca_module, input_df)

    logger.debug(f'output shape {output_df.shape}')
    save_data_frame_to_directory(
        save_to=args.output_dir,
        data=output_df,
        schema=DataFrameSchema.data_frame_to_dict(output_df))
コード例 #2
0
def main(args):
    '''
        Module entry point function
    '''

    seq_col = args.sequence_column
    id_col = args.identifier_column

    logger.debug(f'input-dir {args.input_dir}')
    logger.debug(f'model input dir {args.model_input_dir}')
    logger.debug(f'sequence-column {seq_col}')
    logger.debug(f'identifier-column {id_col}')
    logger.debug(f'output-dir {args.output_dir}')

    sgt = load_model_from_directory(args.model_input_dir,
                                    model_loader=joblib_loader).data
    input_df = load_data_frame_from_directory(args.input_dir).data

    if input_df[seq_col].isnull().sum().sum() > 0:
        print(f'column{seq_col} contains missing values ')
        sys.exit(1)

    embedding_df = score(input_df, sgt, seq_col, id_col)
    print('f embedding shape{embedding_df.shape}')
    print(embedding_df.head())

    save_data_frame_to_directory(
        save_to=args.output_dir,
        data=embedding_df,
        schema=DataFrameSchema.data_frame_to_dict(embedding_df))
コード例 #3
0
def main(args):
    '''
    Module entry function

    args:
      args:list transformer parameters requested by user/

   '''

    logger.debug(f'input-dir {args.input_dir}')
    logger.debug(f'output-dir {args.output_dir}')
    logger.debug(f'model output dir {args.model_output_dir}')

    input_df = load_data_frame_from_directory(args.input_dir).data
    logger.debug(f'{input_df.describe()}\n shape{input_df.shape} ')

    pca_module = PCAModule(args)
    logger.debug(pca_module.pca_instance)

    output_df = pca_module.fit_transform(input_df)
    pca_module.log_metrics(input_df.columns)

    logger.debug(f'output shape {output_df.shape}')
    save_data_frame_to_directory(
        save_to=args.output_dir,
        data=output_df,
        schema=DataFrameSchema.data_frame_to_dict(output_df))

    save_model_to_directory(save_to=args.model_output_dir,
                            model_dumper=pca_module_dumper(data=pca_module))
コード例 #4
0
def entrance(trained_model: str,
             dataset: str,
             scored_dataset: str,
             append_score_columns_to_output: str = "true"):
    logger.info(
        f"append_score_columns_to_output = {append_score_columns_to_output}")
    params = {
        constants.APPEND_SCORE_COLUMNS_TO_OUTPUT_KEY:
        append_score_columns_to_output
    }
    score_module = BuiltinScoreModule(trained_model, params)
    any_directory = AnyDirectory.load(dataset)
    if any_directory.type == "DataFrameDirectory":
        input_dfd = DataFrameDirectory.load(dataset)
        logger.info(f"input_dfd =\n{input_dfd}")
        output_df = score_module.run(input_dfd)
    elif any_directory.type == "ImageDirectory":
        image_directory = ImageDirectory.load(dataset)
        output_df = score_module.run(image_directory)
    else:
        raise Exception(f"Unsupported directory type: {type(any_directory)}.")

    logger.info(f"output_df =\n{output_df}")
    logger.info(f"dumping to DFD {scored_dataset}")

    # TODO: Support other task types
    if score_module.model.task_type == TaskType.MultiClassification:
        predict_df = output_df
        _LABEL_NAME = 'label'
        score_columns = schema_utils.generate_score_column_meta(
            predict_df=predict_df)
        if score_module.model.label_column_name in predict_df.columns:
            label_column_name = score_module.model.label_column_name
        else:
            label_column_name = None
        meta_data = DataFrameSchema(
            column_attributes=DataFrameSchema.generate_column_attributes(
                df=predict_df),
            score_column_names=score_columns,
            label_column_name=label_column_name)
        save_data_frame_to_directory(scored_dataset,
                                     data=predict_df,
                                     schema=meta_data.to_dict())
    else:
        ioutils.save_dfd(output_df, scored_dataset)
コード例 #5
0
def main(args):
    '''
        Module entry function
    '''

    transformer = SUPPORTED_TRANSFORMERS[args.transformer]

    logger.debug(f'input-dir {args.input_dir}')
    logger.debug(f'column {args.column_name}')
    logger.debug(f'distance {args.distance}')
    logger.debug(f'transformer {transformer}')
    logger.debug(f'sim-dir {args.sim_dir}')

    input_df = load_data_frame_from_directory(args.input_dir).data

    if input_df[args.column_name].isnull().sum().sum() > 0:
        logger.debug(f'column{args.column_name} contains missing values ')
        sys.exit(1)

    sts = TextualSimilarity(transformer=transformer,
                            distance_func=args.distance)
    embedding_df, sim_df = sts.fit_transform(input_df[args.column_name].values)

    sim_df.insert(0, args.column_name, input_df[args.column_name])

    logger.debug(f'similarity matrix shape {sim_df.shape}')
    logger.debug(f'embedding  shape {embedding_df.shape}')

    save_data_frame_to_directory(
        save_to=args.sim_dir,
        data=sim_df,
        schema=DataFrameSchema.data_frame_to_dict(sim_df))

    save_data_frame_to_directory(
        save_to=args.embedding_dir,
        data=embedding_df,
        schema=DataFrameSchema.data_frame_to_dict(embedding_df))
コード例 #6
0
def main(args=None):
    '''
        Module entry function
    '''
    input_dir = args.input_dir
    corr_type = args.correlation_method

    logger.debug(f'input-dir {input_dir}')
    logger.debug(f'correlation-method {corr_type}')
    logger.debug(f'output-dir {args.output_dir}')
    input_df = load_data_frame_from_directory(args.input_dir).data

    corr_df = ComputeCorrelationModule(corr_type).compute(input_df)
    logger.debug(f'correlation matrix shape {corr_df.shape}')

    save_data_frame_to_directory(
        save_to=args.output_dir,
        data=corr_df,
        schema=DataFrameSchema.data_frame_to_dict(corr_df))
コード例 #7
0
def main(args=None):
    '''
      Module entry point function
    '''

    seq_col = args.sequence_column
    id_col = args.identifier_column
    length_sensitive = args.length_sensitive
    kappa = args.kappa

    logger.debug(f'input-dir {args.input_dir}')
    logger.debug(f'sequence-column {seq_col}')
    logger.debug(f'identifier-column {id_col}')
    logger.debug(f'length-sensitive {length_sensitive}')
    logger.debug(f'kappa {args.kappa}')
    logger.debug(f'output-dir {args.output_dir}')
    logger.debug(f'model output dir {args.model_output_dir}')

    input_df = load_data_frame_from_directory(args.input_dir).data

    if input_df[seq_col].isnull().sum().sum() > 0:
        logger.debug(f'column {seq_col} contains missing values ')
        sys.exit(1)

    embedding_df, sgt = compute_embeddings(input_df, seq_col, kappa,
                                           length_sensitive, id_col)

    logger.debug(f'embedding shape {embedding_df.shape}')

    save_data_frame_to_directory(
        save_to=args.output_dir,
        data=embedding_df,
        schema=DataFrameSchema.data_frame_to_dict(embedding_df))

    save_model_to_directory(save_to=args.model_output_dir,
                            model_dumper=sgt_dumper(data=sgt))
コード例 #8
0
    logger.debug(f"Rating True path: {args.rating_true}")
    logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}")
    logger.debug(f"Rating Pred path: {args.rating_pred}")
    logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}")

    eval_recall = recall_at_k(
        rating_true,
        rating_pred,
        col_user=col_user,
        col_item=col_item,
        col_rating=col_rating,
        col_prediction=col_prediction,
        relevancy_method=relevancy_method,
        k=k,
        threshold=threshold,
    )

    logger.debug(f"Score: {eval_recall}")

    # Log to AzureML dashboard
    run = Run.get_context()
    run.parent.log("Recall at {}".format(k), eval_recall)

    score_result = pd.DataFrame({"recall_at_k": [eval_recall]})
    save_data_frame_to_directory(
        args.score_result,
        score_result,
        schema=DataFrameSchema.data_frame_to_dict(score_result),
    )
コード例 #9
0
    logger.debug(f"Ratio:    {ratio}")
    logger.debug(f"User:    {col_user}")
    logger.debug(f"Item:    {col_item}")
    logger.debug(f"Seed:    {seed}")

    logger.debug(f"Input path: {args.input_path}")
    logger.debug(f"Shape of loaded DataFrame: {input_df.shape}")
    logger.debug(f"Cols of DataFrame: {input_df.columns}")

    output_train, output_test = python_stratified_split(
        input_df,
        ratio=args.ratio,
        col_user=args.col_user,
        col_item=args.col_item,
        seed=args.seed,
    )

    logger.debug(f"Output path: {args.output_train}")
    logger.debug(f"Output path: {args.output_test}")

    save_data_frame_to_directory(
        args.output_train,
        output_train,
        schema=DataFrameSchema.data_frame_to_dict(output_train),
    )
    save_data_frame_to_directory(
        args.output_test,
        output_test,
        schema=DataFrameSchema.data_frame_to_dict(output_test),
    )
コード例 #10
0
 def build_column_attributes(self):
     self.column_attributes = DataFrameSchema.generate_column_attributes(
         df=self.df)
     return self.column_attributes