Ejemplo n.º 1
0
def save_hierarchy():
    """
    Save Hierarchy to file
    For use in enforcing hierarchy in secondary and tertiary models
    """

    df = execute_query(QUERY_ALL)
    df = df[['p_class', 's_class', 't_class']].drop_duplicates()
    df.to_csv(HIERARCHY_DIR)
Ejemplo n.º 2
0
def get_primary_inputs():
    """
    Query the database and return X and y variables
    """

    df = execute_query(QUERY_ALL)
    df = _clean_df(df)

    save_hierarchy()

    X, y = get_X_and_y(df)
    logger.info("Size of X input: %s", X.shape)

    return X, y,
def main():
    """
    Main function that runs the program
    """

    start_time = time.time()

    args = parser.parse_args()

    id_num = args.id
    start_date = args.start
    end_date = args.end

    table_name = 'primary_classifications'

    logger.info("Classifying events from %s to %s...", start_date, end_date)

    dates = ml_utils.create_date_range(start_date, end_date)

    for _date in dates:
        logger.info("Classifying events created at %s...", _date)

        df = query_events.execute_query(QUERY_EVENTS.format(_date))
        df = _clean_df(df)

        X = pd.DataFrame([df.event_subject, df.event_text, df.event_type]).T
        X.columns = ['subject', 'text', 'event_type']

        results = score_chunks_primary(X, id_num)
        for_database = convert_df_to_sql(results, df)

        query_events.create_staging_and_insert_primary(
            table_name, for_database.values.tolist())
        query_events.merge_records(table_name)
        logger.info("Completed upserting %s", _date)

        # classified_events.to_csv(OUTPUT_CSV + "primary_" + _date + "_.csv", mode='w')

    logger.info('Completed primary classifications of events from %s to %s',
                start_date, end_date)
    logger.info("Elapsed Time: %s", time.time() - start_time)