Ejemplo n.º 1
0
def process_source_data():
    fileHook = FSHook('fs_custom')
    mongoHook = MongoHook()
    path = os.path.join(fileHook.get_path(), 'daily_production_data.json')

    df = pd.read_json(path)
    water_cut_calc = []
    gor_calc = []

    for index, row in df.iterrows():
        water_cut_calc.append(
            utils.calc_watercut(row['OIL_bopd'], row['WATER_bwpd']))
        gor_calc.append(utils.calc_gor(row['OIL_bopd'], row['GAS_mscfd']))

    df = df.assign(**{'water_cut_calc': water_cut_calc, 'gor_calc': gor_calc})

    data_dict = df.to_dict("records")
    mongoHook.insert_many('DailyProduction', data_dict, 'fusion_dev_db')

    os.remove(path)
Ejemplo n.º 2
0
def local_batch(batch_name: str, max_batch_size: int, number_of_batches: int,
                root_path: str) -> None:

    curr_item_num = 0
    batch_id = 1
    id_list = []

    mongo_conn = MongoHook(conn_id="default_mongo")

    # Iterate through all directories and assign each filepath a batch_id.
    for root, directories, files in os.walk(root_path, topdown=False):
        for name in files:
            fp = os.path.join(root, name)
            if curr_item_num == max_batch_size:

                mongo_conn.insert_many("local_results_to_transform",
                                       id_list,
                                       mongo_db="courts")

                curr_item_num = 0
                batch_id %= number_of_batches
                batch_id += 1
                id_list = []

            if curr_item_num < max_batch_size:

                # Each document in local_results_to_transform is a dict of
                # the filepath and the batch it is assigned to.
                id_list.append({
                    "batch_id": f"{batch_name}{batch_id}",
                    "file_path": fp
                })
                curr_item_num += 1

    # Push any remaining filepaths to the local_results_to_transform collection.
    if (curr_item_num > 0):
        mongo_conn.insert_many("local_results_to_transform",
                               id_list,
                               mongo_db="courts")
        id_list = []