def run(enrich_data_params, update_dataset_params): enrich_out_dir = enrich_data_params["enrich_out_dir"] enrich_out_archive_dir = enrich_data_params["enrich_out_archive_dir"] update_in_dir = update_dataset_params["update_in_dir"] update_in_archive_dir = update_dataset_params["update_in_archive_dir"] dataset_file = update_dataset_params["dataset_file"] # copy correlate data out to enrich in # move correlate out to correlate out archive FileUtil.copy_and_move_files(enrich_out_dir, update_in_dir, enrich_out_archive_dir, "*.csv") print("\nLoading Data to be added to Dataset from Filesystem...") df_update = FileUtil.get_df_from_csv_dir(update_in_dir, "*.csv") print("Complete. Count: " + str(df_update.shape[0])) # get header header = ','.join(df_update.columns) + '\n' # print(header) # add header if file does not exist if not FileUtil.file_exists(dataset_file): FileUtil.write_to_file(dataset_file, header) # append to file FileUtil.add_df_to_csv_file(df_update, dataset_file) # move enrich in to enrich in archive FileUtil.move_files(update_in_dir, update_in_archive_dir, "*.csv")
def run(debug, env, username, password, correlate_data_params, enrich_data_params): correlate_out_dir = correlate_data_params["correlate_out_dir"] correlate_out_archive_dir = correlate_data_params[ "correlate_out_archive_dir"] enrich_in_dir = enrich_data_params["enrich_in_dir"] enrich_in_archive_dir = enrich_data_params["enrich_in_archive_dir"] enrich_out_dir = enrich_data_params["enrich_out_dir"] # copy correlate data out to enrich in # move correlate out to correlate out archive FileUtil.copy_and_move_files(correlate_out_dir, enrich_in_dir, correlate_out_archive_dir, "*.csv") wj_api = WorkjamAPI(debug, env, username, password) # now in milliseconds now_timestamp = TimeUtil.get_current_milli_time() enrich_filename = 'Enrich_' + str(now_timestamp) + ".csv" print("\nLoading Data to be enriched from Filesystem...") df_enrich = FileUtil.get_df_from_csv_dir(enrich_in_dir, "*.csv") print("Complete. Count: " + str(df_enrich.shape[0])) # write header to the file response_user_header = wj_api.get_user_details(True, '', '') response_event_header = wj_api.get_event_details(True, '', '', '') FileUtil.write_to_file( enrich_out_dir + enrich_filename, 'loggedin_user,company_id,query_datetime,apply_datetime,number_of_open_shifts,location_id,event_id,' + response_user_header + ',' + response_event_header + ',applied\n') print("\nEnriching User and Event info...") num_records_written_to_file = 0 for index, row in df_enrich.iterrows(): loggedinuser = row['loggedinuser'] companyid = row['companyid'] query_datetime = row['query_datetime'] apply_datetime = row['apply_datetime'] numberofopenshifts = row['numberofopenshifts'] locationid = row['locationid'] eventid = row['eventid'] applied = row['applied'] try: # Get Info for the Event in context response_event_csv = wj_api.get_event_details( False, companyid, locationid, eventid) # Get Info for the User in context response_user_csv = wj_api.get_user_details( False, companyid, loggedinuser) # # write enriche data to out dir with timestamp FileUtil.append_to_file( enrich_out_dir + enrich_filename, str(loggedinuser) + ',' + str(companyid) + ',' + str(query_datetime) + ',' + str(apply_datetime) + ',' + str(numberofopenshifts) + ',' + str(locationid) + ',' + str(eventid) + ',' + response_user_csv + ',' + response_event_csv + ',' + str(applied) + '\n') num_records_written_to_file += 1 except Exception as e: print(e) print("Complete. Found: {} Written: {}\n".format( str(df_enrich.shape[0]), num_records_written_to_file)) # move enrich in to enrich in archive FileUtil.move_files(enrich_in_dir, enrich_in_archive_dir, "*.csv")
def run(debug, get_data_params, correlate_data_params): sumologic_out_dir = get_data_params["sumologic_out_dir"] sumologic_out_archive_dir = get_data_params["sumologic_out_archive_dir"] correlate_in_current_cycle_dir = correlate_data_params["correlate_in_current_cycle_dir"] correlate_in_previous_cycle_dir = correlate_data_params["correlate_in_previous_cycle_dir"] correlate_in_archive_dir = correlate_data_params["correlate_in_archive_dir"] correlate_out_dir = correlate_data_params["correlate_out_dir"] # copy sumologic out to correlate in current cycle # move sumologic out to sumologic out archive FileUtil.copy_and_move_files(sumologic_out_dir, correlate_in_current_cycle_dir, sumologic_out_archive_dir, "*.csv") # now in milliseconds now_timestamp = TimeUtil.get_current_milli_time() correlate_filename = 'Correlate_'+str(now_timestamp)+".csv" print("\nLoading Open Shift Requests from Filesystem...") # correlate apply with requests in current and previoud cycle df_requests = FileUtil.get_df_from_csv_dirs(correlate_in_current_cycle_dir, correlate_in_previous_cycle_dir, "Requests*") print("Complete. Count: " + str(df_requests.shape[0])) if(debug): for index, row in df_requests.iterrows(): print(row) print("\nLoading Apply to Open Shifts from Filesystem...") df_apply = FileUtil.get_df_from_csv_dir(correlate_in_current_cycle_dir, "Apply*") print("Complete. Count: " + str(df_apply.shape[0])) print("\nCorrelating Apply Open Shifts with Open Shifts Requests... ") fields = ['loggedinuser', 'companyid', 'query_datetime', 'apply_datetime', 'numberofopenshifts', 'locationid', 'eventid', 'applied'] CorrelateData.add_header(correlate_out_dir+correlate_filename, fields) for index, row in df_apply.iterrows(): apply_datetime = row['datetime'] loggedinuser = row['loggedinuser'] companyid = row['companyid'] locationid = row['locationid'] eventid = row['eventid'] df_filtered = df_requests.loc[ (df_requests['loggedinuser'] == loggedinuser) & (df_requests['companyid'] == companyid) & (df_requests['datetime'] < apply_datetime) & (df_requests['eventandlocationids'].str.contains(str(eventid)+","+str(locationid))) ].drop_duplicates().sort_values(by=['datetime'], ascending=False).head(1) if df_filtered.shape[0] > 0: # lets first get rid of ', ' and replace it with '|' and then split # Example text: (3714cb1e-4839-4d8c-818e-9d01c655cd86,328038), (d87a2bb7-05e0-465e-8b6c-aa18d89a9c9f,328038), (e7bee5c5-8f4e-457f-95e7-b1ec82e8ab21,328038), (f04d14c1-68c3-4dda-8698-3d95eb3a4b9d,328038) events_and_locations = df_filtered.iloc[0]['eventandlocationids'].replace(', ','|').split('|') for event_location in events_and_locations: # lets get rid of paranthesis and split text by ',' # Example text: (3714cb1e-4839-4d8c-818e-9d01c655cd86,328038) eventid_in_request, locationid_in_request = event_location.replace('(','').replace(')','').split(',') applied = False if str(eventid) == str(eventid_in_request) and str(locationid) == str(locationid_in_request): applied = True row = {'loggedinuser': loggedinuser, 'companyid': companyid, 'query_datetime': df_filtered.iloc[0]['datetime'], 'apply_datetime': apply_datetime, 'numberofopenshifts': df_filtered.iloc[0]['numberofopenshifts'], 'locationid': locationid_in_request, 'eventid': eventid_in_request, 'applied': applied} CorrelateData.add_row(correlate_out_dir+correlate_filename, fields, row) print("Complete. Results written to: {} \n".format(correlate_out_dir+correlate_filename)) # move correlate in previous cycle to correlate in archive FileUtil.move_files(correlate_in_previous_cycle_dir, correlate_in_archive_dir, "*.csv") # move correlate in current cycle (Apply) to # correlate in archive cycle FileUtil.move_files(correlate_in_current_cycle_dir, correlate_in_archive_dir, "Apply*") # move correlate in current cycle (Requests) to # correlate in previous cycle FileUtil.move_files(correlate_in_current_cycle_dir, correlate_in_previous_cycle_dir, "Requests*")