def upload_unuploaded_rows(rds_connection, local_connection, table_name): dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) cloud_pkey_name = rds_connection.get_primary_key_name(table_name) cloud_columns = rds_connection.get_column_names(table_name) local_columns = local_connection.get_column_names(table_name) local_columns = [item[0] for item in local_columns] local_pkey_name = local_connection.get_primary_key_name(table_name) next_unuploaded_pkey = get_local_next_unuploaded_pkey( local_connection, table_name, local_pkey_name) while (not (next_unuploaded_pkey == -1)): col_names = "dep_id" val_list = "\'" + str(dep_id) + "\'," for column in cloud_columns: if ((column[0] == cloud_pkey_name) or (column[0] == 'dep_id') or not (column[0] in local_columns)): continue col_names += (',' + column[0]) val = local_connection.get_rows_with_value(column[0], table_name, local_pkey_name, next_unuploaded_pkey) val = val[0][0] val_list += "\'" + str(val) + "\'," val_list = val_list[:-1] res = rds_connection.insert_row(table_name, col_names, val_list) res = local_connection.set_column(table_name, local_pkey_name, next_unuploaded_pkey, 'Uploaded', '1') next_unuploaded_pkey = get_local_next_unuploaded_pkey( local_connection, table_name, local_pkey_name) print("No (more) data to upload")
def insert_heart_beat(rds_connection): try: dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) ts = time.time() values = str(dep_id) + ',' + str(int(ts)) res = rds_connection.insert_row(table_name, col_names, values) except: print('exception when inserting heart beat') return -1 return res
def get_s3_files(): s3.get_bucket() dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) all_objects = s3.list_items() dep_items = [] for name in all_objects: ar = name.split('/') if (len(ar) > 2 and len(ar[-1]) > 0): if (str(ar[0]) == str(dep_id)): dep_items.append(ar[1:]) cloud_files = get_paths(dep_items) return cloud_files
def upload_all_rows(local_connection, rds_connection, table_name): dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) try: rows = local_connection.get_all_rows(table_name) for row in rows: insert_row_to_cloud(local_connection, rds_connection, table_name, row, dep_id) print('done') except Exception as e: print(e) return -1 return 0
def insert_missing_data(rds_connection,local_connection,table_name,missing_table_name): res=-1 try: dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) cloud_count=rds_connection.get_num_rows(table_name,dep_id) local_count=local_connection.get_num_rows(table_name) col_names='dep_id,ts,local_count,cloud_count' ts=str(datetime.datetime.fromtimestamp(time.time())) values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'" res=rds_connection.insert_row(missing_table_name,col_names,values) except: print('Exception in insert_missing_data') return res
def upoload_missing_data_ts(rds_connection, local_connection, table_name): try: dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) start_date = dep_data.get_start_date() print('uploading data. ' + table_name) cloud_unique_ts_list = rds_connection.get_unique_row_list( table_name, 'ts', dep_id) local_uniqie_ts_list = local_connection.get_unique_row_list( table_name, 'ts') local_uniqie_ts_list = [ ts for ts in local_uniqie_ts_list if str(ts)[0:15] > start_date ] cloud_unique_ts_list.sort() if (len(cloud_unique_ts_list) > 2): final_cloud_ts = cloud_unique_ts_list[-2] selected_local_unique_ts_list = [ ts for ts in local_uniqie_ts_list if (ts > final_cloud_ts) ] else: selected_local_unique_ts_list = local_uniqie_ts_list for ts in selected_local_unique_ts_list: print(ts) ts_upload = False if (ts in cloud_unique_ts_list): num_cloud = rds_connection.get_num_rows_with_value( table_name, 'ts', ts, dep_id) num_local = local_connection.get_num_rows_with_value( table_name, 'ts', ts) if (num_local > num_cloud): ts_upload = True else: ts_upload = True if (ts_upload): #delete_rows_with_value(conn_cloud,cursor_cloud,table_name,col_name,ts) #upload all rows with this ts rows = local_connection.get_rows_with_value( -1, table_name, 'ts', ts) print('uploading ' + str(len(rows)) + ' rows') for i, row in enumerate(rows): res = insert_row_to_cloud(local_connection, rds_connection, table_name, row, dep_id) if (res == -1): print('did not upload...') print('finished uploading data.') except Exception as e: print(e)
def insert_missing_files_row(rds_connection): res=-1 try: dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) local_files=s3_upload.get_local_files() cloud_files=s3_upload.get_s3_files() missing_files=s3_upload.list_diff(local_files,cloud_files) col_names='dep_id,ts,local_count,cloud_count,missing' ts=str(datetime.datetime.fromtimestamp(time.time())) values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(len(local_files))+"\',"+"\'"+str(len(cloud_files))+"\',"+"\'"+str(len(missing_files))+"\'" res=rds_connection.insert_row('missing_files',col_names,values) except: print('exception when inserting to missing_files') return res
def insert_missing_M2G(rds_connection): res=-1 try: file_names=m2g.get_sorted_file_names() local_count=0 if(isinstance(file_names,list)): for file in file_names: lines=m2g.read_file(file) local_count+=len(lines) dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) cloud_count=rds_connection.get_num_rows('M2G',dep_id) col_names='dep_id,ts,local_count,cloud_count' ts=str(datetime.datetime.fromtimestamp(time.time())) values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'" res=rds_connection.insert_row('missing_M2G',col_names,values) except: print('Exception in insert_missing_M2G') return res
def upload_file(file_name, dir_name, is_progress=False): name = file_name.split('/')[-1] try: dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) key = str(dep_id) + '/' + str(dir_name) + '/' + str(name) if (is_progress): res = pcr_storage.upload_file( Filename=file_name, Key=key, Callback=ProgressPercentage(file_name)) else: res = pcr_storage.upload_file(Filename=file_name, Key=key) short_file = file_name.split('/')[-1] Log.log_s3('uploaded ' + dir_name + '/' + short_file) #compare checksums #print('checking checksum....') same = is_checksum_ok(file_name, key) if (not same): log_entry = 'checksum failed after uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str( e) Log.log_s3(log_entry) #delete the file in cloud response = pcr_storage.delete_objects( Delete={'Objects': [{ 'Key': key }]}) return -1 else: Log.log_s3('checksum success ' + dir_name + '/' + short_file) return 0 except Exception as e: print(str(e)) short_file = file_name.split('/')[-1] log_entry = 'exception uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str( e) Log.log_s3(log_entry) return -1
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Nov 6 10:52:22 2019 @author: sleek_eagle """ import file_system_tasks from os import listdir from os.path import isfile, join import dep_data dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) col_names=file_system_tasks.get_parameters('parameters.json')['param']['m2g_fields'] #read the log file def read_file(f): try: root_dir=file_system_tasks.get_project_dir(-3) path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir']+'/' except Exception as e: print(e) with open(path+f, 'r') as file: lines = file.readlines() return lines def get_sorted_file_names(): try: root_dir=file_system_tasks.get_project_dir(-3)