Example #1
0
def upload_unuploaded_rows(rds_connection, local_connection, table_name):
    dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
    cloud_pkey_name = rds_connection.get_primary_key_name(table_name)
    cloud_columns = rds_connection.get_column_names(table_name)
    local_columns = local_connection.get_column_names(table_name)
    local_columns = [item[0] for item in local_columns]
    local_pkey_name = local_connection.get_primary_key_name(table_name)
    next_unuploaded_pkey = get_local_next_unuploaded_pkey(
        local_connection, table_name, local_pkey_name)

    while (not (next_unuploaded_pkey == -1)):
        col_names = "dep_id"
        val_list = "\'" + str(dep_id) + "\',"
        for column in cloud_columns:
            if ((column[0] == cloud_pkey_name) or (column[0] == 'dep_id')
                    or not (column[0] in local_columns)):
                continue
            col_names += (',' + column[0])
            val = local_connection.get_rows_with_value(column[0], table_name,
                                                       local_pkey_name,
                                                       next_unuploaded_pkey)
            val = val[0][0]
            val_list += "\'" + str(val) + "\',"
        val_list = val_list[:-1]

        res = rds_connection.insert_row(table_name, col_names, val_list)
        res = local_connection.set_column(table_name, local_pkey_name,
                                          next_unuploaded_pkey, 'Uploaded',
                                          '1')
        next_unuploaded_pkey = get_local_next_unuploaded_pkey(
            local_connection, table_name, local_pkey_name)

    print("No (more) data to upload")
Example #2
0
def insert_heart_beat(rds_connection):
    try:
        dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        ts = time.time()
        values = str(dep_id) + ',' + str(int(ts))
        res = rds_connection.insert_row(table_name, col_names, values)
    except:
        print('exception when inserting heart beat')
        return -1
    return res
Example #3
0
def get_s3_files():
    s3.get_bucket()
    dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
    all_objects = s3.list_items()

    dep_items = []
    for name in all_objects:
        ar = name.split('/')
        if (len(ar) > 2 and len(ar[-1]) > 0):
            if (str(ar[0]) == str(dep_id)):
                dep_items.append(ar[1:])
    cloud_files = get_paths(dep_items)
    return cloud_files
Example #4
0
def upload_all_rows(local_connection, rds_connection, table_name):
    dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
    try:
        rows = local_connection.get_all_rows(table_name)
        for row in rows:
            insert_row_to_cloud(local_connection, rds_connection, table_name,
                                row, dep_id)
        print('done')

    except Exception as e:
        print(e)
        return -1
    return 0
Example #5
0
def insert_missing_data(rds_connection,local_connection,table_name,missing_table_name):
    res=-1
    try:
        dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))   
        cloud_count=rds_connection.get_num_rows(table_name,dep_id)
        local_count=local_connection.get_num_rows(table_name)
        col_names='dep_id,ts,local_count,cloud_count'
        ts=str(datetime.datetime.fromtimestamp(time.time()))
        values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'"
        res=rds_connection.insert_row(missing_table_name,col_names,values) 
    except:
        print('Exception in insert_missing_data')
    return res
Example #6
0
def upoload_missing_data_ts(rds_connection, local_connection, table_name):
    try:
        dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        start_date = dep_data.get_start_date()

        print('uploading data. ' + table_name)

        cloud_unique_ts_list = rds_connection.get_unique_row_list(
            table_name, 'ts', dep_id)
        local_uniqie_ts_list = local_connection.get_unique_row_list(
            table_name, 'ts')
        local_uniqie_ts_list = [
            ts for ts in local_uniqie_ts_list if str(ts)[0:15] > start_date
        ]
        cloud_unique_ts_list.sort()
        if (len(cloud_unique_ts_list) > 2):
            final_cloud_ts = cloud_unique_ts_list[-2]
            selected_local_unique_ts_list = [
                ts for ts in local_uniqie_ts_list if (ts > final_cloud_ts)
            ]
        else:
            selected_local_unique_ts_list = local_uniqie_ts_list
        for ts in selected_local_unique_ts_list:
            print(ts)
            ts_upload = False
            if (ts in cloud_unique_ts_list):

                num_cloud = rds_connection.get_num_rows_with_value(
                    table_name, 'ts', ts, dep_id)
                num_local = local_connection.get_num_rows_with_value(
                    table_name, 'ts', ts)

                if (num_local > num_cloud):
                    ts_upload = True
            else:
                ts_upload = True
            if (ts_upload):
                #delete_rows_with_value(conn_cloud,cursor_cloud,table_name,col_name,ts)
                #upload all rows with this ts
                rows = local_connection.get_rows_with_value(
                    -1, table_name, 'ts', ts)
                print('uploading ' + str(len(rows)) + ' rows')
                for i, row in enumerate(rows):
                    res = insert_row_to_cloud(local_connection, rds_connection,
                                              table_name, row, dep_id)
                    if (res == -1):
                        print('did not upload...')
        print('finished uploading data.')
    except Exception as e:
        print(e)
Example #7
0
def insert_missing_files_row(rds_connection):
    res=-1
    try:
        dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        local_files=s3_upload.get_local_files()
        cloud_files=s3_upload.get_s3_files()
        missing_files=s3_upload.list_diff(local_files,cloud_files)
        col_names='dep_id,ts,local_count,cloud_count,missing'
        ts=str(datetime.datetime.fromtimestamp(time.time()))
        values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(len(local_files))+"\',"+"\'"+str(len(cloud_files))+"\',"+"\'"+str(len(missing_files))+"\'"
        res=rds_connection.insert_row('missing_files',col_names,values)
    except:
        print('exception when inserting to missing_files')
    return res
Example #8
0
def insert_missing_M2G(rds_connection):
    res=-1
    try:
        file_names=m2g.get_sorted_file_names()
        local_count=0
        if(isinstance(file_names,list)):
            for file in file_names:
                lines=m2g.read_file(file)
                local_count+=len(lines)
        dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))   
        cloud_count=rds_connection.get_num_rows('M2G',dep_id)
        col_names='dep_id,ts,local_count,cloud_count'
        ts=str(datetime.datetime.fromtimestamp(time.time()))
        values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'"
        res=rds_connection.insert_row('missing_M2G',col_names,values) 
    except:
        print('Exception in insert_missing_M2G')
    return res
Example #9
0
def upload_file(file_name, dir_name, is_progress=False):
    name = file_name.split('/')[-1]
    try:
        dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        key = str(dep_id) + '/' + str(dir_name) + '/' + str(name)
        if (is_progress):
            res = pcr_storage.upload_file(
                Filename=file_name,
                Key=key,
                Callback=ProgressPercentage(file_name))
        else:
            res = pcr_storage.upload_file(Filename=file_name, Key=key)
        short_file = file_name.split('/')[-1]
        Log.log_s3('uploaded ' + dir_name + '/' + short_file)
        #compare checksums
        #print('checking checksum....')
        same = is_checksum_ok(file_name, key)
        if (not same):
            log_entry = 'checksum failed after uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str(
                e)
            Log.log_s3(log_entry)
            #delete the file in cloud
            response = pcr_storage.delete_objects(
                Delete={'Objects': [{
                    'Key': key
                }]})
            return -1
        else:
            Log.log_s3('checksum success ' + dir_name + '/' + short_file)
            return 0
    except Exception as e:
        print(str(e))
        short_file = file_name.split('/')[-1]
        log_entry = 'exception uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str(
            e)
        Log.log_s3(log_entry)
        return -1
Example #10
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Nov  6 10:52:22 2019

@author: sleek_eagle
"""
import file_system_tasks
from os import listdir
from os.path import isfile, join
import dep_data


dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
col_names=file_system_tasks.get_parameters('parameters.json')['param']['m2g_fields']

#read the log file
def read_file(f):
    try:
        root_dir=file_system_tasks.get_project_dir(-3)
        path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir']+'/'
    except Exception as e:
        print(e)
    with open(path+f, 'r') as file:
        lines = file.readlines()
    return lines


def get_sorted_file_names():
    try:
        root_dir=file_system_tasks.get_project_dir(-3)