Beispiel #1
0
def process_azm_file(args):
    proc_start_time = time.time()
    ret = -9
    use_popen_mode = True
    sql_dump_file = None

    try:
        dir_processing_azm = None

        dry_str = args['dry']
        dry_str = dry_str.strip().lower()
        dry_mode = (dry_str == "true")
        print "dry_mode setting: ", dry_mode

        if dry_mode:
            print "dry_mode - dont unzip azm for azqdata.db - let preprocess func handle itself"
        else:
            print "normal import mode"
            dir_processing_azm = unzip_azm_to_tmp_folder(args)
            args['dir_processing_azm'] = dir_processing_azm

        preprocess_module = args[
            'call_preprocess_func_in_module_before_import']

        if not preprocess_module is None:
            preprocess_module = preprocess_module.replace(".py", "", 1)
            print "get preprocess module: ", preprocess_module
            importlib.import_module(preprocess_module)
            mod = sys.modules[preprocess_module]
            preprocess = getattr(mod, 'preprocess')
            print "exec preprocess module > preprocess func"
            preprocess(dir_processing_azm, args['azm_file'])

        if dry_mode:
            print "dry_mode - end here"
            mv_azm_to_target_folder(args)
            return 0

        check_azm_azq_app_version(args)

        g_check_and_dont_create_if_empty = args[
            'check_and_dont_create_if_empty']
        use_popen_mode = not args['dump_to_file_mode']

        if args['target_db_type'] == "sqlite3":
            if args['target_sqlite3_file'] is None:
                raise Exception(
                    "INVALID: sqlite3 merge mode requires --target_sqlite3_file option to be specified - ABORT"
                )
            else:
                use_popen_mode = False  # dump to .sql file for .read

        print "NOTE: now we delete pre y2k rows and if it was popen then the delete would error as 'database is locked' so always dump schema to sql - so force set use_popen_mode = False"
        use_popen_mode = False

        if (use_popen_mode):
            print "using live in-memory pipe of sqlite3 dump output parse mode"
        else:
            print "using full dump of sqlite3 to file mode"

        dump_process = None
        dumped_sql_fp = None

        if (use_popen_mode):
            print("starting sqlite3 subporcess...")
            dump_process = popen_sqlite3_dump(args)
            if dump_process is None:
                raise Exception(
                    "FATAL: dump_process is None in popen_mode - ABORT")
        else:
            print("starting sqlite3 to dump db to .sql file...")
            dumped_sql_fp = dump_db_to_sql(dir_processing_azm)
            if dumped_sql_fp is None:
                raise Exception(
                    "FATAL: dumped_sql_fp is None in non popen_mode - ABORT")

        # sqlite3 merge is simple run .read on args['dumped_sql_fp']
        if args['target_db_type'] == "sqlite3":
            is_target_exists = os.path.isfile(args['target_sqlite3_file'])
            print "sqlite3 - import to {} from {}".format(
                args['target_sqlite3_file'], dumped_sql_fp)

            dumped_sql_fp_adj = dumped_sql_fp + "_adj.sql"
            of = open(dumped_sql_fp, "r")
            nf = open(
                dumped_sql_fp_adj, "wb"
            )  # wb required for windows so that \n is 0x0A - otherwise \n will be 0x0D 0x0A and doest go with our fmt file and only 1 row will be inserted per table csv in bulk inserts...

            while True:
                ofl = of.readline()

                if ofl == "":
                    break

                ofl = ofl.replace(
                    "CREATE TABLE android_metadata (locale TEXT);", "", 1)
                ofl = ofl.replace('CREATE TABLE "',
                                  'CREATE TABLE IF NOT EXISTS "', 1)

                if ofl.startswith('INSERT INTO "android_metadata"'):
                    ofl = ""

                if is_target_exists:
                    # dont insert or create qgis tables
                    if ofl.startswith(
                            "CREATE TABLE geometry_columns") or ofl.startswith(
                                "CREATE TABLE spatial_ref_sys"
                            ) or ofl.startswith(
                                'INSERT INTO "spatial_ref_sys"'
                            ) or ofl.startswith(
                                'INSERT INTO "geometry_columns"'):
                        ofl = ""

                nf.write(ofl)
                #nf.write('\n')
            nf.close()
            of.close()

            cmd = [
                args['sqlite3_executable'], args['target_sqlite3_file'],
                ".read {}".format(dumped_sql_fp_adj.replace("\\", "\\\\"))
            ]
            print "cmd: ", cmd
            ret = call(cmd, shell=False)
            print "import ret: " + str(ret)
            if (ret == 0):
                print("\n=== SUCCESS - import completed in %s seconds" %
                      (time.time() - proc_start_time))
                if debug_helpers.debug == 1:
                    print "debug mode keep_tmp_dir..."
                else:
                    cleanup_tmp_dir(dir_processing_azm)
                return 0
            else:
                if debug_helpers.debug == 1:
                    print "debug mode keep_tmp_dir..."
                else:
                    cleanup_tmp_dir(dir_processing_azm)

                raise Exception(
                    "\n=== FAILED - ret %d - operation completed in %s seconds"
                    % (ret, time.time() - proc_start_time))

            raise Exception(
                "FATAL: sqlite3 mode merge process failed - invalid state")

        # now we use bulk insert done at create/commit funcs instead g_insert_function = getattr(mod, 'handle_sqlite3_dump_insert')

        print "### connecting to dbms..."
        ret = g_connect_function(args)

        if ret == False:
            raise Exception("FATAL: connect_function failed")

        if (args['unmerge']):
            print "### unmerge mode"
            # unmerge mode would be handled by same check_if_already_merged_function below - the 'unmerge' flag is in args

        # check if this azm is already imported/merged in target db (and exit of already imported)
        # get log_hash
        sqlstr = "select log_hash from logs limit 1"
        cmd = [args['sqlite3_executable'], args['file'], sqlstr]
        print "call cmd:", cmd
        outstr = subprocess.check_output(cmd)
        log_hash = outstr.strip()
        args['log_hash'] = long(log_hash)

        if log_hash == 0:
            raise Exception("FATAL: invalid log_hash == 0 case")

        args['log_start_time_str'] = get_sql_result(
            "select log_start_time from logs limit 1", args)
        args['log_end_time_str'] = get_sql_result(
            "select log_end_time from logs limit 1", args)

        args['log_start_time'] = get_sql_result(
            "select strftime('%s', log_start_time) from logs limit 1", args)
        print "parse log_start_time:", args['log_start_time']
        args['log_start_time'] = datetime.fromtimestamp(
            long(args['log_start_time']))
        print "args['log_start_time']:", args['log_start_time']
        print "args['log_start_time_str']:", args['log_start_time_str']

        args['log_end_time'] = get_sql_result(
            "select strftime('%s', log_end_time) from logs limit 1", args)
        print "parse log_end_time:", args['log_start_time']
        args['log_end_time'] = datetime.fromtimestamp(
            long(args['log_end_time']))
        print "args['log_end_time']:", args['log_end_time']
        print "args['log_end_time_str']:", args['log_end_time_str']

        args['log_data_min_time'] = args['log_start_time'] - timedelta(
            hours=48)
        print "args['log_data_min_time']:", args['log_data_min_time']

        args['log_data_max_time'] = args['log_end_time'] + timedelta(hours=48)
        print "args['log_data_max_time']:", args['log_data_max_time']

        if log_hash == 0:
            raise Exception("FATAL: invalid log_hash == 0 case")

        g_check_if_already_merged_function(args, log_hash)
        ''' now we're connected and ready to import, open dumped file and hadle CREATE/INSERT
        operations for current target_type (DBMS type)'''

        if (use_popen_mode == False):
            sql_dump_file = open(dumped_sql_fp, 'rb')

        # output for try manual import mode
        # args['out_sql_dump_file'] = open("out_for_dbtype_{}.sql".format(args['file']), 'w')

        dprint("entering main loop")

        n_lines_parsed = 0
        while (True):
            if (use_popen_mode):
                line = dump_process.stdout.readline()
            else:
                line = sql_dump_file.readline()
            dprint("read line: " + line)
            # when EOF is reached, we'd get an empty string
            if (line == ""):
                print "\nreached end of file/output"
                break
            else:
                n_lines_parsed = n_lines_parsed + 1
                handle_sql3_dump_line(args, line)

        # finally call commit again in case the file didn't have a 'commit' line at the end
        print "### calling handler's commit func as we've reached the end..."

        handle_ret = g_commit_function(args, line)

        # call close() for that dbms handler

        operation = "merge/import"
        if (args['unmerge']):
            operation = "unmerge/delete"

        if (n_lines_parsed != 0):
            print(
                "\n=== SUCCESS - %s completed in %s seconds - tatal n_lines_parsed %d (not including bulk-inserted-table-content-lines)"
                % (operation, time.time() - proc_start_time, n_lines_parsed))
            ret = 0
            mv_azm_to_target_folder(args)
        else:
            raise Exception(
                "\n=== FAILED - %s - no lines parsed - tatal n_lines_parsed %d operation completed in %s seconds ==="
                % (operation, n_lines_parsed, time.time() - proc_start_time))

    except Exception as e:
        type_, value_, traceback_ = sys.exc_info()
        exstr = traceback.format_exception(type_, value_, traceback_)

        mv_target_folder = args['move_failed_import_azm_files_to_folder']
        if not mv_target_folder is None and not os.path.exists(
                mv_target_folder):
            os.makedirs(mv_target_folder)
        if not mv_target_folder is None:
            azm_fp = os.path.abspath(args['azm_file'])
            target_fp = os.path.join(mv_target_folder,
                                     os.path.basename(azm_fp))
            try:
                os.remove(target_fp)
                os.remove(target_fp + "_output.txt")
            except Exception as x:
                pass

            print "move the failed_import_azm_files_to_folder: mv {} to {}".format(
                azm_fp, target_fp)
            try:
                os.rename(azm_fp, target_fp)
                try:
                    os.rename(azm_fp + "_output.txt",
                              target_fp + "_output.txt")
                except:
                    pass
            except Exception as x:
                print "WARNING: move_failed_import_azm_files_to_folder failed"
                pass

        print "re-raise exception e - ", exstr
        raise e

    finally:
        print "cleanup start..."
        if (use_popen_mode):
            # clean-up dump process
            try:
                dump_process.kill()
                dump_process.terminate()
            except:
                pass
        else:
            try:
                sql_dump_file.close()
            except:
                pass
        try:
            g_close_function(args)
        except:
            pass

        if debug_helpers.debug == 1:
            print "debug mode keep_tmp_dir..."
            pass  # keep files for analysis of exceptions in debug mode
        else:
            print "cleanup_tmp_dir..."
            cleanup_tmp_dir(dir_processing_azm)

    return ret
Beispiel #2
0
def handle_sql3_dump_line(args, line):
    global g_is_in_insert
    global g_is_in_create
    global g_insert_buf
    global g_create_buf
    global g_insert_function

    if g_is_in_insert is True:
        g_insert_buf = g_insert_buf + line

        if line.strip().endswith(");"):

            handle_ret = g_insert_function(args, g_insert_buf.strip())

            g_is_in_insert = False
            g_insert_buf = None

            # dprint("multi line insert END:")

            return handle_ret
        else:
            # dprint("multi line insert still not ending - continue")
            return True

    if g_is_in_create:
        g_create_buf += line.strip()
        if line.strip().endswith(");"):
            line = g_create_buf
            print "multi line create END\ng_is_in_create final line:", line
        else:
            return True

    is_omit_table = False

    if line.startswith("CREATE TABLE ") or g_is_in_create:
        g_is_in_create = False

        # in case user is using already 'sqlite3 merged azqdata.db' there will be the CREATE TABLE IF NOT EXISTS lines which we created - restore it...
        line = line.replace("CREATE TABLE IF NOT EXISTS ", "CREATE TABLE ", 1)

        if not line.strip().endswith(");"):
            print("multi line create START")
            g_is_in_create = True
            g_create_buf = line.strip()
            return True

        table_name = line.split(" (")[0].replace("CREATE TABLE ",
                                                 "").replace("\"", "")
        dprint("check table_name is_omit_table: " + table_name)
        is_omit_table = table_name in args['omit_tables_array']
        dprint("is this table in --omit_tables ? " + table_name + " = " +
               str(is_omit_table))

        if args['only_tables_on']:
            dprint(
                "--only_tables on - check if we should exclude this table: " +
                table_name)
            is_omit_table = True
            if table_name in args['only_tables_array']:
                is_omit_table = False
            dprint("--only_tables on - exclude this table? " + table_name +
                   " = " + str(is_omit_table))

    if (line.startswith("CREATE TABLE ")
            and not line.startswith("CREATE TABLE android_metadata")
            and not ("_layer_statistics" in line) and not is_omit_table):

        # get table name:
        table_name = line.split(" ")[2].replace("\"", "")

        if not args['unmerge']:
            print "firt delete all rows with wrong modem timestamp before 48h of log_start_time and log_end_time for this table:", table_name

            sqlstr = "delete from {} where time < '{}' or time > '{}';".format(
                table_name, args['log_data_min_time'],
                args['log_data_max_time'])
            cmd = [args['sqlite3_executable'], args['file'], sqlstr]
            print "call cmd:", cmd
            try:
                outstr = subprocess.check_output(cmd)
                print "delete from ret outstr:", outstr
            except Exception as se:
                print "WARNING: delete pre y2k rows from table failed exception:", se

        print("\nprocessing: create/alter/insert for table_name: " +
              table_name)

        # check if create is required for this table (omit if empty)
        create = True
        if (not g_check_and_dont_create_if_empty):
            pass  # print "create_empty_tables specified in option - do create" # always create - flag override
        else:
            # checking can make final processing slower...
            print("checking if table is empty ...")
            sqlstr = "SELECT 1 FROM {} LIMIT 1".format(table_name)
            cmd = [args['sqlite3_executable'], args['file'], sqlstr]
            print "call cmd:", cmd
            outstr = subprocess.check_output(cmd)
            # print "check has_rows out: "+outstr
            has_rows = (outstr.strip() == "1")
            # print "check has_rows ret: " + str(has_rows)
            if (has_rows):
                print "table is not empty - do create"
                create = True
            else:
                print "table is empty - omit create"
                create = False

        if create:
            print "processing create at handler module..."  # always create - flag override
            handle_ret = g_create_function(args, line)

    elif (line.startswith("COMMIT;")):
        print("\nprocessing: commit")
        handle_ret = g_commit_function(args, line)
        return handle_ret
    elif (line.startswith("INSERT INTO")):

        raise Exception(
            "ABORT: currently bulk insert mode is used so only scheme should be dumped/read... found INSERT INTO - abort"
        )

        table_name = line.split(" ")[2].replace("\"", "")
        if (table_name == "android_metadata"):
            return True  #omit

        line_stripped = line.strip()
        if line_stripped.endswith(");"):
            # dprint("single line insert")
            handle_ret = g_insert_function(args, line_stripped)
            return handle_ret
        else:
            dprint("multi line insert START")
            g_is_in_insert = True
            g_insert_buf = line
            return True
    else:
        # dprint "omit line: "+line
        return True

    return False
Beispiel #3
0
def unzip_azm_to_tmp_folder(args):

    dprint("unzip_azm_to_tmp_folder 0")
    print "args['azm_file']: " + args['azm_file']
    azm_fp = os.path.abspath(args['azm_file'])
    print "azm_fp: " + azm_fp

    if os.path.isfile(azm_fp):
        pass
    else:
        raise Exception("INVALID: - azm file does not exist at given path: " +
                        str(azm_fp) + " - ABORT")

    dir_azm_unpack = os.path.dirname(azm_fp)
    print "dir_azm_unpack: " + dir_azm_unpack
    azm_name_no_ext = os.path.splitext(os.path.basename(azm_fp))[0]
    print "azm_name_no_ext: " + azm_name_no_ext
    if 'TMP_GEN_PATH' in os.environ:
        dir_azm_unpack = os.environ['TMP_GEN_PATH']
        print "dir_azm_unpack using TMP_GEN_PATH:", dir_azm_unpack
    dir_processing_azm = os.path.join(
        dir_azm_unpack, "tmp_azm_db_merge_" + str(uuid.uuid4()) + "_" +
        azm_name_no_ext.replace(" ", "-"))  # replace 'space' in azm file name
    args['dir_processing_azm'] = dir_processing_azm

    dprint("unzip_azm_to_tmp_folder 1")

    # try clear tmp processing folder just in case it exists from manual unzip or previous failed imports
    try:
        shutil.rmtree(dir_processing_azm)
    except Exception as e:
        estr = str(e)
        if ("cannot find the path specified" in estr
                or "No such file or" in estr):
            pass
        else:
            print("rmtree dir_processing_azm: " + str(e))
            raise e

    dprint("unzip_azm_to_tmp_folder 2")

    os.mkdir(dir_processing_azm)

    dprint("unzip_azm_to_tmp_folder 3")

    try:
        azm = zipfile.ZipFile(args['azm_file'], 'r')
        azm.extract("azqdata.db", dir_processing_azm)

        try:
            # handle malformed db cases

            import pandas as pd
            import sqlite3

            dbfile = os.path.join(dir_processing_azm, "azqdata.db")
            dbcon = sqlite3.connect(dbfile)
            integ_check_df = pd.read_sql("PRAGMA integrity_check;", dbcon)
            try:
                dbcon.close(
                )  # we dont use dbcon in further azm_db_merge code, and db file can be removed if integ not ok - avoid file locks
            except:
                pass
            print "azm_db_merge: sqlite db integ_check_df first row:", integ_check_df.iloc[
                0]
            if integ_check_df.iloc[0].integrity_check == "ok":
                print "azm_db_merge: sqlite3 db integrity check ok"
            else:
                print "azm_db_merge: sqlite3 db integrity check failed - try recover..."
                dump_ret = subprocess.call(
                    "sqlite3 '{}' .dump > '{}.txt'".format(dbfile, dbfile),
                    shell=True)
                print "azm_db_merge: dump_ret:", dump_ret
                if dump_ret != 0:
                    print "WARNING: azm_db_merge: recov corrupt sqlite db file - failed to dump sqlite db file"
                else:
                    os.remove(dbfile)
                    import_ret = subprocess.call(
                        "sqlite3 '{}' < '{}.txt'".format(dbfile, dbfile),
                        shell=True)
                    print "azm_db_merge: recov corrupt db file import ret:", import_ret
        except:
            type_, value_, traceback_ = sys.exc_info()
            exstr = traceback.format_exception(type_, value_, traceback_)
            print "WARNING: check malformed db exception:", exstr

        if args['get_schema_shasum_and_exit']:
            print "get_schema_shasum_and_exit start"
            sha1 = hashlib.sha1()
            #print "get_schema_shasum_and_exit 1"
            dbfile = os.path.join(dir_processing_azm, "azqdata.db")
            #print "get_schema_shasum_and_exit 2"
            cmd = [args['sqlite3_executable'], dbfile, ".schema"]
            print "call cmd:", cmd
            schema = subprocess.check_output(cmd)
            #print "get_schema_shasum_and_exit 3"
            sha1.update(schema)
            #print "get_schema_shasum_and_exit 4"
            print str(
                sha1.hexdigest()
            ) + " is the sha1 for the schema of azqdata.db inside azm: " + args[
                'azm_file']
            print "get_schema_shasum_and_exit done"
            azm.close()
            cleanup_tmp_dir(dir_processing_azm)
            exit(0)
        azm.close()
    except Exception as e:
        try:
            cleanup_tmp_dir(dir_processing_azm)
        except:
            pass
        raise Exception(
            "Invalid azm_file: azm file does not contain azqdata.db database - exception: "
            + str(e))

    dprint("unzip_azm_to_tmp_folder 4")

    args['file'] = os.path.join(dir_processing_azm, "azqdata.db")
    return dir_processing_azm
Beispiel #4
0
def create(args, line):
    global g_cursor, g_conn
    global g_prev_create_statement_table_name
    global g_prev_create_statement_column_names
    global g_exec_buf
    global g_is_ms, g_is_postgre
    global g_unmerge_logs_row

    g_prev_create_statement_column_names = None

    if args["pg_schema"] != "public":
        g_cursor.execute("SET search_path = '{}','public';".format(
            args["pg_schema"]))

    line_adj = sql_adj_line(line)
    table_name = get_table_name(line_adj)
    schema_per_month_name = "per_month_{}".format(table_name)

    if table_name == "logs":
        uline = line.replace('"log_hash" BIGINT,', '"log_hash" BIGINT UNIQUE,',
                             1)
        print "'logs' table cre - make log_hash unique for this table: ", uline
        line_adj = sql_adj_line(uline)
    if table_name == "wifi_scanned":
        wifi_scanned_MIN_APP_V0 = 3
        wifi_scanned_MIN_APP_V1 = 0
        wifi_scanned_MIN_APP_V2 = 742
        print "check azm apk ver for wifi_scanned table omit: ", args[
            "azm_apk_version"]
        if args["azm_apk_version"] < wifi_scanned_MIN_APP_V0 * 1000 * 1000 + wifi_scanned_MIN_APP_V1 * 1000 + wifi_scanned_MIN_APP_V2:
            print "omit invalidly huge wifi_scanned table in older app vers requested by a customer - causes various db issues"
            return False

    if args['import_geom_column_in_location_table_only'] and table_name != "location":
        line_adj = sql_adj_line(line.replace(',"geom" BLOB', '', 1))

    if (g_unmerge_logs_row is not None):
        print "### unmerge mode - delete all rows for this azm in table: " + table_name
        """ now we use log_hash - no need to parse time
        # remove 3 traling 000 from microsecs str
        start_dt_str = str(g_unmerge_logs_row['log_start_time'])[:-3] 
        end_dt_str = str(g_unmerge_logs_row['log_end_time'])[:-3]
        """
        sqlstr = "delete from \"" + table_name + "\" where \"log_hash\" = {}".format(
            g_unmerge_logs_row['log_hash'])
        g_exec_buf.append(sqlstr)

        return True

    g_prev_create_statement_table_name = table_name
    sqlstr = line_adj
    '''
    Now get local columns
    Example sqlstr:
    CREATE TABLE "browse" ("time" DATETIME,"time_ms" INT,"posid" INT,"seqid" INT,"netid" INT,  "Browse_All_Session_Throughput_Avg" real, "Data_Browse_Throughput" real, "Data_Browse_Throughput_Avg" real, "Data_Browse_Total_Loaded_Obj" smallint, "Data_Browse_Total_Page_Obj" smallint, "Data_Browse_Page_Load_Time" real, "Data_Browse_Page_Load_Time_Avg" real, "Data_Browse_Total_Sessions" smallint, "Data_Browse_Total_Success" smallint, "Data_Browse_Total_Fail_Page" smallint, "Data_Browse_Total_Fail_Obj" smallint, "Data_Browse_Total_Timeout" smallint, "Data_Browse_Exterior_Fail_Page" smallint, "Data_Browse_Exterior_Fail_Obj" smallint, "Browse_Throughput" real, "Browse_Throughput_max" real, "Browse_Throughput_min" real, "Browse_Duration" real, "Browse_Duration_max" real, "Browse_Duration_min" real);
    '''
    # get part inside parenthesis
    ls = line_adj.split('" (')
    dprint("ls :" + str(ls))
    ls = ls[1].split(");")[0]
    # split by comma
    ls = ls.split(",")

    # parse column names and keep for insert commands
    local_columns = []
    local_column_names = []
    for lsp in ls:
        splitted = lsp.split('"')
        dprint("splitted: " + str(splitted))
        col_name = splitted[1]
        col_type = splitted[2].strip()

        omit_col = False
        """
        import_geom_column_in_location_table_only feature already implemented at line_adj above
        if args['import_geom_column_in_location_table_only'] and col_name == "geom" and table_name != "location":
            omit_col = True
        """

        if omit_col == False:
            local_columns.append([col_name, col_type])
            local_column_names.append(col_name)

    # args['prev_create_statement_column_names']
    g_prev_create_statement_column_names = str(local_column_names).replace(
        "'", "").replace("[", "(").replace("]", ")")

    remote_column_names = None

    is_contains_geom_col = False

    try:
        dprint("create sqlstr: " + sqlstr)

        if g_is_postgre:

            if args['pg10_partition_by_month']:
                if table_name == "logs":
                    # dont partition logs table
                    pass
                else:
                    # create target partition for this log + table
                    # ok - partition this table
                    sqlstr = sqlstr.replace(";",
                                            "") + " PARTITION BY RANGE (time);"
                    try:
                        with g_conn as c:
                            g_cursor.execute(
                                "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{}';"
                                .format(schema_per_month_name))
                            if bool(g_cursor.rowcount):
                                print "schema_per_month_name already exists:", schema_per_month_name
                                pass
                            else:
                                print "cre schema now because: NOT schema_per_month_name already exists:", schema_per_month_name
                                with g_conn as c:
                                    c_table_per_month_sql = "create schema {};".format(
                                        schema_per_month_name)
                                    ret = g_cursor.execute(
                                        c_table_per_month_sql)
                                    g_conn.commit()
                                    print "success: create per_month [" + c_table_per_month_sql + "] success"
                    except:
                        type_, value_, traceback_ = sys.exc_info()
                        exstr = str(
                            traceback.format_exception(type_, value_,
                                                       traceback_))
                        print "WARNING: create table_per_month schema failed - next insert/COPY commands would likely faile now - exstr:", exstr

            dprint("create sqlstr postgres mod: " + sqlstr)
            is_contains_geom_col = True
            # postgis automatically creates/maintains "geometry_columns" 'view'

        if g_is_ms:
            dprint("create sqlstr mod mssql geom: " + sqlstr)
            is_contains_geom_col = True

        if g_is_postgre:
            with g_conn as c:
                g_cursor.execute(
                    "select * from information_schema.tables where table_schema=%s and table_name=%s",
                    (
                        args["pg_schema"],
                        table_name,
                    ))
                if bool(g_cursor.rowcount):
                    print "omit already existing table - raise exception to check columns instead"
                    raise Exception(
                        "table {} already exists - no need to create".format(
                            table_name))

        ret = None
        # use with for auto rollback() on g_conn on expected fails like already exists
        with g_conn as c:
            ret = g_cursor.execute(sqlstr)
        # commit now otherwise COPY might not see partitions
        g_conn.commit()
        dprint("create execute ret: " + str(ret))
        """ if control reaches here then the create is successful
        - table was not existing earlier - so remote cols must be the same
        """
        remote_column_names = local_column_names

    except Exception as e:
        emsg = str(e)
        dprint("create failed: " + emsg + "\n from sqlstr:\n" + sqlstr +
               "\nori line:\n" + line)
        if ("There is already an object named" in emsg
                or " already exists" in emsg):

            dprint("""This table already exists -
            checking if all local columns already exist in remote
            - otherwise will add each missing cols to
            remote table before inserting to it.""")

            remote_columns = get_remote_columns(args, table_name)
            remote_column_names = get_col_names(remote_columns)

            if (len(remote_columns) == 0):
                raise Exception("FATAL: failed to parse/list remote columns")

            # now get local columns that are not in remote

            local_columns_not_in_remote = []

            for col in local_columns:
                col_name = col[0]
                col_type = col[1]

                ####### quickfix: col_type override for unsigned int32 cols from sqlite (bindLong already) - conv to bigint in pg as pg doesnt have unsigned
                if col_name == "lte_volte_rtp_source_ssrc" or col_name == "lte_volte_rtp_timestamp":
                    # might need to psql to do first manually if log was already imported using older azm_db_merge:
                    # alter table all_logs.lte_volte_rtp_msg alter column lte_volte_rtp_source_ssrc type bigint;
                    # alter table all_logs.lte_volte_rtp_msg alter column lte_volte_rtp_timestamp type bigint;

                    col_type = "bigint"
                #######################

                is_already_in_table = col_name in remote_column_names
                dprint("local_col_name: " + col_name + " col_type: " +
                       col_type + " - is_already_in_table: " +
                       str(is_already_in_table))
                if (not is_already_in_table):
                    local_columns_not_in_remote.append(' "{}" {}'.format(
                        col_name, col_type))
                # TODO: handle if different type?

            n_cols_to_add = len(local_columns_not_in_remote)

            if (n_cols_to_add == 0):
                dprint("n_cols_to_add == 0 - no need to alter table")
            else:
                print "n_cols_to_add: " + str(
                    n_cols_to_add
                ) + " - need to alter table - add cols:" + str(
                    local_columns_not_in_remote) + "\nremote_cols:\n" + str(
                        remote_columns)
                # example: ALTER TABLE dbo.doc_exa ADD column_b VARCHAR(20) NULL, column_c INT NULL ;
                alter_str = "ALTER TABLE \"{}\" ".format(table_name)
                alter_cols = ""

                for new_col in local_columns_not_in_remote:
                    # not first
                    prefix = ""
                    if (alter_cols != ""):
                        prefix = ", "
                    alter_cols = alter_cols + prefix + " ADD " + new_col

                alter_str = alter_str + alter_cols + ";"

                sqlstr = sql_adj_line(alter_str)
                print "execute alter_str: " + sqlstr
                exec_creatept_or_alter_handle_concurrency(sqlstr)

                # re-get remote cols
                remote_columns = get_remote_columns(args, table_name)
                remote_column_names = get_col_names(remote_columns)
                print("get_remote_columns after alter: " +
                      str(remote_column_names))

        else:
            raise Exception("FATAL: create table error - : \nemsg:\n " + emsg +
                            " \nsqlstr:\n" + sqlstr)

    if g_bulk_insert_mode:

        if args['pg10_partition_by_month']:
            if table_name == "logs":
                # dont partition logs table
                pass
            else:
                log_hash = args['log_hash']

                ##  check/create partitions for month for log_hash, prev month, after month
                ori_log_hash_datetime = datetime.datetime.fromtimestamp(
                    log_hash
                    & 0xffffffff)  # log_hash lower 32 bits is the timestamp
                months_pt_check_list = [
                    ori_log_hash_datetime + relativedelta(months=-1),
                    ori_log_hash_datetime,
                    ori_log_hash_datetime + relativedelta(months=+1)
                ]

                for log_hash_datetime in months_pt_check_list:

                    log_hash_ym_str = log_hash_datetime.strftime('%Y_%m')
                    print "log_hash_datetime:", log_hash_datetime

                    ntn = "logs_{}".format(
                        log_hash_ym_str
                    )  # simpler name because we got cases where schema's table name got truncated: activate_dedicated_eps_bearer_context_request_params_3170932708
                    pltn = "{}.{}".format(schema_per_month_name, ntn)
                    per_month_table_already_exists = False
                    with g_conn as c:
                        check_sql = "select * from information_schema.tables where table_schema='{}' and table_name='{}'".format(
                            schema_per_month_name, ntn)
                        print "check_sql:", check_sql
                        g_cursor.execute(check_sql)
                        if bool(g_cursor.rowcount):
                            per_month_table_already_exists = True

                    if per_month_table_already_exists:
                        print "omit create already existing per_month table:", pltn
                    else:
                        print "NOT omit create already existing per_month table:", pltn
                        cre_target_pt_sql = "CREATE TABLE {} PARTITION OF {} FOR VALUES from ('{}-1') to ('{}-1');".format(
                            pltn, table_name,
                            log_hash_datetime.strftime("%Y-%m"),
                            (log_hash_datetime +
                             relativedelta(months=+1)).strftime("%Y-%m"))
                        if args['pg10_partition_index_log_hash']:
                            cre_index_for_pt_sql = "CREATE INDEX ON {} (log_hash);".format(
                                pltn)
                            cre_target_pt_sql += " " + cre_index_for_pt_sql

                        print("cre_target_pt_sql:", cre_target_pt_sql)
                        exec_creatept_or_alter_handle_concurrency(
                            cre_target_pt_sql,
                            allow_exstr_list=[" already exists"])

        ###### let sqlite3 dump contents of table into file

        table_dump_fp = os.path.join(g_dir_processing_azm, table_name + ".csv")
        table_dump_format_fp = os.path.join(g_dir_processing_azm,
                                            table_name + ".fmt")

        #print("table_dump_fp: "+table_dump_fp)
        #print("table_dump_format_fp: "+table_dump_format_fp)

        # create dump csv of that table
        """ 
        example dump of logs table:
        sqlite3 azqdata.db -list -newline "|" -separator "," ".out c:\\azq\\azq_report_gen\\azm_db_merge\\logs.csv" "select * from logs"
        """

        # get col list, and hex(col) for blob coulumns

        geom_col_index = -1

        i = 0
        col_select = ""
        first = True
        dprint("local_columns: " + str(local_columns))
        for col in local_columns:
            col_name = col[0]
            col_type = col[1]
            if first:
                first = False
            else:
                col_select = col_select + ","

            pre = " "
            post = ""
            if col_type == "geometry" or (
                    g_is_postgre and col_type == "bytea") or (
                        g_is_ms and col_type.startswith("varbinary")):
                pre = " hex("
                post = ")"
                if col_name == "geom":
                    geom_col_index = i
            col_select = col_select + pre + col_name + post
            i = i + 1

        dprint("col_select: " + col_select)

        if g_is_ms:
            ret = call(
                [
                    args['sqlite3_executable'],
                    args['file'],
                    "-ascii",
                    "-list",
                    '-separator',
                    azm_db_constants.BULK_INSERT_COL_SEPARATOR_VALUE,
                    '-newline',
                    azm_db_constants.BULK_INSERT_LINE_SEPARATOR_VALUE,
                    '.out ' + '"' + table_dump_fp.replace("\\", "\\\\") +
                    '"',  # double backslash because it needs to go inside sqlite3 cmd parsing again      
                    'select ' + col_select + ' from ' + table_name
                ],
                shell=False)

        if g_is_postgre:
            dump_cmd = [
                args['sqlite3_executable'],
                args['file'],
                "-ascii",
                "-csv",
                '-separator',
                ',',
                '-newline',
                '\n',
                '.out ' + '"' + table_dump_fp.replace("\\", "\\\\") +
                '"',  # double backslash because it needs to go inside sqlite3 cmd parsing again
                'select ' + col_select + ' from ' + table_name
            ]
            dprint("dump_cmd:", dump_cmd)
            ret = call(dump_cmd, shell=False)

        table_dump_fp_adj = table_dump_fp + "_adj.csv"

        # dont add lines where all cols are null - bug in older azm files causing COPY to fail...
        all_cols_null_line = ""
        for ci in range(len(local_columns)):
            if ci != 0:
                all_cols_null_line += ","
        print "all_cols_null_line:", all_cols_null_line

        pd_csvadj_success = False

        # trying preliminary version (without geom conv yet) of pandas proved that it was slower than python file looping and also failing at events table so disable for now....
        """
        if g_is_postgre:
            # try use pandas to adjust csv instead of looping through file...
            try:
                import pandas as pd
                df = pd.read_csv(table_dump_fp, header=None, names=local_column_names)
                print "df.columns:", df.columns
                print "pd table_dump_fp df len:", len(df)
                df.geom = None
                df = df.dropna(how='all')
                df.to_csv(table_dump_fp_adj, header=None)
                pd_csvadj_success = True
            except:
                type_, value_, traceback_ = sys.exc_info()
                exstr = str(traceback.format_exception(type_, value_, traceback_))
                print "pd_csvadj exception:", exstr

        print "pd_csvadj_success:", pd_csvadj_success
        """

        if not pd_csvadj_success:
            with open(table_dump_fp, "rb") as of:
                with open(
                        table_dump_fp_adj, "wb"
                ) as nf:  # wb required for windows so that \n is 0x0A - otherwise \n will be 0x0D 0x0A and doest go with our fmt file and only 1 row will be inserted per table csv in bulk inserts...
                    while True:
                        ofl = of.readline()
                        if g_is_postgre:
                            ofl = ofl.replace(',""', ',')
                        """ no need to check this, only old stale thread versions would have these cases and will have other cases too so let it crash in all those cases
                        if ofl.strip() == all_cols_null_line:
                            continue
                        """

                        ofl = find_and_conv_spatialite_blob_to_wkb(ofl)

                        if ofl == "":
                            break

                        nf.write(ofl)

        table_dump_fp = table_dump_fp_adj

        dprint("dump table: " + table_name + " for bulk insert ret: " +
               str(ret))

        if (ret != 0):
            print "WARNING: dump table: " + table_name + " for bulk insert failed - likely sqlite db file error like: database disk image is malformed. In many cases, data is still correct/complete so continue."

        if (os.stat(table_dump_fp).st_size == 0):
            print "this table is empty..."
            return True

        # if control reaches here then the table is not empty

        if g_is_ms and is_contains_geom_col:
            # add this table:geom to 'geometry_columns' (table_name was set to UNIQUE so it will fail if already exists...
            """ somehow not working - let QGIS detect automatically itself for now...
            try:
                insert_geomcol_sqlstr = "INSERT INTO \"geometry_columns\" VALUES('azq','dbo','{}','geom',NULL,4326,'POINT');".format(table_name)
                dprint("insert_geomcol_sqlstr: "+insert_geomcol_sqlstr)
                ret = g_cursor.execute(insert_geomcol_sqlstr)
                print "insert this table:geom into geometry_columns done"
            except Exception as e:
                estr = str(e)
                dprint("insert this table:geom into geometry_columns exception: "+estr)
                pass
            """

        # create fmt format file for that table
        """        
        generate format file:
        https://msdn.microsoft.com/en-us/library/ms178129.aspx
        
        format file contents:
        https://msdn.microsoft.com/en-us/library/ms191479(v=sql.110).aspx                
        """

        n_local_cols = len(local_column_names)

        fmt = open(table_dump_format_fp, "w")
        fmt.write("11.0\n")  # ver - 11.0 = SQL Server 2012
        fmt.write(str(n_local_cols) + "\n")  # n cols

        host_field_order = 0  # dyn gen - first inc wil get it to 1
        host_file_data_type = "SQLCHAR"
        prefix_length = 0
        host_file_data_length = 0  # When a delimited text file having a prefix length of 0 and a terminator is imported, the field-length value is ignored, because the storage space used by the field equals the length of the data plus the terminator
        terminator = None  # dyn gen
        server_col_order = None  # dyn gen
        server_col_name = None  # dyn gen
        col_coalition = ""

        for col in local_column_names:
            host_field_order = host_field_order + 1
            if (n_local_cols == host_field_order):  #last
                terminator = azm_db_constants.BULK_INSERT_LINE_SEPARATOR_PARAM
            else:
                terminator = azm_db_constants.BULK_INSERT_COL_SEPARATOR_PARAM
            if not table_name.startswith("wifi_scanned"):
                dprint("remote_column_names: " + str(remote_column_names))
                pass
            dprint("col: " + str(col))
            server_col_order = remote_column_names.index(
                col) + 1  # not 0 based
            server_col_name = col  # always same col name
            fmt.write('{}\t{}\t{}\t{}\t"{}"\t{}\t"{}"\t"{}"\n'.format(
                host_field_order, host_file_data_type, prefix_length,
                host_file_data_length, terminator, server_col_order,
                server_col_name, col_coalition))
        fmt.flush()
        fmt.close()

        # both dump csv and format fmt files are ready
        # execute bulk insert sql now

        if g_is_ms:
            sqlstr = "bulk insert \"{}\" from '{}' with ( formatfile = '{}' );".format(
                table_name, table_dump_fp, table_dump_format_fp)

        if g_is_postgre:
            colnames = ""
            first = True
            for col in local_column_names:
                if not first:
                    colnames = colnames + ","
                if first:
                    first = False
                colnames = colnames + '"' + col + '"'

            sqlstr = "copy \"{}\" ({}) from STDIN with (format csv, NULL '')".format(
                table_name, colnames)

        dprint("START bulk insert sqlstr: " + sqlstr)
        g_exec_buf.append((sqlstr, table_dump_fp))
        # print("DONE bulk insert - nrows inserted: "+str(ret.rowcount))

    return True
Beispiel #5
0
def connect(args):
    global g_bulk_insert_mode
    global g_dir_processing_azm
    global g_cursor, g_conn
    global g_exec_buf
    global g_is_ms, g_is_postgre

    if (args['target_db_type'] == 'postgresql'):
        print "PostgreSQL mode initializing..."
        g_is_postgre = True
        import psycopg2

    elif (args['target_db_type'] == 'mssql'):
        g_is_ms = True
        import pyodbc

    # cleanup old stuff just in case
    close(args)

    g_bulk_insert_mode = True  # always bulk insert mode now

    g_dir_processing_azm = args['dir_processing_azm']

    if g_is_ms:
        print "Connecting... Target DBMS type: mssql"
        dprint("connect args: {} {} {} {}".format(args['server_url'],
                                                  args['server_user'],
                                                  args['server_password'],
                                                  args['server_database']))
        driver = args['mssql_odbc_driver']
        connect_str = 'DRIVER={};SERVER={};DATABASE={};UID={};PWD={}'.format(
            driver, args['server_url'], args['server_database'],
            args['server_user'], args['server_password'])
        #unsafe as users might see in logs print "using connect_str: "+connect_str
        """
        https://msdn.microsoft.com/en-us/library/ms131281.aspx
        ODBC applications should not use Transact-SQL transaction statements such as
        BEGIN TRANSACTION, COMMIT TRANSACTION, or ROLLBACK TRANSACTION because this
        can cause indeterminate behavior in the driver. An ODBC application should
        run in autocommit mode and not use any transaction management functions or
        statements, or run in manual-commit mode and use the ODBC SQLEndTran
        function to either commit or roll back transactions.

        https://mkleehammer.github.io/pyodbc/api.html >> 'autocommit' in our case set to false and buffer all atomic cmds into g_exec_buf for run once before commit
        """
        g_conn = pyodbc.connect(connect_str, autocommit=False)

    elif g_is_postgre:
        print "Connecting... Target DBMS type: PostgreSQL"
        # example: conn = psycopg2.connect("dbname=azqdb user=azqdb")
        connect_str = "dbname={} user={} password={} port={}".format(
            args['server_database'], args['server_user'],
            args['server_password'], args['pg_port'])
        print connect_str
        if args['pg_host'] != None:
            connect_str = "host=" + args['pg_host'] + " " + connect_str
        #unsafe as users might see in logs print "using connect_str: "+connect_str
        args['connect_str'] = connect_str
        g_conn = psycopg2.connect(connect_str)
    if (g_conn is None):
        print "psycopg2.connect returned None"
        return False

    print "connected"

    g_cursor = g_conn.cursor()

    # post connect steps for each dbms
    if g_is_postgre and not args['unmerge']:

        try_cre_postgis(
            schema="public")  # create postgis at public schema first

        if args["pg_schema"] != "public":
            print "pg mode create pg_schema:", args["pg_schema"]
            try:
                with g_conn as c:
                    ret = g_cursor.execute("create schema " +
                                           args["pg_schema"])
                    c.commit()
                    print "success: create schema " + args[
                        "pg_schema"] + " success"

            except Exception as e:
                estr = str(e)
                if 'already exists' in estr:
                    dprint("schema already exists")
                    pass
                else:
                    print("FATAL: CREATE schema failed:" + args["pg_schema"])
                    raise e
            # create postgis in public only - print "pg using schema start"
            # try_cre_postgis(schema=args["pg_schema"]) # inside new schema

    if g_is_ms:
        pass
        ''' somehow not working - let qgis detect itself for now...
        try:
            # set 'f_table_name' to unique so we can blindly insert table_name:geom (on create handlers) to it without checking (let mssql check)
            ret = g_cursor.execute("""
            CREATE TABLE [dbo].[geometry_columns](
            [f_table_catalog] [varchar](50) NULL,
            [f_table_schema] [varchar](50) NULL,
            [f_table_name] [varchar](100) NULL UNIQUE,
            [f_geometry_column] [varchar](50) NULL,
            [coord_dimension] [int] NULL,
            [srid] [int] NULL,
            [geometry_type] [varchar](50) NULL
            )
            """)
            print "created qgis table: geometry_columns"
        except Exception as e:
            pass
        
        try:
            # below execute would raise an exception if it is already created
            ret = g_cursor.execute("""
            CREATE TABLE spatial_ref_sys (srid INTEGER NOT NULL PRIMARY KEY,auth_name VARCHAR(256) NOT NULL,auth_srid INTEGER NOT NULL,ref_sys_name VARCHAR(256),proj4text VARCHAR(2048) NOT NULL);            
            """)
            print "created qgis table: spatial_ref_sys"
            # if control reaches here means the table didn't exist (table was just created and is empty) so insert wgs84 into it... 
            ret = g_cursor.execute("""
            INSERT INTO "spatial_ref_sys" VALUES(4326,'epsg',4326,'WGS 84','+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs');
            """)
            print "added wgs84 to qgis table: spatial_ref_sys"        
        except Exception as e:
            pass
        '''

    return True
Beispiel #6
0
def find_and_conv_spatialite_blob_to_wkb(csv_line):
    #print "fac csv_line:", csv_line
    spat_blob_offset = csv_line.find('0001E6100000')
    if spat_blob_offset == -1:
        return csv_line
    part = csv_line[spat_blob_offset:spat_blob_offset + 120 + 1]
    #print "part[120]:", part[120]
    dprint("csv_line spatialite_geom_part: " + part)

    spatialite_geom_contents = ""
    if (g_is_postgre and
        (part[120] == ',' or part[120] == '\n')) or (g_is_ms
                                                     and part[120] == '\t'):
        spatialite_geom_contents = part[0:120]
    else:
        dprint("check of spatialite_geom_part - failed - abort")
        return csv_line

    dprint("spatialite_geom_contents: len " +
           str(len(spatialite_geom_contents)) + " val: " +
           spatialite_geom_contents)
    # convert spatialite geometry blob to wkb
    """

    Spatialite BLOB Format (Point)
    ------------------------------

    http://www.gaia-gis.it/gaia-sins/BLOB-Geometry.html
    example:
    0001E6100000DD30C0F46C2A594041432013008E2B40DD30C0F46C2A594041432013008E2B407C01000000DD30C0F46C2A594041432013008E2B40FE

    parse:
    spatialite header: 00 (str_off 0 str_len 2)
    endian: 01 (str_off 2 str_len 2)
    SRID: E6 10 00 00 (str_off 4 str_len 8)
    MBR_MIN_X: DD 30 C0 F4 6C 2A 59 40 (str_off 12 str_len 16)
    MBR_MIN_Y: 41 43 20 13 00 8E 2B 40 (str_off 28 str_len 16)
    MBR_MAX_X: DD 30 C0 F4 6C 2A 59 40 (str_off 42 str_len 16)
    MBR_MAX_Y: 41 43 20 13 00 8E 2B 40 (str_off 58 str_len 16)
    MBR_END: 7C (str_off 76 str_len 2)
    CLASS_TYPE: 01 00 00 00 (str_off 78 str_len 8)
    POINT:
      X: DD 30 C0 F4 6C 2A 59 40 (str_off 86 str_len 16)
      Y: 41 43 20 13 00 8E 2B 40 (str_off 102 str_len 16)
    END: FE (str_off 118 str_len 2)

    ---

    WKB Format
    ----------

    See "3.3.2.6 Description of WKBGeometry Representations"
    in https://portal.opengeospatial.org/files/?artifact_id=829

    Point {
    double x;
    double y;
    };

    WKBPoint {
    byte byteOrder;
    uint32 wkbType; // 1
    Point point;
    }

    Therefore, for "Point" we need from spatialite blob parts:
    endian, CLASS_TYPE, POINT
    
    
    
    """
    # spatialite blob point size is 60 bytes = 120 chars in hex - as in above example and starts with 00
    if len(spatialite_geom_contents) == 120 and spatialite_geom_contents[
            0] == '0' and spatialite_geom_contents[1] == '0':
        endian = spatialite_geom_contents[2:4]  # 2 + len 2
        class_type = "<unset>"
        if g_is_postgre:
            """
            old code: class_type = spatialite_geom_contents[78:86] # 78 + 8

            change class_type to 'point' BITWISE_OR SRID flag as per https://trac.osgeo.org/postgis/browser/trunk/doc/ZMSgeoms.txt
           
            "
            wkbSRID = 0x20000000
            If the SRID flag is set it's value is encoded as a 4byte integer
        right after the type integer.
            "
    
            so our class is pont | wkbSRID = 0x20000001 (little endian 32: 01000020)
    
            then add srid "right after the type integer"
            our srid = 4326 = 0x10E6 (little endian 32: E6100000)
            
            therefore, class_type_point_with_srid_wgs84 little_endian is 01000020E6100000
    
            """

            class_type = "01000020E6100000"
        elif g_is_ms:
            class_type = ""

        point = spatialite_geom_contents[86:118]  # 86 + 16 + 16
        wkb = ""
        if g_is_postgre:
            wkb = endian + class_type + point
        if g_is_ms:
            """
            https://msdn.microsoft.com/en-us/library/ee320529.aspx
            
            0xE6100000 01 0C 0000000000001440 0000000000002440
            This string is interpreted as shown in the following table.
            Binary value Description
            E6100000 SRID = 4326
            01 Version = 1
            0C Serialization Properties = V + P (geometry is valid, single point)
            0000000000001440 X = 5
            0000000000002440 Y = 10
            """
            wkb = "E6100000010C" + point
        dprint("wkb: " + wkb)
        csv_line = csv_line.replace(spatialite_geom_contents, wkb, 1)
    else:
        dprint("not entering spatialite blob parse - len " +
               str(len(spatialite_geom_contents)))

    dprint("find_and_conv_spatialite_blob_to_wkb ret: " + csv_line)
    return csv_line
Beispiel #7
0
def check_if_already_merged(args, log_hash):
    global g_unmerge_logs_row
    global g_cursor
    global g_exec_buf
    global g_is_ms, g_is_postgre

    if args["pg_schema"] != "public":
        g_cursor.execute("SET search_path = '{}','public';".format(
            args["pg_schema"]))

    try:
        print "checking if this log_hash has already been imported/merged: " + log_hash
        sqlstr = "select \"log_hash\" from \"logs\" where \"log_hash\" = ?"
        if g_is_postgre:
            sqlstr = sqlstr.replace("?", "%s")
        print("check log cmd: " + sqlstr)

        row = None

        # use with for auto rollback() on g_conn on exception - otherwise we cant use the cursor again - would fail as: current transaction is aborted, commands ignored until end of transaction block
        ret = None
        with g_conn as c:
            ret = g_cursor.execute(sqlstr, [log_hash])
            row = g_cursor.fetchone()

        print("after cmd check if exists row:", row)

        if (row is None):

            # azm never imported

            if (args['unmerge']):
                # unmerge mode - this azm is not in target db
                raise Exception(
                    "ABORT: This azm is already not present in target db's logs' table"
                )
            else:
                print "This log hasn't been imported into target db yet - ok to proceed"
                pass
                return True

        else:

            # azm already imported

            if (args['unmerge']):

                dprint("um0: row: " + str(row))

                if g_is_postgre or g_is_ms:
                    dprint("upg 0")
                    # row is a tuple - make it a dict
                    dprint("upg 01")
                    # now we only need 'log_hash' to unmerge and the used odbc cant parse geom too - cols = get_remote_columns(args,'logs')
                    cols = [['log_hash', 'bigint']]
                    dprint("upg 1: cols: " + str(cols))
                    drow = {}
                    i = 0
                    for col in cols:
                        print row[i]
                        drow[col[0]] = row[i]
                        i = i + 1
                    row = drow

                dprint("um1")

                print "### unmerge mode - delete start for azm: log_hash {}".format(
                    row['log_hash'])
                g_unmerge_logs_row = row

                sqlstr = "delete from \"logs\" where \"log_hash\" = '{}'".format(
                    log_hash)

                g_exec_buf.append(sqlstr)
                print "delete from logs table added to g_exec_buf..."

            else:
                raise Exception(
                    "ABORT: This log ({}) has already been imported/exists in target db (use --unmerge to remove first if you want to re-import)."
                    .format(log_hash))

    except Exception as e:
        estr = str(e)
        if ("Invalid object name 'logs'" in estr or '42S02' in estr
                or 'relation "logs" does not exist' in estr):
            print "looks like this is the first-time log import - no table named logs exists yet - ok..."
            if args['unmerge']:
                raise Exception(
                    "--unmerge mode called on an empty database: no related 'logs' table exist yet"
                )
            # first time import - no table named logs exists yet

        else:
            type_, value_, traceback_ = sys.exc_info()
            exstr = str(traceback.format_exception(type_, value_, traceback_))
            print "re-raise exception e - ", exstr
            raise e

    return False
Beispiel #8
0
def get_remote_columns(args, table_name):
    global g_cursor
    global g_is_ms, g_is_postgre

    dprint("table_name: " + table_name)
    sqlstr = ""
    if g_is_ms:
        sqlstr = "sp_columns @table_name=\"{}\"".format(table_name)
    if g_is_postgre:
        sqlstr = "select * from \"{}\" where false".format(table_name)

    dprint("check table columns sqlstr: " + sqlstr)
    ret = g_cursor.execute(sqlstr)
    dprint("query execute ret: " + str(ret))
    rows = g_cursor.fetchall()
    '''
    Now get remote column list for this table...

    '''
    remote_columns = []

    if g_is_postgre:
        colnames = [desc[0] for desc in g_cursor.description]
        for col in colnames:
            remote_columns.append([col, ""])
        return remote_columns

    if g_is_ms:
        # MS SQL
        for row in rows:
            '''
            MSSQL Column str return example:
            row n: {0: u'azqdemo', 1: u'dbo', 2: u'android_metadata', 3: u'locale', 4: -1, 5: u'text', u'DATA_TYPE': -1, 7: 2147483647, 8: None, 9: None, 10: 1, 11: None, 12: None, 13: -1, 14: None, 15: 2147483647, u'COLUMN_DEF': None, 17: u'YES', 18: 35, u'SCALE': None, u'TABLE_NAME': u'android_metadata', u'SQL_DATA_TYPE': -1, 6: 2147483647, u'NULLABLE': 1, u'REMARKS': None, u'CHAR_OCTET_LENGTH': 2147483647, u'COLUMN_NAME': u'locale', u'SQL_DATETIME_SUB': None, u'TABLE_OWNER': u'dbo', 16: 1, u'RADIX': None, u'SS_DATA_TYPE': 35, u'TYPE_NAME': u'text', u'PRECISION': 2147483647, u'IS_NULLABLE': u'YES', u'LENGTH': 2147483647, u'ORDINAL_POSITION': 1, u'TABLE_QUALIFIER': u'azqdemo'}

            Result:
            col_name: locale
            col_type: text
            '''
            rs = str(row)
            dprint("row n: " + rs)
            splitted = rs.split(", u")
            col_name = splitted[3].split("'")[1]
            dprint("col_name: " + col_name)
            col_type = splitted[4].split("'")[1]
            dprint("col_type: " + col_type)
            remote_columns.append([col_name, col_type])

        return remote_columns