Beispiel #1
0
 def test_rewrite_dquotes(self):
     sql = """SELECT * FROM @"./data/remap.csv" WHERE frm = 'y';"""
     table_remap = {}
     sql, map = rewrite_sql([sql], table_remap)
     self.assertEqual("""SELECT * FROM "remap" WHERE frm = 'y';""", sql)
     self.assertEqual(
         {'remap': expand_path_and_exists('./data/remap.csv')[0]}, map)
Beispiel #2
0
 def test_rewrite_multiple(self):
     sql = """SELECT * FROM @./data/remap.csv WHERE frm = 'y' SELECT * FROM @./data/test1.csv WHERE foo = 'bar';"""
     table_remap = {}
     sql, map = rewrite_sql([sql], table_remap)
     self.assertEqual(
         """SELECT * FROM "remap" WHERE frm = 'y' SELECT * FROM "test1" WHERE foo = 'bar';""",
         sql)
     self.assertDictEqual(
         {
             'remap': expand_path_and_exists('./data/remap.csv')[0],
             'test1': expand_path_and_exists('./data/test1.csv')[0]
         }, map)
Beispiel #3
0
def execute(sql: str,
            headers=None,
            filters=None,
            output='-',
            output_format='csv',
            skip_lines=0,
            output_delimiter=',',
            column_remapping=None,
            table_remapping=None,
            auto_filter=False,
            save_db=None,
            load_db=None,
            dialect='unix',
            input_delimiter=',',
            input_quotechar='"',
            debug_=False):
    """
    :param filters:  {"col": [["filter", ...args...], ...]
    :param sql:
    :param headers:
    :param output:
    :param output_format:
    :param skip_lines:
    :param output_delimiter:
    :param column_remapping: {"col": "map_to_col", ...}
    :param table_remapping:  {"table": "map_to_col", ...}
    :param auto_filter:
    :param save_db:
    :param load_db:
    :param dialect:
    :param input_delimiter:
    :param input_quotechar:
    :param debug_:
    :return:
    """

    global DEBUG
    DEBUG = debug_
    column_remapping = column_remapping or {}
    headers = headers or []
    if headers and isinstance(headers, str):
        headers = [h.strip() for h in headers.split(',')]
    filters = filters or {}

    # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s)
    sql, tables = rewrite_sql(sql, table_remapping)
    debug(sql, 'sql=')
    debug(tables, 'tables=')

    # Open the database
    if save_db:
        path, exists = expand_path_and_exists(save_db)
        if exists:
            raise Error("fDatabase file {path} already exists.")
        con = sqlite3.connect(path)
    elif load_db:
        path, exists = expand_path_and_exists(load_db)
        if not exists:
            raise FileNotFoundError(f"Database file {path} not found.")
        con = sqlite3.connect(path)
    else:
        con = sqlite3.connect(":memory:")

    cur = con.cursor()

    # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file
    for tablename, path in tables.items():
        with open(path) as f:
            if skip_lines:
                [f.readline() for _ in range(skip_lines)]

            reader = csv.reader(f,
                                dialect=dialect,
                                delimiter=input_delimiter,
                                quotechar=input_quotechar)
            first, colnames = True, []

            for row in reader:
                # debug(row)
                row = [n.strip() for n in row if n]

                if first:
                    placeholders = ', '.join(['?'] * len(row))
                    col_src = headers if headers else row
                    colnames = [
                        column_remapping.get(n.strip()) or n.strip()
                        for n in col_src
                    ]

                    # Apply auto filtering
                    if auto_filter:
                        for col in colnames:
                            if col not in filters:
                                filters[col] = [['num']]
                        debug(filters, 'filters (auto)=')

                    debug(colnames, 'colnames=')
                    colnames_str = ','.join(f'"{c}"' for c in colnames)

                    s = f"""CREATE TABLE "{tablename}" ({colnames_str});"""
                    debug(s)
                    try:
                        cur.execute(s)
                    except sqlite3.OperationalError as e:
                        raise Error(
                            "Failed to create table. Most likely cause is missing headers. "
                            "Use --headers/-r and/or --skip-lines/-k to setup headers."
                        )

                    first = False
                    continue

                filtered_row = apply_filters(filters, colnames, row)

                s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});"""
                # debug(f"{s}, {filtered_row}")
                cur.execute(s, filtered_row)

    con.commit()

    debug(sql, 'sql=')
    do_output(sql, cur, output, output_format, output_delimiter)
    con.close()
Beispiel #4
0
 def test_rewrite_bad_syntax(self):
     sql = """SELECT * FROM @ WHERE frm = 'y';"""  # TODO: This doesn't fail, but it should
     table_remap = {}
     # with self.assertRaises(FileNotFoundError):
     sql, map = rewrite_sql([sql], table_remap)
Beispiel #5
0
 def test_rewrite_bad_quotes(self):
     sql = """SELECT * FROM @'foo" WHERE frm = 'y';"""
     table_remap = {}
     with self.assertRaises(FileNotFoundError):
         rewrite_sql([sql], table_remap)
Beispiel #6
0
 def test_rewrite_db_table(self):
     sql = """SELECT * FROM foo WHERE frm = 'y';"""
     table_remap = {}
     sql, map = rewrite_sql([sql], table_remap)
     self.assertEqual("""SELECT * FROM foo WHERE frm = 'y';""", sql)
     self.assertEqual({}, map)
Beispiel #7
0
 def test_rewrite_stdin_with_at_and_quotes(self):
     sql = """SELECT * FROM '@-' WHERE frm = 'y';"""  # TODO: Doesn't work... maybe don't support this?
     table_remap = {}
     sql, map = rewrite_sql([sql], table_remap)
     self.assertEqual("""SELECT * FROM "stdin" WHERE frm = 'y';""", sql)
     self.assertEqual({'stdin': '-'}, map)
Beispiel #8
0
 def test_rewrite_stdin_with_at(self):
     sql = """SELECT * FROM @- WHERE frm = 'y';"""
     table_remap = {}
     sql, map = rewrite_sql([sql], table_remap)
     self.assertEqual("""SELECT * FROM "stdin" WHERE frm = 'y';""", sql)
     self.assertEqual({'stdin': '-'}, map)
Beispiel #9
0
def execute(
        sql: str,
        headers=None,
        filters=None,
        output='-',
        output_format='table',
        skip_lines=0,
        output_delimiter=',',
        column_remapping=None,
        table_remapping=None,
        auto_filter=False,
        save_db=None,
        load_db=None,
        # dialect='unix',
        input_format='csv',
        input_delimiter=',',
        input_encoding='utf-8',
        input_compression=None,
        #input_quotechar='"',
        debug_=False):
    """
    :param input_format:
    :param filters:  {"col": [["filter", ...args...], ...]
    :param sql:
    :param headers:
    :param output:
    :param output_format:
    :param skip_lines:
    :param output_delimiter:
    :param column_remapping: {"col": "map_to_col", ...}
    :param table_remapping:  {"table": "map_to_col", ...}
    :param auto_filter:
    :param save_db:
    :param load_db:
    # :param dialect:
    :param input_delimiter:
    # :param input_quotechar:
    :param debug_:
    :return:
    """

    global DEBUG
    DEBUG = debug_
    column_remapping = column_remapping or {}
    headers = headers or []
    if headers and isinstance(headers, str):
        headers = [h.strip() for h in headers.split(',')]
        # debug(headers, "headers=")
    filters = filters or {}

    # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s)
    sql, tables = rewrite_sql(sql, table_remapping)
    debug(sql, 'sql=')
    debug(tables, 'tables=')

    # Open the database
    if save_db:
        path, exists = expand_path_and_exists(save_db)
        if exists:
            raise Error("fDatabase file {path} already exists.")
        con = sqlite3.connect(path)
    elif load_db:
        path, exists = expand_path_and_exists(load_db)
        if not exists:
            raise FileNotFoundError(f"Database file {path} not found.")
        con = sqlite3.connect(path)
    else:
        con = sqlite3.connect(":memory:")

    cur = con.cursor()

    # if load_db:
    #     # Check for table conflicts
    #     s = f"""SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;"""
    #     result = cur.execute(s)
    #     for tables in result.fetchall():
    #         pass

    # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file
    for tablename, path in tables.items():
        print(path)
        with Stream(
                path,
                format=input_format,
                delimiter=input_delimiter,
                skip_rows=range(1, skip_lines + 1),
                custom_parsers={},
                custom_loaders={
                    's3': S3Loader,
                    'gs': GSLoader
                },
                custom_writers={},
                ignore_blank_headers=True,
                encoding=input_encoding,
                compression=input_compression,
                headers=headers if headers else 1,
                # fill_merged_cells=True,
        ) as stream:

            debug(stream.headers, "headers=")
            debug(stream.encoding, "encoding=")
            # print(stream.sample)

            first, colnames, line_num = True, [], 0
            for row in stream:
                # print(row)
                debug(row, "row=")
                if not row:
                    error(f"Skipping blank line num. {line_num}\n")
                    continue
                row = [
                    n.strip() if isinstance(n, str) else n for n in row
                    if not isinstance(n, str) or (isinstance(n, str) and n)
                ]
                # debug(row, "row=")
                if first:

                    placeholders = ','.join(['?'] * len(row))
                    debug(placeholders, "placeholders=")
                    colnames = [
                        column_remapping.get(n.strip()) or n.strip()
                        for n in stream.headers
                    ]

                    # Check for duplicate column names
                    dups = set(x for x in colnames if colnames.count(x) > 1)
                    if dups:
                        raise Error(
                            f"Invalid duplicate column name(s): {', '.join(dups)}"
                        )

                    # Apply auto filtering
                    if auto_filter:
                        for col in colnames:
                            if col not in filters:
                                filters[col] = [['num']]
                        debug(filters, 'filters (auto)=')

                    debug(colnames, 'colnames=')
                    colnames_str = ','.join(f'"{c}"' for c in colnames)

                    check_filters_against_columns(filters, colnames)

                    s = f"""CREATE TABLE "{tablename}" ({colnames_str});"""
                    debug(s)
                    try:
                        cur.execute(s)
                    except sqlite3.OperationalError as e:
                        raise Error(
                            "Failed to create table. Most likely cause is missing headers. "
                            "Use --headers/-r and/or --skip-lines/-k to setup headers."
                        )

                    first = False
                    # continue

                filtered_row = apply_filters(filters, colnames, row)
                if len(filtered_row) != len(colnames):
                    error(
                        f"Warning: Invalid row: {row!r} (line={line_num}). Skipping...\n"
                    )
                    continue
                s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});"""
                debug(f"{s}, {filtered_row}")
                cur.execute(s, filtered_row)
                line_num += 1

    con.commit()

    debug(sql, 'sql=')
    do_output(sql, cur, output, output_format, output_delimiter)
    con.close()