Beispiel #1
0
def createTable(
    dbhandle,
    error,
    tablename,
    options,
    retry=True,
    ignore_empty=True,
    ignore_columns=[],
    rename_columns=[],
    lowercase=False,
    ignore_duplicates=True,
    indices=[],
    rows=None,
    headers=None,
    first_column=None,
    existing_tables=set(),
    append=False,
):

    # create table by guessing column types from data type.
    if rows:
        map_column2type, ignored, max_values = CSV.getMapColumn2Type(
            rows, ignore_empty=ignore_empty, get_max_values=True
        )
        if ignored:
            E.info("ignored columns: %s" % str(ignored))

        headers = list(map_column2type.keys())
        headers.sort()

    elif headers:
        map_column2type = dict(list(zip(headers, [None] * len(headers))))
        ignored = 0

    columns_to_ignore = set([x.lower() for x in ignore_columns])
    columns_to_rename = dict([x.lower().split(":") for x in rename_columns])

    take = []
    # associate headers to field names
    columns = []
    present = {}
    for header_index, h in enumerate(headers):
        hh = h
        if lowercase:
            hh = string.lower(h)

        if hh in columns_to_ignore:
            continue

        if hh in present:
            if ignore_duplicates:
                continue
            else:
                raise ValueError("duplicate column %s" % hh)

        present[hh] = 1
        take.append(h)
        if map_column2type[h] == int:
            max_value = max_values[h]
            if max_value > 2147483647:
                t = "BIGINT DEFAULT '0'"
            elif max_value > 32767:
                t = "INTEGER DEFAULT '0'"
            else:
                t = "SMALLINT DEFAULT '0'"

        elif map_column2type[h] == float:
            t = "FLOAT DEFAULT '0'"
        else:
            if h in options.indices:
                t = options.index
            else:
                t = options.text

        # remove special characters from column names
        if hh == "":
            if first_column is not None and header_index == 0:
                hh = first_column
            else:
                raise ValueError("column '%s' without header " % h)
        hh = columns_to_rename.get(hh, hh)
        hh = re.sub("""['"]""", "", hh)
        hh = re.sub("[,;.:\-\+/ ()%?]", "_", hh)
        if hh[0] in "0123456789":
            hh = "_" + hh
        columns.append("%s %s" % (hh, t))

    if not options.append:
        # delete old table if it exists
        while 1:
            try:
                cc = dbhandle.cursor()
                # mysql: removed '' around table name
                statement = "DROP TABLE IF EXISTS %s" % tablename
                E.debug(statement)
                cc.execute(statement)
                dbhandle.commit()
                cc.close()
                E.info("existing table %s deleted" % tablename)
            except sqlite3.OperationalError as msg:
                E.warn(msg)
                time.sleep(5)
                continue
            except error as msg:
                E.warn("could not delete existing table %s: %s" % (tablename, str(msg)))
                dbhandle.rollback()
                if not retry:
                    raise error(msg)
                elif tablename in existing_tables:
                    # table exists, but drop did not work (e.g. database lock)
                    time.sleep(5)
                    continue
                else:
                    # table might not have existed
                    break
            break

        # create new table
        statement = "CREATE TABLE %s ( %s );" % (tablename, ", ".join(columns))

        E.debug("table create:\n# %s" % (statement))

        while 1:
            try:
                cc = dbhandle.cursor()
                cc.execute(statement)
                cc.close()
                dbhandle.commit()
            except error as msg:
                E.warn("table creation failed: msg=%s, statement=\n  %s" % (msg, statement))
                # TODO: check for database locked msg
                if not retry:
                    raise error(msg)
                if not re.search("locked", str(msg)):
                    raise error("%s: %s" % (msg, statement))
                time.sleep(5)
                continue
            break

        E.info("table %s created successfully." % tablename)

    return take, map_column2type, ignored
Beispiel #2
0
def createTable(dbhandle,
                error,
                tablename,
                options,
                retry=True,
                ignore_empty=True,
                ignore_columns=[],
                rename_columns=[],
                lowercase=False,
                ignore_duplicates=True,
                indices=[],
                rows=None,
                headers=None,
                first_column=None,
                existing_tables=set(),
                append=False):

    # create table by guessing column types from data type.
    if rows:
        map_column2type, ignored, max_values = CSV.getMapColumn2Type(
            rows, ignore_empty=ignore_empty, get_max_values=True)
        if ignored:
            E.info("ignored columns: %s" % str(ignored))

        headers = list(map_column2type.keys())
        headers.sort()

    elif headers:
        map_column2type = dict(list(zip(headers, [
            None,
        ] * len(headers))))
        ignored = 0

    columns_to_ignore = set([x.lower() for x in ignore_columns])
    columns_to_rename = dict([x.lower().split(":") for x in rename_columns])

    take = []
    # associate headers to field names
    columns = []
    present = {}
    for header_index, h in enumerate(headers):
        hh = h
        if lowercase:
            hh = string.lower(h)

        if hh in columns_to_ignore:
            continue

        if hh in present:
            if ignore_duplicates:
                continue
            else:
                raise ValueError("duplicate column %s" % hh)

        present[hh] = 1
        take.append(h)
        if map_column2type[h] == int:
            max_value = max_values[h]
            if max_value > 2147483647:
                t = "BIGINT DEFAULT '0'"
            elif max_value > 32767:
                t = "INTEGER DEFAULT '0'"
            else:
                t = "SMALLINT DEFAULT '0'"

        elif map_column2type[h] == float:
            t = "FLOAT DEFAULT '0'"
        else:
            if h in options.indices:
                t = options.index
            else:
                t = options.text

        # remove special characters from column names
        if hh == "":
            if first_column is not None and header_index == 0:
                hh = first_column
            else:
                raise ValueError("column '%s' without header " % h)
        hh = columns_to_rename.get(hh, hh)
        hh = re.sub('''['"]''', "", hh)
        hh = re.sub("[,;.:\-\+/ ()%?]", "_", hh)
        if hh[0] in "0123456789":
            hh = "_" + hh
        columns.append("%s %s" % (hh, t))

    if not options.append:
        # delete old table if it exists
        while 1:
            try:
                cc = dbhandle.cursor()
                # mysql: removed '' around table name
                statement = "DROP TABLE IF EXISTS %s" % tablename
                E.debug(statement)
                cc.execute(statement)
                dbhandle.commit()
                cc.close()
                E.info("existing table %s deleted" % tablename)
            except sqlite3.OperationalError as msg:
                E.warn(msg)
                time.sleep(5)
                continue
            except error as msg:
                E.warn("could not delete existing table %s: %s" %
                       (tablename, str(msg)))
                dbhandle.rollback()
                if not retry:
                    raise error(msg)
                elif tablename in existing_tables:
                    # table exists, but drop did not work (e.g. database lock)
                    time.sleep(5)
                    continue
                else:
                    # table might not have existed
                    break
            break

        # create new table
        statement = "CREATE TABLE %s ( %s );" % (tablename, ", ".join(columns))

        E.debug("table create:\n# %s" % (statement))

        while 1:
            try:
                cc = dbhandle.cursor()
                cc.execute(statement)
                cc.close()
                dbhandle.commit()
            except error as msg:
                E.warn("table creation failed: msg=%s, statement=\n  %s" %
                       (msg, statement))
                # TODO: check for database locked msg
                if not retry:
                    raise error(msg)
                if not re.search("locked", str(msg)):
                    raise error("%s: %s" % (msg, statement))
                time.sleep(5)
                continue
            break

        E.info("table %s created successfully." % tablename)

    return take, map_column2type, ignored