Esempio n. 1
0
def load_data(col_map, data):
    """Loads the data into the database.

    Parameters
    ----------
    col_map : [str,]
        The columns to load the data into
    data : bytes, bytearray, str
        The data to load
    """
    if data is not None:
        values = f.raw_input_to_list(data)
        if values:
            # If the data has more values than the header provided, ignore the end (green data set has that)
            while len(values) > len(col_map):
                f.debug(
                    "Removing extra row value entry not present in the header."
                )
                values.pop()
            cfg.input_data.append(values)

    if (len(cfg.input_data) == cfg.batch_size) or (data is None):
        f.debug("Executing statement:")
        stmt = generate_statement(col_map)
        f.debug(stmt)
        cur = cfg.conn.cursor()
        cur.executemany(stmt, cfg.input_data)
        f.debug("Commit")
        cfg.conn.commit()
        cur.close()
        f.verbose("{0} rows loaded".format(len(cfg.input_data)))
        cfg.input_data.clear()
Esempio n. 2
0
def load_data(col_map, data):
    """Loads the data into the database.

    Parameters
    ----------
    col_map : [str,]
        The columns to load the data into
    data : [str,]
        The data to load. If data is None the array will be loaded and flushed.
    """
    if data is not None and len(data) > 0:
        # If the data has more values than the header provided, ignore the end (green data set has that)
        while len(data) > len(col_map):
            f.debug(
                "Removing extra row value entry not present in the header.")
            data.pop()
        # tuple or dictionary only for SQL Server
        cfg.input_data.append(tuple(data))

    # If batch size has been reached or input array should be flushed
    if (len(cfg.input_data)
            == cfg.batch_size) or (data is None and len(cfg.input_data) > 0):
        f.debug("Executing statement:")
        stmt = generate_statement(col_map)
        f.debug(stmt)
        cur = cfg.conn.cursor()
        try:
            f.executemany(cur, stmt)
        except Exception as err:
            # Rollback old batch (needed for at least Postgres to finish transaction)
            cfg.conn.rollback()
            # If debug output is enabled, find failing record
            if cfg.debug:
                for record in cfg.input_data:
                    try:
                        cur.execute(stmt, record)
                    except Exception as err1:
                        f.debug("Error with record: {0}".format(record))
                        # Rollback old batch (needed for at least Postgres to finish transaction)
                        cfg.conn.rollback()
                        cur.close()
                        cfg.input_data.clear()
                        raise
            # Debug output is not enabled, clear current batch and raise error
            else:
                cur.close()
                cfg.input_data.clear()
                raise
        f.debug("Commit")
        cfg.conn.commit()
        cur.close()
        f.verbose("{0} rows loaded.".format(len(cfg.input_data)))
        cfg.input_data.clear()
Esempio n. 3
0
def run(cmd):
    """Runs csv2db.

    This function is the main entry point for csv2db.

    Parameters
    ----------
    cmd : str array
        The arguments passed

    Returns
    -------
    int
        The exit code.
    """
    args = parse_arguments(cmd)

    # Set verbose and debug output flags
    cfg.verbose = args.verbose
    if args.debug:
        cfg.verbose = True
        cfg.debug = True

    # Set table name
    cfg.table_name = args.table

    # Find all files
    f.verbose("Finding file(s).")
    file_names = f.find_all_files(args.file)
    f.debug("Found {0} files.".format(len(file_names)))
    f.debug(file_names)

    if args.command.startswith("gen"):
        f.verbose("Generating CREATE TABLE statement.")
        generate_table_sql(file_names, args.column_type)
    else:
        # Set DB type
        f.debug("DB type: {0}".format(args.dbtype))
        cfg.db_type = args.dbtype
        # Set DB default port, if needed
        if args.port is None:
            args.port = f.get_default_db_port(args.dbtype)
            f.debug("Using default port {0}".format(args.port))

        # Set batch size
        f.debug("Batch size: {0}".format(args.batch))
        cfg.batch_size = int(args.batch)

        f.verbose("Establishing database connection.")
        f.debug("Database details:")
        f.debug({
            "dbtype": args.dbtype,
            "user": args.user,
            "host": args.host,
            "port": args.port,
            "dbname": args.dbname
        })
        try:
            cfg.conn = f.get_db_connection(cfg.db_type, args.user,
                                           args.password, args.host, args.port,
                                           args.dbname)
            load_files(file_names)
            f.verbose("Closing database connection.")
            cfg.conn.close()
        except Exception as err:
            print("Error connecting to the database: {0}".format(err))
        except KeyboardInterrupt:
            print("Exiting program")
            cfg.conn.close()
Esempio n. 4
0
def run(cmd):
    """Runs csv2db.

    This function is the main entry point for csv2db.

    Parameters
    ----------
    cmd : str array
        The arguments passed

    Returns
    -------
    int
        The exit code.
    """
    args = parse_arguments(cmd)

    # Set verbose and debug output flags
    cfg.verbose = args.verbose
    if args.debug:
        cfg.verbose = True
        cfg.debug = True

    # Set table name
    cfg.table_name = args.table
    f.debug("Table name: {0}".format(cfg.table_name))

    # Set column separator characters(s)
    cfg.column_separator = args.separator
    f.debug("Column separator: {0}".format(cfg.column_separator))

    # Set quote character(s)
    cfg.quote_char = args.quote
    f.debug("Column escape character: {0}".format(cfg.quote_char))

    # Find all files
    f.verbose("Finding file(s).")
    file_names = f.find_all_files(args.file)
    f.verbose("Found {0} file(s).".format(len(file_names)))
    # Exit program if no files found.
    if len(file_names) == 0:
        return f.ExitCodes.SUCCESS.value
    f.debug(file_names)

    # Generate CREATE TABLE SQL
    if args.command.startswith("gen"):
        f.verbose("Generating CREATE TABLE statement.")
        try:
            generate_table_sql(file_names, args.column_type)
            return f.ExitCodes.SUCCESS.value
        except Exception as err:
            f.error("Error generating statement: {0}".format(err))
            return f.ExitCodes.GENERIC_ERROR.value

    # Load data
    else:
        # Set DB type
        f.debug("DB type: {0}".format(args.dbtype))
        cfg.db_type = args.dbtype
        cfg.direct_path = args.directpath
        # Set DB default port, if needed
        if args.port is None:
            args.port = f.get_default_db_port(args.dbtype)
            f.debug("Using default port {0}".format(args.port))

        # Set batch size
        f.debug("Batch size: {0}".format(args.batch))
        cfg.batch_size = int(args.batch)
        # If batch size is lower than 10k and direct path has been specified, overwrite batch size to 10k.
        if cfg.direct_path and cfg.batch_size < 10000:
            f.debug(
                "Direct path was specified but batch size is less than 10000.")
            f.debug(
                "Overwriting the batch size to 10000 for direct-path load to make sense."
            )
            cfg.batch_size = 10000

        f.verbose("Establishing database connection.")
        f.debug("Database details:")
        f.debug({
            "dbtype": args.dbtype,
            "user": args.user,
            "host": args.host,
            "port": args.port,
            "dbname": args.dbname
        })
        if args.password is None:
            args.password = getpass.getpass()
        try:
            cfg.conn = f.get_db_connection(cfg.db_type, args.user,
                                           args.password, args.host, args.port,
                                           args.dbname)
        except Exception as err:
            f.error("Error connecting to the database: {0}".format(err))
            return f.ExitCodes.DATABASE_ERROR.value

        try:
            load_files(file_names)
            f.verbose("Closing database connection.")
            cfg.conn.close()
            return f.ExitCodes.SUCCESS.value if not cfg.data_loading_error else f.ExitCodes.DATA_LOADING_ERROR.value
        except KeyboardInterrupt:
            print("Exiting program")
            cfg.conn.close()
            return f.ExitCodes.GENERIC_ERROR.value
        except Exception as err:
            f.error("Error loading file(s): {0}".format(err))
            cfg.conn.close()
            return f.ExitCodes.GENERIC_ERROR.value
Esempio n. 5
0
            "name", "ignore_error", "verbose", "input"
    ]:
        settings[key] = user[key]

    "Loads config file"
    if settings["config"]:
        settings = functions.load_config(settings)

    "Checks speed, time and FPS if they are set all three."
    if settings["speed"] and settings["time"] and settings[
            "fps"] and not float(settings["speed"]) * float(
                settings["fps"]) == float(settings["time"]):
        functions.soft_error(
            "WARNING: Mutually exclusive arguments defined. (-S speed, -T time, -F fps)",
            settings["verbose"], 1, settings["ignore_error"])
        functions.verbose(" - Using default values.", settings["verbose"], 1)
        settings["speed"] = constants["speed"]
        settings["time"] = None
        settings["fps"] = constants["fps"]

    "If only one of the values time, speed and FPS set we have to compute the rest."
    if settings["time"] and not settings["fps"] and not settings["speed"]:
        settings["fps"] = constants["fps"]
    elif not settings["time"] and settings["fps"] and not settings["speed"]:
        settings["speed"] = constants["speed"]
    elif not settings["time"] and not settings["fps"] and settings["speed"]:
        settings["fps"] = constants["fps"]
    elif not settings["time"] and not settings["fps"] and not settings["speed"]:
        settings["fps"] = constants["fps"]
        settings["speed"] = constants["speed"]
    args = parser.parse_args()

    user = vars(args)

    "Copy arguments the the dict settings"
    for key in ["time_format", "max_val", "min_val", "max_time", "min_time", "speed", "time", "fps", "legend", "gnuplot", "effect", "config", "name", "ignore_error", "verbose", "input"]:
        settings[key] = user[key]

    "Loads config file"
    if settings["config"]:
        settings = functions.load_config(settings)

    "Checks speed, time and FPS if they are set all three."
    if settings["speed"] and settings["time"] and settings["fps"] and not float(settings["speed"]) * float(settings["fps"]) == float(settings["time"]):
        functions.soft_error("WARNING: Mutually exclusive arguments defined. (-S speed, -T time, -F fps)", settings["verbose"], 1, settings["ignore_error"])
        functions.verbose(" - Using default values.", settings["verbose"], 1)
        settings["speed"] = constants["speed"]
        settings["time"] = None
        settings["fps"] = constants["fps"]

    "If only one of the values time, speed and FPS set we have to compute the rest."
    if settings["time"] and not settings["fps"] and not settings["speed"]:
        settings["fps"] = constants["fps"]
    elif not settings["time"] and settings["fps"] and not settings["speed"]:
        settings["speed"] = constants["speed"]
    elif not settings["time"] and not settings["fps"] and settings["speed"]:
        settings["fps"] = constants["fps"]
    elif not settings["time"] and not settings["fps"] and not settings["speed"]:
        settings["fps"] = constants["fps"]
        settings["speed"] = constants["speed"]