Exemple #1
0
def create_table(params, db, task_table_name):
    # creates table schema based on the result file of run 1

    with open(get_result_file(params, params.run_prefix, 1), "r") as res:
        result_params = [param.strip() for param in res.readline().rstrip().split("\t")]
        result_params_sample = [value.strip() for value in res.readline().rstrip().split("\t")]
        while len(result_params_sample) < len(result_params):
            result_params_sample.append("")

        # check if table name already exists
        cursor = db.cursor()
        cursor.execute(
            "SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(
                params.task_table_name.strip("[]")
            )
        )
        if cursor.fetchone():
            return

        # table identifiers cannot be parameterized, so have to use string concatenation
        create_table_command = """CREATE TABLE IF NOT EXISTS {}(""".format(task_table_name)

        # include one item for each result_params
        create_table_command += "{} INTEGER, ".format(params.run_prefix)
        # time is in seconds since epoch
        create_table_command += "parsed_date REAL, "
        for p in range(len(result_params)):
            p_schema = "".join(
                (
                    '"',
                    result_params[p].strip(),
                    '" ',
                    type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT"),
                )
            )
            p_schema += ", "
            create_table_command += p_schema

        # treat with unique keys
        primary_keys = "PRIMARY KEY({},".format(
            params.run_prefix
        )  # run always considered primary key
        for primary_key in params.key_params:
            if primary_key not in result_params:
                print("{} does not exist in result file of run 1".format(primary_key))
                sys.exit(3)
            primary_keys += primary_key
            primary_keys += ","
        primary_keys = primary_keys[:-1]  # remove trailing ,
        primary_keys += ")"
        create_table_command += primary_keys

        create_table_command += ")"

        print(create_table_command)
        cursor = db.cursor()
        cursor.execute(create_table_command)

        # reinitialize tracked columns for newly created table
        initialize_tracked_columns(params, db)
Exemple #2
0
    def add_run_to_db(params, run):
        resfilename = get_result_file(params, run)
        run_number = get_trailing_num(run)
        try:
            parsed_date = os.path.getmtime(resfilename)
        except OSError:
            print("file {} not found; skipping".format(resfilename))
            return


        with open(resfilename, 'r') as res:
            # make sure table is compatible with run data by inserting any new columns
            # always called "run" in table (converted from whatever prefix users use)
            result_params = ["run", "parsed_date"]
            result_params.extend(res.readline().split('\t'))
            if result_params[-1] == '\n':
                result_params.pop()
            result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params]

            pre_sample_pos = res.tell()
            result_params_sample = res.readline().split('\t')
            # go back to presample location for normal line iteration
            res.seek(pre_sample_pos, os.SEEK_SET)

            # add new column to table
            for c in range(len(result_params)):
                if result_params[c] not in params.tracked_columns:
                    print("ADDING {} as new column".format(result_params[c]))
                    add_column_to_table(params, db, result_params[c], result_params_sample[c-2]) # -2 accounts for run and parsed date

            # add value rows
            rows_to_add = []
            for line in res:
                # run number and parsed_date are always recorded
                result_params_val = [run_number, parsed_date]
                result_params_val.extend(line.split('\t'))
                if result_params_val[-1] == '\n':
                    result_params_val.pop()
                # something must be wrong here
                if len(result_params_val) > len(result_params):
                    print("There are {} values for only {} parameters in run {}; \
                        skipping run".format(len(result_params_val), len(result_params), run_number))
                    # skip this run
                    return

                # for when the last column value is the empty string
                while len(result_params_val) < len(result_params):
                    result_params_val.append('')

                rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val))

            param_placeholders = ("?,"*len(result_params)).rstrip(',')
            # specify columns to insert into since ordering of columns in file may not match table
            insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format(
                params.task_table_name,
                ','.join(result_params), 
                param_placeholders)
            cursor = db.cursor()
            cursor.executemany(insert_rows_command, rows_to_add)
Exemple #3
0
def add_column_to_table(params, db, column_name, sample_val):
    cursor = db.cursor()
    col_type = type_map.get(type(convert_strictest(sample_val)), "TEXT")
    cursor.execute(
        "ALTER TABLE {table} ADD COLUMN {col} {type}".format(
            table=params.task_table_name, col=column_name, type=col_type
        )
    )
    params.tracked_columns.add(column_name)
def add_column_to_table(params, db, column_name, sample_val):
    cursor = db.cursor()
    col_type = type_map.get(type(convert_strictest(sample_val)), "TEXT")
    cursor.execute("ALTER TABLE {table} ADD COLUMN {col} {type}".format(
        table=params.task_table_name, col=column_name, type=col_type))
    params.tracked_columns.add(column_name)
def create_table(params, db, task_table_name):
    # creates table schema based on the result file of run 1
    with open(get_result_file(params, params.run_prefix + "1"), 'r') as res:
        result_params = res.readline().rstrip().split('\t')
        result_params_sample = res.readline().rstrip().split('\t')
        while len(result_params_sample) < len(result_params):
            result_params_sample.append('')


        # check if table name already exists
        cursor = db.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]")))
        if cursor.fetchone():
            return


        # table identifiers cannot be parameterized, so have to use string concatenation
        create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(task_table_name)

        # include one item for each result_params
        create_table_command += "{} INTEGER, ".format(params.run_prefix)
        # time is in seconds since epoch
        create_table_command += "parsed_date REAL, "
        for p in range(len(result_params)):
            p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT")))
            p_schema += ", "
            create_table_command += p_schema


        # treat with unique keys
        primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key
        for primary_key in params.key_params:
            if primary_key not in result_params:
                print("{} does not exist in result file of run 1".format(primary_key))
                sys.exit(3)
            primary_keys += primary_key
            primary_keys += ','
        primary_keys = primary_keys[:-1]  # remove trailing ,
        primary_keys += ')'
        create_table_command += primary_keys

        create_table_command += ')'

        print(create_table_command)
        cursor = db.cursor()
        cursor.execute(create_table_command)

        # reinitialize tracked columns for newly created table
        initialize_tracked_columns(params, db)
    def add_run_to_db(params, run):
        resfilename = get_result_file(params, run)
        run_number = get_trailing_num(run)
        try:
            parsed_date = os.path.getmtime(resfilename)
            # throw away unless newer than latest or run number greater than maximum
            if parsed_date <= params.last_parsed_date and run_number <= params.last_run:
                return
        except OSError:
            print("file {} not found; skipping".format(resfilename))
            return

        params.last_run += 1
        print("run {} added ({}) ({})".format(run_number, params.last_run, parsed_date))

        with open(resfilename, 'rb') as res:
            # make sure table is compatible with run data by inserting any new columns
            # always called "run" in table (converted from whatever prefix users use)
            csvreader = csv.reader(res, delimiter=params.delimiter)
            result_params = ["run", "parsed_date"]
            result_params.extend(csvreader.next())
            empty_end = False
            if not result_params[-1]:   # empty or None
                empty_end = True
                result_params.pop()
            result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params]
            print(result_params)

            result_params_sample = [params.last_run, parsed_date]
            result_params_sample.extend(csvreader.next())
            if empty_end:
                result_params_sample.pop()
            while len(result_params_sample) < len(result_params):
                result_params_sample.append('')

            # add new column to table
            for c in range(len(result_params)):
                if result_params[c] not in params.tracked_columns:
                    print("ADDING {} as new column".format(result_params[c]))
                    add_column_to_table(params, db, result_params[c], result_params_sample[c])

            # add value rows
            rows_to_add = [tuple(convert_strictest(param) if param != nullval else None for param in result_params_sample)]
            for line in csvreader:
                # run number and parsed_date are always recorded
                result_params_val = [params.last_run, parsed_date]
                result_params_val.extend(line)
                if empty_end:
                    result_params_val.pop()
                # something must be wrong here
                if len(result_params_val) > len(result_params):
                    print("There are {} values for only {} parameters in run {}; \
                        skipping run".format(len(result_params_val), len(result_params), run_number))
                    # skip this run
                    params.last_run -= 1
                    return

                # for padding when columns have unequal depth
                while len(result_params_val) < len(result_params):
                    result_params_val.append('')

                rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val))

            print("rows to add")
            print(rows_to_add)
            param_placeholders = ("?,"*len(result_params)).rstrip(',')
            # specify columns to insert into since ordering of columns in file may not match table
            insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format(
                params.task_table_name,
                ','.join(result_params), 
                param_placeholders)
            cursor = db.cursor()
            cursor.executemany(insert_rows_command, rows_to_add)
def create_table(params, db):
    runs = get_runs(params)
    # select how to and which runs to use for a certain range
    natural_sort(runs)

    if len(runs) < 1:
        print("No runs in this directory, cannot create table");
        sys.exit(2)

    # creates table schema based on the result file of the first run
    with open(get_result_file(params, runs[0]), 'rb') as res:
        csvreader = csv.reader(res, delimiter=params.delimiter)
        result_params = csvreader.next()
        result_params_sample = csvreader.next()
        # empty end
        if not result_params[-1]:
            result_params.pop()
            result_params_sample.pop()
        while len(result_params_sample) < len(result_params):
            result_params_sample.append('')
            
        result_params = [param.strip() for param in result_params]


        # check if table name already exists
        cursor = db.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]")))
        if cursor.fetchone():
            return


        # table identifiers cannot be parameterized, so have to use string concatenation
        create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(params.task_table_name)

        # include one item for each result_params
        create_table_command += "{} INTEGER, ".format(params.run_prefix)
        # time is in seconds since epoch
        create_table_command += "parsed_date REAL, "
        for p in range(len(result_params)):
            p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT")))
            p_schema += ", "
            create_table_command += p_schema

        # treat with unique keys
        primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key
        for primary_key in params.key_params:
            if primary_key not in result_params:
                print("{} does not exist in result file of run 1".format(primary_key))
                sys.exit(3)
            primary_keys += "".join(('\"', primary_key, '\"'))
            primary_keys += ','
        primary_keys = primary_keys[:-1]  # remove trailing ,
        primary_keys += ')'
        create_table_command += primary_keys

        create_table_command += ')'

        print(create_table_command)
        cursor = db.cursor()
        cursor.execute(create_table_command)

        # reinitialize tracked columns for newly created table
        initialize_tracked_columns(params, db)