Ejemplo n.º 1
0
def load_csv_to_table(table ,schema_file ,csv_file, server, database, config,cred_file='config/dblogin.config',skipfirstrow=1):
    """Takes csv file, schema file, with sql server connection params and inserts data to a specified table

    Args:
        table: table name where csv data will be written
        schema_file: schema file that has all column names and data type names
        csv_file: data being loaded
        server: sql server host name
        config: which configuration name to pull username and password credentials
        cred_file: location of db login config file
        skipfirstrow(optional): if 1 then skip the first row of data (exclude headers)

    Returns:
        None
    """    
    from files import loop_csv_file
    from files import get_schema_file

    with open(cred_file,'rb') as cred:
        db_info = json.loads(cred.read())

    username = db_info[config]['username']
    password = db_info[config]['password']

    data_list = loop_csv_file(csv_file)

    connection = mssql_connect(server, database, username, password)

    schema_list = get_schema_file(schema_file)
    #skips the first value of data_list which is the header
    data_list = iter(data_list)
    if skipfirstrow == 1:
        next(data_list)

    process_datarow_to_list(data_list,schema_list,connection,table)
Ejemplo n.º 2
0
def cursor_to_json(cursor, dest_file, dest_schema_file=None, source_schema_file=None):
    """Takes a cursor and creates JSON file with the data
    and a schema file for loading to other data systems.

    Args:
        cursor: cursor object with data to extract to file
        dest_file: string, path and file name to save data

    Returns:
        None
    """
    if source_schema_file is None:
        schema = []
        for i in cursor.description:
            schema.append([i[0],str(i[1])])
    else:
        schema = get_schema_file(source_schema_file)

    if dest_schema_file is not None:
        with open(dest_schema_file,'wb') as schemafile:
            for row in schema:
                #try:
                col = row[0]
                if 'date' in row[1]:
                    datatype = 'timestamp'
                elif 'list' in row[1]:
                    datatype = 'list'
                elif 'int' in row[1] or 'long' in row[1]:
                    datatype = 'integer'
                elif 'float' in row[1]:
                    datatype = 'float'
                elif 'bool' in row[1]:
                    datatype = 'boolean'
                elif 'str' in row[1]:
                    datatype = 'string'
                else:
                    datatype = 'string'
                schemafile.write("%s\n" % (col + ',' + datatype))
                #except Exception as e:
                #    print "Exception on row ", row
                 #   print e
    with open(dest_file,'wb') as outfile:
        for row in cursor:
            #try:
            result_dct = process_postgres_data_row(row,schema)
            outfile.write("%s\n" % json.dumps(result_dct, default=_defaultencode))
Ejemplo n.º 3
0
def load_json_file_to_table(connection, table , source_file, schema_file):
    """Takes delimited file name, schema file, and db connection and inserts data to a specified table

    Args:
        table: table name where csv data will be written
        schema_file: schema file that has all column names and data type names
        csv_file: data being loaded
        server: sql server host name
        config: which configuration name to pull username and password credentials
        cred_file: location of db login config file
        skipfirstrow(optional): if 1 then skip the first row of data (exclude headers)

    Returns:
        None
    """
    data_list = loop_json_file(source_file)
    schema_list = get_schema_file(schema_file)
    #data_list = iter(data_list)
    insert_datarows_dct_to_table(data_list,schema_list,connection,table)
Ejemplo n.º 4
0
def create_table(connection, table_name, schema_file, index):  # courseTagDict
    """Runs SQL statement and commits changes to database.

        Args:
            connection: pyodbc.connect() object, Connection to use when running Sql
            table_name: string, Table name including db schema (ex: my_schema.my_table)
            schema_file: string, Path to csv schema file with each row as col_name, data_type
            index: string, Column name of index (can put multiple columns comma delimited if desired)
        Returns:
            cursor object, Results of the call to pyodb.connection().cursor().execute(query)
    """
    cursor = connection.cursor()
    schema_list = get_schema_file(schema_file)

    ddl = 'CREATE TABLE IF NOT EXISTS ' + table_name + '('
    for col, dt in schema_list:
        ddl = ddl + col + ' ' + dt + ', '
    ddl = ddl[:-2] + ');'

    try:
        cursor.execute(ddl.encode('utf-8'))
    except UnicodeDecodeError:
        cursor.execute(ddl)

    if index is not None:
        idx_name = table_name + '_idx'
        exists = run_sql(connection, "SELECT to_regclass('{0}')".format(idx_name))
        if exists.fetchone()[0] != idx_name:
            index_name = table_name.split('.')[-1] + '_idx'
            ddl2 = 'CREATE INDEX {0} ON {1}({2});'.format(index_name, table_name, index)
            try:
                cursor.execute(ddl2.encode('utf-8'))
            except UnicodeDecodeError:
                cursor.execute(ddl2)

    connection.commit()
    return cursor
Ejemplo n.º 5
0
def load_csv_to_table(table ,schema_file ,csv_file, connection, skipfirstrow=1):
    """Takes csv file, schema file, with sql server connection params and inserts data to a specified table

    Args:
        table: table name where csv data will be written
        schema_file: schema file that has all column names and data type names
        csv_file: data being loaded
        server: sql server host name
        config: which configuration name to pull username and password credentials
        cred_file: location of db login config file
        skipfirstrow(optional): if 1 then skip the first row of data (exclude headers)

    Returns:
        None
    """    
    data_list = loop_csv_file(csv_file)

    schema_list = get_schema_file(schema_file)
    #skips the first value of data_list which is the header
    data_list = iter(data_list)
    if skipfirstrow == 1:
        next(data_list)

    insert_datarows_to_table(data_list,schema_list,connection,table)