Esempio n. 1
0
def test_todb_appenddb_cursor():

    f = NamedTemporaryFile(delete=False)
    conn = sqlite3.connect(f.name)
    conn.execute('create table foobar (foo, bar)')
    conn.commit()

    # exercise function
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    cursor = conn.cursor()
    todb(table, cursor, 'foobar')

    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1), ('b', 2), ('c', 2))
    ieq(expect, actual)

    # try appending
    table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1))
    appenddb(table2, cursor, 'foobar')

    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1), ('b', 2), ('c', 2), ('d', 7), ('e', 9), ('f', 1))
    ieq(expect, actual)
Esempio n. 2
0
def test_todb_appenddb_cursor():

    f = NamedTemporaryFile(delete=False)
    conn = sqlite3.connect(f.name)
    conn.execute("create table foobar (foo, bar)")
    conn.commit()

    # exercise function
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    cursor = conn.cursor()
    todb(table, cursor, "foobar")

    # check what it did
    actual = conn.execute("select * from foobar")
    expect = (("a", 1), ("b", 2), ("c", 2))
    ieq(expect, actual)

    # try appending
    table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1))
    appenddb(table2, cursor, "foobar")

    # check what it did
    actual = conn.execute("select * from foobar")
    expect = (("a", 1), ("b", 2), ("c", 2), ("d", 7), ("e", 9), ("f", 1))
    ieq(expect, actual)
Esempio n. 3
0
    def etl_from_table(self, table, force_reload=False):
        """Extract, translate, load exclusions (and not reinstatements) from
        a petl TABLE.

        Set FORCE_RELOAD to True to turn off the protections against
        reading the same csv file twice.  There is no harm in redoing
        a csv file, since the csv contents replaces the db table
        entirely.  We avoid reloading because it is faster and because
        it prevents the db from having an empty table for a moment
        between blowing away and refilling it.

        """
        if not force_reload:
            # If UPDATED.csv has the same number of rows and the same most
            # recent date as our db, we've already snarfed this csv file and
            # can skip it.
            db_latest = self.conn.get_latest_exclusion_date().replace('-', '')
            db_num_rows = self.conn.count_exclusions()
            updated_latest = etl.cut(etl.sort(table, 'EXCLDATE'),
                                     'EXCLDATE')[len(table) - 1][0]
            updated_num_rows = len(table) - 1
            if (db_num_rows == updated_num_rows
                    and db_latest == updated_latest):
                return

        # Massage data
        individual, business = clean_and_separate(table)

        # Save to db, BLOWING AWAY data in the existing tables.  If
        # tables don't exist, will create them, but without any
        # constraints.
        info("Replacing individual_exclusion and business_exclusion tables.")
        etl.todb(individual, self.conn.conn, 'individual_exclusion')
        etl.todb(business, self.conn.conn, 'business_exclusion')
Esempio n. 4
0
	def writeToDB(self):
		'''
		Retrive the hockey data from the CSV file,
		print snippet of the data to the console for visual feedback on progress,
		and write the data (with petl) to the hockey database.
		'''
		data = fromcsv(self.csvPath)
		print(look(data, style='minimal'))
		todb(data, connection, self.tableName)
Esempio n. 5
0
 def _save_chunk(self, data, chunk_info):
     print(data)
     table = pd.DataFrame.from_records(data)
     print('chunk_i', chunk_info['chunk_i'])
     table['chunk_i'] = [
         chunk_info['chunk_i'],
     ] * len(data)
     print(table.head())
     print(table.columns)
     etl.todb(data, self.connection, str(self.key), create=True)
     return dict()
Esempio n. 6
0
    def _load_db(self, taiwan_date_str=None, is_delete=False):
        if not (taiwan_date_str is None): self.set_trade_date(taiwan_date_str)
        raw_file = self.source_file
        tse = etl.fromcsv(raw_file)

        connection = self._get_connection() #psycopg2.connect(_CONNECTION)
        if is_delete: self.clean_db(self._trade_date)

        # assuming table "quotes" already exists in the database, and tse need to have the header.
        # petl.io.db.todb(table, dbo, tablename, schema=None, commit=True, create=False, drop=False, constraints=True,
        #                metadata=None, dialect=None, sample=1000)[source]
        etl.todb(tse, connection, 'quotes', drop=False) #, truncate=False)
Esempio n. 7
0
def _test_unicode(dbo):
    expect = ((u'name', u'id'),
              (u'Արամ Խաչատրյան', 1),
              (u'Johann Strauß', 2),
              (u'Вагиф Сәмәдоғлу', 3),
              (u'章子怡', 4),
              )
    actual = etl.fromdb(dbo, 'SELECT * FROM test_unicode')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test_unicode')
    ieq(expect, actual)
    print(etl.look(actual))
Esempio n. 8
0
def _test_unicode(dbo):
    expect = ((u'name', u'id'),
              (u'Արամ Խաչատրյան', 1),
              (u'Johann Strauß', 2),
              (u'Вагиф Сәмәдоғлу', 3),
              (u'章子怡', 4),
              )
    actual = etl.fromdb(dbo, 'SELECT * FROM test_unicode')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test_unicode')
    ieq(expect, actual)
    print(etl.look(actual))
def load_pessoa():

    connSicaf = get_sicaf_dw_connection()
    engine = get_data_lake_engine()

    print("Consultando DW SICAF")

    ## Pessoa
    tabela_sql = etl.fromdb(connSicaf, Pessoa.PESSOA)

    print("realizando carga na tabela _PESSOA")
    etl.todb(tabela_sql, engine, '_PESSOA')

    return 0
Esempio n. 10
0
def extract_result():
    try:
        db_connect = create_engine(
            'postgres://*****:*****@ec2-34-204-22-76.compute-1.amazonaws.com:5432/dabbhqt4pegslv'
        )
        table = etl.fromdb(db_connect, "select * from result")  #
        # print(etl.look(table))
        connection = sqlite3.connect('example.db')
        etl.todb(table, connection, 'agrobean_results')

    except:
        return 1


# extract_result()
Esempio n. 11
0
def _test_with_schema(dbo, schema):

    expect = (('foo', 'bar'), ('a', 1), ('b', 2))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2))
    actual = etl.fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test', schema=schema)
    ieq(expect, actual)
    print(etl.look(actual))

    print('append some data and verify...')
    etl.appenddb(expect, dbo, 'test', schema=schema)
    ieq(expect_appended, actual)
    print(etl.look(actual))
def load_raw_data():
    """Function to load raw extracted data into the configured raw database."""
    def extract_raw_csv():
        """Nested function to load extracted raw processed csv data to table data frame"""

        table = etl.fromcsv(raw_csv_data,
                            header=raw_datatable_headers,
                            encoding='utf8')

        return table

    raw_dataframe = extract_raw_csv()

    conn = sqlite3.connect(raw_database_path)

    etl.todb(raw_dataframe, conn, raw_tablename)
Esempio n. 13
0
 def load_to_staging_warehouse(self):
     db_info = self.conf.get_staging_db_info()
     connection = pymysql.connect(
         host=db_info['host'],
         user=db_info['user'],
         password=db_info['passwd'],
         db=db_info['db'],
     )
     connection.cursor().execute('SET SQL_MODE=ANSI_QUOTES')
     # TODO: ask Yang about peerlogic set up, why is there an error for duplicate
     # entries in actor IDs, can't different roles share primary keys for IDs
     for table in self.UPDATE_ORDER:
         data = self.TABLES[table]()
         if data:
             print(f'Loading {table}...\n{self.TABLES[table]()}')
             etl.todb(data, connection, table)
     connection.close()
Esempio n. 14
0
def load(tb, pkey, hst, usr, pwd, dtb):
    db = pymysql.connect(host=hst,
                         user=usr,
                         password=pwd,
                         db=dtb,
                         charset='utf8mb4',
                         cursorclass=pymysql.cursors.DictCursor)
    cur = db.cursor()

    if cur.execute("SHOW TABLES LIKE 'ETL'") == 0:
        cur.execute(crtTable(tb[0], dmap(tb[0], tb[1]), tname))
    print "No. of data to be processed: " + str(len(tb))
    cur.execute('SET SQL_MODE=ANSI_QUOTES')
    etl.todb(tb, db, tname)
    print "Done loading."
    db.commit()
    """s = int()
Esempio n. 15
0
def exercise_unicode(dbo):
    print('=' * len(repr(dbo)))
    print('EXERCISE UNICODE')
    print(repr(dbo))
    print('=' * len(repr(dbo)))
    print()
    expect = ((u'name', u'id'),
              (u'Արամ Խաչատրյան', 1),
              (u'Johann Strauß', 2),
              (u'Вагиф Сәмәдоғлу', 3),
              (u'章子怡', 4),
              )
    actual = fromdb(dbo, 'SELECT * FROM test_unicode')
    print('write some data and verify...')
    todb(expect, dbo, 'test_unicode')
    ieq(expect, actual)
    print(look(actual))
def load_fornecedor_contrato_item():

    connSiasg_DW = get_siasg_dw_connection()

    engine = get_data_lake_engine()

    print("Selecionando faixas de itens contrato")

    faixa = pd.read_sql_query(con=connSiasg_DW,
                              sql="""
    SELECT cast((D.ID_ITCT_ITEM_CONTRATO  / 500000) as integer) as faixa 
    FROM D_ITCT_ITEM_CONTRATO D
    group by cast((D.ID_ITCT_ITEM_CONTRATO  / 500000) as integer)
    order by cast((D.ID_ITCT_ITEM_CONTRATO  / 500000) as integer)
    """)
    for index, row in faixa.iterrows():
        print("faixa", index + 1)
        print(
            f"Selecionando faixa de {int(index * 500000)} ate {int(index * 500000 + 499999 )}..."
        )
        n = row['faixa']
        sql = """
            SELECT 
                I.ID_ITCT_ITEM_CONTRATO,    
                I.ID_CNTR_CONTRATO,
                I.ID_ITCP_ITEM_COMPRA,
                CH_ITCT_ITEM_CONTRATO_EDIT,
                DS_ITCP_ITEM_COMPRA as NO_ITCP_ITEM_CONTRATO,
                I.ID_ITCP_TP_COD_MAT_SERV,
                CASE WHEN TI.ID_ITCP_TP_MATERIAL_SERVICO = '1' THEN 'Material' WHEN TI.ID_ITCP_TP_MATERIAL_SERVICO = '2' THEN 'Serviço' else 'Não informado' end as NO_ITCP_TP_COD_MAT_SERV,
                I.QT_ITCT_ITENS_CONTRATO,
                I.QT_ITCT_CONTRATADA,
                I.VL_ITCT_CONTRATADO
            FROM D_CNTR_CONTRATO CO 
            INNER JOIN F_ITEM_CONTRATO I ON CO.ID_CNTR_CONTRATO = I.ID_CNTR_CONTRATO 
            AND I.ID_ITCT_ITEM_CONTRATO between {0} * 500000 and {0}* 500000 + 499999
            inner join D_ITCT_ITEM_CONTRATO D ON D.ID_ITCT_ITEM_CONTRATO = I.ID_ITCT_ITEM_CONTRATO
            INNER JOIN D_ITCP_ITEM_COMPRA C ON C.ID_ITCP_ITEM_COMPRA = I.ID_ITCP_ITEM_COMPRA
            INNER JOIN D_ITCP_MATERIAL_SERVICO TI ON I.ID_ITCP_TP_COD_MAT_SERV = TI.ID_ITCP_TP_COD_MAT_SERV;
        """.format(n)
        tabela_sql = etl.fromdb(connSiasg_DW, sql)
        print("Inserindo faixa na base de dados...")
        etl.todb(tabela_sql, engine, '_FORNECEDOR_CONTRATO_ITEM')
    print("Carga realizada com sucesso")
    return 0
Esempio n. 17
0
    def execute(self, context):
        engine = create_engine(
            "postgresql+psycopg2://giovani.stefani@prd-dictas-postgresql-server.postgres.database.azure.com:B7hqx9mFi19J8BTE@prd-dictas-postgresql-server.postgres.database.azure.com:5432/dictas"
        )
        try:
            nm_arq = 'VIDAS.csv'
            nm_tbl = 'vidas'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            etl.todb(table, engine, f'{nm_tbl}')

            self.logger.info(f"tabela {nm_tbl} carregada")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')
Esempio n. 18
0
    def execute(self):
        if self.destination is None:
            self.destination = get_default_destination()

        for source in self.sources:
            conn = source.get_connection()
            dest_conn = self.destination.get_connection()
            print source.test_connection()
            print self.destination.test_connection()

            if source.dbms == 'mysql':
                conn.cursor().execute('SET SQL_MODE=ANSI_QUOTES;')

            for table in source.tables:
                table_data = petl.fromdb(conn, 'SELECT * FROM %s' % table)
                petl.todb(table_data, dest_conn, table, commit=True)
                dest_conn.commit()
            dest_conn.close()
        return
Esempio n. 19
0
def exercise_with_schema(dbo, db):
    print('=' * len(repr(dbo)))
    print('EXERCISE WITH EXPLICIT SCHEMA NAME')
    print(repr(dbo))
    print('=' * len(repr(dbo)))
    print(    )
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    todb(expect, dbo, 'test', schema=db)
    ieq(expect, actual)
    print(look(actual))
    
    print('append some data and verify...')
    appenddb(expect, dbo, 'test', schema=db)
    ieq(expect_appended, actual)
    print(look(actual))
Esempio n. 20
0
    def load(self):

        with open(os.path.join(dir, 'configuration.json'), 'r') as f:
            data = json.load(f)

        s_credentials = {
            "database": data['source']['database'],
            "host": data['source']['host'],
            "port": data['source']['port'],
            "user": data['source']['username'],
            "password": data['source']['password']
        }
        t_credentials = {
            "database": data['target']['database'],
            "host": data['target']['host'],
            "port": data['target']['port'],
            "user": data['target']['username'],
            "password": data['target']['password']
        }

        u = Utility()

        source_id = u.get_source(vendor=data['source']['vendor'])["id"]
        target_id = u.get_source(vendor=data['target']['vendor'])["id"]

        source = u.get_source_connection(vendor_id=source_id,
                                         credentials=s_credentials).connect()
        target = u.get_target_connection(vendor_id=target_id,
                                         credentials=t_credentials).connect()

        tables = u.get_tables('public')
        # table = etl.fromdb(source,'select * from vendor_modules')
        # etl.todb(table, target, 'vendor_modules')
        for doc in tables:

            query = "select * from " + doc['name']
            table = etl.fromdb(source, query)
            if len(table) > 1:
                etl.todb(table, target, doc['name'])
            # print len(table)

            print "Loading of table " + doc['name'] + " completed"
Esempio n. 21
0
def todb(table, dbo, tablename, schema=None, commit=True,
         create=False, drop=False, constraints=True, metadata=None, dialect=None, sample=1000):
    """
    Drop-in replacement for :func:`petl.todb` which also supports automatic table creation.

    Parameters
    ----------

    table : sequence of sequences (petl table)
        Table data to load
    dbo : database object
        DB-API 2.0 connection, callable returning a DB-API 2.0 cursor, or SQLAlchemy connection, engine or session
    tablename : string
        Name of the table
    schema : string
        Name of the database schema to create the table in
    commit : bool
        If True commit the changes
    create : bool
        If True attempt to create the table before loading, inferring types from a sample of the data
    drop : bool
        If True attempt to drop the table before recreating (only relevant if create=True)
    constraints : bool
        If True use length and nullable constraints (only relevant if create=True)
    metadata : sqlalchemy.MetaData
        Custom table metadata (only relevant if create=True)
    dialect : string
        One of {'access', 'sybase', 'sqlite', 'informix', 'firebird', 'mysql', 'oracle', 'maxdb', 'postgresql', 'mssql'}
        (only relevant if create=True)
    sample : int
        Number of rows to sample when inferring types etc. Set to 0 to use the whole table.

    """

    if create:
        if drop:
            drop_table(dbo, tablename, schema=schema, commit=commit)
        create_table(table, dbo, tablename, schema=schema, commit=commit,
                     constraints=constraints, metadata=metadata, dialect=dialect, sample=sample)

    petl.todb(table, dbo, tablename, schema=schema, commit=commit)
Esempio n. 22
0
def todb(table, dbo, tablename, schema=None, commit=True,
         create=False, drop=False, constraints=True, metadata=None, dialect=None, sample=1000):
    """
    Drop-in replacement for :func:`petl.todb` which also supports automatic table creation.

    Parameters
    ----------

    table : sequence of sequences (petl table)
        Table data to load
    dbo : database object
        DB-API 2.0 connection, callable returning a DB-API 2.0 cursor, or SQLAlchemy connection, engine or session
    tablename : string
        Name of the table
    schema : string
        Name of the database schema to create the table in
    commit : bool
        If True commit the changes
    create : bool
        If True attempt to create the table before loading, inferring types from a sample of the data
    drop : bool
        If True attempt to drop the table before recreating (only relevant if create=True)
    constraints : bool
        If True use length and nullable constraints (only relevant if create=True)
    metadata : sqlalchemy.MetaData
        Custom table metadata (only relevant if create=True)
    dialect : string
        One of {'access', 'sybase', 'sqlite', 'informix', 'firebird', 'mysql', 'oracle', 'maxdb', 'postgresql', 'mssql'}
        (only relevant if create=True)
    sample : int
        Number of rows to sample when inferring types etc. Set to 0 to use the whole table.

    """

    if create:
        if drop:
            drop_table(dbo, tablename, schema=schema, commit=commit)
        create_table(table, dbo, tablename, schema=schema, commit=commit,
                     constraints=constraints, metadata=metadata, dialect=dialect, sample=sample)

    petl.todb(table, dbo, tablename, schema=schema, commit=commit)
Esempio n. 23
0
def _test_with_schema(dbo, schema):

    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test', schema=schema)
    ieq(expect, actual)
    print(etl.look(actual))

    print('append some data and verify...')
    etl.appenddb(expect, dbo, 'test', schema=schema)
    ieq(expect_appended, actual)
    print(etl.look(actual))
def load_fornecedor_sumario_compra():

    connSiasg_DW = get_siasg_dw_connection()

    engine = get_data_lake_engine()

    # FORNECEDOR_SUMARIO_COMPRA
    #tabela_sql = etl.fromdb(connSiasg_DW, Siasg_DW.FORNECEDOR_SUMARIO_COMPRA)
    #etl.todb(tabela_sql, engine, '_FORNECEDOR_SUMARIO_COMPRA')
    print("Selecionando faixas de itens contrato")
    faixa = pd.read_sql_query(con=connSiasg_DW,
                              sql="""
    SELECT cast((D.ID_ITFN_ITEM_FORNECEDOR  / 10000000) as integer) as faixa 
    FROM F_ITEM_FORNECEDOR D
    group by cast((D.ID_ITFN_ITEM_FORNECEDOR  / 10000000) as integer)
    order by cast((D.ID_ITFN_ITEM_FORNECEDOR  / 10000000) as integer)
    """)
    for index, row in faixa.iterrows():
        print("faixa", index + 1)
        print(
            f"Selecionando faixa de {int(index * 10000000)} ate {int(index * 10000000 + 9999999 )}..."
        )
        n = row['faixa']
        sql = """ SELECT F.ID_FRND_FORNECEDOR_COMPRA as ID_FORNECEDOR,
                    count(Distinct F.ID_CMPR_COMPRA) as QTD_COMPRA,
                    count(Distinct F.ID_ITCP_ITEM_COMPRA) as QTD_ITEM_COMPRA,
                    count(Distinct case when F.VL_PRECO_TOTAL_HOMOLOG > 0 then F.ID_CMPR_COMPRA end) as QTD_COMPRA_HOMOLOG,
                    count(Distinct case when F.VL_PRECO_TOTAL_HOMOLOG > 0 then F.ID_ITCP_ITEM_COMPRA end) as QTD_ITEM_COMPRA_HOMOLOG,
                    sum(F.VL_PRECO_TOTAL_HOMOLOG) as VALOR_HOMOLOGADO
                    from F_ITEM_FORNECEDOR F
                    WHERE F.ID_ITFN_ITEM_FORNECEDOR between {0} * 10000000 and {0}* 10000000 + 9999999
                    group by F.ID_FRND_FORNECEDOR_COMPRA;
        """.format(n)
        tabela_sql = etl.fromdb(connSiasg_DW, sql)
        print("Inserindo faixa na base de dados...")
        etl.todb(tabela_sql, engine, '_FORNECEDOR_SUMARIO_COMPRA')

    print("Carga realizada com sucesso")
    return 0
Esempio n. 25
0
def _test_dbo(write_dbo, read_dbo=None):
    if read_dbo is None:
        read_dbo = write_dbo

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(read_dbo, 'SELECT * FROM test')

    debug('verify empty to start with...')
    debug(etl.look(actual))
    ieq(expect_empty, actual)

    debug('write some data and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('append some data and verify...')
    etl.appenddb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('overwrite and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, overwrite and verify')
    etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, append and verify')
    etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('try a single row')
    etl.todb(etl.head(expect, 1), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(etl.head(expect, 1), actual)
Esempio n. 26
0
def test_todb_appenddb_cursor():
    
    f = NamedTemporaryFile(delete=False)
    conn = sqlite3.connect(f.name)
    conn.execute('create table foobar (foo, bar)')
    conn.commit()

    # exercise function
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    cursor = conn.cursor()
    todb(table, cursor, 'foobar') 
    
    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1),
              ('b', 2),
              ('c', 2))
    ieq(expect, actual)
    
    # try appending
    table2 = (('foo', 'bar'),
              ('d', 7),
              ('e', 9),
              ('f', 1))
    appenddb(table2, cursor, 'foobar') 

    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1),
              ('b', 2),
              ('c', 2),
              ('d', 7),
              ('e', 9),
              ('f', 1))
    ieq(expect, actual)
Esempio n. 27
0
def _test_dbo(write_dbo, read_dbo=None):
    if read_dbo is None:
        read_dbo = write_dbo

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(read_dbo, 'SELECT * FROM test')

    debug('verify empty to start with...')
    debug(etl.look(actual))
    ieq(expect_empty, actual)

    debug('write some data and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('append some data and verify...')
    etl.appenddb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('overwrite and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, overwrite and verify')
    etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, append and verify')
    etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('try a single row')
    etl.todb(etl.head(expect, 1), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(etl.head(expect, 1), actual)
Esempio n. 28
0
def write(cursor, table, table_name, mode='insert', module='MySQLdb'):
    """
    load table to $table_name.

    :param cursor:
        database agent
    :type
        Cursor

    :param table:
        data container
    :type table
        ``petl.util.base.Table`` or double list like this: [['field_name', ...], ['value_object', ...], ...]

    :param table_name:
        table name
    :type table_name:
        ``str``

    :param mode
        truncate and than insert if mode equal 'trunc';
        insert data if mode equal 'insert';
        insert and replace row where pk has exit if mode equal 'replace'
    :type mode
        ``str``={'trunc'|'insert'|'replace'}
    """

    if 'MYSQL' in module.upper():
        cursor.execute('SET SQL_MODE=ANSI_QUOTES')

    if mode == 'trunc':
        res = petl.todb(table, cursor, table_name)
    elif mode == 'insert':
        res = petl.appenddb(table, cursor, table_name)
    elif mode == 'replace':
        with _LOCK:
            petl.io.db.SQL_INSERT_QUERY = 'REPLACE INTO %s (%s) VALUES (%s)'
            res = petl.appenddb(table, cursor, table_name)
            petl.io.db.SQL_INSERT_QUERY = 'INSERT INTO %s (%s) VALUES (%s)'
    else:
        raise ValueError(
            "Argument mode must be {'trunc'|'insert'|'replace'}, not '%s'" %
            mode)
    return res
def load_stage_data():
    """Function to load stage data into the configured stage database."""
    def extract_staging_csv(dataframe):
        """Nested function to load processed stage csv data to table data frame"""

        table = etl.fromcsv(dataframe, encoding='utf8')

        return table

    conn = psycopg2.connect(database=stage_db_name,
                            user=stage_db_user,
                            password=stage_db_pass)

    etl.todb(extract_staging_csv(stage_tweet_csv), conn, stage_tweet_table)

    etl.todb(extract_staging_csv(stage_user_csv), conn, stage_user_table)

    etl.todb(extract_staging_csv(stage_date_csv), conn, stage_date_table)
Esempio n. 30
0
def exercise_ss_cursor(setup_dbo, ss_dbo):
    print '=' * len(repr(ss_dbo))
    print 'EXERCISE WITH SERVER-SIDE CURSOR'
    print repr(ss_dbo)
    print '=' * len(repr(ss_dbo))
    print

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(ss_dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)

    print 'write some data and verify...'
    todb(expect, setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'append some data and verify...'
    appenddb(expect, setup_dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)

    print 'overwrite and verify...'
    todb(expect, setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), setup_dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
Esempio n. 31
0
def exercise(dbo):
    print '=' * len(repr(dbo))
    print repr(dbo)
    print '=' * len(repr(dbo))
    print
    
    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)
    
    print 'write some data and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'append some data and verify...'
    appenddb(expect, dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
    
    print 'overwrite and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
Esempio n. 32
0
def exercise(dbo):
    print '=' * len(repr(dbo))
    print repr(dbo)
    print '=' * len(repr(dbo))
    print
    
    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)
    
    print 'write some data and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'append some data and verify...'
    appenddb(expect, dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
    
    print 'overwrite and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
Esempio n. 33
0
def writedata(conection, table):
    etl.todb(table,conection,'son',create=True)
Esempio n. 34
0
import petl as etl, psycopg2 as pg, sys
from sqlalchemy import *

reload(sys)
sys.setdefaultencoding('utf8')

dbCnxns = {
    'operations': 'dbname=operations user=etl host=127.0.0.1',
    'python': 'dbname=python user=etl host=127.0.0.1',
    'production': 'dbname=production user=etl host=127.0.0.1'
}

sourceConn = pg.connect(dbCnxns['operations'])
targetConn = pg.connect(dbCnxns['python'])
sourceCursor = sourceConn.cursor()
targetCursor = targetConn.cursor()

sourceCursor.execute(
    """select table_name from information_schema.columns where 
table_name in ('orders','returns') group by 1""")
sourceTables = sourceCursor.fetchall()

for t in sourceTables:
    targetCursor.execute("drop table if exists %s" % (t[0]))
    sourceDs = etl.fromdb(sourceConn, 'select * from %s' % (t[0]))
    etl.todb(sourceDs, targetConn, t[0], create=True, sample=10000)
Esempio n. 35
0
t_confirmed = etl.rename(t_confirmed, {'index': 'Country'})

# Luego agregamos las columnas de fecha como datos y renombramos las nuevas columnas
t_confirmed = etl.melt(t_confirmed, 'Country')
t_confirmed = etl.rename(t_confirmed, {'variable': 'Date'})
t_confirmed = etl.rename(t_confirmed, {'value': 'Cases'})

# Luego agregamos el continente para agrupar
t_confirmed = etl.addfield(t_confirmed, 'Continent',
                           lambda rec: get_continent_code(rec['Country']))

# Y nuevamente nos aseguramos que sean del tipo de dato que deben ser.
t_confirmed = etl.convert(t_confirmed, 'Cases', int)
t_confirmed = etl.convert(t_confirmed, 'Date',
                          lambda v: datetime.datetime.strptime(v, '%Y-%m-%d'))

#Finalmente, subimos el archivo al repositorio de datos
conn = pymysql.connect(password='******', database='covid', user='******')
conn.cursor().execute('SET SQL_MODE=ANSI_QUOTES')
etl.todb(t_confirmed, conn, 'confirmed', create=True, drop=True)
conn.close()

# Ejemplos de visualización para debugging
#print(etl.header(t_confirmed))
#print(etl.records(t_confirmed))
#print(t_confirmed.lookall())
#etl.tocsv(t_confirmed, 'confirmados.csv')
#df.to_csv(r'confirmados.csv', index=True, header=True)

#cols = etl.columns(t_confirmed)
"""Test python etl library https://petl.readthedocs.io/en/stable/io.html"""

import psycopg2
from petl import todb, look

create_table_script = """
CREATE TABLE public.petl_test (
	petl_id serial NOT NULL,
	description varchar NULL,
	external_id int4 NULL
); """

# source list of data including headers
source = [['description', 'external_id'], ['teach', 2], ['learn', 3]]
look(source)  # debugging
dest_db_table = 'petl_test'  # this db table must already exist

with psycopg2.connect(
        "host=localhost dbname=mydb user=guest password=12345") as connection:
    with connection.cursor() as db_cur:
        # truncate the table and insert the data
        todb(table=source,
             dbo=db_cur,
             tablename=dest_db_table,
             schema='public')
Esempio n. 37
0
prompt('setup table')
cursor = connection.cursor()
# deal with quote compatibility
cursor.execute('SET SQL_MODE=ANSI_QUOTES')
cursor.execute('DROP TABLE IF EXISTS test')
cursor.execute('CREATE TABLE test (foo TEXT, bar INT)')
cursor.close()
connection.commit()

prompt('exercise the petl functions using a connection')
from petl import look, fromdb, todb, appenddb
t1 = fromdb(connection, 'SELECT * FROM test')
print look(t1)
t2 = (('foo', 'bar'), ('a', 1), ('b', 1))
t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, connection, 'test')
print look(t1)
ieq(t2app, t1)
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)

prompt('exercise the petl functions using a cursor')
cursor = connection.cursor()
todb(t2, cursor, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, cursor, 'test')
Esempio n. 38
0
cs = "dbname=%s user=%s password=%s host=%s port=%s" % (dn, du, dp, dh, dbp)
connection = psycopg2.connect(cs)

#
a = np.random.rand(100000, 200).tolist()
b = np.random.rand(100000, 10).tolist()
etl.fromcolumns()

#pandas -> table -> db
import pandas as pd
df = pd.DataFrame(columns=['id', 'features', 'model_id', 'regdate'])
for i in range(1, 10000):
    df.loc[i] = [i, np.random.rand(10).tolist(), 'test1', datetime.now()]

pddf = etl.fromdataframe(df)
etl.todb(pddf, connection, 'defect_features', 'public')

#select query
mkcursor = lambda: connection.cursor(name='arbitrary')
table = etl.fromdb(mkcursor, 'select * from public.defect_features')
table

#primitive -> db
import numpy as np
from datetime import datetime
datatable = [['id', 'features', 'model_id', 'regdate']]
for i in range(1, 10):
    datatable.append([i, np.random.rand(10).tolist(), 'test1', datetime.now()])
etl.todb(datatable, connection, 'defect_features', 'public')
Esempio n. 39
0
#!/usr/bin/env python3

import petl as etl
import pandas as pd
import psycopg2 as pg

NASA_SOURCE = "../kepler_accepted_results.csv"

k_conn = pg.connect(user='******', password='******',
                    host='localhost', database='kepler')

# Not using petl to pull the data in from CSV because it's CSV methods
# are a bit lacking.  Pandas does a better job here.
df = pd.read_csv(NASA_SOURCE, skiprows=31)
# df = df.set_index('kepid')

stbl = etl.fromdataframe(df, include_index=False)

# Not doing any transformation at all yet

etl.todb(stbl, k_conn, 'nasa_result', create=True, drop=False, sample=0)
k_conn.close()
Esempio n. 40
0
connection = psycopg2.connect('dbname=twelveBD user=postgres password=admin')

table = etl.fromdb(connection, 'SELECT * FROM personas')
table1 = [['idPersona', 'nombre', 'sexo', 'edad'],
          [1, 'Rafael Perez Aguirre', 'm', 24]]
table2 = [['idPersona', 'nombre', 'sexo', 'edad'],
          [2, 'Eduardo Cantoran Flores', 'm', 25]]
table3 = [['idPersona', 'nombre', 'sexo', 'edad'],
          [3, 'Adriana Lopez Montiel', 'm', 30]]

table1 = [
    ['idEmpresa', 'nombre', 'sucursal', 'direccion'],
    [
        'IDIT', 1,
        'Blvrd del Niño Poblano 2901, Reserva Territorial Atlixcáyotl, Centro Comercial Puebla, 72810 San Andrés Cholula, Pue.'
    ]
]

table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['fácil', 30, 1, 45]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'],
          ['intermedio', 35, 1, 40]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'],
          ['Dificil', 45, 1, 40]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'],
          ['Veterano', 50, 1, 45]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Dios', 55, 1, 40]]

etl.todb(table1, connection, 'personas')
etl.appenddb(table1, connection, 'personas')