Python appenddb Exemples, petl.appenddb Python Exemples

Exemple #1

0

Afficher le fichier

    def anadirListaTipos(self):
        global iTipos

        text, okPressed = QtWidgets.QInputDialog.getText(
            MainWindow, "Guardar preset", "Nombre del preset:",
            QtWidgets.QLineEdit.Normal, "")
        if okPressed and text != '':
            print(text)
        seconds = QtCore.QTime(0, 0, 0).secsTo(self.timeEdit.time())

        table1 = [[
            'nombre', 'nofichas', 'tiempo', 'ficha1', 'ficha2', 'ficha3'
        ],
                  [
                      text,
                      self.spinBox_2.value(), seconds,
                      self.comboBtnFicha1.currentIndex() + 1,
                      self.comboBtnFicha2.currentIndex() + 1,
                      self.comboBtnFicha3.currentIndex() + 1
                  ]]
        etl.appenddb(table1, connection, 'tipos')

        item = QtWidgets.QListWidgetItem()
        self.listWidgetPresets.addItem(item)

        item = self.listWidgetPresets.item(iTipos)
        item.setText(text)

Exemple #2

0

Afficher le fichier

Fichier : etl.py Projet : BenGalewsky/psm

    def etl_from_dir(self, data_dir="data"):
        """Extract, translate, load reinstatements (and not exclusions) from
        directory DATA_DIR.
        """

        # Get YYYYMM date of most recent reinstatement action
        most_recent = self.conn.get_latest_reinstatement_date().replace(
            '-', '')[:6] or "000000000"

        # Get the data from REIN CSV files.  Gather reinstatement actions
        # since most_recent
        total_indiv = []
        total_bus = []
        for fname in sorted(glob.glob(os.path.join(data_dir, "*REIN.csv"))):
            if int(os.path.basename(fname)[:4]) <= int(most_recent[2:]):
                continue
            debug("Processing " + fname)
            reinstated = etl.fromcsv(fname)
            individual, business = clean_and_separate(reinstated)
            total_indiv.append(individual)
            total_bus.append(business)

        # Save to db, APPENDING TO existing data tables.  Assumes tables
        # exist.
        if total_indiv:
            etl.appenddb(etl.cat(*total_indiv), self.conn.conn,
                         'individual_reinstatement')
        if total_bus:
            etl.appenddb(etl.cat(*total_bus), self.conn.conn,
                         'business_reinstatement')

        # It is possible to end up with duplicate rows if, say, an ETL
        # process is interrupted midway through.  So we should find and
        # remove dupes.
        self.conn.dedupe_reinstatements()

Exemple #3

0

Afficher le fichier

    def synctable(self, sourceDb, targetDb, sourceTable, targetTable):
        sourceCursor = sourceDb.cursor()
        targetCursor = targetDb.cursor()
        affected_total = 0
        init_rowCount = targetTable.rowCount if targetTable.rowCount < sourceTable.rowCount else sourceTable.rowCount
        pbar = tqdm(total=sourceTable.rowCount, unit='records')
        pbar.update(init_rowCount)
        while sourceTable.lastUpdatetime > targetTable.lastUpdatetime:
            affected_rows = 0
            batchSize = 100000
            sql = "SELECT * FROM (SELECT * FROM {schema}.{tablename} WHERE {timestamp}>=to_timestamp('{last_updatetime}','yyyy-mm-dd hh24:mi:ss.ff6') ORDER BY {timestamp}) WHERE ROWNUM<={batch_size}".format(
                timestamp=sourceTable.timestampField,
                schema=sourceTable.schema,
                tablename=sourceTable.tablename,
                last_updatetime=targetTable.lastUpdatetime,
                batch_size=batchSize)
            sourceRecord = etl.fromdb(lambda: CursorProxy(sourceDb.cursor()),
                                      sql)
            targetRecord = etl.fromdb(
                lambda: CursorProxy(targetDb.cursor()),
                "SELECT * FROM {schema}.{tablename} WHERE 1=0".format(
                    schema=targetTable.schema,
                    tablename=targetTable.tablename))
            sourceTable.columns = etl.header(sourceRecord)
            targetTable.columns = etl.header(targetRecord)
            for column in list(
                    set(sourceTable.columns) - set(targetTable.columns)):
                sourceRecord = etl.cutout(sourceRecord, column)
            max_updatetime = sourceRecord.cut(
                sourceTable.timestampField).skip(1).max()[0]
            sourceRecord = sourceRecord.sort(sourceTable.timestampField)
            etl.appenddb(sourceRecord,
                         CursorProxy(targetCursor),
                         targetTable.tablename,
                         schema=targetTable.schema,
                         commit=True)
            affected_rows += targetCursor.rowcount
            targetTable.lastUpdatetime = max_updatetime.strftime(
                '%Y-%m-%d %H:%M:%S.%f')
            targetTable.rowCount += affected_rows
            pbar.update(affected_rows if init_rowCount + affected_total +
                        affected_rows < sourceTable.rowCount else
                        sourceTable.rowCount - init_rowCount - affected_total)
            affected_total += affected_rows
            pbar.set_description("%s |%d records updated." %
                                 (targetTable.tablename, affected_total))

        if targetTable.lastUpdatetime > sourceTable.lastUpdatetime:
            pbar.set_description("%s |timestamp >, skip." %
                                 (targetTable.tablename))
        elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount == sourceTable.rowCount:
            pbar.set_description("%s |no data change." %
                                 (targetTable.tablename))
        elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount > sourceTable.rowCount:
            pbar.set_description("%s |RowCount > but timestamp ==, skip." %
                                 (targetTable.tablename))
        elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount < sourceTable.rowCount:
            pbar.set_description("%s |RowCount < but timestamp ==, skip." %
                                 (targetTable.tablename))
        pbar.close()

Exemple #4

0

Afficher le fichier

Fichier : test_io.py Projet : deytao/petl

def test_todb_appenddb_cursor():

    f = NamedTemporaryFile(delete=False)
    conn = sqlite3.connect(f.name)
    conn.execute("create table foobar (foo, bar)")
    conn.commit()

    # exercise function
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    cursor = conn.cursor()
    todb(table, cursor, "foobar")

    # check what it did
    actual = conn.execute("select * from foobar")
    expect = (("a", 1), ("b", 2), ("c", 2))
    ieq(expect, actual)

    # try appending
    table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1))
    appenddb(table2, cursor, "foobar")

    # check what it did
    actual = conn.execute("select * from foobar")
    expect = (("a", 1), ("b", 2), ("c", 2), ("d", 7), ("e", 9), ("f", 1))
    ieq(expect, actual)

Exemple #5

0

Afficher le fichier

Fichier : test_io.py Projet : pombredanne/petl

def test_todb_appenddb_cursor():

    f = NamedTemporaryFile(delete=False)
    conn = sqlite3.connect(f.name)
    conn.execute('create table foobar (foo, bar)')
    conn.commit()

    # exercise function
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    cursor = conn.cursor()
    todb(table, cursor, 'foobar')

    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1), ('b', 2), ('c', 2))
    ieq(expect, actual)

    # try appending
    table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1))
    appenddb(table2, cursor, 'foobar')

    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1), ('b', 2), ('c', 2), ('d', 7), ('e', 9), ('f', 1))
    ieq(expect, actual)

Exemple #6

0

Afficher le fichier

def ETL_MMS_NOW_schema(connection, tables, schema, system_name):
    '''Import all the data from the specified schema and tables.'''
    for destination, source in tables.items():
        try:
            current_table = etl.fromdb(connection,
                                       f'SELECT * from {schema}.{source}')
            print(f'    {destination}:{etl.nrows(current_table)}')

            if (source == 'application'):
                # add originating source
                table_plus_os = etl.addfield(current_table,
                                             'originating_system', system_name)

                table_plus_os_guid = join_mine_guids(connection, table_plus_os)

                etl.appenddb(table_plus_os_guid,
                             connection,
                             destination,
                             schema='now_submissions',
                             commit=False)
            else:

                etl.appenddb(current_table,
                             connection,
                             destination,
                             schema='now_submissions',
                             commit=False)

        except Exception as err:
            print(f'ETL Parsing error: {err}')
            raise

Exemple #7

0

Afficher le fichier

def validate(cursor, table, constraints, task_name):
    header = etl.header(table)
    problems = etl.validate(table, constraints=constraints, header=header)
    problems = etl.addfield(problems, 'task_name', task_name)
    problems = etl.addfield(problems, 'create_date', datetime.now())

    # etl.todb(problems, cursor, 'etl_logs')
    etl.appenddb(problems, cursor, 'tetl_logs')

Exemple #8

0

Afficher le fichier

Fichier : NOW_import.py Projet : NWCalvank/mds

def ETL_MMS_NOW_schema(connection, tables, schema):
    for key, value in tables.items():
        try:
            current_table = etl.fromdb(connection,
                                       f'SELECT * from {schema}.{value}')
            etl.appenddb(current_table,
                         connection,
                         key,
                         schema='now_submissions',
                         commit=False)
        except Exception as err:
            print(f'ETL Parsing error: {err}')

Exemple #9

0

Afficher le fichier

Fichier : pipeline2.py Projet : skilbjo/economics

def load(data,data_set):
	import psycopg2

	conn = psycopg2.connect(	
		host 			= config.get('rpi','server'), 
		database	= config.get('rpi','database'), 
		user			=	config.get('rpi','user'), 
		password	= config.get('rpi','passwd')
	)
	conn.autocommit = True
	etl.appenddb(data,conn,data_set.lower())
	conn.close()
	print('Table {0} loaded.'.format(data_set))
	return

Exemple #10

0

Afficher le fichier

Fichier : loaddata.py Projet : PornGitHub/zmq

def vehicleP():
    while True:
        feed.ParseFromString(
            urlopen(
                'http://gtfs.openov.nl/gtfs-rt/vehiclePositions.pb').read())
        data = []

        timer1 = datetime.now()
        timer2 = datetime.now() - timedelta(minutes=1)
        for entity in feed.entity:
            vp = entity.vehicle
            timex = datetime.fromtimestamp(vp.timestamp)
            if timex < timer1 and timex > timer2:
                x = vp.position.longitude
                y = vp.position.latitude
                time = datetime.fromtimestamp(vp.timestamp)
                geo = shape.from_shape(Point(x, y), srid=4326)
                schedule_relationship = vp.trip.schedule_relationship,
                direction_id = vp.trip.direction_id,
                current_stop_sequence = vp.current_stop_sequence,
                current_status = vp.current_status,
                trip_id = vp.trip.trip_id,
                route_id = vp.trip.route_id,
                stop_id = vp.stop_id,
                # trip_start_time = datetime.strptime(vp.trip.start_time, '%H:%M:%S').time(),
                # trip_start_date = datetime.strptime(vp.trip.start_date, "%d%m%Y").date(),
                trip_start_time = vp.trip.start_time,
                trip_start_date = vp.trip.start_date,
                vehicle_label = vp.vehicle.label,

            data.append({
                'time': time,
                'geo_loc': str(geo),
                'schedule_relationship': vp.trip.schedule_relationship,
                'direction_id': direction_id,
                'current_stop_sequence': current_stop_sequence,
                'current_status': current_status,
                'trip_id': trip_id,
                'route_id': route_id,
                'stop_id': stop_id,
                'trip_start_time': trip_start_time,
                'trip_start_date': trip_start_date,
                'vehicle_label': vehicle_label,
            })

        table1 = petl.fromdicts(data)
        print(petl.nrows(table1))
        petl.appenddb(table1, con, 'vehicle_positions')
        t.sleep(60)

Exemple #11

0

Afficher le fichier

Fichier : test_db_server.py Projet : larissarmp/TCC

def _test_with_schema(dbo, schema):

    expect = (('foo', 'bar'), ('a', 1), ('b', 2))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2))
    actual = etl.fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test', schema=schema)
    ieq(expect, actual)
    print(etl.look(actual))

    print('append some data and verify...')
    etl.appenddb(expect, dbo, 'test', schema=schema)
    ieq(expect_appended, actual)
    print(etl.look(actual))

Exemple #12

0

Afficher le fichier

Fichier : pipeline.py Projet : skilbjo/economics

def load(data, data_set):
    import psycopg2

    conn = psycopg2.connect(
        host=config.get("rpi", "server"),
        database=config.get("rpi", "database"),
        user=config.get("rpi", "user"),
        password=config.get("rpi", "passwd"),
    )
    conn.autocommit = True
    # etl.appenddb(data,conn,data_set.lower())
    etl.appenddb(data, conn, data_set.lower())
    conn.close()
    print("Table {0} loaded.".format(data_set))
    return

Exemple #13

0

Afficher le fichier

Fichier : test_db_server.py Projet : DeanWay/petl

def _test_dbo(write_dbo, read_dbo=None):
    if read_dbo is None:
        read_dbo = write_dbo

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(read_dbo, 'SELECT * FROM test')

    debug('verify empty to start with...')
    debug(etl.look(actual))
    ieq(expect_empty, actual)

    debug('write some data and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('append some data and verify...')
    etl.appenddb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('overwrite and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, overwrite and verify')
    etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, append and verify')
    etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('try a single row')
    etl.todb(etl.head(expect, 1), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(etl.head(expect, 1), actual)

Exemple #14

0

Afficher le fichier

def _test_dbo(write_dbo, read_dbo=None):
    if read_dbo is None:
        read_dbo = write_dbo

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(read_dbo, 'SELECT * FROM test')

    debug('verify empty to start with...')
    debug(etl.look(actual))
    ieq(expect_empty, actual)

    debug('write some data and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('append some data and verify...')
    etl.appenddb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('overwrite and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, overwrite and verify')
    etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, append and verify')
    etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('try a single row')
    etl.todb(etl.head(expect, 1), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(etl.head(expect, 1), actual)

Exemple #15

0

Afficher le fichier

    def anadirListaEmpresas(self):
        global iEmpresa

        table1 = [['nombre', 'sucursal', 'direccion'],
                  [
                      self.txtNombreEmpresa.toPlainText(),
                      self.spinBoxEmpresa.value(),
                      self.txtDireccionEmpresa.toPlainText()
                  ]]
        etl.appenddb(table1, connection, 'empresas')

        item = QtWidgets.QListWidgetItem()
        self.listEmpresas.addItem(item)

        item = self.listEmpresas.item(iEmpresa)
        item.setText(self.txtNombreEmpresa.toPlainText())

Exemple #16

0

Afficher le fichier

def write(cursor, table, table_name, mode='insert', module='MySQLdb'):
    """
    load table to $table_name.

    :param cursor:
        database agent
    :type
        Cursor

    :param table:
        data container
    :type table
        ``petl.util.base.Table`` or double list like this: [['field_name', ...], ['value_object', ...], ...]

    :param table_name:
        table name
    :type table_name:
        ``str``

    :param mode
        truncate and than insert if mode equal 'trunc';
        insert data if mode equal 'insert';
        insert and replace row where pk has exit if mode equal 'replace'
    :type mode
        ``str``={'trunc'|'insert'|'replace'}
    """

    if 'MYSQL' in module.upper():
        cursor.execute('SET SQL_MODE=ANSI_QUOTES')

    if mode == 'trunc':
        res = petl.todb(table, cursor, table_name)
    elif mode == 'insert':
        res = petl.appenddb(table, cursor, table_name)
    elif mode == 'replace':
        with _LOCK:
            petl.io.db.SQL_INSERT_QUERY = 'REPLACE INTO %s (%s) VALUES (%s)'
            res = petl.appenddb(table, cursor, table_name)
            petl.io.db.SQL_INSERT_QUERY = 'INSERT INTO %s (%s) VALUES (%s)'
    else:
        raise ValueError(
            "Argument mode must be {'trunc'|'insert'|'replace'}, not '%s'" %
            mode)
    return res

Exemple #17

0

Afficher le fichier

def exercise_with_schema(dbo, db):
    print('=' * len(repr(dbo)))
    print('EXERCISE WITH EXPLICIT SCHEMA NAME')
    print(repr(dbo))
    print('=' * len(repr(dbo)))
    print(    )
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    todb(expect, dbo, 'test', schema=db)
    ieq(expect, actual)
    print(look(actual))
    
    print('append some data and verify...')
    appenddb(expect, dbo, 'test', schema=db)
    ieq(expect_appended, actual)
    print(look(actual))

Exemple #18

0

Afficher le fichier

Fichier : test_db_server.py Projet : DeanWay/petl

def _test_with_schema(dbo, schema):

    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test', schema=schema)
    ieq(expect, actual)
    print(etl.look(actual))

    print('append some data and verify...')
    etl.appenddb(expect, dbo, 'test', schema=schema)
    ieq(expect_appended, actual)
    print(etl.look(actual))

Exemple #19

0

Afficher le fichier

Fichier : dbtests.py Projet : pombredanne/petl

def exercise_ss_cursor(setup_dbo, ss_dbo):
    print '=' * len(repr(ss_dbo))
    print 'EXERCISE WITH SERVER-SIDE CURSOR'
    print repr(ss_dbo)
    print '=' * len(repr(ss_dbo))
    print

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(ss_dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)

    print 'write some data and verify...'
    todb(expect, setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'append some data and verify...'
    appenddb(expect, setup_dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)

    print 'overwrite and verify...'
    todb(expect, setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), setup_dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)

Exemple #20

0

Afficher le fichier

Fichier : painel_compras_mensal.py Projet : chris-redfield/airflow-docker-selenium-me

def load_fornecedor_comportamento():

    connComprasnet = get_comprasnet_connection()

    connSiasg_DW = get_siasg_dw_connection()

    engine = get_data_lake_engine()

    print("Iniciando carga desclassificação ...")
    ## DESCLASSIFICACAO_FORNECEDORES -> FORNECEDOR_COMPORTAMENTO
    tabela_sql = etl.fromdb(connComprasnet,
                            Comprasnet.DESCLASSIFICACAO_FORNECEDORES)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO')
    print("Carga desclassificação executada com sucesso")

    print("Iniciando carga \"contrato continuado\"")
    ## CONTRATO_CONTINUADO -> FORNECEDOR_COMPORTAMENTO
    tabela_sql = etl.fromdb(connSiasg_DW, Siasg_DW.CONTRATO_CONTINUADO)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO')
    print("Carga \"contrato continuado\" executada com sucesso")

    print("Iniciando carga recursos ...")
    ## RECURSOS -> FORNECEDOR_COMPORTAMENTO
    # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake
    # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER)
    tabela_sql = etl.fromdb(connComprasnet, Comprasnet.RECURSOS)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO')
    print("Carga recursos executada com sucesso")
    return 0

Exemple #21

0

Afficher le fichier

Fichier : dbtests.py Projet : shayh/petl

def exercise(dbo):
    print '=' * len(repr(dbo))
    print repr(dbo)
    print '=' * len(repr(dbo))
    print
    
    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)
    
    print 'write some data and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'append some data and verify...'
    appenddb(expect, dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
    
    print 'overwrite and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)

Exemple #22

0

Afficher le fichier

Fichier : dbtests.py Projet : pombredanne/petl

def exercise(dbo):
    print '=' * len(repr(dbo))
    print repr(dbo)
    print '=' * len(repr(dbo))
    print
    
    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)
    
    print 'write some data and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'append some data and verify...'
    appenddb(expect, dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
    
    print 'overwrite and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)

Exemple #23

0

Afficher le fichier

Fichier : painel_compras_mensal.py Projet : chris-redfield/airflow-docker-selenium-me

def load_fornecedor_historico():

    # Siasgnet
    connQSiasgnet = get_siasgnet_connection()
    engine = get_data_lake_engine()

    # CONTRATO_ATIVOS -> FORNECEDOR_HISTORICO
    print("Iniciando carga contratos ativos")
    execute_sql(Painel_Fornecedor.CONTRATO_ATIVOS, engine)
    print("Carga contratos ativos realizada com sucesso")

    # CONTRATO_VENCIDOS -> FORNECEDOR_HISTORICO
    print("Iniciando carga contratos vencidos")
    execute_sql(Painel_Fornecedor.CONTRATO_VENCIDOS, engine)
    print("Carga contratos vencidos realizada com sucesso")

    # LICITACOES -> FORNECEDOR_HISTORICO
    print("Iniciando carga licitações")
    execute_sql(Painel_Fornecedor.LICITACOES, engine)
    print("Carga licitações realizada com sucesso")

    ## ATA_VIGENTES -> FORNECEDOR_HISTORICO
    # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake
    # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER)
    print("Iniciando carga atas vigentes")
    tabela_sql = etl.fromdb(connQSiasgnet, Siasgnet.ATA_VIGENTES)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_HISTORICO_CONTRATO')
    print("Carga atas vigentes realizada com sucesso")

    ## ATA_VENCIDAS -> FORNECEDOR_HISTORICO
    # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake
    # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER)
    print("Iniciando carga atas vencidas")
    tabela_sql = etl.fromdb(connQSiasgnet, Siasgnet.ATA_VENCIDAS)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_HISTORICO_CONTRATO')
    print("Carga atas vigentes realizada com sucesso")

    return 0

Exemple #24

0

Afficher le fichier

Fichier : test_io.py Projet : brutimus/petl

def test_todb_appenddb_cursor():
    
    f = NamedTemporaryFile(delete=False)
    conn = sqlite3.connect(f.name)
    conn.execute('create table foobar (foo, bar)')
    conn.commit()

    # exercise function
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    cursor = conn.cursor()
    todb(table, cursor, 'foobar') 
    
    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1),
              ('b', 2),
              ('c', 2))
    ieq(expect, actual)
    
    # try appending
    table2 = (('foo', 'bar'),
              ('d', 7),
              ('e', 9),
              ('f', 1))
    appenddb(table2, cursor, 'foobar') 

    # check what it did
    actual = conn.execute('select * from foobar')
    expect = (('a', 1),
              ('b', 2),
              ('c', 2),
              ('d', 7),
              ('e', 9),
              ('f', 1))
    ieq(expect, actual)

Exemple #25

0

Afficher le fichier

import petl as etl
server = 'localhost' 
database = 'BikeStores' 
username = '******' 
password = '******' 
connection = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
mkcursor = lambda: connection.cursor()
table = etl.fromdb(mkcursor,'select * from production.brands')
table
cursor = cnxn.cursor()

categories = [['yuchan','raum','hyejin']]

table = etl.fromcolumns(categories)
table = etl.rename(table,'f0','category_name')
res = etl.appenddb(table,connection,'categories','production')
res

#df = pd.DataFrame(columns=['id','list','dict'])

#for i in range(5):
#    id = i
#    a = np.array([k for k in range(i+10)]).tobytes()
#    b = json.dumps({'ab':[1,2,3],'cd':[4,5,6]})
#    df.loc[i] = (i,a,b)


#from sqlalchemy import create_engine
#df
##https://stackoverflow.com/questions/15750711/connecting-to-sql-server-2012-using-sqlalchemy-and-pyodbc/36747352
## create sqlalchemy engine

Exemple #26

0

Afficher le fichier

Fichier : etlCsvJsonSqlserverToMysql.py Projet : Mneche/Business-Intelligence

########## Json extraction and maping
tableJ = etl.fromjson('cust_data.json', header=['id','gender','first_name','last_name', 'email','ville'])
tableJ = etl.movefield(tableJ, 'gender', 4)

########## CSV extraction and conversion
tableCSV = etl.fromcsv('week_cust.csv')
tableCSV = etl.convert(tableCSV, 'id', int)

########### Sqlserver connection and extraction
connectionSqlServer=pyodbc.connect("Driver={SQL Server Native Client 11.0};" "Server=81_64_msdn;" "Database=BD4client;" "Trusted_Connection=yes;" "convert_unicode =True;")
cursor = connectionSqlServer.cursor()
cursor.execute('SELECT id, first_name, last_name, email, gender, ville FROM client_DATA')
tableSqlServer = cursor.fetchall()
tableSqlServer =[('id','first_name','last_name', 'email','gender','ville')]+tableSqlServer
cursor.close()
connectionSqlServer.close()

######### Staging area transforming and concatenation
StagingArea = etl.cat(tableCSV, tableJ,tableSqlServer)
StagingArea = etl.convert(StagingArea, 'gender', {'Male': 'M', 'Female': 'F', 'male': 'M', 'female': 'F', None: 'N'})
StagingArea = etl.rename(StagingArea, 'ville', 'city')

######## mysql
connection = mysql.connect(host="localhost", user="******", passwd="", db="customerdatabase")
curseur = connection.cursor()
curseur.execute('SET SQL_MODE=ANSI_QUOTES')
#### load data, assuming table " CustomerData" already exists in the database
etl.appenddb(StagingArea, connection, 'customerdata', schema='customerdatabase', commit='commit')
curseur.close()
connection.close()

Exemple #27

0

Afficher le fichier

Fichier : finhub_data.py Projet : almmtv/ML_task

                                       stock)

list_of_sentiments = ['bearishPercent', 'bullishPercent']
important_sentiments = {}
for sentiments in list_of_sentiments:
    important_sentiments[sentiments] = sentiment['sentiment'][sentiments]
important_sentiments['date_and_time'] = str(datetime.datetime.now())[:19]
table_sentiment = etl.fromdicts([important_sentiments])
table_sentiment = etl.addfield(table_sentiment, 'symbol', stock)
tables_without_symbol = [
    table_price, table_insiders, table_financial_metrics, table_sentiment
]

db = Database('postgres', 'postgres', '123456f')
db_connection = db.con
etl.appenddb(table_price, db_connection, 'price')
etl.todb(table_recommendation, db_connection, 'recommendation')
etl.todb(table_insiders, db_connection, 'insiders')
etl.appenddb(table_financial_metrics, db_connection, 'financial_metrics')
etl.appenddb(table_sentiment, db_connection, 'sentiment')

# data for first request
db.cursor.execute('select NAME from INSIDERS')
data1 = db.cursor.fetchall()
data1 = list(set(data1))

# data for second request
db.cursor.execute('select * from INSIDERS')
data2 = db.cursor.fetchall()
data2_modify = {}
for row in data2:

Exemple #28

0

Afficher le fichier

Fichier : tailings_report_migration.py Projet : NWCalvank/mds

def append_tailings_reports_to_code_required_reports(connection, commit=False):
    src_table = etl.fromdb(
        connection,
        'SELECT exp_doc.mine_guid, exp_doc.exp_document_guid, req_doc.req_document_name, exp_doc.due_date, exp_doc.exp_document_status_code, exp_doc.received_date, exp_doc.active_ind, exp_doc_x.mine_document_guid, exp_doc.create_user, exp_doc.create_timestamp, exp_doc.update_user, exp_doc.update_timestamp from mine_expected_document exp_doc \
        inner join mine_expected_document_xref exp_doc_x on exp_doc.exp_document_guid = exp_doc_x.exp_document_guid\
        inner join mds_required_document req_doc on req_doc.req_document_guid = exp_doc.req_document_guid'
    )

    req_document_crr_defintion_map = [
        ['req_document_name', 'mine_report_definition_id'],
        ['Summary of TSF and Dam Safety Recommendations', 28],
        ['ITRB Activities Report', 27],
        ['Register of Tailings Storage Facilities and Dams', 47],
        ['Dam Safety Inspection (DSI) Report', 26],
        ['Dam Safety Review (DSR) Report', 31],
        ['“As-built” Reports', 32],
        ['Annual Reclamation', 25],
        ['MERP Record of Testing', 3],
        #['Annual Manager\'s Report', __________________ ], no mapping or data, ignore.
        ['OMS Manual', 33],
        ['Annual reconciliation of water balance and water management plans', 44],
        ['TSF risk assessment', 46],
        ['Mine Emergency Preparedness and Response Plan (MERP)', 24],
        ['Performance of high risk dumps', 29]
    ]

    table1 = etl.join(src_table, req_document_crr_defintion_map, 'req_document_name')
    mine_report = etl.cutout(table1, 'req_document_name')

    #to be inserted into db
    mine_report = etl.addfield(mine_report, 'submission_year', 2019)
    mine_report = etl.rename(mine_report, 'exp_document_status_code',
                             'mine_report_submission_status_code')
    mine_report = etl.addfield(mine_report, 'deleted_ind', lambda x: not x.active_ind)
    mine_report = etl.cutout(mine_report, 'active_ind')
    #to determine what FK's will be so can insert into related tables
    max_report_id = etl.fromdb(connection,
                               'select last_value from public.mine_report_mine_report_id_seq')[1][0]
    max_report_submission_id = etl.fromdb(
        connection,
        'select last_value from public.mine_report_submission_mine_report_submission_id_seq')[1][0]

    #if sequence hasn't been used yet, fix off by one
    if max_report_id == 1:
        max_report_id = 0
    if max_report_submission_id == 1:
        max_report_submission_id = 0

    #get one-to-many
    mine_report, mine_report_submission_documents = etl.unjoin(mine_report,
                                                               'mine_document_guid',
                                                               key='exp_document_guid')

    #add PK's for mappings
    mine_report_with_ids = etl.addrownumbers(mine_report,
                                             start=max_report_id + 1,
                                             step=1,
                                             field='mine_report_id')
    mine_report_with_ids = etl.addrownumbers(mine_report_with_ids,
                                             start=max_report_submission_id + 1,
                                             step=1,
                                             field='mine_report_submission_id')
    print(f'max_report_id= {max_report_id}, max_report_submission_id={max_report_submission_id}')
    #copy out fields for submission tables
    mine_report_submissions = etl.cut(mine_report_with_ids, [
        'mine_report_id', 'exp_document_guid', 'mine_report_submission_status_code', 'create_user',
        'create_timestamp', 'update_user', 'update_timestamp'
    ])
    mine_report_submissions = etl.addfield(mine_report_submissions,
                                           'submission_date', lambda x: x.create_timestamp)
    #remove fields not in mine_report
    mine_report = etl.cutout(mine_report, 'mine_report_submission_status_code')

    #replace exp_document_guid FK with mine_report_submission FK
    submission_id_lookup = etl.cut(mine_report_with_ids,
                                   ['mine_report_submission_id', 'exp_document_guid'])
    mine_report_submission_documents = etl.join(submission_id_lookup,
                                                mine_report_submission_documents,
                                                key='exp_document_guid')
    mine_report_submission_documents = etl.cutout(mine_report_submission_documents,
                                                  'exp_document_guid')

    #removed original PK
    mine_report = etl.cutout(mine_report, 'exp_document_guid')
    mine_report_submissions = etl.cutout(mine_report_submissions, 'exp_document_guid')

    print(etl.valuecounter(etl.distinct(table1, key='exp_document_guid'), 'req_document_name'))
    print(etl.valuecounter(mine_report, 'mine_report_definition_id'))
    print(table1)
    print(mine_report)
    print(mine_report_submissions)
    print(mine_report_submission_documents)

 
    etl.appenddb(mine_report, connection, 'mine_report', commit=False)
    print('INSERT mine_report staged')
    etl.appenddb(mine_report_submissions, connection, 'mine_report_submission', commit=False)
    print('INSERT mine_report_submission staged')
    etl.appenddb(mine_report_submission_documents,
                    connection,
                    'mine_report_document_xref',
                    commit=False)
    print('INSERT mine_report_document_xref staged')
    if commit:  
        connection.commit()
        print('DATA CREATION COMPLETE')
    else:
        connection.rollback()
        print('NO DATA CREATED: add --commit=true to insert report rows')

Exemple #29

0

Afficher le fichier

Fichier : dbexamples.py Projet : greeness/petl

cursor = connection.cursor()
cursor.execute('DROP TABLE IF EXISTS test')
cursor.execute('CREATE TABLE test (foo TEXT, bar INT)')
connection.commit()
cursor.close()

print 'exercise the petl functions using a connection'
from petl import look, fromdb, todb, appenddb
t1 = fromdb(connection, 'SELECT * FROM test')
print look(t1)
t2 = (('foo', 'bar'), ('a', 1), ('b', 1))
t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, connection, 'test')
print look(t1)
ieq(t2app, t1)
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)

print 'exercise the petl functions using a cursor'
cursor = connection.cursor()
todb(t2, cursor, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, cursor, 'test')
print look(t1)
ieq(t2app, t1)
todb(t2, cursor, 'test')

Exemple #30

0

Afficher le fichier

Fichier : dbexamples.py Projet : aklimchak/petl

cursor.execute('SET SQL_MODE=ANSI_QUOTES')
cursor.execute('DROP TABLE IF EXISTS test')
cursor.execute('CREATE TABLE test (foo TEXT, bar INT)')
cursor.close()
connection.commit()

prompt('exercise the petl functions using a connection')
from petl import look, fromdb, todb, appenddb
t1 = fromdb(connection, 'SELECT * FROM test')
print look(t1)
t2 = (('foo', 'bar'), ('a', 1), ('b', 1))
t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, connection, 'test')
print look(t1)
ieq(t2app, t1)
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)

prompt('exercise the petl functions using a cursor')
cursor = connection.cursor()
todb(t2, cursor, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, cursor, 'test')
print look(t1)
ieq(t2app, t1)
todb(t2, cursor, 'test')

Exemple #31

0

Afficher le fichier

Fichier : data_migrator.py Projet : CCI-MOC/reporting

def transfer_data(from_db_conn, to_db_conn):
    '''
    Transfer data from databases given cursor to execute queries to connected databases
    Limitations:
    1. poc.address_id is currently marked as  -1 since it was not provided in test data and is a FK non-null constraint
    2. institution2poc table is not available in old schema
    3. role table is already populated in bill.sql file so that table is skipped by this script
    4. poc_poc_id is currently set to be poc_id since no relevant information is available about the column
    5. project2moc_project.role_id column is not available in old schema and is a not null field in new schema
        so we default it to 1 for now.
    6. project2moc_project.username is not available from old schema so currently set to empty
    7. raw_item_ts.item_id has duplicates when imported from item_ts. So we currently filter out and insert only uniques.

    :param from_db_conn: source database connection
    :param to_db_conn: destination database connection
    '''

    # Emptying out tables with possible foreign key constraint issues
    fk_dep_tables = [
        'poc2project', 'poc2moc_project', 'poc', 'raw_item_ts', 'item',
        'project', 'institution2moc_project'
    ]
    for table_name in fk_dep_tables:
        table = etl.fromdb(to_db_conn,
                           "select * from {} where 1=0".format(table_name))
        etl.todb(table, to_db_conn, table_name)

    # Tables with no change in schema
    insert_as_tables = [
        'institution', 'address', 'item_type', 'item2item', 'catalog_item'
    ]
    for table_name in insert_as_tables:
        table = etl.fromdb(from_db_conn, "select * from {}".format(table_name))
        etl.todb(table, to_db_conn, table_name)

    # inserting dummy address for constraint matching
    dummy_address = [{'address_id': -1}]
    dummy_address_table = etl.fromdicts(dummy_address)
    etl.appenddb(dummy_address_table, to_db_conn, 'address')

    poc = etl.fromdb(from_db_conn, 'select * from poc')
    poc_transformed = etl.cutout(poc, 'domain_id', 'user_uid')
    poc_dummy_address = etl.replace(poc_transformed, 'address_id', None, -1)
    etl.todb(poc_dummy_address, to_db_conn, 'poc')

    project_names_table = etl.fromdb(
        from_db_conn, "select distinct project_name from project")
    moc_project_transformed = etl.addrownumbers(project_names_table)
    moc_project_transformed = etl.rename(moc_project_transformed,
                                         {'row': 'moc_project_id'})
    etl.todb(moc_project_transformed, to_db_conn, 'moc_project')

    domain = etl.fromdb(from_db_conn, "select * from domain")
    domain_table_transformed = etl.cutout(domain, 'domain_uid')
    domain_table_transformed = etl.rename(domain_table_transformed, {
        'domain_id': 'service_id',
        'domain_name': 'service_name'
    })
    etl.todb(domain_table_transformed, to_db_conn, 'service')

    project = etl.fromdb(from_db_conn, "select * from project")
    moc_project = etl.fromdb(to_db_conn, "select * from moc_project")
    project_moc_project_joined = etl.join(project,
                                          moc_project,
                                          key='project_name')
    project_table_transformed = etl.cutout(project_moc_project_joined,
                                           'project_name')
    project_table_transformed = etl.rename(project_table_transformed, {
        'domain_id': 'service_id',
        'project_uid': 'project_uuid'
    })
    etl.todb(project_table_transformed, to_db_conn, 'project')

    institution2project = etl.fromdb(from_db_conn,
                                     "Select * from institution2project")
    project = etl.fromdb(to_db_conn,
                         "select project_id, moc_project_id from project")
    inst2project_project_joined = etl.join(institution2project,
                                           project,
                                           key='project_id')
    inst2moc_project = etl.cutout(inst2project_project_joined, 'domain_id')
    etl.todb(inst2moc_project, to_db_conn, 'institution2moc_project')

    project2poc = etl.fromdb(from_db_conn, "select * from project2poc")
    project2poc_project_joined = etl.join(project2poc,
                                          project,
                                          key='project_id')
    poc2moc_project = etl.cutout(project2poc_project_joined, 'project_id',
                                 'domain_id')
    poc2moc_project = etl.addfield(poc2moc_project, 'role_id', 1)
    poc2moc_project = etl.addfield(poc2moc_project, 'poc_poc_id',
                                   lambda rec: rec['poc_id'])
    etl.todb(poc2moc_project, to_db_conn, 'poc2moc_project')

    poc2project = etl.cutout(project2poc, 'domain_id')
    poc2project = etl.addfield(poc2project, 'role_id', 1)
    poc2project = etl.addfield(poc2project, 'username', '')
    etl.todb(poc2project, to_db_conn, 'poc2project')

    item = etl.fromdb(from_db_conn, "select * from item")
    item_transformed = etl.cutout(item, 'domain_id')
    etl.todb(item_transformed, to_db_conn, 'item')

    raw_item_ts_unique = etl.fromdb(
        from_db_conn,
        "WITH summary AS ( SELECT its.item_id, its.start_ts, its.end_ts, its.state, its.catalog_item_id, ROW_NUMBER() OVER(PARTITION BY its.item_id) AS rk FROM ITEM_TS its) SELECT s.* FROM summary s WHERE s.rk = 1"
    )
    raw_item_ts_unique = etl.cutout(raw_item_ts_unique, 'rk')
    etl.todb(raw_item_ts_unique, to_db_conn, 'raw_item_ts')

Exemple #32

0

Afficher le fichier

connection = psycopg2.connect('dbname=twelveBD user=postgres password=admin')

table = etl.fromdb(connection, 'SELECT * FROM personas')
table1 = [['idPersona', 'nombre', 'sexo', 'edad'],
          [1, 'Rafael Perez Aguirre', 'm', 24]]
table2 = [['idPersona', 'nombre', 'sexo', 'edad'],
          [2, 'Eduardo Cantoran Flores', 'm', 25]]
table3 = [['idPersona', 'nombre', 'sexo', 'edad'],
          [3, 'Adriana Lopez Montiel', 'm', 30]]

table1 = [
    ['idEmpresa', 'nombre', 'sucursal', 'direccion'],
    [
        'IDIT', 1,
        'Blvrd del Niño Poblano 2901, Reserva Territorial Atlixcáyotl, Centro Comercial Puebla, 72810 San Andrés Cholula, Pue.'
    ]
]

table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['fácil', 30, 1, 45]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'],
          ['intermedio', 35, 1, 40]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'],
          ['Dificil', 45, 1, 40]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'],
          ['Veterano', 50, 1, 45]]
table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Dios', 55, 1, 40]]

etl.todb(table1, connection, 'personas')
etl.appenddb(table1, connection, 'personas')