コード例 #1
0
    def synctable(self, sourceDb, targetDb, sourceTable, targetTable):
        sourceCursor = sourceDb.cursor()
        targetCursor = targetDb.cursor()
        affected_total = 0
        init_rowCount = targetTable.rowCount if targetTable.rowCount < sourceTable.rowCount else sourceTable.rowCount
        pbar = tqdm(total=sourceTable.rowCount, unit='records')
        pbar.update(init_rowCount)
        while sourceTable.lastUpdatetime > targetTable.lastUpdatetime:
            affected_rows = 0
            batchSize = 100000
            sql = "SELECT * FROM (SELECT * FROM {schema}.{tablename} WHERE {timestamp}>=to_timestamp('{last_updatetime}','yyyy-mm-dd hh24:mi:ss.ff6') ORDER BY {timestamp}) WHERE ROWNUM<={batch_size}".format(
                timestamp=sourceTable.timestampField,
                schema=sourceTable.schema,
                tablename=sourceTable.tablename,
                last_updatetime=targetTable.lastUpdatetime,
                batch_size=batchSize)
            sourceRecord = etl.fromdb(lambda: CursorProxy(sourceDb.cursor()),
                                      sql)
            targetRecord = etl.fromdb(
                lambda: CursorProxy(targetDb.cursor()),
                "SELECT * FROM {schema}.{tablename} WHERE 1=0".format(
                    schema=targetTable.schema,
                    tablename=targetTable.tablename))
            sourceTable.columns = etl.header(sourceRecord)
            targetTable.columns = etl.header(targetRecord)
            for column in list(
                    set(sourceTable.columns) - set(targetTable.columns)):
                sourceRecord = etl.cutout(sourceRecord, column)
            max_updatetime = sourceRecord.cut(
                sourceTable.timestampField).skip(1).max()[0]
            sourceRecord = sourceRecord.sort(sourceTable.timestampField)
            etl.appenddb(sourceRecord,
                         CursorProxy(targetCursor),
                         targetTable.tablename,
                         schema=targetTable.schema,
                         commit=True)
            affected_rows += targetCursor.rowcount
            targetTable.lastUpdatetime = max_updatetime.strftime(
                '%Y-%m-%d %H:%M:%S.%f')
            targetTable.rowCount += affected_rows
            pbar.update(affected_rows if init_rowCount + affected_total +
                        affected_rows < sourceTable.rowCount else
                        sourceTable.rowCount - init_rowCount - affected_total)
            affected_total += affected_rows
            pbar.set_description("%s |%d records updated." %
                                 (targetTable.tablename, affected_total))

        if targetTable.lastUpdatetime > sourceTable.lastUpdatetime:
            pbar.set_description("%s |timestamp >, skip." %
                                 (targetTable.tablename))
        elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount == sourceTable.rowCount:
            pbar.set_description("%s |no data change." %
                                 (targetTable.tablename))
        elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount > sourceTable.rowCount:
            pbar.set_description("%s |RowCount > but timestamp ==, skip." %
                                 (targetTable.tablename))
        elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount < sourceTable.rowCount:
            pbar.set_description("%s |RowCount < but timestamp ==, skip." %
                                 (targetTable.tablename))
        pbar.close()
def load_fornecedor_comportamento():

    connComprasnet = get_comprasnet_connection()

    connSiasg_DW = get_siasg_dw_connection()

    engine = get_data_lake_engine()

    print("Iniciando carga desclassificação ...")
    ## DESCLASSIFICACAO_FORNECEDORES -> FORNECEDOR_COMPORTAMENTO
    tabela_sql = etl.fromdb(connComprasnet,
                            Comprasnet.DESCLASSIFICACAO_FORNECEDORES)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO')
    print("Carga desclassificação executada com sucesso")

    print("Iniciando carga \"contrato continuado\"")
    ## CONTRATO_CONTINUADO -> FORNECEDOR_COMPORTAMENTO
    tabela_sql = etl.fromdb(connSiasg_DW, Siasg_DW.CONTRATO_CONTINUADO)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO')
    print("Carga \"contrato continuado\" executada com sucesso")

    print("Iniciando carga recursos ...")
    ## RECURSOS -> FORNECEDOR_COMPORTAMENTO
    # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake
    # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER)
    tabela_sql = etl.fromdb(connComprasnet, Comprasnet.RECURSOS)
    etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO')
    print("Carga recursos executada com sucesso")
    return 0
コード例 #3
0
def insert_data(df, int):

    connection = psycopg2.connect(dbname='voyager', user='******', password='******', host='172.16.0.45')
    engine = create_engine('postgresql://*****:*****@172.16.0.45:5432/voyager')

    latest_temp = 'SELECT id FROM temperature ORDER BY id DESC LIMIT 1'
    latest_hum = 'SELECT id FROM humidity ORDER BY id DESC LIMIT 1'
    temp = etl.fromdb(connection, latest_temp)
    hum = etl.fromdb(connection, latest_hum)
    temp = temp['id'][0] + 1
    hum = hum['id'][0] + 1


    df_loc = df[['time', 'latitude', 'longitude']]

    if int == 1:
        df_temp = df[['temperature']]
        df_temp.index = np.arange(temp, len(df_temp) + temp)
        df_temp['id'] = df_temp.index
        df_temp.rename(columns={'temperature': 'value'}, inplace=True)
        df_loc['temperature_id'] = df_temp.index
        df_temp.to_sql('temperature', engine, if_exists='append', index=False, method='multi')

        print(df_temp)
        df_hum = df[['humidity']]
        df_hum.index = np.arange(hum, len(df_hum) + hum)
        df_hum['id'] = df_hum.index
        df_hum.rename(columns={'humidity': 'value'}, inplace=True)
        df_loc['humidity_id'] = df_hum.index
        print(df_hum)
        df_hum.to_sql('humidity', engine, if_exists='append', index=False, method='multi')

    if int == 2:
        df_temp = df[['temperature']]
        df_temp.index = np.arange(temp, len(df_temp) + temp)
        df_temp['id'] = df_temp.index
        df_temp.rename(columns={'temperature': 'value'}, inplace=True)
        df_loc['temperature_id'] = df_temp.index
        print(df_temp)

        df_temp.to_sql('temperature', engine, if_exists='append', index=False, method='multi')

    if int == 3:
        df_hum = df[['humidity']]
        df_hum.index = np.arange(hum, len(df_hum) + hum)
        df_hum['id'] = df_hum.index
        df_hum.rename(columns={'humidity': 'value'}, inplace=True)
        df_loc['humidity_id'] = df_hum.index
        print(df_hum)
        df_hum.to_sql('humidity', engine, if_exists='append', index=False, method='multi')


    df_loc['research_id'] = research
    df_loc['voyager_id'] = voyager
    print(df_loc)
    df_loc.to_sql('location', engine, if_exists='append', index=False, method='multi')
コード例 #4
0
def etl_(query):
    source_db = get_source_db(query)
    extract_query = get_extract_query(query)

    with source_db() as source:
        etl.fromdb(source, extract_query) \
           .topickle(f'temp/{query.target_table}.p')

    with GISLNIDB.GISLNIDB() as target:
        etl.frompickle(f'temp/{query.target_table}.p') \
           .todb(get_cursor(target), query.target_table.upper())
コード例 #5
0
ファイル: dim_time.py プロジェクト: Cimsolutions2019/ETL
def dimension_values():
    connection = psycopg2.connect(dbname='voyager',
                                  user='******',
                                  password='******',
                                  host='172.16.0.45')
    engine = create_engine('postgresql://*****:*****@172.16.0.45:5432/voyager')

    knmi_dim_time = "select distinct((to_date(CAST(date AS text), 'YYYYMMDD'))::timestamp + interval '1h' * hour) as time from knmi_station_data"

    lmn_dim_time = "SELECT Distinct(left(timestamp, -6)) as time from luchtmeetnet_data"

    knmi_table = etl.fromdb(connection, knmi_dim_time)
    lmn_table = etl.fromdb(connection, lmn_dim_time)

    df_knmi = pd.DataFrame(knmi_table)
    df_lmn = pd.DataFrame(lmn_table)

    df_knmi.columns = df_knmi.iloc[0]
    df_knmi = df_knmi.drop(0)

    df_lmn.columns = df_lmn.iloc[0]
    df_lmn = df_lmn.drop(0)

    df_total = df_knmi.append(df_lmn)

    df_total = df_total.drop_duplicates()

    df_total = df_total.reset_index()
    df_total = df_total.drop('index', axis=1)
    df_total = df_total.reset_index()
    df_total = df_total.rename(columns={'index': 'id'})

    df_total['time'] = df_total['time'].astype(str)

    df_total[['Year', 'Month', 'Day']] = df_total.time.str.split(
        "-",
        expand=True,
    )
    df_total[['Day', 'Hour']] = df_total.Day.str.split(
        " ",
        expand=True,
    )
    df_total[['Hour', 'Minute', 'Second']] = df_total.Hour.str.split(
        ":",
        expand=True,
    )
    df_total.to_sql('dim_time',
                    engine,
                    if_exists='append',
                    index=False,
                    method='multi')

    print(df_total)
コード例 #6
0
def etl_(query, logger):
    source_db = get_source_db(query)
    extract_query = get_extract_query(query)

    logger.info(f'{query.target_table} - extracting data into pickle file...')
    with source_db() as source:
        etl.fromdb(source,
                   extract_query).topickle(f'temp/{query.target_table}.p')

    logger.info(f'{query.target_table} - loading data from pickle file...')
    with PERMITP.PERMITP() as target:
        etl.frompickle(f'temp/{query.target_table}.p').todb(
            get_cursor(target), query.target_table.upper())
コード例 #7
0
def dimension_values():
    connection = psycopg2.connect(dbname='voyager',
                                  user='******',
                                  password='******',
                                  host='172.16.0.45')
    engine = create_engine('postgresql://*****:*****@172.16.0.45:5432/voyager')

    knmi_dim_com = "SELECT column_name as name FROM information_schema.columns " \
          "WHERE table_schema = 'public' " \
          "AND table_name = 'knmi_station_data'"

    lmn_dim_com = "SELECT name FROM luchtmeetnet_sensors"

    knmi_table = etl.fromdb(connection, knmi_dim_com)
    lmn_table = etl.fromdb(connection, lmn_dim_com)

    df_knmi = pd.DataFrame(knmi_table)
    df_lmn = pd.DataFrame(lmn_table)

    df_knmi.columns = df_knmi.iloc[0]
    df_knmi = df_knmi.drop(0)

    df_lmn.columns = df_lmn.iloc[0]
    df_lmn = df_lmn.drop(0)

    wid = df_knmi.loc[df_knmi['name'] == 'weather_station_id'].index
    df_knmi = df_knmi.drop(wid)

    date = df_knmi.loc[df_knmi['name'] == 'date'].index
    df_knmi = df_knmi.drop(date)

    hour = df_knmi.loc[df_knmi['name'] == 'hour'].index
    df_knmi = df_knmi.drop(hour)

    index = df_knmi.loc[df_knmi['name'] == 'index'].index
    df_knmi = df_knmi.drop(index)

    df_total = df_knmi.append(df_lmn)
    df_total = df_total.reset_index()
    df_total = df_total.drop('index', axis=1)
    df_total = df_total.reset_index()

    df_total = df_total.rename(columns={'index': 'id'})

    print(df_total)

    df_total.to_sql('dim_com',
                    engine,
                    if_exists='append',
                    index=False,
                    method='multi')
コード例 #8
0
    def llenarListaEmpresas(self):
        global iEmpresa

        self.listEmpresas = QtWidgets.QListWidget(
            self.scrollAreaWidgetContents_2)
        self.listEmpresas.setObjectName("listEmpresas")

        empresas = etl.fromdb(connection, 'SELECT * FROM empresas')

        for empresa in etl.data(empresas):
            item = QtWidgets.QListWidgetItem()
            self.listEmpresas.addItem(item)

        self.horizontalLayout_3.addWidget(self.listEmpresas)

        __sortingEnabled = self.listEmpresas.isSortingEnabled()
        self.listEmpresas.setSortingEnabled(False)

        iEmpresa = 0
        for empresa in etl.data(empresas):
            item = self.listEmpresas.item(iEmpresa)
            item.setText(empresa[1])
            iEmpresa += 1

        self.listEmpresas.setSortingEnabled(__sortingEnabled)
コード例 #9
0
    def run(self, driver, task, log):
        input_driver = driver.get_driver(task["source"]["connection"])
        sql = self._parse_sql(task["source"])
        db = input_driver.get_db()
        record_set = etl.fromdb(db, sql)
        if not etl.data(record_set).any():
            log.write("Task skipped. No rows on source")
        else:
            transform = TransformSubTask(task, log)
            record_set = transform.get_result(record_set)

            fld = task["target"].get("folder", "output")
            fld = compat.translate_unicode(fld)
            target = task["target"]["file"]
            target = compat.translate_unicode(target)
            out = "{}/{}".format(fld, target)

            separator = task["target"].get("delimiter", ";")
            separator = compat.translate_unicode(separator)
            enc = task["target"].get("encoding", "utf-8")

            task_log = "log/db-csv_{}_{}.log".format(task["name"],
                                                     get_time_filename())
            with open(task_log, "w") as lg:
                if "truncate" in task["target"] and task["target"]["truncate"]:
                    record_set.progress(10000,
                                        out=lg).tocsv(out,
                                                      encoding=enc,
                                                      delimiter=separator)
                else:
                    record_set.progress(10000,
                                        out=lg).appendcsv(out,
                                                          encoding=enc,
                                                          delimiter=separator)
        db.close()
コード例 #10
0
def ETL_MMS_NOW_schema(connection, tables, schema, system_name):
    '''Import all the data from the specified schema and tables.'''
    for destination, source in tables.items():
        try:
            current_table = etl.fromdb(connection,
                                       f'SELECT * from {schema}.{source}')
            print(f'    {destination}:{etl.nrows(current_table)}')

            if (source == 'application'):
                # add originating source
                table_plus_os = etl.addfield(current_table,
                                             'originating_system', system_name)

                table_plus_os_guid = join_mine_guids(connection, table_plus_os)

                etl.appenddb(table_plus_os_guid,
                             connection,
                             destination,
                             schema='now_submissions',
                             commit=False)
            else:

                etl.appenddb(current_table,
                             connection,
                             destination,
                             schema='now_submissions',
                             commit=False)

        except Exception as err:
            print(f'ETL Parsing error: {err}')
            raise
コード例 #11
0
ファイル: views.py プロジェクト: kannandreams/antwalk
def generateCSV(request):
    connection = psycopg2.connect("dbname=TaaS \
                                  user=postgres \
                                  password=python \
                                  host=localhost")
    table = fromdb(connection, 'select * from cars')
    #table2 = head(table, 2)
    #petl.tail(table, n=10)

    #table3 = select(table, lambda rec: rec['name'] == 'Skoda' and rec['baz'] > 88.1)
    #table3 = select(table, lambda rec: rec['name'] == 'Skoda')

    #print look(table)

    tocsv(table, '/home/kannan/Documents/openproject/mysite/output.csv')
    data = open("/home/kannan/Documents/openproject/mysite/output.csv", "r")

    #data = f.readlines()
    #f = StringIO.StringIO()
    #template_name = 'GDR.html'
    #response = HttpResponse(f, content_type='text/csv')
    #response['Content-Disposition'] = 'attachment; \

    #        filename=output.csv'
    #return response

    resp = HttpResponse(data, mimetype='text/csv')
    resp['Content-Disposition'] = 'attachment;filename=output.csv'
    return resp
コード例 #12
0
ファイル: test_io.py プロジェクト: deytao/petl
def test_fromdb_mkcursor():

    # initial data
    data = (("a", 1), ("b", 2), ("c", 2.0))
    connection = sqlite3.connect(":memory:")
    c = connection.cursor()
    c.execute("create table foobar (foo, bar)")
    for row in data:
        c.execute("insert into foobar values (?, ?)", row)
    connection.commit()
    c.close()

    # test the function
    mkcursor = lambda: connection.cursor()
    actual = fromdb(mkcursor, "select * from foobar")
    expect = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2.0))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

    # test iterators are isolated
    i1 = iter(actual)
    i2 = iter(actual)
    eq_(("foo", "bar"), i1.next())
    eq_(("a", 1), i1.next())
    eq_(("foo", "bar"), i2.next())
    eq_(("b", 2), i1.next())
コード例 #13
0
ファイル: test_io.py プロジェクト: brutimus/petl
def test_fromdb_mkcursor():
    
    # initial data
    data = (('a', 1),
            ('b', 2),
            ('c', 2.0))
    connection = sqlite3.connect(':memory:')
    c = connection.cursor()
    c.execute('create table foobar (foo, bar)')
    for row in data:
        c.execute('insert into foobar values (?, ?)', row)
    connection.commit()
    c.close()
    
    # test the function
    mkcursor = lambda: connection.cursor()
    actual = fromdb(mkcursor, 'select * from foobar')
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2),
              ('c', 2.0))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice

    # test iterators are isolated
    i1 = iter(actual)
    i2 = iter(actual)
    eq_(('foo', 'bar'), i1.next())
    eq_(('a', 1), i1.next())
    eq_(('foo', 'bar'), i2.next())
    eq_(('b', 2), i1.next())
コード例 #14
0
ファイル: test_io.py プロジェクト: pombredanne/petl
def test_fromdb_mkcursor():

    # initial data
    data = (('a', 1), ('b', 2), ('c', 2.0))
    connection = sqlite3.connect(':memory:')
    c = connection.cursor()
    c.execute('create table foobar (foo, bar)')
    for row in data:
        c.execute('insert into foobar values (?, ?)', row)
    connection.commit()
    c.close()

    # test the function
    mkcursor = lambda: connection.cursor()
    actual = fromdb(mkcursor, 'select * from foobar')
    expect = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2.0))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice

    # test iterators are isolated
    i1 = iter(actual)
    i2 = iter(actual)
    eq_(('foo', 'bar'), i1.next())
    eq_(('a', 1), i1.next())
    eq_(('foo', 'bar'), i2.next())
    eq_(('b', 2), i1.next())
コード例 #15
0
ファイル: analysis.py プロジェクト: ryantuck/lombardi
def game_buckets(game_param, game_bins, position):

    # extract data
    table_name = '{}_games'.format(position)
    games = petl.fromdb(conn, 'select * from lombardi.{}'.format(table_name))

    vals = [float(y) for y in games[game_param]]

    # calculate params for our model
    min_v = min(vals)
    max_v = max(vals)
    bin_size = (max_v-min_v)/game_bins

    def rebucket(v):
        if v == game_bins:
            return v-1
        return v

    # bucket vals
    bucketed_games = (
        games
        .addfield('normed_v', lambda r: float(r[game_param]) - min_v)
        .addfield('bucket', lambda r: int(r['normed_v'] // bin_size))
        .convert(game_param, float)
        # gnarly hack to account for bin logic weirdness on last value
        .convert('bucket', rebucket)
        .cut(('year', 'name', 'week', game_param, 'bucket'))
    )

    return bucketed_games
コード例 #16
0
    def selectRows(self, line):
        currentCount = 0
        listLength = 0
        list1 = []
        fromTable = petl.fromdb(self.FROM_DB_CON, line)
        it = iter(fromTable)
        hdr = next(it)
        for one in it:
            currentCount += 1
            listLength += 1
            if self.bigFields is not None:
                bigFields = self.bigFields
                one = list(one)
                if 'BLOB' in bigFields:
                    for n in bigFields['BLOB']:
                        try:
                            one[n] = one[n].read()
                        except Exception as e:
                            print(e)
            list1.append(one)
            if listLength == self.getDataLength:
                #print(self.manager.getDispatchedJobQueue().qsize())
                qList = list1
                self.manager.getDispatchedJobQueue().put(qList)
                list1 = []
                listLength = 0

        if len(list1):
            self.manager.getDispatchedJobQueue().put(list1)

        data = self.manager.getConfigQueue().get(1)
        data['endFlag'] = True
        self.manager.getConfigQueue().put(data)
コード例 #17
0
ファイル: wc.py プロジェクト: NMBGMR/WDIETL
    def extract(self, model, record_id):
        table = model.name
        sql = f'''select POINT_ID, WQ_{table}.Latitude, WQ_{table}.Longitude, SiteNames, WellDepth from dbo.WQ_{table}
    join NM_Aquifer.dbo.Location on NM_Aquifer.dbo.Location.PointID = dbo.WQ_{table}.POINT_ID
    where PublicRelease=1 and POINT_ID = %d'''

        table = petl.fromdb(nm_quality_connection(), sql, record_id)
        return table
コード例 #18
0
def load_db_data(db, dispensary_id, table_name):
    """
    Data extracted from source db
    """
    sql = ("SELECT * from {0} WHERE "
           "dispensary_id={1}").format(table_name, dispensary_id)

    return etl.fromdb(db, sql)
コード例 #19
0
ファイル: wl.py プロジェクト: NMBGMR/WDIETL
 def _has_observations(self, record):
     sql = '''select count(PointID) from dbo.WaterLevelsContinuous_Pressure
     where PointID=%s'''
     pid = record['PointID']
     table = petl.fromdb(nm_aquifier_connection(), sql, (pid, ))
     nobs = petl.values(table, '')[0]
     print(f'{pid} has nobs={nobs}')
     return bool(nobs)
コード例 #20
0
def get_data_table(tableName):
    conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"],
                      dDBI["passwd"], dDBI["db"])
    tabla = etl.fromdb(conex, "SELECT * FROM " + tableName)
    etl.tojson(tabla, "./static/data/" + tableName + '.json')
    conex.close()
    rv = showjson(str(tableName))
    return jsonify(rv)
コード例 #21
0
def join_mine_guids(connection, application_table):
    current_mines = etl.fromdb(
        connection,
        'select distinct on (minenumber) mine_guid, mine_no as minenumber from public.mine order by minenumber, create_timestamp;'
    )
    application_table_guid_lookup = etl.leftjoin(application_table,
                                                 current_mines,
                                                 key='minenumber')
    return application_table_guid_lookup
コード例 #22
0
ファイル: tools.py プロジェクト: NMBGMR/WDIETL
def make_runlist():
    sql = '''select DISTINCT Location.PointID from dbo.WaterLevelsContinuous_Pressure
join dbo.Location on Location.PointID = dbo.WaterLevelsContinuous_Pressure.PointID
where dbo.Location.LatitudeDD is not null and dbo.Location.PublicRelease=1
group by Location.PointID
order by Location.PointID'''
    table = petl.fromdb(nm_aquifier_connection(), sql)

    obj = petl.tojson(table, 'record_ids.json', indent=2)
    print(obj)
コード例 #23
0
def get_atributosTable(tableName):
    conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"],
                      dDBI["passwd"], dDBI["db"])
    query = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME =" + "'" + str(
        tableName) + "'"
    atributos = etl.fromdb(conex, query)
    etl.tojson(atributos, "./static/data/" + tableName + '_atrib.json')
    conex.close()
    rv = showjson(str(tableName) + "_atrib")
    return jsonify(rv)
コード例 #24
0
ファイル: ex4-3.metrics.py プロジェクト: jdgwartney/tsi-lab
 def get_data(self, min_dt, max_dt):
     """
     Generates the SQL and extracts our data
     :param min_dt:
     :param max_dt:
     :return:
     """
     sql = "select dt, total, duration from ol_transactions where dt > '{0}' and dt <= '{1}'".format(min_dt, max_dt)
     self.log("SQL: {0}".format(sql))
     self.table = petl.fromdb(self.connection, sql)
コード例 #25
0
ファイル: ex4-3.metrics.py プロジェクト: jdgwartney/tsi-lab
 def get_max_dt(self):
     """
     Gets the current maximum date in the table
     :return:
     """
     sql = 'select max(dt) as max_dt from ol_transactions'
     self.log("SQL: {0}".format(sql))
     table = petl.fromdb(self.connection, sql)
     max_dt = petl.values(table, 'max_dt')[0]
     return max_dt
コード例 #26
0
    def cambiarFichas(self, tipo):
        tipos = etl.fromdb(connection, 'SELECT * FROM tipos')
        b = tipos.data()

        self.imgFicha1.setPixmap(
            QtGui.QPixmap(self.ponerImagenFicha(b[midTipo][4])))
        self.imgFicha2.setPixmap(
            QtGui.QPixmap(self.ponerImagenFicha(b[midTipo][5])))
        self.imgFicha3.setPixmap(
            QtGui.QPixmap(self.ponerImagenFicha(b[midTipo][6])))
コード例 #27
0
def get_cambiar_valor(cambio):
    conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"],
                      dDBI["passwd"], dDBI["db"])
    myCambio = cambio.split("_")  #0una vez cambiado 1
    tabla = etl.fromdb(conex, "SELECT * FROM " + myCambio[3])
    tablaCambiada = etl.convert(tabla, str(myCambio[0]), 'replace',
                                str(myCambio[1]), str(myCambio[2]))
    etl.tojson(tablaCambiada, "./static/data/cambiarValor.json")
    conex.close()
    rv = showjson("cambiarValor")
    return jsonify(rv)
コード例 #28
0
    def _extract(self, thing, model, skip):
        point_id = thing['@nmbgmr.point_id']

        sql = f'''select DateMeasured, {model.mapped_column}
        from dbo.WaterLevelsContinuous_Pressure
        join dbo.Location on dbo.Location.PointID = dbo.WaterLevelsContinuous_Pressure.PointID
        where dbo.Location.PointID = %d and QCed=1 
        order by DateMeasured offset %d rows
        '''

        return petl.fromdb(nm_aquifier_connection(), sql, (point_id, skip))
コード例 #29
0
def get_all_tables():
    conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"],
                      dDBI["passwd"], dDBI["db"])
    listTable = "SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA =" + "'" + str(
        dDBI["db"]) + "'"
    tablas = etl.fromdb(conex, listTable)
    etl.tojson(tablas, './static/data/tablas.json'
               )  #ACA AGREGAR UN IDENTIFICADOR PARA BASE DE DATOS!!!! OJO
    conex.close()
    aTablas = showjson('tablas')
    return jsonify(aTablas)
コード例 #30
0
def run_backup(sqlite_db, backup_path):
    """backs-up each table in the inventory database to a csv,
    zips them all up, and saves the zip with a timestamp-derived name.
    """
    ts = timestamp()

    # SET UP THE FOLDERS -----------------------------------------------------

    #check for backup folder, make if it doesn't exist
    if not os.path.exists(backup_path):
        os.makedirs(backup_path)

    #make a folder for this backup
    this_backup_path = os.path.join(backup_path, "backup_{0}".format(ts))
    if not os.path.exists(this_backup_path):
        os.makedirs(this_backup_path)
    click.echo(this_backup_path)

    # GET THE DATA OUT -------------------------------------------------------

    # temporarily store extracted csv files. (use this to delete them later)
    csvs = []

    # connect to the DB, get each table, save out as a csv.
    conn = sqlite3.connect(sqlite_db)
    for table in [
            'product', 'product_tags', 'sale', 'staff', 'supplier', 'tag'
    ]:
        t = etl.fromdb(lambda: conn.cursor(),
                       """SELECT * FROM {0}""".format(table))
        out_csv = os.path.join(this_backup_path, '{0}.csv'.format(table))
        etl.tocsv(t, out_csv)
        csvs.append(out_csv)

    # ZIP THE DATA UP --------------------------------------------------------

    # make a zip file in the main backup location
    zipfile_directory = os.path.join(backup_path,
                                     "inventory_backup_{0}.zip".format(ts))
    # create a zip file object
    zf = zipfile.ZipFile(zipfile_directory, mode="w")

    for each in csvs:
        click.echo(each)
        zf.write(filename=each,
                 arcname=os.path.basename(each),
                 compress_type=compression)
    zf.close()

    # REMOVE TEMP FILES -------------------------------------------------------

    for each in csvs:
        os.remove(each)
    os.rmdir(this_backup_path)
コード例 #31
0
    def __iter__(self):
        """Proxy iteration to core petl."""
        # form sql statement
        stmt = self.stmt()

        # get petl iterator
        dbo = self.db.dbo
        db_view = etl.fromdb(dbo, stmt)
        iter_fn = db_view.__iter__()

        return iter_fn
コード例 #32
0
def get_all_atributos():
    conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"],
                      dDBI["passwd"], dDBI["db"])
    listAtrib = "SELECT COLUMN_NAME,TABLE_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA =" + "'" + str(
        dDBI["db"]) + "'"
    allAtributos = etl.fromdb(conex, listAtrib)
    etl.tojson(allAtributos, './static/data/allAtributos.json'
               )  #ACA AGREGAR UN IDENTIFICADOR PARA BASE DE DATOS!!!! OJO
    conex.close()
    myAtributos = showjson('allAtributos')
    return jsonify(myAtributos)
コード例 #33
0
ファイル: demo1.py プロジェクト: sshd123/study-python3
def test_sync_users(src_conn, dest_conn):
    src_table = etl.fromdb(src_conn, 'select * from auth_user limit 8')

    mapping = OrderedDict()
    mapping['org_id'] = lambda x: 1
    mapping['username'] = '******'
    mapping['name'] = 'full_name'
    mapping['status'] = lambda x: 'ACTIVE'
    mapping['uid'] = 'username'
    mapping['type'] = lambda x: "STUDENT"
    dst_table = etl.fieldmap(src_table, mapping)
    upsert_many(dest_conn['auth_user'], dst_table, keys=['username'])
コード例 #34
0
def _medical_limits(id, source_db):
    """
    get the member limits
    """
    sql = ("SELECT dispensary_id, daily_purchase_limit, visit_purchase_limit, "
           "daily_visit_limit, two_week_purchase_limit "
           "FROM red_flags "
           "WHERE dispensary_id={0}").format(id)

    data = etl.fromdb(source_db, sql)
    limits = etl.select(data, lambda rec: rec.dispensary_id == id)
    return etl.dicts(limits)
コード例 #35
0
ファイル: NOW_import.py プロジェクト: NWCalvank/mds
def ETL_MMS_NOW_schema(connection, tables, schema):
    for key, value in tables.items():
        try:
            current_table = etl.fromdb(connection,
                                       f'SELECT * from {schema}.{value}')
            etl.appenddb(current_table,
                         connection,
                         key,
                         schema='now_submissions',
                         commit=False)
        except Exception as err:
            print(f'ETL Parsing error: {err}')
コード例 #36
0
ファイル: test_db_server.py プロジェクト: DeanWay/petl
def _test_unicode(dbo):
    expect = ((u'name', u'id'),
              (u'Արամ Խաչատրյան', 1),
              (u'Johann Strauß', 2),
              (u'Вагиф Сәмәдоғлу', 3),
              (u'章子怡', 4),
              )
    actual = etl.fromdb(dbo, 'SELECT * FROM test_unicode')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test_unicode')
    ieq(expect, actual)
    print(etl.look(actual))
コード例 #37
0
def pg_extract(service):
    """
    Returns the full contents of a postgresql table
    :param service:  object with the pgsql connection service
    :return: data
    """
    schema=config.postgres_info['schema']
    table=config.postgres_info['table']
    try:
        data = petl.fromdb(service, 'SELECT * FROM %s.%s;' %(schema, table))
    except Exception, e:
        print "Error extracting data from postgres %s.%s %s" % (schema, table, e)
        service.rollback()
        exit(1)
コード例 #38
0
ファイル: ex4-3.metrics.py プロジェクト: boundary/tsi-lab
    def get_min_dt(self, last):
        """
        Gets the minimum date considering previous extractions from the table.
        :param last:
        :return:
        """
        if last is None or len(last) == 0:
            sql = "select min(dt) as min_dt from ol_transactions"
        else:
            sql = "select min(dt) as min_dt from ol_transactions where dt >= '{0}'".format(last)

        self.log("SQL: {0}".format(sql))
        table = petl.fromdb(self.connection, sql)
        extract_dt = petl.values(table, 'min_dt')[0]
        return extract_dt
コード例 #39
0
ファイル: test_db_server.py プロジェクト: DeanWay/petl
def _test_dbo(write_dbo, read_dbo=None):
    if read_dbo is None:
        read_dbo = write_dbo

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(read_dbo, 'SELECT * FROM test')

    debug('verify empty to start with...')
    debug(etl.look(actual))
    ieq(expect_empty, actual)

    debug('write some data and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('append some data and verify...')
    etl.appenddb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('overwrite and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, overwrite and verify')
    etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, append and verify')
    etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('try a single row')
    etl.todb(etl.head(expect, 1), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(etl.head(expect, 1), actual)
コード例 #40
0
ファイル: test_io.py プロジェクト: deytao/petl
def test_fromdb_withargs():

    # initial data
    data = (("a", 1), ("b", 2), ("c", 2.0))
    connection = sqlite3.connect(":memory:")
    c = connection.cursor()
    c.execute("create table foobar (foo, bar)")
    for row in data:
        c.execute("insert into foobar values (?, ?)", row)
    connection.commit()
    c.close()

    # test the function
    actual = fromdb(connection, "select * from foobar where bar > ? and bar < ?", (1, 3))
    expect = (("foo", "bar"), ("b", 2), ("c", 2.0))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
コード例 #41
0
ファイル: dbtests.py プロジェクト: podpearson/petl
def exercise_unicode(dbo):
    print('=' * len(repr(dbo)))
    print('EXERCISE UNICODE')
    print(repr(dbo))
    print('=' * len(repr(dbo)))
    print()
    expect = ((u'name', u'id'),
              (u'Արամ Խաչատրյան', 1),
              (u'Johann Strauß', 2),
              (u'Вагиф Сәмәдоғлу', 3),
              (u'章子怡', 4),
              )
    actual = fromdb(dbo, 'SELECT * FROM test_unicode')
    print('write some data and verify...')
    todb(expect, dbo, 'test_unicode')
    ieq(expect, actual)
    print(look(actual))
コード例 #42
0
ファイル: migration_job.py プロジェクト: MattShirley/lariat
    def execute(self):
        if self.destination is None:
            self.destination = get_default_destination()

        for source in self.sources:
            conn = source.get_connection()
            dest_conn = self.destination.get_connection()
            print source.test_connection()
            print self.destination.test_connection()

            if source.dbms == 'mysql':
                conn.cursor().execute('SET SQL_MODE=ANSI_QUOTES;')

            for table in source.tables:
                table_data = petl.fromdb(conn, 'SELECT * FROM %s' % table)
                petl.todb(table_data, dest_conn, table, commit=True)
                dest_conn.commit()
            dest_conn.close()
        return
コード例 #43
0
ファイル: dbtests.py プロジェクト: podpearson/petl
def exercise_with_schema(dbo, db):
    print('=' * len(repr(dbo)))
    print('EXERCISE WITH EXPLICIT SCHEMA NAME')
    print(repr(dbo))
    print('=' * len(repr(dbo)))
    print(    )
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    todb(expect, dbo, 'test', schema=db)
    ieq(expect, actual)
    print(look(actual))
    
    print('append some data and verify...')
    appenddb(expect, dbo, 'test', schema=db)
    ieq(expect_appended, actual)
    print(look(actual))
コード例 #44
0
ファイル: dbtests.py プロジェクト: hexatonics/petlx
def exercise(dbo):
    print '=' * len(repr(dbo))
    print repr(dbo)
    print '=' * len(repr(dbo))
    print
    
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_extended = (('foo', 'bar', 'baz'), ('a', 1, 2.3), ('b', 1, 4.1))
    actual = fromdb(dbo, 'SELECT * FROM testx')

    print "verify table doesn't exist to start with"
    try:
        print look(actual)
    except Exception as e:
        print 'expected exception: ' + str(e)
    else:
        raise Exception('expected exception not raised')

    print "verify cannot write without create"
    try:
        todb(expect, dbo, 'testx')
    except Exception as e:
        print 'expected exception: ' + str(e)
    else:
        raise Exception('expected exception not raised')

    print 'create table and verify...'
    todb(expect, dbo, 'testx', create=True)
    ieq(expect, actual)
    print look(actual)
    
    print 'verify cannot overwrite with new cols without recreate...'
    try:
        todb(expect_extended, dbo, 'testx')
    except Exception as e:
        print 'expected exception: ' + str(e)
    else:
        raise Exception('expected exception not raised')
    
    print 'verify recreate...'
    todb(expect_extended, dbo, 'testx', create=True, drop=True)
    ieq(expect_extended, actual)
    print look(actual)
コード例 #45
0
ファイル: test_db_server.py プロジェクト: DeanWay/petl
def _test_with_schema(dbo, schema):

    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(dbo, 'SELECT * FROM test')

    print('write some data and verify...')
    etl.todb(expect, dbo, 'test', schema=schema)
    ieq(expect, actual)
    print(etl.look(actual))

    print('append some data and verify...')
    etl.appenddb(expect, dbo, 'test', schema=schema)
    ieq(expect_appended, actual)
    print(etl.look(actual))
コード例 #46
0
ファイル: test_io.py プロジェクト: brutimus/petl
def test_fromdb_withargs():
    
    # initial data
    data = (('a', 1),
            ('b', 2),
            ('c', 2.0))
    connection = sqlite3.connect(':memory:')
    c = connection.cursor()
    c.execute('create table foobar (foo, bar)')
    for row in data:
        c.execute('insert into foobar values (?, ?)', row)
    connection.commit()
    c.close()
    
    # test the function
    actual = fromdb(connection, 'select * from foobar where bar > ? and bar < ?', (1, 3))
    expect = (('foo', 'bar'),
              ('b', 2),
              ('c', 2.0))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice
コード例 #47
0
ファイル: dbtests.py プロジェクト: brutimus/petl
def exercise_ss_cursor(setup_dbo, ss_dbo):
    print '=' * len(repr(ss_dbo))
    print 'EXERCISE WITH SERVER-SIDE CURSOR'
    print repr(ss_dbo)
    print '=' * len(repr(ss_dbo))
    print

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(ss_dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)

    print 'write some data and verify...'
    todb(expect, setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'append some data and verify...'
    appenddb(expect, setup_dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)

    print 'overwrite and verify...'
    todb(expect, setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), setup_dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), setup_dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
コード例 #48
0
ファイル: dbtests.py プロジェクト: shayh/petl
def exercise(dbo):
    print '=' * len(repr(dbo))
    print repr(dbo)
    print '=' * len(repr(dbo))
    print
    
    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'), ('a', 1), ('b', 1))
    expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
    actual = fromdb(dbo, 'SELECT * FROM test')

    print 'verify empty to start with...'
    ieq(expect_empty, actual)
    print look(actual)
    
    print 'write some data and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'append some data and verify...'
    appenddb(expect, dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
    
    print 'overwrite and verify...'
    todb(expect, dbo, 'test')
    ieq(expect, actual)
    print look(actual)
    
    print 'cut, overwrite and verify'
    todb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect, actual)
    print look(actual)

    print 'cut, append and verify'
    appenddb(cut(expect, 'bar', 'foo'), dbo, 'test')
    ieq(expect_appended, actual)
    print look(actual)
コード例 #49
0
ファイル: dbexamples.py プロジェクト: greeness/petl
user = sys.argv[1]
passwd = sys.argv[2]

import MySQLdb
# assume database petl_test already created
connection = MySQLdb.connect(user=user, passwd=passwd, db='petl_test')
print 'setup table'
cursor = connection.cursor()
cursor.execute('DROP TABLE IF EXISTS test')
cursor.execute('CREATE TABLE test (foo TEXT, bar INT)')
connection.commit()
cursor.close()

print 'exercise the petl functions using a connection'
from petl import look, fromdb, todb, appenddb
t1 = fromdb(connection, 'SELECT * FROM test')
print look(t1)
t2 = (('foo', 'bar'), ('a', 1), ('b', 1))
t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, connection, 'test')
print look(t1)
ieq(t2app, t1)
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)

print 'exercise the petl functions using a cursor'
cursor = connection.cursor()
コード例 #50
0
ファイル: import_qa_data.py プロジェクト: CDE-UNIBE/qcat
    def collect_import_objects(self):
        """
        Query and put together all QA objects which will be imported.
        """

        def get_tables(mappings):
            """
            Recursively collect all WOCAT tables of the mappings.

            Args:
                mappings: list.

            Returns:
                list. A list of tables.
            """
            tables = []
            for mapping in mappings:
                table = mapping.get('wocat_table')
                if table:
                    tables.append(table)
                tables.extend(get_tables(mapping.get('mapping', [])))
                tables.extend(get_tables(mapping.get('conditions', [])))
            return tables

        self.output('Fetching data from WOCAT QA database.', v=1)

        # Extend the default tables by adding the ones from the mapping.
        tables = self.default_tables
        for qg_properties in self.mapping.values():
            questions = qg_properties.get('questions', {})
            for q_properties in questions.values():
                tables.extend(get_tables(q_properties.get('mapping', [])))

        # Remove duplicates
        tables = list(set(tables))

        # Try to query the lookup table and collect its values.
        try:
            lookup_query = """
                    SELECT *
                    FROM {schema}.{table_name};
                """.format(schema=self.schema,
                           table_name=self.lookup_table_name)
            lookup_table = {}
            for row in petl.dicts(petl.fromdb(self.connection, lookup_query)):
                lookup_table[row.get('id')] = row
        except AttributeError:
            lookup_table = {}

        # So far, lookup_text is never used. Therefore it can be left empty.
        lookup_table_text = {}

        # Try to query file infos
        try:
            lookup_query_files = """
                    SELECT *
                    FROM {schema}.{table_name};
                """.format(schema=self.schema,
                           table_name=self.file_info_table)
            file_infos = {}
            for row in petl.dicts(
                    petl.fromdb(self.connection, lookup_query_files)):
                file_infos[row.get('blob_id')] = row
        except AttributeError:
            file_infos = {}

        for table_name in tables:
            query = 'SELECT {columns} FROM {schema}.{table_name};'.format(
                columns='*', schema=self.schema, table_name=table_name)

            queried_table = petl.fromdb(self.connection, query)
            row_errors = False
            for row in petl.dicts(queried_table):

                if row_errors is True:
                    continue

                # Inconsistent naming throughout the tables
                questionnaire_identifier = self.questionnaire_identifier
                if table_name == 'approach':
                    questionnaire_identifier = 'id'
                elif table_name == 'qa_quality_review':
                    questionnaire_identifier = 'qa_id'

                identifier = row.get(questionnaire_identifier)
                if identifier is None:
                    self.output('No identifier found for table "{}".'.format(
                        table_name), v=1, l='error')
                    row_errors = True

                if identifier in self.import_objects_exclude:
                    continue

                import_object = self.get_import_object(identifier)

                if import_object is None:
                    import_object = QAImportObject(
                        identifier, self.command_options, lookup_table,
                        lookup_table_text, file_infos, self.image_url)

                    import_object.add_custom_mapping_messages(
                        self.custom_mapping_messages)

                    self.import_objects.append(import_object)

                # Set the code if it is available in the current table
                code = row.get(self.questionnaire_code)
                if code:
                    import_object.set_code(code)

                # The main contributor is the compiler
                compiler_id = row.get(self.questionnaire_owner)

                if compiler_id:
                    # If the main contributer is "Not registered" (ID 661), use
                    # the default compiler
                    if compiler_id == 661:
                        compiler_id = self.default_compiler_id
                        import_object.add_mapping_message(
                            'Using "Unknown User" as compiler in QCAT as main '
                            'contributor in QA was "Not registered"')

                    # The following QAs have a main contributor which is not
                    # available through the API call. Set the default user and
                    # add a mapping message.
                    elif identifier in [131, 128, 89, 47, 106, 82, 195, 212,
                                        76, 107, 84, 139, 130, 276, 72, 147,
                                        138, 43, 44, 46, 49, 50, 52, 57, 173,
                                        171, 170, 166, 125, 78, 102, 45, 197,
                                        48]:
                        compiler_id = self.default_compiler_id
                        import_object.add_mapping_message(
                            'The compiler needs to be set manually. Use the '
                            'main contributor of QA.')

                    import_object.set_owner(compiler_id)

                # Use the creation date available on the approach table
                created = row.get('date')
                if created and table_name == 'approach':
                    creation_time = datetime.strptime(
                        created, WOCAT_DATE_FORMAT)
                    import_object.created = timezone.make_aware(
                        creation_time, timezone.get_current_timezone())

                import_object.add_wocat_data(table_name, row)
コード例 #51
0
id_to_predict = 11111

read_db_conn = pymysql.connect(host=dbconfig.db_host,
                              port=dbconfig.db_port,
                              charset="utf8",
                              user=dbconfig.db_user,
                              password=dbconfig.db_pass,
                              db=dbconfig.db_name)

df = pd.read_csv('trained_data.csv', index_col=False)

query = "SELECT id,names FROM {} WHERE id = {} ".format(dbconfig.db_table_items,
                                                        id_to_predict)

items_to_predict = etl.fromdb(read_db_conn, query)
print items_to_predict.values('name')

similiar_items = df.loc[lambda df:df.id == id_to_predict, 'similiar_items']
similiar_items = json.loads(similiar_items.values[0])

results_ids = []
for similarity, item_id in similiar_items:
    print similarity, item_id
    if similarity > 0.04:
        # put some threshold
        results_ids.append(str(item_id))


query = "SELECT image, name, vendor_id FROM {} WHERE id IN ({})".format(dbconfig.db_table_items,
                                                                        ",".join(results_ids))
コード例 #52
0
def standardize_name(name):
    tmp = name.strip()
    # Name standardization:
    tmp_list = re.sub('[' + string.punctuation + ']', '', tmp).split()
    std = StandardName(tmp_list, False).output
    std_name = ' '.join(std)
    return std_name


alias_stmt = '''
    select trim(ala.pre_dir) as PRE_DIR, trim(ala.name) as ST_NAME, trim(ala.type_) as ST_TYPE, 
    trim(ala.suf_dir) as SUF_DIR, sc.l_f_add, sc.l_t_add, sc.r_f_add, sc.r_t_add, sc.st_code, ala.seg_id, 
    trim(sc.responsibl) as RESPONSIBL, 
    trim(sc.PRE_DIR) as CL_PRE_DIR, trim(sc.ST_NAME) as CL_ST_NAME, trim(sc.ST_TYPE) as CL_ST_TYPE, trim(sc.SUF_DIR) as CL_SUF_DIR
    from {alias_table} ala 
    inner join {cl_table} sc on sc.seg_id = ala.seg_id
    order by st_name, pre_dir, st_type, suf_dir, l_f_add, l_t_add, r_f_add, r_t_add, st_code, seg_id
'''.format(cl_table = street_centerline_table_name, alias_table = alias_table_name)

alias_rows = etl.fromdb(dbo, alias_stmt).convert('SEG_ID', int) \
    .convert('ST_NAME', lambda s: standardize_name(s)) \
    .convert('CL_ST_NAME', lambda s: standardize_name(s)) \
    .addfield('CL_STREET_FULL', lambda a: concat_cl_streetname(a))
alias_rows.tocsv(alias_csv)

alias_centerline_street_rows = alias_rows.cut('PRE_DIR', 'ST_NAME', 'ST_TYPE') \
    .addfield('STREET_FULL', lambda a: concat_al_streetname(a)) \
    .cut('STREET_FULL', 'PRE_DIR', 'ST_NAME', 'ST_TYPE') \
    .addfield('POST_DIR', '')

alias_centerline_street_rows.tocsv(alias_streets_csv, write_header=False)
コード例 #53
0
ファイル: dbexamples.py プロジェクト: aklimchak/petl
    i = raw_input(msg + '? ([y]/n)\n')
    if i not in ('', 'y', 'Y'):
        sys.exit(0)
     
prompt('setup table')
cursor = connection.cursor()
# deal with quote compatibility
cursor.execute('SET SQL_MODE=ANSI_QUOTES')
cursor.execute('DROP TABLE IF EXISTS test')
cursor.execute('CREATE TABLE test (foo TEXT, bar INT)')
cursor.close()
connection.commit()

prompt('exercise the petl functions using a connection')
from petl import look, fromdb, todb, appenddb
t1 = fromdb(connection, 'SELECT * FROM test')
print look(t1)
t2 = (('foo', 'bar'), ('a', 1), ('b', 1))
t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1))
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)
appenddb(t2, connection, 'test')
print look(t1)
ieq(t2app, t1)
todb(t2, connection, 'test')
print look(t1)
ieq(t2, t1)

prompt('exercise the petl functions using a cursor')
cursor = connection.cursor()
コード例 #54
0
ファイル: warmup_lb.py プロジェクト: CityOfPhiladelphia/ais
        # print(url)
        r = requests.get(url)
        return r.status_code
    except requests.exceptions.HTTPError as e:
        error = [e,'','']
        query_errors[url] = error
    except requests.exceptions.RequestException as e:
        error = [e,'','']
        query_errors[url] = error
    except JSONDecodeError as e:
        error = [e, r.raw.data, r.raw.read(100)]
        query_errors[url] = error


read_conn = psycopg2.connect("dbname=ais_engine user=ais_engine")
address_count = etl.fromdb(read_conn, 'select count(*) as N from {}'.format(warmup_address_table_name))
n = list(address_count.values('n'))[0]
warmup_rows = etl.fromdb(read_conn, 'select {address_field} from {table} OFFSET floor(random()*{n}) limit {limit}'.format(address_field=warmup_address_field, table=warmup_address_table_name, n=n, limit=warmup_row_limit))
# print(etl.look(warmup_rows))
responses = warmup_rows.addfield('response_status', (lambda a: query_address(a['street_address']))).progress(100)
# print(etl.look(responses))
eval = responses.aggregate('response_status', len)
print(etl.look(eval))
f_200 = [(count/warmup_row_limit) for status, count in eval[1:] if status == 200][0]
print(f_200)
###########################
# WRITE ERRORS OUT TO FILE #
############################
print("Writing errors to file...")
error_table = []
for url, error_vals in query_errors.items():
コード例 #55
0
dbo = cx_Oracle.connect(dsn)
source_table_name = source_def['table']
source_field_map = source_def['field_map']
source_field_map_upper = {}

for k,v in source_field_map.items():
    source_field_map_upper[k] = v.upper()

# Read DOR CONDO rows from source
print("Reading condos...")
# TODO: get fieldnames from source_field_map
dor_condo_read_stmt = '''
    select condounit, objectid, mapref from {dor_condo_table}
    where status in (1,3)
'''.format(dor_condo_table = source_table_name)
source_dor_condo_rows = etl.fromdb(dbo, dor_condo_read_stmt).fieldmap(source_field_map_upper)
if DEV:
    print(etl.look(source_dor_condo_rows))

# Read DOR Parcel rows from engine db
print("Reading parcels...")
dor_parcel_read_stmt = '''
    select parcel_id, street_address, address_low, address_low_suffix, address_low_frac, address_high, street_predir, 
    street_name, street_suffix, street_postdir, street_full from {dor_parcel_table}
    '''.format(dor_parcel_table='dor_parcel')
engine_dor_parcel_rows = etl.fromdb(pg_db, dor_parcel_read_stmt)
if DEV:
    print(etl.look(engine_dor_parcel_rows))

# Get duplicate parcel_ids:
non_unique_parcel_id_rows = engine_dor_parcel_rows.duplicates(key='parcel_id')
コード例 #56
0
    if standardize_nulls(unit_num):
        unit_full = '# {}'.format(unit_num)

    if address_full and street_full:
        source_address_comps = [address_full, street_full, unit_full]
        source_address = ' '.join([x for x in source_address_comps if x])

    return source_address if source_address != None else ''
#############################################
# Read in files, format and write to tables #
#############################################
##############
# TRUE RANGE #
##############
print("Writing true_range table...")
etl.fromdb(read_conn, 'select * from true_range').tooraclesde(write_dsn, true_range_write_table_name)
########################
# SERVICE AREA SUMMARY #
########################
print("Writing service_area_summary table...")
etl.fromdb(read_conn, 'select * from service_area_summary')\
  .rename({'neighborhood_advisory_committee': 'neighborhood_advisory_committe'}, )\
  .tooraclesde(write_dsn, service_area_summary_write_table_name)
########################
# ADDRESS AREA SUMMARY #
########################
print("Creating transformed address_summary table...")
address_summary_out_table = etl.fromdb(read_conn, 'select * from address_summary') \
    .addfield('address_full', (lambda a: make_address_full(
    {'address_low': a['address_low'], 'address_low_suffix': a['address_low_suffix'],
     'address_low_frac': a['address_low_frac'], 'address_high': a['address_high']}))) \
コード例 #57
0
    return ' '.join(stnam_list)

def standardize_name(name):
    tmp = name.strip()
    # Name standardization:
    tmp_list = re.sub('[' + string.punctuation + ']', '', tmp).split()
    std = StandardName(tmp_list, False).output
    std_name = ' '.join(std)
    return std_name


centerline_stmt = '''select trim(PRE_DIR) AS PRE_DIR,trim(ST_NAME) AS ST_NAME,trim(ST_TYPE) AS ST_TYPE,trim(SUF_DIR) AS SUF_DIR,
            L_F_ADD,L_T_ADD,R_F_ADD,R_T_ADD,ST_CODE,SEG_ID,trim(RESPONSIBL) AS RESPONSIBL from {} 
           order by st_name, st_type, pre_dir, suf_dir, l_f_add, l_t_add, r_f_add, r_t_add, st_code, seg_id'''.format(street_centerline_table_name)

centerline_rows = etl.fromdb(dbo, centerline_stmt).convert('ST_NAME', lambda s: standardize_name(s))

print(etl.look(centerline_rows))
centerline_rows.tocsv(centerline_csv)

# Centerline_streets
centerline_street_rows = centerline_rows.cut('PRE_DIR', 'ST_NAME', 'ST_TYPE') \
    .addfield('STREET_FULL', lambda a: concat_streetname(a)) \
    .addfield('POST_DIR', '') \
    .cut('STREET_FULL', 'PRE_DIR', 'ST_NAME', 'ST_TYPE', 'POST_DIR') \
    .distinct() \
    .sort(key=['ST_NAME', 'ST_TYPE', 'PRE_DIR', 'POST_DIR'])

print(etl.look(centerline_street_rows))
centerline_street_rows.tocsv(centerline_streets_csv, write_header=False)
コード例 #58
0
ファイル: ex4-2.metrics.py プロジェクト: boundary/tsi-lab
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pymysql
import os
import petl
import pymysql

# Fetch our database access configuration from environment variables
host = os.environ['DB_HOST']
user = os.environ['DB_USER']
password = os.environ['DB_PASSWORD']
db = os.environ['DB_DATABASE']

# Connect to the database using the PyMSQL package
connection = pymysql.connect(host=host,
                             user=user,
                             password=password,
                             db=db)

# Extract the data using both PyMSQL and PETL
table = petl.fromdb(connection, 'SELECT dt, total, duration FROM ol_transactions')
print(table)

connection.close()