Esempio n. 1
0
def test_teepickle():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teepickle(f1.name).selectgt('bar', 1).topickle(f2.name)

    ieq(t1, etl.frompickle(f1.name))
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Esempio n. 2
0
def test_teepickle():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teepickle(f1.name).selectgt("bar", 1).topickle(f2.name)

    ieq(t1, etl.frompickle(f1.name))
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def etl_(query):
    source_db = get_source_db(query)
    extract_query = get_extract_query(query)

    with source_db() as source:
        etl.fromdb(source, extract_query) \
           .topickle(f'temp/{query.target_table}.p')

    with GISLNIDB.GISLNIDB() as target:
        etl.frompickle(f'temp/{query.target_table}.p') \
           .todb(get_cursor(target), query.target_table.upper())
def etl_(query, logger):
    source_db = get_source_db(query)
    extract_query = get_extract_query(query)

    logger.info(f'{query.target_table} - extracting data into pickle file...')
    with source_db() as source:
        etl.fromdb(source,
                   extract_query).topickle(f'temp/{query.target_table}.p')

    logger.info(f'{query.target_table} - loading data from pickle file...')
    with PERMITP.PERMITP() as target:
        etl.frompickle(f'temp/{query.target_table}.p').todb(
            get_cursor(target), query.target_table.upper())
Esempio n. 5
0
def test_frompickle_cachetag_strict():
    """Test the cachetag method on tables returned by frompickle."""
    
    # initial data
    f = NamedTemporaryFile(delete=False)
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    for row in table:
        pickle.dump(row, f)
    f.close()

    # cachetag with initial data
    tbl = frompickle(FileSource(f.name, checksumfun=crc32sum))
    tag1 = tbl.cachetag()
    
    # make a change, preserving file size
    with open(f.name, 'wb') as o:
        rows = (('foo', 'bar'),
                ('d', 3),
                ('e', 5),
                ('f', 4))
        for row in rows:
            pickle.dump(row, o)

    # check cachetag has changed
    tag2 = tbl.cachetag()
    assert tag2 != tag1, (tag2, tag1)
Esempio n. 6
0
    def materialize_to_file(self, file_path=None):
        """
        "Materializes" a Table, meaning all pending transformations are applied.

        Unlike the original materialize function, this method does not bring the data into memory,
        but instead loads the data into a local temp file.

        This method updates the current table in place.

        `Args:`
            file_path: str
                The path to the file to materialize the table to; if not specified, a temp file
                will be created.
        `Returns:`
            str
                Path to the temp file that now contains the table
        """

        # Load the data in batches, and "pickle" the rows to a temp file.
        # (We pickle rather than writing to, say, a CSV, so that we maintain
        # all the type information for each field.)

        file_path = file_path or files.create_temp_file()

        with open(file_path, 'wb') as handle:
            for row in self.table:
                pickle.dump(list(row), handle)

        # Load a Table from the file
        self.table = petl.frompickle(file_path)

        return file_path
Esempio n. 7
0
def test_frompickle_cachetag():
    """Test the cachetag method on tables returned by frompickle."""

    # initial data
    f = NamedTemporaryFile(delete=False)
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    for row in table:
        pickle.dump(row, f)
    f.close()

    # cachetag with initial data
    tbl = frompickle(f.name)
    tag1 = tbl.cachetag()

    # make a change
    with open(f.name, 'wb') as o:
        rows = (
            ('foo', 'bar'),
            ('d', 3),
            #                ('e', 5),
            ('f', 4))
        for row in rows:
            pickle.dump(row, o)

    # check cachetag has changed
    tag2 = tbl.cachetag()
    assert tag2 != tag1, (tag2, tag1)
Esempio n. 8
0
def test_frompickle_cachetag():
    """Test the cachetag method on tables returned by frompickle."""

    # initial data
    f = NamedTemporaryFile(delete=False)
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    for row in table:
        pickle.dump(row, f)
    f.close()

    # cachetag with initial data
    tbl = frompickle(f.name)
    tag1 = tbl.cachetag()

    # make a change
    with open(f.name, "wb") as o:
        rows = (
            ("foo", "bar"),
            ("d", 3),
            #                ('e', 5),
            ("f", 4),
        )
        for row in rows:
            pickle.dump(row, o)

    # check cachetag has changed
    tag2 = tbl.cachetag()
    assert tag2 != tag1, (tag2, tag1)
def extract_odoo(offline=OFFLINE):
    if not offline:
        api = OdooConnector()

        filters = [('supplier', '=', True),
                   ('active', '=', True),
                   ('company_id', '=', 3)]

        dataframe = api.extract('res.partner', filters)
        drivers = fromdataframe(dataframe)

        mappings = {
            'backend_username': '******',
            'backend_uuid': 'x_backend_uuid',
            'salary_id': 'x_salary_id',
            'odoo_id': 'id',
            'fleetname': lambda rec: rec['x_fleet'][1].replace('_', ' '),
            'fullname': lambda rec: rec['display_name'].strip()
        }

        drivers = drivers.fieldmap(mappings)
        drivers = drivers.suffixheader('_in_odoo')
        drivers.topickle(DRIVERS_IN_ODOO_FILEPATH)

    else:
        drivers = frompickle(DRIVERS_IN_ODOO_FILEPATH)

    drivers = drivers.addfield('backend_username', lambda rec: rec['backend_username_in_odoo'])
    drivers = drivers.addfield('salary_id', lambda rec: rec['salary_id_in_odoo'])

    drivers = standardize_missing_values(drivers)
    write_to_log(drivers, 'drivers', 'odoo')

    return drivers
Esempio n. 10
0
def test_teehtml():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teehtml(f1.name).selectgt("bar", 1).topickle(f2.name)

    ieq(t1, etl.fromxml(f1.name, ".//tr", ("th", "td")).convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Esempio n. 11
0
def test_teehtml():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teehtml(f1.name).selectgt('bar', 1).topickle(f2.name)

    ieq(t1, etl.fromxml(f1.name, './/tr', ('th', 'td')).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Esempio n. 12
0
def test_teehtml_unicode():

    t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    (etl.wrap(t1).teehtml(f1.name, encoding="utf-8").selectgt("bar", 1).topickle(f2.name))

    ieq(t1, (etl.fromxml(f1.name, ".//tr", ("th", "td"), encoding="utf-8").convertnumbers()))
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Esempio n. 13
0
def test_frompickle():
    """Test the frompickle function."""

    f = NamedTemporaryFile(delete=False)
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    for row in table:
        pickle.dump(row, f)
    f.close()

    actual = frompickle(f.name)
    ieq(table, actual)
    ieq(table, actual)  # verify can iterate twice
Esempio n. 14
0
def test_frompickle():
    """Test the frompickle function."""

    f = NamedTemporaryFile(delete=False)
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    for row in table:
        pickle.dump(row, f)
    f.close()

    actual = frompickle(f.name)
    ieq(table, actual)
    ieq(table, actual)  # verify can iterate twice
Esempio n. 15
0
def test_issue_231():

    table = [['foo', 'bar'], ['a', '1'], ['b', '2']]
    t = cut(table, 'foo')
    totsv(t, 'tmp/issue_231.tsv')
    u = fromtsv('tmp/issue_231.tsv')
    ieq(t, u)
    tocsv(t, 'tmp/issue_231.csv')
    u = fromcsv('tmp/issue_231.csv')
    ieq(t, u)
    topickle(t, 'tmp/issue_231.pickle')
    u = frompickle('tmp/issue_231.pickle')
    ieq(t, u)
Esempio n. 16
0
def test_issue_231():

    table = [['foo', 'bar'], ['a', '1'], ['b', '2']]
    t = cut(table, 'foo')
    totsv(t, 'tmp/issue_231.tsv')
    u = fromtsv('tmp/issue_231.tsv')
    ieq(t, u)
    tocsv(t, 'tmp/issue_231.csv')
    u = fromcsv('tmp/issue_231.csv')
    ieq(t, u)
    topickle(t, 'tmp/issue_231.pickle')
    u = frompickle('tmp/issue_231.pickle')
    ieq(t, u)
Esempio n. 17
0
def test_teehtml_unicode():

    t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1),
          (u'Вагиф Сәмәдоғлу', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    (etl.wrap(t1).teehtml(f1.name,
                          encoding='utf-8').selectgt('bar',
                                                     1).topickle(f2.name))

    ieq(t1, (etl.fromxml(f1.name, './/tr',
                         ('th', 'td'), encoding='utf-8').convertnumbers()))
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Esempio n. 18
0
def test_frompickle():
    """Test the frompickle function."""
    
    f = NamedTemporaryFile(delete=False)
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    for row in table:
        pickle.dump(row, f)
    f.close()
    
    actual = frompickle(f.name)
    ieq(table, actual)
    ieq(table, actual) # verify can iterate twice
Esempio n. 19
0
def test_teetext():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = 'foo,bar\n'
    template = '{foo},{bar}\n'
    epilogue = 'd,4'
    (etl.wrap(t1).teetext(f1.name,
                          template=template,
                          prologue=prologue,
                          epilogue=epilogue).selectgt('bar',
                                                      1).topickle(f2.name))

    ieq(t1 + (('d', 4), ), etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Esempio n. 20
0
def test_teetext_unicode():

    t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = u"foo,bar\n"
    template = u"{foo},{bar}\n"
    epilogue = u"章子怡,4"
    (
        etl.wrap(t1)
        .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue, encoding="utf-8")
        .selectgt("bar", 1)
        .topickle(f2.name)
    )

    ieq(t1 + ((u"章子怡", 4),), etl.fromcsv(f1.name, encoding="utf-8").convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Esempio n. 21
0
def test_teetext():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = "foo,bar\n"
    template = "{foo},{bar}\n"
    epilogue = "d,4"
    (
        etl.wrap(t1)
        .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue)
        .selectgt("bar", 1)
        .topickle(f2.name)
    )

    ieq(t1 + (("d", 4),), etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Esempio n. 22
0
    def query(self, sql):
        """
        Run a BigQuery query and return the results as a Parsons table.

        `Args:`
            sql: str
                A valid BigTable statement

        `Returns:`
            Parsons Table
                See :ref:`parsons-table` for output options.
        """
        # Run the query
        query_job = self.client.query(sql)

        # We will use a temp file to cache the results so that they are not all living
        # in memory. We'll use pickle to serialize the results to file in order to maintain
        # the proper data types (e.g. integer).
        temp_filename = create_temp_file()

        wrote_header = False
        with open(temp_filename, 'wb') as temp_file:
            results = query_job.result()

            # If there are no results, just return None
            if results.total_rows == 0:
                return None

            for row in results:
                # Make sure we write out the header once and only once
                if not wrote_header:
                    wrote_header = True
                    header = list(row.keys())
                    pickle.dump(header, temp_file)

                row_data = list(row.values())
                pickle.dump(row_data, temp_file)

        ptable = petl.frompickle(temp_filename)
        final_table = Table(ptable)

        return final_table
Esempio n. 23
0
def test_teetext_unicode():

    t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1),
          (u'Вагиф Сәмәдоғлу', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = u'foo,bar\n'
    template = u'{foo},{bar}\n'
    epilogue = u'章子怡,4'
    (etl.wrap(t1).teetext(f1.name,
                          template=template,
                          prologue=prologue,
                          epilogue=epilogue,
                          encoding='utf-8').selectgt('bar',
                                                     1).topickle(f2.name))

    ieq(t1 + ((u'章子怡', 4), ),
        etl.fromcsv(f1.name, encoding='utf-8').convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Esempio n. 24
0

# frompickle()
##############

import petl as etl
import pickle
# set up a file to demonstrate with
with open('example.p', 'wb') as f:
    pickle.dump(['foo', 'bar'], f)
    pickle.dump(['a', 1], f)
    pickle.dump(['b', 2], f)
    pickle.dump(['c', 2.5], f)

# demonstrate the use of frompickle()
table1 = etl.frompickle('example.p')
table1


# topickle()
############

import petl as etl
table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 2]]
etl.topickle(table1, 'example.p')
# look what it did
table2 = etl.frompickle('example.p')
table2
Esempio n. 25
0
look(testcsv)


# topickle

table = [['foo', 'bar'],
         ['a', 1],
         ['b', 2],
         ['c', 2]]

from petl import topickle, look
look(table)
topickle(table, 'test.dat')
# look what it did
from petl import frompickle
look(frompickle('test.dat'))


# appendpickle

table = [['foo', 'bar'],
         ['d', 7],
         ['e', 42],
         ['f', 12]]

from petl import look, frompickle
# inspect an existing pickle file
testdat = frompickle('test.dat')
look(testdat)
# append some data
from petl import appendpickle
Esempio n. 26
0
look(testcsv)


# topickle

table = [['foo', 'bar'],
         ['a', 1],
         ['b', 2],
         ['c', 2]]

from petl import topickle, look
look(table)
topickle(table, 'test.dat')
# look what it did
from petl import frompickle
look(frompickle('test.dat'))


# appendpickle

table = [['foo', 'bar'],
         ['d', 7],
         ['e', 42],
         ['f', 12]]

from petl import look, frompickle
# inspect an existing pickle file
testdat = frompickle('test.dat')
look(testdat)
# append some data
from petl import appendpickle
Esempio n. 27
0
    def query(self, sql, parameters=None):
        """
        Run a BigQuery query and return the results as a Parsons table.

        To include python variables in your query, it is recommended to pass them as parameters,
        following the BigQuery style where parameters are prefixed with `@`s.
        Using the ``parameters`` argument ensures that values are escaped properly, and avoids SQL
        injection attacks.

        **Parameter Examples**

        .. code-block:: python

        name = "Beatrice O'Brady"
        sql = 'SELECT * FROM my_table WHERE name = %s'
        rs.query(sql, parameters=[name])

        .. code-block:: python

        name = "Beatrice O'Brady"
        sql = "SELECT * FROM my_table WHERE name = %(name)s"
        rs.query(sql, parameters={'name': name})

        `Args:`
            sql: str
                A valid BigTable statement
            parameters: dict
                A dictionary of query parameters for BigQuery.

        `Returns:`
            Parsons Table
                See :ref:`parsons-table` for output options.
        """
        # get our connection and cursor
        cursor = self._dbapi.connect(self.client).cursor()

        # Run the query
        cursor.execute(sql, parameters)

        # We will use a temp file to cache the results so that they are not all living
        # in memory. We'll use pickle to serialize the results to file in order to maintain
        # the proper data types (e.g. integer).
        temp_filename = create_temp_file()

        wrote_header = False
        with open(temp_filename, 'wb') as temp_file:
            # Track whether we got data, since if we don't get any results we need to return None
            got_results = False
            while True:
                batch = cursor.fetchmany(QUERY_BATCH_SIZE)
                if len(batch) == 0:
                    break

                got_results = True

                for row in batch:
                    # Make sure we write out the header once and only once
                    if not wrote_header:
                        wrote_header = True
                        header = list(row.keys())
                        pickle.dump(header, temp_file)

                    row_data = list(row.values())
                    pickle.dump(row_data, temp_file)

        if not got_results:
            return None

        ptable = petl.frompickle(temp_filename)
        final_table = Table(ptable)

        return final_table
Esempio n. 28
0
from __future__ import division, print_function, absolute_import

# frompickle()
##############

import petl as etl
import pickle
# set up a file to demonstrate with
with open('example.p', 'wb') as f:
    pickle.dump(['foo', 'bar'], f)
    pickle.dump(['a', 1], f)
    pickle.dump(['b', 2], f)
    pickle.dump(['c', 2.5], f)

# demonstrate the use of frompickle()
table1 = etl.frompickle('example.p')
table1

# topickle()
############

import petl as etl
table1 = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]]
etl.topickle(table1, 'example.p')
# look what it did
table2 = etl.frompickle('example.p')
table2
Esempio n. 29
0
    def query_with_connection(self,
                              sql,
                              connection,
                              parameters=None,
                              commit=True):
        """
        Execute a query against the Redshift database, with an existing connection.
        Useful for batching queries together. Will return ``None`` if the query
        returns zero rows.

        `Args:`
            sql: str
                A valid SQL statement
            connection: obj
                A connection object obtained from ``redshift.connection()``
            parameters: list
                A list of python variables to be converted into SQL values in your query
            commit: boolean
                Whether to commit the transaction immediately. If ``False`` the transaction will
                be committed when the connection goes out of scope and is closed (or you can
                commit manually with ``connection.commit()``).

        `Returns:`
            Parsons Table
                See :ref:`parsons-table` for output options.
        """

        # To Do: Have it return an ordered dict to return the
        #        rows in the correct order

        with self.cursor(connection) as cursor:

            if 'credentials' not in sql:
                logger.debug(f'SQL Query: {sql}')
            cursor.execute(sql, parameters)

            if commit:
                connection.commit()

            # If the cursor is empty, don't cause an error
            if not cursor.description:
                logger.debug('Query returned 0 rows')
                return None

            else:

                # Fetch the data in batches, and "pickle" the rows to a temp file.
                # (We pickle rather than writing to, say, a CSV, so that we maintain
                # all the type information for each field.)

                temp_file = files.create_temp_file()

                with open(temp_file, 'wb') as f:
                    # Grab the header
                    header = [i[0] for i in cursor.description]
                    pickle.dump(header, f)

                    while True:
                        batch = cursor.fetchmany(QUERY_BATCH_SIZE)
                        if not batch:
                            break

                        logger.debug(f'Fetched {len(batch)} rows.')
                        for row in batch:
                            pickle.dump(list(row), f)

                # Load a Table from the file
                final_tbl = Table(petl.frompickle(temp_file))

                logger.debug(f'Query returned {final_tbl.num_rows} rows.')
                return final_tbl
Esempio n. 30
0
    def query_with_connection(self,
                              sql,
                              connection,
                              parameters=None,
                              commit=True):
        """
        Execute a query against the database, with an existing connection. Useful for batching
        queries together. Will return ``None`` if the query returns zero rows.

        `Args:`
            sql: str
                A valid SQL statement
            connection: obj
                A connection object obtained from ``mysql.connection()``
            parameters: list
                A list of python variables to be converted into SQL values in your query
            commit: boolean
                Whether to commit the transaction immediately. If ``False`` the transaction will
                be committed when the connection goes out of scope and is closed (or you can
                commit manually with ``connection.commit()``).

        `Returns:`
            Parsons Table
                See :ref:`parsons-table` for output options.
        """
        with self.cursor(connection) as cursor:

            # The python connector can only execute a single sql statement, so we will
            # break up each statement and execute them separately.
            for s in sql.strip().split(';'):
                if len(s) != 0:
                    logger.debug(f'SQL Query: {sql}')
                    cursor.execute(s, parameters)

            if commit:
                connection.commit()

            # If the SQL query provides no response, then return None
            if not cursor.description:
                logger.debug('Query returned 0 rows')
                return None

            else:
                # Fetch the data in batches, and "pickle" the rows to a temp file.
                # (We pickle rather than writing to, say, a CSV, so that we maintain
                # all the type information for each field.)
                temp_file = files.create_temp_file()

                with open(temp_file, 'wb') as f:
                    # Grab the header
                    pickle.dump(cursor.column_names, f)

                    while True:
                        batch = cursor.fetchmany(QUERY_BATCH_SIZE)
                        if len(batch) == 0:
                            break

                        logger.debug(f'Fetched {len(batch)} rows.')
                        for row in batch:
                            pickle.dump(row, f)

                # Load a Table from the file
                final_tbl = Table(petl.frompickle(temp_file))

                logger.debug(f'Query returned {final_tbl.num_rows} rows.')
                return final_tbl
def extract_backend(offline=OFFLINE):
    # Done in 4 steps: (1) grab the driver table from the CloudSQL,
    # (2) use the user uuids to query for users one by one through
    # the API, (3) get the fleet table from CloudSQL and (4) join
    # everything together.

    def extract_drivers():
        query = SQLReader('sql.drivers_from_cloudsql')
        drivers_df = sql.execute(query.statements[0])
        drivers_tb = fromdataframe(drivers_df)

        mappings = {
            'driver_uuid': lambda rec: str(UUID(bytes=rec['uuid'], version=4)),
            'fleet_uuid': lambda rec: str(UUID(bytes=rec['fleet_uuid'], version=4)),
            'user_uuid': lambda rec: str(UUID(bytes=rec['user_ds_uuid'], version=4)),
            'fullname': lambda rec: rec['last_name'].strip() + ', ' + rec['first_name'].strip(),
        }

        drivers_tb = drivers_tb.fieldmap(mappings)
        drivers_tb = drivers_tb.suffixheader('_in_backend')

        return drivers_tb

    def extract_users():
        users_records = [api.get_record('users', driver.user_uuid_in_backend)
                         for driver in drivers.namedtuples()]
        users_df = DataFrame().from_records(users_records)
        users_tb = fromdataframe(users_df)

        mappings = {
            'driver_uuid': 'driver',
            'user_uuid': 'uuid',
            'backend_username': '******'
        }

        users_tb = users_tb.fieldmap(mappings)
        users_tb = users_tb.suffixheader('_in_backend')

        return users_tb

    def extract_fleets_from_dwh():
        query = SQLReader('sql.fleets_from_tableau')
        fleets_df = dwh.execute(query.statements[0])
        fleets_tb = fromdataframe(fleets_df)

        mappings = {
            'fleet_uuid': 'uuid',
            'fleetname': lambda rec: rec['backend_name'].replace('_', ' '),
            'country_code': 'country_code',
        }

        fleets_tb = fleets_tb.cutout('country_code')
        fleets_tb = fleets_tb.fieldmap(mappings)
        fleets_tb = fleets_tb.suffixheader('_in_backend')

        return fleets_tb

    if not offline:
        sql = CloudSQLConnector()
        api = ValkfleetConnector()
        dwh = WarehouseConnector()

        drivers = extract_drivers()
        fleets = extract_fleets_from_dwh()
        users = extract_users()

        drivers.topickle(DRIVERS_IN_BACKEND_FILEPATH)
        fleets.topickle(FLEETS_IN_BACKEND_FILEPATH)
        users.topickle(USERS_IN_BACKEND_FILEPATH)

    else:
        drivers = frompickle(DRIVERS_IN_BACKEND_FILEPATH)
        fleets = frompickle(FLEETS_IN_BACKEND_FILEPATH)
        users = frompickle(USERS_IN_BACKEND_FILEPATH)

    write_to_log(drivers, 'drivers', 'backend')
    write_to_log(fleets, 'fleets', 'backend')
    write_to_log(users, 'users', 'backend')

    drivers_without_fleet = antijoin(drivers, fleets, key='fleet_uuid_in_backend')
    drivers_without_user = antijoin(drivers, users, key='user_uuid_in_backend')
    write_to_log(drivers_without_fleet, 'drivers without fleet', 'backend')
    write_to_log(drivers_without_user, 'drivers without user', 'backend')

    drivers_n_fleets = join(drivers, fleets, key='fleet_uuid_in_backend').cutout('fleet_uuid_in_backend')
    backend_drivers = join(drivers_n_fleets, users, key='user_uuid_in_backend')
    backend_drivers = backend_drivers.addfield('backend_username', lambda rec: rec['backend_username_in_backend'])
    backend_drivers = backend_drivers.cutout('driver_uuid_in_backend')

    backend_drivers = standardize_missing_values(backend_drivers)
    write_to_log(backend_drivers, 'drivers', 'backend')

    return backend_drivers
Esempio n. 32
0
#Creating a pickle file

a = ['test value', 'test value 2', 'test value 3']

file_Name = "testfile"
# open the file for writing
fileObject = open('pickel_file.p', 'wb')

# this writes the object a to the
# file named 'testfile'
pickle.dump(a, fileObject)

# here we close the fileObject
fileObject.close()

table3 = etl.frompickle('pickel_file.p')
print('Pick')
print(table3)

###################Reading Text Files#################################

text = 'a,1\nb,2\nc,2\n'
with open('example.txt', 'w') as f:
    f.write(text)

table4 = etl.fromtext('example.txt')
print(table4)

################Reading XML files##################################

table5 = etl.fromxml('data.xml', 'tr', 'td')