Example #1
0
def _test_dbo(write_dbo, read_dbo=None):
    if read_dbo is None:
        read_dbo = write_dbo

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(read_dbo, 'SELECT * FROM test')

    debug('verify empty to start with...')
    debug(etl.look(actual))
    ieq(expect_empty, actual)

    debug('write some data and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('append some data and verify...')
    etl.appenddb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('overwrite and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, overwrite and verify')
    etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, append and verify')
    etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('try a single row')
    etl.todb(etl.head(expect, 1), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(etl.head(expect, 1), actual)
Example #2
0
def _test_dbo(write_dbo, read_dbo=None):
    if read_dbo is None:
        read_dbo = write_dbo

    expect_empty = (('foo', 'bar'),)
    expect = (('foo', 'bar'),
              ('a', 1),
              ('b', 2))
    expect_appended = (('foo', 'bar'),
                       ('a', 1),
                       ('b', 2),
                       ('a', 1),
                       ('b', 2))
    actual = etl.fromdb(read_dbo, 'SELECT * FROM test')

    debug('verify empty to start with...')
    debug(etl.look(actual))
    ieq(expect_empty, actual)

    debug('write some data and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('append some data and verify...')
    etl.appenddb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('overwrite and verify...')
    etl.todb(expect, write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, overwrite and verify')
    etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect, actual)

    debug('cut, append and verify')
    etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(expect_appended, actual)

    debug('try a single row')
    etl.todb(etl.head(expect, 1), write_dbo, 'test')
    debug(etl.look(actual))
    ieq(etl.head(expect, 1), actual)
Example #3
0
    def __bool__(self):

        # Try to get a single row from our table
        head_one = petl.head(self.table)

        # See if our single row is empty
        return petl.nrows(head_one) > 0
Example #4
0
def drained_entries(ctx: typer.Context, issues, entries, project):
    config = ctx.meta['config']
    empty_entries, unset_entries = petl.biselect(
        entries, lambda row: row['issue_id'] is None)

    drain_issues = list(
        petl.dicts(
            transform.select_drain_issues(
                issues,
                assignee_id=ctx.meta['rdm_user']['id'],
                drain_cf_id=get_proj_attr(config, project,
                                          'rdm_drain_cf_id'))))

    if not len(drain_issues):
        log.error('No drain issues found')
        return petl.head(unset_entries, 0), entries

    if len(drain_issues) > 1:
        log.warning(
            f'Found {len(drain_issues)} drain issues. Will use only first one')

    drain_issue = drain_issues[0]
    drained = petl.addfield(petl.cutout(empty_entries, 'issue_id'), 'issue_id',
                            drain_issue['id'])
    return drained, unset_entries
def createDimCampaign():
    try:
        tbl_campaign = [['campaign_name', 'campaign_started', 'campaign_ended'], ['none', '2014-04-28T00:00:00', '2018-09-30T00:00:00']]
        dim_campaign = etl.head(tbl_campaign, 1)
        # Export as csv to load folder
        etl.tocsv(dim_campaign, 'load/dim_campaign.csv')
    except Exception as e:
        print("Something went wrong. Error {0}".format(e))
Example #6
0
def load(request):
	filename = request.GET.get('name','')
	fullpath = settings.DATA_DIR+filename
	"""
	detector = UniversalDetector()
	file_open = open(fullpath)
	for line in file_open.readlines():
		detector.feed(line)
		if detector.done: break
		detector.close()
	file_open.close()
	"""
	encoding = 'ascii'#detector.result['encoding']

	response_data = {}
	a = tryExtractors(fullpath, encoding)
	response_data['result'] = [row for row in etl.head(a)]
	response_data['headers'] = etl.header(a)
	typeInference(a)
	return HttpResponse(json.dumps(response_data), content_type="application/json")
Example #7
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context["filename"] = self.object.downloaded_file.name.split(os.path.sep)[-1]
        context["count_query_kwarg"] = self.count_query_kwarg

        table = petl.fromcsv(self.object.downloaded_file)
        context["header"] = petl.header(table)

        try:
            record_count_to_show = int(self.request.GET.get(self.count_query_kwarg))
        except (TypeError, ValueError):
            record_count_to_show = self.count_increment

        # Potentially expensive, cache / save in database for dataset
        if petl.nrows(table) > record_count_to_show:
            context[
                "load_more_url"
            ] = f"{self.request.path}?{self.count_query_kwarg}={record_count_to_show+self.count_increment}"

        context["rows"] = petl.records(petl.head(table, record_count_to_show))

        return context
Example #8
0
def create_table(table, dbo, tablename, schema=None, commit=True, constraints=True, metadata=None, dialect=None,
                 sample=1000):
    """
    Create a database table based on a sample of data in the given table.

    Parameters
    ----------

    table : sequence of sequences (petl table)
        Table data to load
    dbo : database object
        DB-API 2.0 connection, callable returning a DB-API 2.0 cursor, or SQLAlchemy connection, engine or session
    tablename : string
        Name of the table
    schema : string
        Name of the database schema to create the table in
    commit : bool
        If True commit the changes
    constraints : bool
        If True use length and nullable constraints (only relevant if create=True)
    metadata : sqlalchemy.MetaData
        Custom table metadata (only relevant if create=True)
    dialect : string
        One of {'access', 'sybase', 'sqlite', 'informix', 'firebird', 'mysql', 'oracle', 'maxdb', 'postgresql', 'mssql'}
        (only relevant if create=True)
    sample : int
        Number of rows to sample when inferring types etc., set to 0 to use the whole table (only relevant if
        create=True)

    """

    if sample > 0:
        table = head(table, sample)
    sql = make_create_table_statement(table, tablename, schema=schema,
                                      constraints=constraints, metadata=metadata, dialect=dialect)
    _execute(sql, dbo, commit=commit)
def split_dataset(dataset, p_train_data, split_mode):

    fields = list(fieldnames(dataset))
    
    size_dataset = len(values(dataset, fields[0])) 
    size_train_data = int(round(size_dataset * p_train_data))
    size_test_data = abs(size_train_data - size_dataset)


    if split_mode == 'normal' :

        train_data = head(dataset, size_train_data - 1)
        
        if size_test_data == 0:
            
            test_data = []
            
        else:
            
            test_data = tail(dataset, size_test_data - 1)

    #################### Falta incluir Shuffle mode ###############

    return train_data, test_data
Example #10
0
def create_table(table, dbo, tablename, schema=None, commit=True, constraints=True, metadata=None, dialect=None,
                 sample=1000):
    """
    Create a database table based on a sample of data in the given table.

    Parameters
    ----------

    table : sequence of sequences (petl table)
        Table data to load
    dbo : database object
        DB-API 2.0 connection, callable returning a DB-API 2.0 cursor, or SQLAlchemy connection, engine or session
    tablename : string
        Name of the table
    schema : string
        Name of the database schema to create the table in
    commit : bool
        If True commit the changes
    constraints : bool
        If True use length and nullable constraints (only relevant if create=True)
    metadata : sqlalchemy.MetaData
        Custom table metadata (only relevant if create=True)
    dialect : string
        One of {'access', 'sybase', 'sqlite', 'informix', 'firebird', 'mysql', 'oracle', 'maxdb', 'postgresql', 'mssql'}
        (only relevant if create=True)
    sample : int
        Number of rows to sample when inferring types etc., set to 0 to use the whole table (only relevant if
        create=True)

    """

    if sample > 0:
        table = head(table, sample)
    sql = make_create_table_statement(table, tablename, schema=schema,
                                      constraints=constraints, metadata=metadata, dialect=dialect)
    _execute(sql, dbo, commit=commit)
Example #11
0
                    'new_tests_per_thousand', 'tests_units'):

    # condition to take 15 countries from the table for intregration
    # thus 16 is given because when count will be 0 it contain the unnecessary data.
    if count == 16:
        break

    # thus data1 was already declared with header on above code and other data are being appended
    data1.append(i)
    count = count + 1

# removing the unnecessary data from the list which was in data1[1]
data1.pop(1)

# converting the list into tables
table_old = etl.head(data1, 15)

# importing data from latest covid -19 data and converting the fields which will be needed
table2 = (etl.fromcsv('current_covid.csv').convert(
    'median_age', float).convert('aged_65_older',
                                 float).convert('aged_70_older', float))
# same as above table , list is declared with header
table2_header = [['iso_code', 'median_age', 'aged_65_older', 'aged_70_older']]
table2_data = etl.cut(table2, 'iso_code', 'date', 'median_age',
                      'aged_65_older', 'aged_70_older')
table2_dated = etl.select(table2_data, 'date', lambda v: v == '2020-04-30')
table2_sort = etl.sort(table2_dated, key='iso_code')

count = 0
for j in etl.values(table2_sort, 'iso_code', 'median_age', 'aged_65_older',
                    'aged_70_older'):
Example #12
0
table4


# head()
########

import petl as etl
table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 5],
          ['d', 7],
          ['f', 42],
          ['f', 3],
          ['h', 90]]
table2 = etl.head(table1, 4)
table2


# tail()
########

import petl as etl
table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 5],
          ['d', 7],
          ['f', 42],
          ['f', 3],
          ['h', 90],
Example #13
0
stores = etl.fromcsv('stores.csv')

# Open XML document
locations = etl.fromxml('locations.xml', 'store', {'Name': 'Name', 'Lat': 'Lat', 'Lon': 'Lon'})
print(locations)

# Set output
output_table = [["ID", "Name", "Suburb", "State", "Postcode"]]

store_id = 1

# Read through the store.csv to generate output_table
store = etl.cut(stores, 'Name', 'Suburb', 'State', 'Postcode').distinct()
print(store)
for s in etl.values(store, 'Name', 'Suburb', 'State', 'Postcode'):
    output_table.append([store_id, s])
    store_id += 1
print (output_table)

# Merge and join XML and CSV together
merge_output = etl.join(stores, locations, key="Name")
print(merge_output)

store_table = etl.cut(merge_output, 'ID', 'Name', 'Suburb', 'State', 'Postcode', 'Lat', 'Lon')
print(etl.head(store_table, 5))

# Export to CSV file
etl.tocsv(merge_output, 'store_locations.csv')


#	Another table, with the campaign name and start and end date will be connect with the facts table
dim_campaigntype_cut = etl.cut(events, 'utm_campaign')
dim_campaigntype_rename = etl.rename(dim_campaigntype_cut,
                                     {'utm_campaign': 'campaign_type'})
dim_campaigntype = etl.distinct(dim_campaigntype_rename)
# export as csv to load folder
etl.tocsv(dim_campaigntype, 'load/dim_campaigntype.csv')

# Dim Campaign
# Note:
#	Slowly changing dimension
# 	No data for now, meaning that until we have this defined we will fill everything with a "none"
#	Let's define that this will be solved until the end of september, and the start date was on the 28 of April 2018
tbl_campaign = [['campaign_name', 'campaign_started', 'campaign_ended'],
                ['none', '2014-04-28T00:00:00', '2018-09-30T00:00:00']]
dim_campaign = etl.head(tbl_campaign, 1)
# Export as csv to load folder
etl.tocsv(dim_campaign, 'load/dim_campaign.csv')

# Dim Time
# TO DO
#	Load a full year (2018) with the most simple datetime analysis
#	Year, month, day, hour, minute, second

#	For the full loading process , use the reference on the references.txt
#	This should be a processure with all the validation logic there, to create the next X months when it is called

#  Facts

# This facts table will be the staging with all the needed info to quickly update with the dimension keys and load to the facts table
# The facts table will have columns to match each column on the dim Time table, to make it easier to get the reference key
Example #15
0

# head

table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 5],
          ['d', 7],
          ['f', 42],
          ['f', 3],
          ['h', 90]]

from petl import head, look
look(table1)
table2 = head(table1, 4)
look(table2)    
    

# tail

table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 5],
          ['d', 7],
          ['f', 42],
          ['f', 3],
          ['h', 90],
          ['k', 12],
          ['l', 77],
Example #16
0

# head

table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 5],
          ['d', 7],
          ['f', 42],
          ['f', 3],
          ['h', 90]]

from petl import head, look
look(table1)
table2 = head(table1, 4)
look(table2)    
    

# tail

table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 5],
          ['d', 7],
          ['f', 42],
          ['f', 3],
          ['h', 90],
          ['k', 12],
          ['l', 77],
# coding:utf8

import petl as etl

table1 = [('foo', 'bar', 'baz'), ('apple', 1, 2.5), ('orange', 3, 4.5),
          ('pears', 5, 6.5), ('bananer', 7, 8.5), ('cat', 9, 10.5)]
# head 4
table_head = etl.head(table1, 4)
print(table_head)

# tail 4
table_tail = etl.tail(table1, 4)
print(table_tail)

# rowslice
rowsliceTb = etl.rowslice(table1, 2)
print(rowsliceTb)

rowsliceTb_2_4 = etl.rowslice(table1, 2, 4)
print(rowsliceTb_2_4)

# 从1开始,2作为第一个,步长为2,
rowsliceTb_1_2_5 = etl.rowslice(table1, 1, 5, 2)
print(rowsliceTb_1_2_5)

# cut
cutTb = etl.cut(table1, 'foo', 'bar')
print(cutTb)

# index starts from 0
cutTb_0_2 = etl.cut(table1, 0, 2)