Example #1
0
def display_store():
    response.headers['Access-Control-Allow-Origin'] = '*'
    response.headers['Content-type'] = 'application/json'
    table = (
        etl
        .fromcsv('store_locations.csv')
        .convert('Lat', float)
        .convert('Lon', float)
    )
    store_id = request.query.postcode

    # Select rows
    table1 = etl.select(table, "{Postcode}=='" + store_id + "'")

    # Set default postcode of 2000 
    if etl.nrows(table1) == 0:
        defaultPostCode = "2000"
        table1 = etl.select(table, "{Postcode}=='" + defaultPostCode + "'")

    # Reorder fields
    print(table1)
    table2 = etl.cut(table1, 'Name', 'Lat', 'Lon').dicts()[0]

    print(table2)
    return table2
Example #2
0
def select_execute(c, selector, **kwargs):
    r = c()
    if 'addfields' in kwargs:
        r = etl.addfields(r, kwargs['addfields'])
    if selector:
        r = etl.select(r, selector)
    return r
Example #3
0
def download_file(win, value):
    from tkinter import filedialog
    import requests
    #here we are going to nstatantiate the save as window
    win.filename = filedialog.asksaveasfilename(
        initialdir="/",
        title="Select file",
        filetypes=(("csv files", "*.csv"), ("Excel files", "*.xls"),
                   ("all files", "*.*")))
    #reading the entred date and storing it
    selected_date = value
    dir = win.filename + ".csv"

    #data = requests.get('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&apikey=demo&datatype=csv')
    #with open(dir, 'w') as f:
    #   writer = csv.writer(f)
    #  reader = csv.reader(data.text.splitlines())

    # for row in reader:
    #    writer.writerow(row)
    #Load the table from the result file wghich the timestamp colomun corresponded to the given date
    table1 = etl.fromcsv(r'C:\avenguard\files\results.csv')
    table2 = etl.rowlenselect(table1, 12)
    # Alter the colums
    table2 = etl.cut(table2, 'plate', 'timestamp')

    #table2 = etl.tail(table2, 15)
    table2 = etl.select(
        table2, lambda rec: rec.timestamp.split("_")[0] == selected_date)
    # Save to new file in format xmls (excel file)
    #etl.tocsv(table2, dir)
    etl.toxlsx(table2, win.filename + '.xlsx')
Example #4
0
    def _petl_transform(self, record_set):
        if "transform" in self.task:
            transform = self.task["transform"]
            if "convert" in transform:
                conversions = {}
                for field, func in transform["convert"]:
                    conversions[field] = func
                record_set = etl.convert(record_set, conversions)

            if "filter" in transform:
                record_set = etl.select(record_set, transform["filter"])

            if "remove" in transform:
                cuts = []
                for field in transform["remove"]:
                    cuts.append(field)
                record_set = etl.cutout(record_set, cuts)

            if "rename" in transform:
                names = {}
                for old, new_one in transform["rename"]:
                    names[old] = new_one
                record_set = etl.rename(record_set, names)

        return record_set
Example #5
0
    def select_rows(self, *filters):
        """
        Select specific rows from a Parsons table based on the passed
        filters.

        Example filters:

        .. code-block:: python

            tbl = Table([['foo', 'bar', 'baz'],
                         ['c', 4, 9.3],
                         ['a', 2, 88.2],
                         ['b', 1, 23.3],])

            # You can structure the filter in multiple wayss

            # Lambda Function
            tbl2 = tbl.select_rows(lambda row: row.foo == 'a' and row.baz > 88.1)
            tbl2
            >>> {foo: 'a', 'bar': 2, 'baz': 88.1}

            # Expression String
            tbl3 = tbl.select_rows("{foo} == 'a' and {baz} > 88.1")
            tbl3
            >>> {foo: 'a', 'bar': 2, 'baz': 88.1}

        `Args:`
            \*filters: function or str
        `Returns:`
            A new parsons table containing the selected rows
        """  # noqa: W605

        from parsons.etl.table import Table

        return Table(petl.select(self.table, *filters))
Example #6
0
def anyServices():
    # requested query
    Postcode = request.query.Postcode
    #Converting the Service value to String
    Postcode = str(Postcode)
    # reading the csv file
    csv = petl.fromcsv(file)

    # json content type declaration
    response.headers['Content-type'] = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'
    for i in csv:
        if Postcode == i[4]:
            # select the data according to the given requested query
            dataSelect = petl.select(csv, "{Postcode} == '" + Postcode + "'")
            # cutting out the required column names
            jsonData = petl.cut(dataSelect, 'Service', 'Suburb')
            # convert the dictionary data into json data
            jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData)))
            # return the json data
            return jsonData

    else:
        jsonData = json.JSONEncoder().encode('Unable to find this Service.')
        return jsonData
Example #7
0
def main_loop():
    # requested query
    Service = request.query.Service
    #Converting the Service value to String
    Service = str(Service)

    csv = petl.fromcsv(file)
    response.headers['Content-type'] = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'

    for i in csv:
        if Service == i[1]:
            # select the data according to the given requested query
            dataSelect = petl.select(csv, "{Service} == '" + Service + "'")
            # cutting out the required column names
            jsonData = petl.cut(dataSelect, 'ClinicID', 'Suburb', 'Lat', 'Lon')
            # convert the dictionary data into json data
            jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData)))
            # return the json data
            return jsonData

        # this is requested link of getting all the distinct list of clinics offering any service.
        if Service == "0":
            anyServices = petl.unique(csv, key='Name')
            jsonData = petl.cut(anyServices, 'ClinicID', 'Suburb', 'Lat',
                                'Lon')
            jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData)))
            return jsonData
    else:
        jsonData = json.JSONEncoder().encode('Please Enter a Service.')
        return jsonData
def main_loop():
    # requested query
    inputServiceID = request.query.serviceid
    csv = pt.fromcsv('clinicservicelocations.csv')
    response.headers['Content-type'] = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'
    for i in csv:
        if inputServiceID == i[0]:
            # select the data according to the given requested query
            dataSelect = pt.select(
                csv, "{ServiceID} == '" + str(inputServiceID) + "'")
            # cutting out the required column names
            jsonData = pt.cut(dataSelect, 'Name', 'Service', 'Suburb', 'State',
                              'Email', 'Lat', 'Lon')
            # convert the dictionary data into json data
            jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData)))
            # return the json data
            return jsonData

        # this is requested link of getting all the distinct list of
        # clinics offering any service.
        if inputServiceID == "0":
            anyServices = pt.unique(csv, key='Name')
            jsonData = pt.cut(anyServices, 'Name', 'Service', 'Suburb',
                              'State', 'Email', 'Lat', 'Lon')
            jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData)))
            return jsonData
    else:
        jsonData = json.JSONEncoder().encode('Unable to find this id.')
        return jsonData
def get_location():
    setHeaders()
    postCode = request.query.postcode
    selectedRow = etl.select(storeLocationsTable,
                             "{Postcode} == '" + postCode + "'")

    #if 2 records are not returned it didn't find a result
    if (selectedRow.len() != 2):
        defaultPostCode = "2000"
        selectedRow = etl.select(storeLocationsTable,
                                 "{Postcode} == '" + defaultPostCode + "'")

    storeData = {
        "name": selectedRow[1][0],
        "lat": selectedRow[1][2],
        "lon": selectedRow[1][3]
    }
    return storeData
Example #10
0
def clean_and_separate(table):
    """Do some cleanup of TABLE and split into individual and business tables.

    TABLE is a petl table."""

    # Rename column to expand name
    table = etl.rename(table, {'WVRSTATE': 'waiverstate'})

    # More conversions
    table = etl.convert(
        table,
        {
            'EXCLTYPE': lambda f: f.strip(),  # Trim extra spaces
            'EXCLDATE': munge_date,  # Arrange date for sqlite
            'REINDATE': munge_date,  # Arrange date for sqlite
            'WAIVERDATE': munge_date  # Arrange date for sqlite
        })

    # Separate into two tables, as this is actually two different data sets
    individual = etl.select(table, "{LASTNAME} != '' and {FIRSTNAME} != ''")
    business = etl.select(table, "{LASTNAME} == '' and {FIRSTNAME} == ''")

    # Sanity check: Make sure we split the rows without dupes or
    # missing any.  The +1 is to account for the second header row
    # that gets counted when we have two tables.
    if len(business) + len(individual) != len(table) + 1:
        fatal(
            "Separating business and individual exclusions came up with the wrong number of rows!"
        )

    # Remove unused columns
    individual = etl.transform.basics.cutout(individual, "BUSNAME")
    business = etl.transform.basics.cutout(business, "LASTNAME", "FIRSTNAME",
                                           "MIDNAME", "DOB")

    # Do some cleanup conversions on individual data
    individual = etl.convert(
        individual,
        {
            'DOB': munge_date,
            'MIDNAME': lambda f: f
            if f != " " else ""  # no spaces as middle names
        })
    return individual, business
Example #11
0
def select_drain_issues(inp, assignee_id: int, drain_cf_id: int):
    def is_drain(fields: list) -> bool:
        return any(
            map(
                lambda field: field['id'] == drain_cf_id and field['value'] ==
                '1', fields))

    # custom fields have more selectivity
    inp = etl.select(inp, 'custom_fields', is_drain)
    return etl.selecteq(inp, 'assigned_to_id', assignee_id)
Example #12
0
def xref_symbol_reports():
    symbol_reports = [
        f for f in os.listdir()
        if re.match('OCLC Datasync Unresolved.*\.csv', f)
    ]

    today = str(date.today())

    for report in symbol_reports:

        symbol_split = re.split('^.*processing.(M[A-Z]{2}).*$', report)
        symbol = symbol_split[1]
        xlsx_outfile = symbol + '_datasync_unresolved_' + today + '.xlsx'
        xls_outfile = symbol + '_datasync_unresolved_' + today + '.xls'
        txt_outfile = symbol + '_staging_OCNs_' + today + '.txt'

        symbol_table_raw = etl.fromcsv(report, encoding='utf-8')
        symbol_table = etl.rename(symbol_table_raw, '\ufeffMMS Id', 'MMS ID')
        symbol_table2 = etl.select(symbol_table, "{MMS ID} is not None")
        symbol_table_sorted = etl.sort(symbol_table2, 'MMS ID')

        xref_table = etl.fromcsv('unresxref.csv')
        xref_table2 = etl.select(xref_table, "{MMS ID} is not None")
        xref_table_sorted = etl.sort(xref_table2, 'MMS ID')

        symbol_xref_table = etl.join(symbol_table_sorted,
                                     xref_table_sorted,
                                     presorted=True,
                                     lkey="MMS ID",
                                     rkey="MMS ID")

        try:
            etl.toxlsx(symbol_xref_table, xlsx_outfile, encoding='utf-8')
        except TypeError:
            etl.toxls(symbol_xref_table,
                      xls_outfile,
                      'Sheet1',
                      encoding='utf-8')

        staging_ocns_table = etl.cut(symbol_xref_table, 'Staging OCN')
        template = '{Staging OCN}\n'
        etl.totext(staging_ocns_table, txt_outfile, template=template)
Example #13
0
def _medical_limits(id, source_db):
    """
    get the member limits
    """
    sql = ("SELECT dispensary_id, daily_purchase_limit, visit_purchase_limit, "
           "daily_visit_limit, two_week_purchase_limit "
           "FROM red_flags "
           "WHERE dispensary_id={0}").format(id)

    data = etl.fromdb(source_db, sql)
    limits = etl.select(data, lambda rec: rec.dispensary_id == id)
    return etl.dicts(limits)
Example #14
0
def _get_taxes(id, source_db):
    """
    get the dispensary taxes settings for each dispensary_id
    """
    sql = ("SELECT DISTINCT dispensary_id, amount, name "
           "FROM taxes "
           "WHERE dispensary_id={0}").format(id)

    data = etl.fromdb(source_db, sql)
    try:
        lookup_taxes = etl.select(data, lambda rec: rec.dispensary_id == id)
        return etl.dicts(lookup_taxes)
    except KeyError:
        return 0
Example #15
0
def valuecounts(table, col_name):
    return_dict = {}
    reported_count = 0
    unreported_count = 0
    column = petl.values(table, col_name)
    nrows = petl.nrows(table)
    non_blanks = petl.select(table, '{' + quote_single_quote(col_name) + "} != ''")
    num_blanks = nrows - petl.nrows(non_blanks)
    counts_table = petl.valuecounts(non_blanks, col_name)
    for row in petl.records(counts_table):
        if row['frequency'] > 0.01:
            return_dict[row[col_name]] = row['count']
            reported_count += row['count']
        else:
            unreported_count += row['count']
    return_dict['<other>'] = unreported_count
    return_dict['<blank>'] = num_blanks
    return return_dict
Example #16
0
def valuecounts(table, col_name):
    return_dict = {}
    reported_count = 0
    unreported_count = 0
    column = petl.values(table, col_name)
    nrows = petl.nrows(table)
    non_blanks = petl.select(table,
                             '{' + quote_single_quote(col_name) + "} != ''")
    num_blanks = nrows - petl.nrows(non_blanks)
    counts_table = petl.valuecounts(non_blanks, col_name)
    for row in petl.records(counts_table):
        if row['frequency'] > 0.01:
            return_dict[row[col_name]] = row['count']
            reported_count += row['count']
        else:
            unreported_count += row['count']
    return_dict['<other>'] = unreported_count
    return_dict['<blank>'] = num_blanks
    return return_dict
Example #17
0
def dataPreProcessing(fileName):
    inputData = fromcsv(fileName)
    table1 = cutout(inputData, 'member_id', 'grade', 'sub_grade', 'emp_title',
                    'url', 'desc', 'title', 'accept_d', 'exp_d', 'list_d',
                    'issue_d', 'purpose', 'addr_city', 'addr_state',
                    'earliest_cr_line', 'last_pymnt_d', 'next_pymnt_d',
                    'last_credit_pull_d')
    table2 = select(
        table1,
        lambda i: i['term'] == ' 36 months' and i['loan_status'] is not "")
    labelMapping = OrderedDict()
    labelMapping['loan_status'] = 'loan_status'
    labelMapping['id'] = 'id'
    table6 = fieldmap(table2, labelMapping)
    table8 = sort(table6, 'id')
    table10 = cutout(table8, 'id')
    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['home_ownership'] = 'ownership', {
        'MORTGAGE': '-1',
        'RENT': '0',
        'OWN': '1'
    }
    mappings['emp_length'] = 'empLength', {'n/a': 0}
    mappings['is_inc_v'] = 'verificationStatus', {
        'Source Verified': 1,
        'Verified': 0,
        'Not Verified': -1
    }
    mappings['pymnt_plan'] = 'paymentPlan', {'n': 0, 'y': 1}
    mappings['initial_list_status'] = 'listStatus', {'f': 0, 'w': 1}
    table3 = fieldmap(table2, mappings)
    table4 = cutout(table2, 'home_ownership', 'is_inc_v', 'pymnt_plan',
                    'initial_list_status', 'term', 'loan_status')
    table5 = merge(table3, table4, key='id')
    table7 = sort(table5, 'id')
    table9 = cutout(table7, 'id')
    featureFileCsv = tocsv(table9, 'featureFileCsv.csv')
    labelsFileCsv = tocsv(table10, 'labelsFileCsv.csv')
    return featureFileCsv, labelsFileCsv
Example #18
0
def xls_tidy(xls,qvalue):
    d=etl.fromtsv(xls)
    sd=etl.select(d,lambda x: float(x.PepQValue) <=float(qvalue))
    psmsummary=sd

    ssd=etl.cut(sd, 'Peptide', 'Protein', 'PepQValue')
    #remove the mod info in peptide.
    ssd=etl.transform.regex.sub(ssd,'Peptide', r'^[\w-]\.(.+)\.[\w-]$', r'\1')
    ssd=etl.transform.regex.sub(ssd,'Peptide', r'[\d\.\+]+', r'')

    aggregation = OrderedDict()
    aggregation['SpecCount'] = len
    cssd=etl.aggregate(ssd, 'Peptide', aggregation)

    fssd=etl.groupselectfirst(ssd, key=('Peptide','Protein',"PepQValue"))
    aggregation = OrderedDict()
    aggregation['Protein'] = 'Protein', etl.strjoin(';')
    aggregation['PepQValue'] = 'PepQValue', etl.strjoin(';')
    assd=etl.aggregate(fssd, 'Peptide', aggregation)
    pepsummary=etl.join(assd, cssd, key='Peptide')

    return (psmsummary, pepsummary)
Example #19
0
from __future__ import absolute_import, print_function, division


import petl as etl
table = [['foo', 'bar'],
         ['a', 1],
         ['b', None]]

# raises exception under Python 3
etl.select(table, 'bar', lambda v: v > 0)
# no error under Python 3
etl.selectgt(table, 'bar', 0)
# or ...
etl.select(table, 'bar', lambda v: v > etl.Comparable(0))

Example #20
0
table = src_table
print('TOTAL SOURCE ROWS = ' + str(etl.nrows(table)))
print('SOURCE HEADERS = ' + str(etl.header(table)))

#UNUSED COLUMNS
if CLEAN_UP:
    table = clean_up(table, 'rcv_nm')
    table = clean_up(table, 'recp_cd')
    table = clean_up(table, 'ins_ind')
    table = clean_up(table, 'geo_ind')
    table = clean_up(table, 'cid')
    table = clean_up(table, 'occ_typ')
    print('TRIMMED HEADERS = ' + str(etl.header(table)))

table = etl.select(table, 'occ_dt', lambda x: x > datetime(2000, 1, 1))
print('ROWS POST YR 2000 = ' + str(etl.nrows(table)))

mine_table = etl.fromcsv('mines.csv', encoding='utf-8')

##handle leading 0's
mine_table = etl.convert(mine_table, 'mine_no', lambda x: str(int(x)))
table = etl.convert(table, 'mine_no', lambda x: str(int(x)))

#MAP mine_no to mine_guid
table = etl.leftjoin(table, mine_table, key='mine_no')
table = clean_up(table, 'mine_no')
#make sure this is 0
if etl.valuecount(table, 'mine_guid', None)[0] > 0:
    print('mine_guid, mine_no pair missing from mines.csv')
    exit(1)
Example #21
0
# select

table1 = [['foo', 'bar', 'baz'],
          ['a', 4, 9.3],
          ['a', 2, 88.2],
          ['b', 1, 23.3],
          ['c', 8, 42.0],
          ['d', 7, 100.9],
          ['c', 2]]

from petl import select, look     
look(table1)
# the second positional argument can be a function accepting a record (i.e., a 
# dictionary representation of a row).
table2 = select(table1, lambda rec: rec['foo'] == 'a' and rec['baz'] > 88.1)
look(table2)
# the second positional argument can also be an expression string, which 
# will be converted to a function using expr()
table3 = select(table1, "{foo} == 'a' and {baz} > 88.1")
look(table3)
# the condition can also be applied to a single field
table4 = select(table1, 'foo', lambda v: v == 'a')
look(table4)


# fieldmap

table1 = [['id', 'sex', 'age', 'height', 'weight'],
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
Example #22
0

# select()
##########

import petl as etl
table1 = [['foo', 'bar', 'baz'],
          ['a', 4, 9.3],
          ['a', 2, 88.2],
          ['b', 1, 23.3],
          ['c', 8, 42.0],
          ['d', 7, 100.9],
          ['c', 2]]
# the second positional argument can be a function accepting
# a row
table2 = etl.select(table1,
                    lambda rec: rec.foo == 'a' and rec.baz > 88.1)
table2
# the second positional argument can also be an expression
# string, which will be converted to a function using petl.expr()
table3 = etl.select(table1, "{foo} == 'a' and {baz} > 88.1")
table3
# the condition can also be applied to a single field
table4 = etl.select(table1, 'foo', lambda v: v == 'a')
table4


# selectre()
############

import petl as etl
table1 = [['foo', 'bar', 'baz'],
Example #23
0
def sales_summary(start_dt=None, end_dt=None, staff_id=None, for_export=False):
    """tally up gross (sale over list) profits
    TODO: tally up net profites (gross profit vs inventory purchase total)

    TODO: Keyword Arguments:
        start_dt {[type]} -- datetime for start of query (default: {None})
        end_dt {[type]} -- datetime for start of query [description] (default: {None})

    Returns:
        [dict] -- various types of sales information, stored in a dictionary.
    """

    # products = db.session.query(Product).all()
    # sales = db.session.query(Sale).all()

    # retrieve existing tables
    products_records = etl.fromdb(db.engine, 'SELECT * FROM product')
    sales_records = etl.fromdb(db.engine, 'SELECT * FROM sale')
    staff_records = etl.fromdb(db.engine, 'SELECT * FROM staff')

    # filter by start/end date if provided
    if start_dt and end_dt:
        sales_records = etl\
            .selectnotnone(sales_records, 'date')\
            .select(lambda r: r.date > start_dt and r.date <= end_dt)
    elif start_dt and not end_dt:
        sales_records = etl\
            .selectnotnone(sales_records, 'date')\
            .select(lambda r: r.date > start_dt)
    elif end_dt and not start_dt:
        sales_records = etl\
            .selectnotnone(sales_records, 'date')\
            .select(lambda r: r.date <= end_dt)
    else:
        pass

    # filter by staff id if provided
    if staff_id:
        sales_records = etl.select(sales_records, 'staff_id',
                                   lambda v: v == staff_id)

    # join product info to sales data
    sales_data = etl\
        .join(
            sales_records,
            products_records,
            lkey='product_id',
            rkey='id'
        )\
        .leftjoin(
            staff_records,
            lkey='staff_id',
            rkey='id'
        )

    # prep joined sales data for tabulation
    sales_data = etl\
        .convert(sales_data, 'date', lambda dt: format_date(dt))\
        .sort('date')\
        .convert('quantity', lambda q: handle_none(q, replace_with=1))\
        .addfield('profit', lambda rec: calculate_profit(rec))\
        .addfield('gross_sales', lambda rec: calculate_gross_sales(rec))

    # tabulate some figures
    gross_sales = 0
    profits = 0
    for sale in etl.dicts(sales_data):
        profits += calculate_profit(sale)
        gross_sales += calculate_gross_sales(sale)

    if for_export:
        return {
            'gross_sales': gross_sales,
            'profits': profits,
            'table': sales_data
        }

    # summarize data into charting-friendly data structures
    chart_count, chart_count_missing_date = etl\
        .fold(sales_data, 'date', operator.add, 'quantity', presorted=True)\
        .rename({'key': 'x', 'value': 'y'})\
        .biselect(lambda rec: rec.x is not None)

    # print(chart_count)
    # etl.lookall(chart_count)

    chart_gross, chart_gross_missing_date = etl\
        .fold(sales_data, 'date', operator.add,'gross_sales', presorted=True)\
        .rename({'key': 'x', 'value': 'y'})\
        .biselect(lambda rec: rec.x is not None)

    # print(chart_gross)
    # etl.lookall(chart_gross)

    chart_profit, chart_profit_missing_date = etl\
        .fold(sales_data, 'date', operator.add, 'profit', presorted=True)\
        .rename({'key': 'x', 'value': 'y'})\
        .biselect(lambda rec: rec.x is not None)

    # for i in etl.dicts(chart_count):
    #     print(i)
    # for i in etl.dicts(chart_gross):
    #     print(i)

    return {
        'gross_sales': gross_sales,
        'profits': profits,
        'chart_gross': list(etl.dicts(chart_gross)),
        'chart_gross_missing_date': list(etl.dicts(chart_gross_missing_date)),
        'chart_profit': list(etl.dicts(chart_profit)),
        'chart_profit_missing_date':
        list(etl.dicts(chart_profit_missing_date)),
        'chart_count': list(etl.dicts(chart_count)),
        'chart_count_missing_date': list(etl.dicts(chart_count_missing_date))
    }
import sys
import petl as etl


def add_bbreflink(rec):
    bid = rec['bbrefID']
    initial = bid[0]
    return "http://www.baseball-reference.com/players/" + initial + "/" + bid + ".shtml"



# Load Master.csv from the Lahman database.
table = etl.fromcsv(sys.argv[1])

# Use US births only
table2 = etl.select(table, lambda rec: rec.birthCountry == 'USA')

# Only use these fields
table3 = etl.cut(table2, 'nameFirst', 'nameLast', 'debut', 'bbrefID', 'weight', 'height', 'finalGame', 'birthCity', 'birthState', 'birthYear')

# Remove null birth city and birth year
table4 = etl.select(table3, lambda rec: rec.birthCity != "" and rec.birthYear != "")

# Add Baseball Reference URL
table5 = etl.addfield(table4, 'baseball_ref_url', add_bbreflink)
# Remove unnecessary bbrefid
table6 = etl.cutout(table5, "bbrefID")

# Load city,state lat long table.
city = etl.fromcsv(sys.argv[2])
# Only use these fields
Example #25
0
    # load expense document
    try:
        expenses = petl.io.xlsx.fromxlsx('Expenses.xlsx', sheet='Github')
    except Exception as e:
        print('could not open expenses.xlsx:' + str(e))
        sys.exit()

    # join tables
    expenses = petl.outerjoin(exchangeRates, expenses, key='date')

    # fill down missing values
    expenses = petl.filldown(expenses, 'rate')

    # remove dates with no expenses
    expenses = petl.select(expenses, lambda rec: rec.USD != None)

    # add CDN column
    expenses = petl.addfield(expenses, 'CAD',
                             lambda rec: decimal.Decimal(rec.USD) * rec.rate)

    # intialize database connection
    try:
        dbConnection = pymssql.connect(server=destServer,
                                       database=destDatabase)
    except Exception as e:
        print('could not connect to database:' + str(e))
        sys.exit()

    # populate Expenses database table
    try:
def remove_rows(table, list_rows_to_remove):
    global g

    g.list_rows_to_remove = list_rows_to_remove
    return petl.select(table, row_remover, complement=True)
Example #27
0
def transform(mmj_menu_items, mmj_categories, prices, organization_id,
              source_db, debug):
    """
    Transform data
    """
    # source data table
    source_dt = utils.view_to_list(mmj_menu_items)

    cut_menu_data = [
        'id', 'vendor_id', 'menu_id', 'dispensary_id', 'strain_id',
        'created_at', 'updated_at', 'category_id', 'name', 'sativa', 'indica',
        'on_hold', 'product_type', 'image_file_name', 'medicine_amount',
        'product_type'
    ]

    cut_prices = [
        'menu_item_id', 'dispensary_id', 'price_half_gram', 'price_gram',
        'price_two_gram', 'price_eigth', 'price_quarter', 'price_half',
        'price_ounce'
    ]

    # Cut out all the fields we don't need to load
    menu_items = etl.cut(source_dt, cut_menu_data)
    prices_data = etl.cut(prices, cut_prices)

    menu_items = (etl.addfield(
        menu_items, 'createdAtEpoch').addfield('unitOfMeasure').addfield(
            'locationProductDetails').addfield('keys').addfield('restockLevel')
                  )

    # Two-step transform and cut. First we need to cut the name
    # and id from the source data to map to.
    cut_source_cats = etl.cut(mmj_categories, 'name', 'id', 'measurement')
    source_values = etl.values(cut_source_cats, 'name', 'id')

    # Then we nede a dict of categories to compare against.
    # id is stored to match against when transforming and mapping categories
    mmj_categories = dict([(value, id) for (value, id) in source_values])

    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['createdAt'] = 'created_at'
    mappings['updatedAt'] = 'updated_at'
    mappings['createdAtEpoch'] = lambda x: utils.create_epoch(x.created_at)
    mappings['name'] = 'name'
    mappings['shareOnWM'] = lambda x: _wm_integration(x.id, source_db)
    """
    1 = Units
    2 = Grams (weight)
    """
    mappings['unitOfMeasure'] = \
        lambda x: _map_uom(x.category_id, source_db)

    fields = etl.fieldmap(menu_items, mappings)
    data = etl.merge(menu_items, fields, key='id')

    items = []
    for item in etl.dicts(data):

        breakpoint_pricing = (etl.select(
            prices_data,
            lambda x: x.dispensary_id == item['dispensary_id']).rename({
                'price_eigth':
                'price_eighth'
            }).cutout('menu_item_id'))
        # Set image url for load to download
        url = None
        if debug and item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-development.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])
        elif item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-production.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])

        item['image_file_name'] = url

        item['categoryId'] = _map_categories(item['category_id'],
                                             item['sativa'], item['indica'],
                                             mmj_categories, menu_items)
        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id'],
            'menu_id': item['menu_id'],
            'vendor_id': item['vendor_id'],
            'strain_id': item['strain_id'],
            'category_id': item['category_id']
        }

        # set a default netMJ value if the menu item is a unit product
        if item['unitOfMeasure'] is 2:
            item['netMarijuana'] = int(item['medicine_amount'])

        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        item['locationProductDetails'] = {
            'id': item['id'],
            'active': _active(item['on_hold'])
        }

        item['restockLevel'] = _restock_level(item['dispensary_id'],
                                              item['product_type'], source_db)

        if item['shareOnWM'] is None:
            item['shareOnWM'] = False

        for price in etl.dicts(breakpoint_pricing):
            try:
                price_two_gram = price['price_two_gram']
            except KeyError:
                price_two_gram = 0.0

            item['locationProductDetails']['weightPricing'] = {
                'price_half_gram':
                utils.dollars_to_cents(price['price_half_gram']),
                'price_two_gram': utils.dollars_to_cents(price_two_gram),
                'price_gram': utils.dollars_to_cents(price['price_gram']),
                'price_eighth': utils.dollars_to_cents(price['price_eighth']),
                'price_quarter':
                utils.dollars_to_cents(price['price_quarter']),
                'price_half': utils.dollars_to_cents(price['price_half']),
                'price_ounce': utils.dollars_to_cents(price['price_ounce'])
            }

        del item['vendor_id']
        del item['indica']
        del item['dispensary_id']
        del item['id']
        del item['strain_id']
        del item['on_hold']
        del item['menu_id']
        del item['sativa']
        del item['category_id']
        del item['updated_at']
        del item['created_at']
        del item['product_type']

        if item['image_file_name'] is None:
            del item['image_file_name']

        # set up final structure for API
        items.append(item)

    # Remove inactive items
    for item in items:
        if item['locationProductDetails']['active'] is False:
            items.remove(item)

    if debug:
        result = json.dumps(items,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return items
Example #28
0
from __future__ import absolute_import, print_function, division

# select()
##########

import petl as etl
table1 = [['foo', 'bar', 'baz'], ['a', 4, 9.3], ['a', 2, 88.2], ['b', 1, 23.3],
          ['c', 8, 42.0], ['d', 7, 100.9], ['c', 2]]
# the second positional argument can be a function accepting
# a row
table2 = etl.select(table1, lambda rec: rec.foo == 'a' and rec.baz > 88.1)
table2
# the second positional argument can also be an expression
# string, which will be converted to a function using petl.expr()
table3 = etl.select(table1, "{foo} == 'a' and {baz} > 88.1")
table3
# the condition can also be applied to a single field
table4 = etl.select(table1, 'foo', lambda v: v == 'a')
table4

# selectre()
############

import petl as etl
table1 = [['foo', 'bar', 'baz'], ['aa', 4, 9.3], ['aaa', 2, 88.2],
          ['b', 1, 23.3], ['ccc', 8, 42.0], ['bb', 7, 100.9], ['c', 2]]
table2 = etl.selectre(table1, 'foo', '[ab]{2}')
table2

# selectusingcontext()
######################
for x in range(length):
    attr = data['attibutes'][x]['attrName']
    matchingField = data['attibutes'][x]['matchingField']
    mappings[attr] = matchingField
    
mappedTable = etl.fieldmap(dataTable, mappings)

cleansedTable = mappedTable
#add rules to clean the table - reversed for give the priority for top attributes
for x in reversed(range(length)):
    attr = data['attibutes'][x]['attrName']
    rules = data['attibutes'][x]['rules']
    rulesListSize = len(rules)
    for y in range(rulesListSize):
        if rules[y] == "Remove Null Value Rows":
            cleansedTable = etl.select(cleansedTable, attr, lambda v: v != '')
        if rules[y] == "Remove Duplicates":
            cleansedTable = etl.aggregate(cleansedTable, attr)
        if rules[y] == "Sort":
            cleansedTable = etl.mergesort(cleansedTable, key=attr)
        if rules[y] == "Number Validation":
            cleansedTable = etl.select(cleansedTable, attr)
        if rules[y] == "Fill Missing Values":
            cleansedTable = etl.filldown(cleansedTable, attr)

etl.tocsv(cleansedTable,'src/etl/outputs/cleansed.csv')

#Create rawData Table
dataTable = cleansedTable
rawDataTable = cleansedTable
Example #30
0
# select

table1 = [['foo', 'bar', 'baz'],
          ['a', 4, 9.3],
          ['a', 2, 88.2],
          ['b', 1, 23.3],
          ['c', 8, 42.0],
          ['d', 7, 100.9],
          ['c', 2]]

from petl import select, look     
look(table1)
# the second positional argument can be a function accepting a record (i.e., a 
# dictionary representation of a row).
table2 = select(table1, lambda rec: rec['foo'] == 'a' and rec['baz'] > 88.1)
look(table2)
# the second positional argument can also be an expression string, which 
# will be converted to a function using expr()
table3 = select(table1, "{foo} == 'a' and {baz} > 88.1")
look(table3)
# the condition can also be applied to a single field
table4 = select(table1, 'foo', lambda v: v == 'a')
look(table4)


# fieldmap

table1 = [['id', 'sex', 'age', 'height', 'weight'],
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
        sys.exit(1)
    #
    print("Algunos agregados de los resultados, para fecha: ", DATE)

    t1 = (etl.fromcsv(f"var/s1_invisible_prefixes-{DATE}.csv",
                      delimiter="|").convert('visible', int).convert(
                          'dark', int).convert('total', int))
    #print(t1.look())

    print("Algunos agregados de los resultados, para fecha: ", DATE)
    print("Total de espacio de todo el pool",
          sum([x[4] for x in list(t1)][1:]))

    print(" - ")
    # numero de asignaciones completamente invisibles
    n1 = etl.select(t1, lambda r: r['dark'] == r['total'])
    print("numero de asignaciones completamente invisibles", n1.nrows())
    print("total de ips en asignaciones completamente invisibles",
          sum([x[4] for x in list(n1)][1:]))
    print("tamano  promedio de asignaciones completamente invisibles",
          Average([x[4] for x in list(n1)][1:]))

    print(" - ")
    # numero de asignaciones parcialmente invisibles
    n2 = etl.select(t1, lambda r: r['dark'] > 0)
    print("numero de asignaciones parcialmente invisibles", n2.nrows())
    print("total de ips en asignaciones parcialmente invisibles",
          sum([x[3] for x in list(n2)][1:]))
    print("tamano  promedio de asignaciones parcialmente invisibles (total) ",
          Average([x[4] for x in list(n2)][1:]))
    print("tamano  promedio de asignaciones parcialmente invisibles (dark) ",
Example #32
0
    'new_tests', 'total_tests_per_thousand', 'new_tests_per_thousand',
    'tests_units'
]]

# cut function is used to cut out the column given in the bracket below from the table
# cut function is not compulsory for table1 because the value given below are the total field that are present in table1
data = etl.cut(table1, 'iso_code', 'location', 'date', 'total_cases',
               'new_cases', 'total_deaths', 'new_deaths',
               'total_cases_per_million', 'new_cases_per_million',
               'total_deaths_per_million', 'new_deaths_per_million',
               'total_tests', 'new_tests', 'total_tests_per_thousand',
               'new_tests_per_thousand', 'tests_units')

# selecting the data from table on the basis of current date
# variable num consist of only the data of 2020-04-30 from each country.Hence the latest data is filter out.
num = etl.select(data, 'date', lambda r: r == '2020-04-30')

# sort function is used to sort the unsorted data on the basis of iso_code
# thus ,this process help us to join the data easily in furthur steps
table1_sort = etl.sort(num, key='iso_code')

# counter variable is declared to count the number of country
count = 0

# values function is used to read the data from table
for i in etl.values(table1_sort, 'iso_code', 'location', 'date', 'total_cases',
                    'new_cases', 'total_deaths', 'new_deaths',
                    'total_cases_per_million', 'new_cases_per_million',
                    'total_deaths_per_million', 'new_deaths_per_million',
                    'total_tests', 'new_tests', 'total_tests_per_thousand',
                    'new_tests_per_thousand', 'tests_units'):
                for name, region in q_geo.items():
                    if point.within(region):
                        outrow.append(name)
                        break
            writer.writerow(outrow)
        f.close()
print("Creating new csv...")
addRows(geo,'tract','saved_homes.csv','saved_homes_extended.csv')
print("New csv created.")
table = etl.fromcsv('saved_homes_extended.csv')
zip_list = []
tract_list = []
year_list = []
print("Aggregating by zip...")
for feature in zip_data['features']:
    zip_table = etl.select(table, 'zip', lambda x: x == feature['properties']['code'])
    zip_data = etl.data(zip_table)
    zip_data_list = list(zip_data)
    zip_dict = {'zip' : feature['properties']['code'], 'saved' : 0, 'lost' : 0, 'saved_fta' : 0, 'lost_fta' : 0, 'pending' : 0, 'pending_fta' : 0, 'vacant' : 0, 'nonowner' : 0, 'litig/bankr' : 0, 'litig/bankr_fta' : 0, 'shape' : feature['geometry']}
    for b in zip_data_list:
            if b[1] == 'Saved':
                zip_dict['saved'] = zip_dict['saved'] + 1
            elif b[1] == 'Lost':
                zip_dict['lost'] = zip_dict['lost'] + 1
            elif b[1] == 'Saved - FTA':
                zip_dict['saved_fta'] = zip_dict['saved_fta'] + 1
            elif b[1] == 'Lost - FTA':
                zip_dict['lost_fta'] = zip_dict['lost_fta'] + 1
            elif b[1] == 'Pending':
                zip_dict['pending'] = zip_dict['pending'] + 1
            elif b[1] == 'Pending - FTA':
Example #34
0
import sys
import petl as etl


def add_bbreflink(rec):
    bid = rec['bbrefID']
    initial = bid[0]
    return "http://www.baseball-reference.com/players/" + initial + "/" + bid + ".shtml"


# Load Master.csv from the Lahman database.
table = etl.fromcsv(sys.argv[1])

# Use US births only
table2 = etl.select(table, lambda rec: rec.birthCountry == 'USA')

# Only use these fields
table3 = etl.cut(table2, 'nameFirst', 'nameLast', 'debut', 'bbrefID', 'weight',
                 'height', 'finalGame', 'birthCity', 'birthState', 'birthYear')

# Remove null birth city and birth year
table4 = etl.select(table3,
                    lambda rec: rec.birthCity != "" and rec.birthYear != "")

# Add Baseball Reference URL
table5 = etl.addfield(table4, 'baseball_ref_url', add_bbreflink)
# Remove unnecessary bbrefid
table6 = etl.cutout(table5, "bbrefID")

# Load city,state lat long table.
Example #35
0
    "Free SO2", "Total SO2", "Density", "pH", "Sulfates", "Alcohol", "Quality"
]

table1 = etl.addfield(
    etl.convertnumbers(
        etl.setheader(etl.fromcsv('winequality-red.csv'), table_header)),
    "Type", "Red")
table2 = etl.addfield(
    etl.convertnumbers(
        etl.setheader(etl.fromcsv('winequality-white.csv'), table_header)),
    "Type", "White")

#print(etl.head(table1))
#print(etl.head(table2))

table1_filtered = etl.select(table1, "Quality", lambda v: v > 6)
table2_filtered = etl.select(table2, "Quality", lambda v: v > 4)

good_wines = etl.cat(table1_filtered, table2_filtered)

good_wines_enhanced = etl.addfields(
    good_wines,
    [("Max Acidity",
      lambda rec: rec["Fixed Acidity"] + rec["Volatile Acidity"]),
     ("Locked SO2", lambda rec: rec["Total SO2"] - rec["Free SO2"])])
#print(etl.head(good_wines_enhanced))
#print(etl.tail(good_wines_enhanced))

gwe_sorted = etl.sort(good_wines_enhanced, key=["Quality", "Sugar"])

#print(etl.head(gwe_sorted))
Example #36
0
from __future__ import absolute_import, print_function, division

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', None]]

# raises exception under Python 3
etl.select(table, 'bar', lambda v: v > 0)
# no error under Python 3
etl.selectgt(table, 'bar', 0)
# or ...
etl.select(table, 'bar', lambda v: v > etl.Comparable(0))