def test_integration(): left = etl.wrap((('begin', 'end', 'quux'), (1, 2, 'a'), (2, 4, 'b'), (2, 5, 'c'), (9, 14, 'd'), (9, 140, 'e'), (1, 1, 'f'), (2, 2, 'g'), (4, 4, 'h'), (5, 5, 'i'), (1, 8, 'j'))) right = etl.wrap((('start', 'stop', 'value'), (1, 4, 'foo'), (3, 7, 'bar'), (4, 9, 'baz'))) actual = left.intervaljoin(right, lstart='begin', lstop='end', rstart='start', rstop='stop') expect = (('begin', 'end', 'quux', 'start', 'stop', 'value'), (1, 2, 'a', 1, 4, 'foo'), (2, 4, 'b', 1, 4, 'foo'), (2, 4, 'b', 3, 7, 'bar'), (2, 5, 'c', 1, 4, 'foo'), (2, 5, 'c', 3, 7, 'bar'), (2, 5, 'c', 4, 9, 'baz'), (1, 8, 'j', 1, 4, 'foo'), (1, 8, 'j', 3, 7, 'bar'), (1, 8, 'j', 4, 9, 'baz')) ieq(expect, actual) ieq(expect, actual)
def test_integration(): tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2), (u'é', datetime(2012, 1, 1))) f = NamedTemporaryFile(delete=False) f.close() etl.wrap(tbl).toxlsx(f.name, 'Sheet1') actual = etl.fromxlsx(f.name, 'Sheet1') ieq(tbl, actual)
def test_integration(): expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2)) f = NamedTemporaryFile(delete=False) f.close() etl.wrap(expect).toxls(f.name, 'Sheet1') actual = etl.fromxls(f.name, 'Sheet1') ieq(expect, actual) ieq(expect, actual)
def test_wrap_tuple_return(): tablea = etl.wrap((('foo', 'bar'), ('A', 1), ('C', 7))) tableb = etl.wrap((('foo', 'bar'), ('B', 5), ('C', 7))) added, removed = tablea.diff(tableb) eq_(('foo', 'bar'), added.header()) eq_(('foo', 'bar'), removed.header()) ieq(etl.data(added), added.data()) ieq(etl.data(removed), removed.data())
def test_teepickle(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teepickle(f1.name).selectgt('bar', 1).topickle(f2.name) ieq(t1, etl.frompickle(f1.name)) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_teehtml_unicode(): t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teehtml(f1.name, encoding="utf-8").selectgt("bar", 1).topickle(f2.name)) ieq(t1, (etl.fromxml(f1.name, ".//tr", ("th", "td"), encoding="utf-8").convertnumbers())) ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def test_teehtml(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teehtml(f1.name).selectgt('bar', 1).topickle(f2.name) ieq(t1, etl.fromxml(f1.name, './/tr', ('th', 'td')).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_teepickle(): t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teepickle(f1.name).selectgt("bar", 1).topickle(f2.name) ieq(t1, etl.frompickle(f1.name)) ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def test_teehtml(): t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teehtml(f1.name).selectgt("bar", 1).topickle(f2.name) ieq(t1, etl.fromxml(f1.name, ".//tr", ("th", "td")).convertnumbers()) ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def test_teecsv_unicode(): t1 = ((u"name", u"id"), (u"Արամ Խաչատրյան", 1), (u"Johann Strauß", 2), (u"Вагиф Сәмәдоғлу", 3), (u"章子怡", 4)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teecsv(f1.name, encoding="utf-8").selectgt("id", 1).tocsv(f2.name, encoding="utf-8")) ieq(t1, etl.fromcsv(f1.name, encoding="utf-8").convertnumbers()) ieq(etl.wrap(t1).selectgt("id", 1), etl.fromcsv(f2.name, encoding="utf-8").convertnumbers())
def test_container(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) actual = etl.wrap(table)[0] expect = ('foo', 'bar') eq_(expect, actual) actual = etl.wrap(table)['bar'] expect = (1, 2, 2) ieq(expect, actual) actual = len(etl.wrap(table)) expect = 4 eq_(expect, actual)
def test_teetsv(): t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teetsv(f1.name, encoding="ascii").selectgt("bar", 1).totsv(f2.name, encoding="ascii")) ieq(t1, etl.fromtsv(f1.name, encoding="ascii").convertnumbers()) ieq(etl.wrap(t1).selectgt("bar", 1), etl.fromtsv(f2.name, encoding="ascii").convertnumbers())
def test_teehtml_unicode(): t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1), (u'Вагиф Сәмәдоғлу', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teehtml(f1.name, encoding='utf-8').selectgt('bar', 1).topickle(f2.name)) ieq(t1, (etl.fromxml(f1.name, './/tr', ('th', 'td'), encoding='utf-8').convertnumbers())) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_repr_html(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) expect = """<table class='petl'> <thead> <tr> <th>foo</th> <th>bar</th> </tr> </thead> <tbody> <tr> <td>a</td> <td style='text-align: right'>1</td> </tr> <tr> <td>b</td> <td style='text-align: right'>2</td> </tr> <tr> <td>c</td> <td style='text-align: right'>2</td> </tr> </tbody> </table> """ actual = etl.wrap(table)._repr_html_() for l1, l2 in zip(expect.split('\n'), actual.split('\n')): eq_(l1, l2)
def test_values_container_convenience_methods(): table = etl.wrap((('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))) actual = table.values('foo').set() expect = {'a', 'b', 'c'} eq_(expect, actual) actual = table.values('foo').list() expect = ['a', 'b', 'c'] eq_(expect, actual) actual = table.values('foo').tuple() expect = ('a', 'b', 'c') eq_(expect, actual) actual = table.values('bar').sum() expect = 5 eq_(expect, actual) actual = table.data().dict() expect = {'a': 1, 'b': 2, 'c': 2} eq_(expect, actual)
def test_repr_html_limit(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) # lower limit etl.config.display_limit = 2 expect = """<table class='petl'> <thead> <tr> <th>foo</th> <th>bar</th> </tr> </thead> <tbody> <tr> <td>a</td> <td style='text-align: right'>1</td> </tr> <tr> <td>b</td> <td style='text-align: right'>2</td> </tr> </tbody> </table> <p><strong>...</strong></p> """ actual = etl.wrap(table)._repr_html_() print(actual) for l1, l2 in zip(expect.split('\n'), actual.split('\n')): eq_(l1, l2)
def test_integration(): tbl = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, .1)] df = etl.wrap(tbl).todataframe() tbl2 = etl.fromdataframe(df) ieq(tbl, tbl2) ieq(tbl, tbl2)
def test_valuesarray_explicit_dtype(): t = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, .1)] expect = np.array([1, 3, 7], dtype='i2') actual = etl.wrap(t).values('bar').array(dtype='i2') eq_(expect.dtype, actual.dtype) assert np.all(expect == actual)
def test_transform_characters_data(settings): settings.STAR_WARS_CHARACTERS_BASE_FIELDS = ['name'] url_to_name_map = { 'http://test.com/planets/1/': 'Test planet 1', 'http://test.com/planets/2/': 'Test planet 2', } table = etl.wrap([ ['name', 'edited', 'homeworld'], [ 'Test 1', '2014-12-09T13:50:51.644000Z', 'http://test.com/planets/1/' ], [ 'Test 2', '2014-12-20T21:17:56.891000Z', 'http://test.com/planets/2/' ], ]) result = transform_characters_data( table, url_to_name_map, ) assert list(result.values('name')) == ['Test 1', 'Test 2'] assert list(result.values('date')) == ['2014-12-09', '2014-12-20'] assert list(result.values('homeworld')) == [ 'Test planet 1', 'Test planet 2', ]
def test_teetsv(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teetsv(f1.name, encoding='ascii').selectgt('bar', 1).totsv(f2.name, encoding='ascii')) ieq(t1, etl.fromtsv(f1.name, encoding='ascii').convertnumbers()) ieq( etl.wrap(t1).selectgt('bar', 1), etl.fromtsv(f2.name, encoding='ascii').convertnumbers())
def __init__(self, lst=[]): self.table = None lst_type = type(lst) if lst_type in [list, tuple]: # Check for empty list if not len(lst): self.table = petl.fromdicts([]) else: row_type = type(lst[0]) # Check for list of dicts if row_type == dict: self.table = petl.fromdicts(lst) # Check for list of lists elif row_type in [list, tuple]: self.table = petl.wrap(lst) else: # Create from a petl table self.table = lst if not self.is_valid_table(): raise ValueError("Could not create Table") # Count how many times someone is indexing directly into this table, so we can warn # against inefficient usage. self._index_count = 0
def extract(profile, workdir): dicts = [] outer = parse(profile) station_ids = outer.findall('./stationID') updatetime_str = outer.getroot().attrib.get('updatetime') modify_time = parser.parse(updatetime_str, ignoretz=True) if updatetime_str else datetime.datetime.now() for station_id_tree in station_ids: status = station_id_tree.find('./status') if status is not None and status.find('./station') is not None: if status.find('./station').text.strip().startswith(u'無觀測'): continue profile = station_id_tree.find('./profile') station_id = station_id_tree.attrib['id'].strip() # filename = station_id + '.xml' outer_data = { 'station_id': station_id, 'seas_chName': profile.find('./seas_chName').text.strip(), 'latitude': float(profile.find('./latitude').text.strip()), 'longitude': float(profile.find('./longitude').text.strip()), 'chName': getattr(profile.find('./chName'), 'text', None) and profile.find('./chName').text.strip(), 'chCity': getattr(profile.find('./chCity'), 'text', None) and profile.find('./chCity').text.strip(), 'kind_chName': getattr(profile.find('./kind_chName'), 'text', None) and profile.find('./kind_chName').text.strip(), 'chTown': getattr(profile.find('./chTown'), 'text', None) and profile.find('./chTown').text.strip(), 'chLocation': getattr(profile.find('./chLocation'), 'text', None) and profile.find('./chLocation').text.strip(), 'dataItem': getattr(profile.find('./dataItem'), 'text', None) and profile.find('./dataItem').text.strip(), # 'file_path': (workdir + filename) if workdir.endswith('/') else ('%s/%s' % (workdir, filename)), 'modifytime': modify_time, 'updatetime': datetime.datetime.now(), } dicts.append(outer_data) return petl.wrap([row for row in petl.fromdicts(dicts)])
def test_export_gift_cards_in_batches_to_csv( gift_card, gift_card_expiry_date, gift_card_used, tmpdir, ): # given gift_cards = GiftCard.objects.exclude(id=gift_card_used.id).order_by("pk") table = etl.wrap([["code"]]) temp_file = NamedTemporaryFile() etl.tocsv(table, temp_file.name, delimiter=",") # when export_gift_cards_in_batches( gift_cards, ["code"], ",", temp_file, "csv", ) # then file_content = temp_file.read().decode().split("\r\n") # ensure headers are in the file assert "code" in file_content for card in gift_cards: assert card.code in file_content shutil.rmtree(tmpdir)
def vcfunpackcall(table, *keys): """ Unpack the call column. E.g.:: >>> import petl as etl >>> # activate bio extensions ... import petlx.bio >>> table1 = ( ... etl ... .fromvcf('fixture/sample.vcf') ... .vcfmeltsamples() ... .vcfunpackcall() ... ) >>> table1 +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | CHROM | POS | ID | REF | ALT | QUAL | FILTER | INFO | SAMPLE | DP | GQ | GT | HQ | +=======+=====+======+=====+=====+======+========+======+===========+======+======+=======+==========+ | '19' | 111 | None | 'A' | [C] | 9.6 | None | {} | 'NA00001' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 111 | None | 'A' | [C] | 9.6 | None | {} | 'NA00002' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 111 | None | 'A' | [C] | 9.6 | None | {} | 'NA00003' | None | None | '0/1' | [3, 3] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 112 | None | 'A' | [G] | 10 | None | {} | 'NA00001' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 112 | None | 'A' | [G] | 10 | None | {} | 'NA00002' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ ... """ result = (etl.wrap(table).convert( 'CALL', lambda v: v.data._asdict()).unpackdict('CALL', keys=keys)) return result
def test_export_products_in_batches_for_csv( product_list, user_export_file, tmpdir, media_root, ): # given qs = Product.objects.all() export_info = { "fields": [ ProductFieldEnum.NAME.value, ProductFieldEnum.DESCRIPTION.value, ProductFieldEnum.VARIANT_SKU.value, ], "warehouses": [], "attributes": [], "channels": [], } export_fields = ["id", "name", "variants__sku"] expected_headers = ["id", "name", "variant sku"] table = etl.wrap([expected_headers]) temp_file = NamedTemporaryFile() etl.tocsv(table, temp_file.name, delimiter=";") # when export_products_in_batches( qs, export_info, set(export_fields), export_fields, ";", temp_file, FileTypes.CSV, ) # then expected_data = [] for product in qs.order_by("pk"): product_data = [] id = graphene.Node.to_global_id("Product", product.pk) product_data.append(id) product_data.append(product.name) for variant in product.variants.all(): product_data.append(str(variant.sku)) expected_data.append(product_data) file_content = temp_file.read().decode().split("\r\n") # ensure headers are in file assert ";".join(expected_headers) in file_content for row in expected_data: assert ";".join(row) in file_content shutil.rmtree(tmpdir)
def test_export_products_in_batches_for_xlsx( product_list, user_export_file, tmpdir, media_root, ): # given qs = Product.objects.all() export_info = { "fields": [ProductFieldEnum.NAME.value, ProductFieldEnum.VARIANT_SKU.value], "warehouses": [], "attributes": [], "channels": [], } export_fields = ["id", "name", "variants__sku"] expected_headers = ["id", "name", "variant sku"] table = etl.wrap([expected_headers]) temp_file = NamedTemporaryFile(suffix=".xlsx") etl.io.xlsx.toxlsx(table, temp_file.name) # when export_products_in_batches( qs, export_info, set(export_fields), export_fields, ";", temp_file, FileTypes.XLSX, ) # then expected_data = [] for product in qs.order_by("pk"): product_data = [] product_data.append(product.pk) product_data.append(product.name) for variant in product.variants.all(): product_data.append(variant.sku) expected_data.append(product_data) wb_obj = openpyxl.load_workbook(temp_file) sheet_obj = wb_obj.active max_col = sheet_obj.max_column max_row = sheet_obj.max_row headers = [sheet_obj.cell(row=1, column=i).value for i in range(1, max_col + 1)] data = [] for i in range(2, max_row + 1): row = [] for j in range(1, max_col + 1): row.append(sheet_obj.cell(row=i, column=j).value) data.append(row) assert headers == expected_headers for row in expected_data: assert row in data shutil.rmtree(tmpdir)
def test_teecsv_write_header(): t1 = (("foo", "bar"), ("a", "2"), ("b", "1"), ("c", "3")) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) ( etl.wrap(t1) .convertnumbers() .teecsv(f1.name, write_header=False, encoding="ascii") .selectgt("bar", 1) .tocsv(f2.name, encoding="ascii") ) ieq(t1[1:], etl.fromcsv(f1.name, encoding="ascii")) ieq(etl.wrap(t1).convertnumbers().selectgt("bar", 1), etl.fromcsv(f2.name, encoding="ascii").convertnumbers())
def test_teecsv_unicode(): t1 = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teecsv(f1.name, encoding='utf-8').selectgt('id', 1).tocsv(f2.name, encoding='utf-8')) ieq(t1, etl.fromcsv(f1.name, encoding='utf-8').convertnumbers()) ieq( etl.wrap(t1).selectgt('id', 1), etl.fromcsv(f2.name, encoding='utf-8').convertnumbers())
def test_repr(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) expect = str(etl.look(table)) actual = repr(etl.wrap(table)) eq_(expect, actual)
def main(): """Compute various vol estimates.""" logging.basicConfig(level=logging.INFO, format='%(levelname)-8s: %(message)s') parser = argparse.ArgumentParser(description=__doc__.strip()) ameritrade.add_args(parser) parser.add_argument('-s', '--symbol', action='store', default='SPY', help="Symbol to compute on") args = parser.parse_args() config = ameritrade.config_from_args(args) api = ameritrade.open(config) # Fetch call chain for underlying. hist = api.GetPriceHistory(symbol=args.symbol, frequency=1, frequencyType='daily', period=2, periodType='year') candle = price_history_to_arrays(hist) # Compute historical volatility estimates and centile of vol distribution of # underlying over various time periods. header = ['windays', 'annual_vol', 'iv_percentile', 'mean', 'std'] rows = [header] for days in [7, 15, 20, 30, 60, 90, 120, 180, 365, None]: centiles = True if days is None: centiles = False days = candle.datetime.shape[0] vol = historical_volatility(candle.datetime[-days:], candle.close[-days:]) if centiles: _, vols = historical_volatility_dist(candle.datetime, candle.close, days) assert len(vols) > 0 meanvol = numpy.mean(vols) stdvol = numpy.std(vols) centile = norm.cdf(vol, meanvol, stdvol) rows.append([days, vol, centile, meanvol, stdvol]) # print("Vol over {:3} days: {:8.2f}" # " (IVpct: {:5.1%}; mean/std: {:.1f}~{:.1f})".format( # days, vol, centile, meanvol, stdvol)) else: #print("Vol over {:3} days: {:8.2f}".format(days, vol)) rows.append([days, vol, '', '', '']) convert = lambda v: Decimal(v).quantize(Q) if v else '' table = (petl.wrap(rows).convert('annual_vol', convert).convert( 'iv_percentile', convert).convert('mean', convert).convert('std', convert).cut('windays', 'mean', 'std', 'annual_vol', 'iv_percentile')) print(table.lookallstr())
def test_export_products_in_batches_for_csv( product_list, user_export_file, tmpdir, media_root, ): # given qs = Product.objects.all() export_info = { "fields": [ProductFieldEnum.NAME.value, ProductFieldEnum.VARIANT_SKU.value], "warehouses": [], "attributes": [], } file_name = "test.csv" export_fields = ["id", "name", "variants__sku"] expected_headers = ["id", "name", "variant sku"] table = etl.wrap([expected_headers]) with NamedTemporaryFile() as temp_file: etl.tocsv(table, temp_file.name, delimiter=";") user_export_file.content_file.save(file_name, temp_file) assert user_export_file.content_file # when export_products_in_batches( qs, export_info, set(export_fields), export_fields, ";", user_export_file, FileTypes.CSV, ) # then user_export_file.refresh_from_db() csv_file = user_export_file.content_file assert csv_file expected_data = [] for product in qs.order_by("pk"): product_data = [] product_data.append(str(product.pk)) product_data.append(product.name) for variant in product.variants.all(): product_data.append(str(variant.sku)) expected_data.append(product_data) file_content = csv_file.read().decode().split("\r\n") # ensure headers are in file assert ";".join(expected_headers) in file_content for row in expected_data: assert ";".join(row) in file_content shutil.rmtree(tmpdir)
def test_teecsv_write_header(): t1 = (('foo', 'bar'), ('a', '2'), ('b', '1'), ('c', '3')) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).convertnumbers().teecsv(f1.name, write_header=False, encoding='ascii').selectgt( 'bar', 1).tocsv(f2.name, encoding='ascii')) ieq(t1[1:], etl.fromcsv(f1.name, encoding='ascii')) ieq( etl.wrap(t1).convertnumbers().selectgt('bar', 1), etl.fromcsv(f2.name, encoding='ascii').convertnumbers())
def test_teetext(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = 'foo,bar\n' template = '{foo},{bar}\n' epilogue = 'd,4' (etl.wrap(t1).teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue).selectgt('bar', 1).topickle(f2.name)) ieq(t1 + (('d', 4), ), etl.fromcsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def find1(org_name, org_settlement, pir_to_details, input_fields=INPUT_FIELDS, output_fields=OUTPUT_FIELDS, settlements=SETTLEMENTS): input = petl.wrap( [ ['id', input_fields.org_name, input_fields.settlement], [1, org_name, org_settlement], ]) parser = Parser() parser.build(settlements, report_conflicts=True) matches = find_matches(input, input_fields, output_fields, pir_to_details, parser.parse) return records_to_dict(matches)[1]
def test_integration(xlsx_test_table): f = NamedTemporaryFile(delete=True, suffix='.xlsx') f.close() tbl = etl.wrap(xlsx_test_table) tbl.toxlsx(f.name, 'Sheet1') actual = etl.fromxlsx(f.name, 'Sheet1') ieq(tbl, actual) tbl.appendxlsx(f.name, 'Sheet1') expect = tbl.cat(tbl) ieq(expect, actual)
def test_teetext(): t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = "foo,bar\n" template = "{foo},{bar}\n" epilogue = "d,4" ( etl.wrap(t1) .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue) .selectgt("bar", 1) .topickle(f2.name) ) ieq(t1 + (("d", 4),), etl.fromcsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def materialize(self): """ "Materializes" a Table, meaning all data is loaded into memory and all pending transformations are applied. Use this if petl's lazy-loading behavior is causing you problems, eg. if you want to read data from a file immediately. """ self.table = petl.wrap(petl.tupleoftuples(self.table))
def test_teetext_unicode(): t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = u"foo,bar\n" template = u"{foo},{bar}\n" epilogue = u"章子怡,4" ( etl.wrap(t1) .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue, encoding="utf-8") .selectgt("bar", 1) .topickle(f2.name) ) ieq(t1 + ((u"章子怡", 4),), etl.fromcsv(f1.name, encoding="utf-8").convertnumbers()) ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def _parse_crew_list(self): crew_pattern = re.compile(r'\n(\d{2}-\w{3})\s+(\S*)\s+(.*)') crew_lines = crew_pattern.findall(self.roster_text_string) crew_table = ( etl.wrap(crew_lines) .pushheader(['flight_date', 'flight_number', 'crew_list']) .convert('crew_list', self._extract_crew) .convert('flight_date', self._convert_date_without_year) .convert('flight_number', self._sanitize_flight_number) ) return crew_table
def create_file_with_headers(file_headers: List[str], delimiter: str, file_type: str): table = etl.wrap([file_headers]) if file_type == FileTypes.CSV: temp_file = NamedTemporaryFile("ab+", suffix=".csv") etl.tocsv(table, temp_file.name, delimiter=delimiter) else: temp_file = NamedTemporaryFile("ab+", suffix=".xlsx") etl.io.xlsx.toxlsx(table, temp_file.name) return temp_file
def main(): parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument('--betasym', default='SPY', help='Symbol to use as reference for beta') ameritrade.add_args(parser) args = parser.parse_args() config = ameritrade.config_from_args(args) api = ameritrade.open(config) accountId = utils.GetMainAccount(api) # Get the account's portfolio stock positions. all_positions = utils.GetPositions(api, accountId) positions = [(pos.instrument.symbol, pos.longQuantity - pos.shortQuantity) for pos in all_positions if pos.instrument.assetType == 'EQUITY'] positions.append(('$SPX.X', Decimal('1'))) positions.append(('SPY', Decimal('1'))) positions = [list(x) for x in sorted(positions)] # Get betas from the API. for row in positions: symbol = row[0] api_beta = GetBeta(api, symbol) row.append(api_beta.quantize(Q)) # Get time series for the benchmark. for periodType in PERIOD_TYPES: # Get benchmark returns for that period type. time, bench_returns = GetReturns(api, args.betasym, periodType) # Get price time series for all symbols. for row in positions: symbol = row[0] ptime, returns = GetReturns(api, symbol, periodType) if time.shape != ptime.shape: print((time.shape, ptime.shape, row)) pyplot.plot(time, bench_returns) pyplot.plot(ptime, returns) pyplot.show() computed_beta = Decimal('-100') else: assert list(time) == list(ptime) cov = numpy.cov(returns, bench_returns) computed_beta = cov[0, 1] / cov[1, 1] row.append(Decimal(computed_beta).quantize(Q)) header = ['symbol', 'quantity', 'api_beta'] + PERIOD_TYPES positions.insert(0, header) table = (petl.wrap(positions).cutout('quantity')) print(table.lookallstr())
def test_teetext_unicode(): t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1), (u'Вагиф Сәмәдоғлу', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = u'foo,bar\n' template = u'{foo},{bar}\n' epilogue = u'章子怡,4' (etl.wrap(t1).teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue, encoding='utf-8').selectgt('bar', 1).topickle(f2.name)) ieq(t1 + ((u'章子怡', 4), ), etl.fromcsv(f1.name, encoding='utf-8').convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_integration(): tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2), (u'é', datetime(2012, 1, 1))) f = NamedTemporaryFile(delete=True, suffix='.xlsx') f.close() tbl = etl.wrap(tbl) tbl.toxlsx(f.name, 'Sheet1') actual = etl.fromxlsx(f.name, 'Sheet1') ieq(tbl, actual) tbl.appendxlsx(f.name, 'Sheet1') expect = tbl.cat(tbl) ieq(expect, actual)
def test_basics(): t1 = (('foo', 'bar'), ('A', 1), ('B', 2)) w1 = etl.wrap(t1) eq_(('foo', 'bar'), w1.header()) eq_(etl.header(w1), w1.header()) ieq((('A', 1), ('B', 2)), w1.data()) ieq(etl.data(w1), w1.data()) w2 = w1.cut('bar', 'foo') expect2 = (('bar', 'foo'), (1, 'A'), (2, 'B')) ieq(expect2, w2) ieq(etl.cut(w1, 'bar', 'foo'), w2) w3 = w1.cut('bar', 'foo').cut('foo', 'bar') ieq(t1, w3)
def test_integration(): f = NamedTemporaryFile() # set up a new hdf5 table to work with h5file = tables.open_file(f.name, mode="w", title="Test file") h5file.create_group('/', 'testgroup', 'Test Group') h5file.create_table('/testgroup', 'testtable', FooBar, 'Test Table') h5file.flush() h5file.close() # load some initial data via tohdf5() table1 = etl.wrap((('foo', 'bar'), (1, b'asdfgh'), (2, b'qwerty'), (3, b'zxcvbn'))) table1.tohdf5(f.name, '/testgroup', 'testtable') ieq(table1, etl.fromhdf5(f.name, '/testgroup', 'testtable')) # append some more data table1.appendhdf5(f.name, '/testgroup', 'testtable') ieq(chain(table1, table1[1:]), etl.fromhdf5(f.name, '/testgroup', 'testtable'))
def vcfunpackcall(table, *keys): """ Unpack the call column. E.g.:: >>> import petl as etl >>> # activate bio extensions ... import petlx.bio >>> table1 = ( ... etl ... .fromvcf('fixture/sample.vcf') ... .vcfmeltsamples() ... .vcfunpackcall() ... ) >>> table1 +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | CHROM | POS | ID | REF | ALT | QUAL | FILTER | INFO | SAMPLE | DP | GQ | GT | HQ | +=======+=====+======+=====+=====+======+========+======+===========+======+======+=======+==========+ | '19' | 111 | None | 'A' | [C] | 9.6 | None | {} | 'NA00001' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 111 | None | 'A' | [C] | 9.6 | None | {} | 'NA00002' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 111 | None | 'A' | [C] | 9.6 | None | {} | 'NA00003' | None | None | '0/1' | [3, 3] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 112 | None | 'A' | [G] | 10 | None | {} | 'NA00001' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ | '19' | 112 | None | 'A' | [G] | 10 | None | {} | 'NA00002' | None | None | '0|0' | [10, 10] | +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+ ... """ result = ( etl.wrap(table) .convert('CALL', lambda v: v.data._asdict()) .unpackdict('CALL', keys=keys) ) return result
#Extracting data from example csv file table1 = etl.fromcsv('example.csv') print table1 #etl.look(table1) #Transformation function to be applied on extracted data table2 = etl.convert(table1,'foo','upper') table3 = etl.convert(table2,'bar',int) table4 = etl.convert(table3,'baz',float) table5 = etl.addfield(table4, 'finally', lambda row: row.bar * row.baz) print table5 #etl.look(table5) #Writing above ETL pipeline in a functional style table = (etl .fromcsv('example.csv') .convert('foo', 'upper') .convert('bar', int) .convert('baz', float) .addfield('finally', lambda row: row.bar * row.baz) ) table.look() #look function only displays five rows. print table #OOP style programming l = [['foo','bar'], ['a', 1], ['b', 2], ['c', 2]] table6 = etl.wrap(l) print table6
# the dtype can also be partially specified a = etl.toarray(table, dtype={'foo': 'a4'}) a # fromarray() ############# import petl as etl import numpy as np a = np.array([('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)], dtype='U8, i4,f4') table = etl.fromarray(a) table # valuestoarray() ################# import petl as etl table = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, .1)] table = etl.wrap(table) table.values('bar').array() # specify dtype table.values('bar').array(dtype='i4')
import petl as etl table1 = etl.fromcsv('example.csv') table2 = etl.convert(table1, 'foo', 'upper') table3 = etl.convert(table2, 'bar', int) table4 = etl.convert(table3, 'baz', float) table5 = etl.addfield(table4, 'quux', lambda row: row.bar * row.baz) table5 table = ( etl .fromcsv('example.csv') .convert('foo', 'upper') .convert('bar', int) .convert('baz', float) .addfield('quux', lambda row: row.bar * row.baz) ) table l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] table = etl.wrap(l) table.look() l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] table = etl.wrap(l) table etl.config.look_index_header = True table