Example #1
0
    def test_integration():

        left = etl.wrap((('begin', 'end', 'quux'),
                         (1, 2, 'a'),
                         (2, 4, 'b'),
                         (2, 5, 'c'),
                         (9, 14, 'd'),
                         (9, 140, 'e'),
                         (1, 1, 'f'),
                         (2, 2, 'g'),
                         (4, 4, 'h'),
                         (5, 5, 'i'),
                         (1, 8, 'j')))

        right = etl.wrap((('start', 'stop', 'value'),
                          (1, 4, 'foo'),
                          (3, 7, 'bar'),
                          (4, 9, 'baz')))

        actual = left.intervaljoin(right,
                                   lstart='begin', lstop='end',
                                   rstart='start', rstop='stop')
        expect = (('begin', 'end', 'quux', 'start', 'stop', 'value'),
                  (1, 2, 'a', 1, 4, 'foo'),
                  (2, 4, 'b', 1, 4, 'foo'),
                  (2, 4, 'b', 3, 7, 'bar'),
                  (2, 5, 'c', 1, 4, 'foo'),
                  (2, 5, 'c', 3, 7, 'bar'),
                  (2, 5, 'c', 4, 9, 'baz'),
                  (1, 8, 'j', 1, 4, 'foo'),
                  (1, 8, 'j', 3, 7, 'bar'),
                  (1, 8, 'j', 4, 9, 'baz'))
        ieq(expect, actual)
        ieq(expect, actual)
Example #2
0
 def test_integration():
     tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2),
            (u'é', datetime(2012, 1, 1)))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(tbl).toxlsx(f.name, 'Sheet1')
     actual = etl.fromxlsx(f.name, 'Sheet1')
     ieq(tbl, actual)
Example #3
0
 def test_integration():
     expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(expect).toxls(f.name, 'Sheet1')
     actual = etl.fromxls(f.name, 'Sheet1')
     ieq(expect, actual)
     ieq(expect, actual)
Example #4
0
def test_wrap_tuple_return():
    tablea = etl.wrap((('foo', 'bar'), ('A', 1), ('C', 7)))
    tableb = etl.wrap((('foo', 'bar'), ('B', 5), ('C', 7)))

    added, removed = tablea.diff(tableb)
    eq_(('foo', 'bar'), added.header())
    eq_(('foo', 'bar'), removed.header())
    ieq(etl.data(added), added.data())
    ieq(etl.data(removed), removed.data())
Example #5
0
def test_teepickle():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teepickle(f1.name).selectgt('bar', 1).topickle(f2.name)

    ieq(t1, etl.frompickle(f1.name))
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Example #6
0
def test_teehtml_unicode():

    t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    (etl.wrap(t1).teehtml(f1.name, encoding="utf-8").selectgt("bar", 1).topickle(f2.name))

    ieq(t1, (etl.fromxml(f1.name, ".//tr", ("th", "td"), encoding="utf-8").convertnumbers()))
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Example #7
0
def test_teehtml():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teehtml(f1.name).selectgt('bar', 1).topickle(f2.name)

    ieq(t1, etl.fromxml(f1.name, './/tr', ('th', 'td')).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Example #8
0
def test_teepickle():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teepickle(f1.name).selectgt("bar", 1).topickle(f2.name)

    ieq(t1, etl.frompickle(f1.name))
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Example #9
0
def test_teehtml():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teehtml(f1.name).selectgt("bar", 1).topickle(f2.name)

    ieq(t1, etl.fromxml(f1.name, ".//tr", ("th", "td")).convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Example #10
0
 def test_integration():
     tbl = (('foo', 'bar'),
            ('A', 1),
            ('B', 2),
            ('C', 2),
            (u'é', datetime(2012, 1, 1)))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(tbl).toxlsx(f.name, 'Sheet1')
     actual = etl.fromxlsx(f.name, 'Sheet1')
     ieq(tbl, actual)
Example #11
0
def test_teecsv_unicode():

    t1 = ((u"name", u"id"), (u"Արամ Խաչատրյան", 1), (u"Johann Strauß", 2), (u"Вагиф Сәмәдоғлу", 3), (u"章子怡", 4))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (etl.wrap(t1).teecsv(f1.name, encoding="utf-8").selectgt("id", 1).tocsv(f2.name, encoding="utf-8"))

    ieq(t1, etl.fromcsv(f1.name, encoding="utf-8").convertnumbers())
    ieq(etl.wrap(t1).selectgt("id", 1), etl.fromcsv(f2.name, encoding="utf-8").convertnumbers())
Example #12
0
 def test_integration():
     expect = (('foo', 'bar'),
               ('A', 1),
               ('B', 2),
               ('C', 2))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(expect).toxls(f.name, 'Sheet1')
     actual = etl.fromxls(f.name, 'Sheet1')
     ieq(expect, actual)
     ieq(expect, actual)
Example #13
0
def test_container():
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    actual = etl.wrap(table)[0]
    expect = ('foo', 'bar')
    eq_(expect, actual)
    actual = etl.wrap(table)['bar']
    expect = (1, 2, 2)
    ieq(expect, actual)
    actual = len(etl.wrap(table))
    expect = 4
    eq_(expect, actual)
Example #14
0
def test_teetsv():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (etl.wrap(t1).teetsv(f1.name, encoding="ascii").selectgt("bar", 1).totsv(f2.name, encoding="ascii"))

    ieq(t1, etl.fromtsv(f1.name, encoding="ascii").convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.fromtsv(f2.name, encoding="ascii").convertnumbers())
Example #15
0
def test_wrap_tuple_return():
    tablea = etl.wrap((('foo', 'bar'),
                       ('A', 1),
                       ('C', 7)))
    tableb = etl.wrap((('foo', 'bar'),
                       ('B', 5),
                       ('C', 7)))

    added, removed = tablea.diff(tableb)
    eq_(('foo', 'bar'), added.header())
    eq_(('foo', 'bar'), removed.header())
    ieq(etl.data(added), added.data())
    ieq(etl.data(removed), removed.data())
Example #16
0
def test_container():
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    actual = etl.wrap(table)[0]
    expect = ('foo', 'bar')
    eq_(expect, actual)
    actual = etl.wrap(table)['bar']
    expect = (1, 2, 2)
    ieq(expect, actual)
    actual = len(etl.wrap(table))
    expect = 4
    eq_(expect, actual)
Example #17
0
def test_teehtml_unicode():

    t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1),
          (u'Вагиф Сәмәдоғлу', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    (etl.wrap(t1).teehtml(f1.name,
                          encoding='utf-8').selectgt('bar',
                                                     1).topickle(f2.name))

    ieq(t1, (etl.fromxml(f1.name, './/tr',
                         ('th', 'td'), encoding='utf-8').convertnumbers()))
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Example #18
0
def test_repr_html():
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    expect = """<table class='petl'>
<thead>
<tr>
<th>foo</th>
<th>bar</th>
</tr>
</thead>
<tbody>
<tr>
<td>a</td>
<td style='text-align: right'>1</td>
</tr>
<tr>
<td>b</td>
<td style='text-align: right'>2</td>
</tr>
<tr>
<td>c</td>
<td style='text-align: right'>2</td>
</tr>
</tbody>
</table>
"""
    actual = etl.wrap(table)._repr_html_()
    for l1, l2 in zip(expect.split('\n'), actual.split('\n')):
        eq_(l1, l2)
Example #19
0
def test_values_container_convenience_methods():
    table = etl.wrap((('foo', 'bar'),
                      ('a', 1),
                      ('b', 2),
                      ('c', 2)))
    
    actual = table.values('foo').set()
    expect = {'a', 'b', 'c'}
    eq_(expect, actual)
    
    actual = table.values('foo').list()
    expect = ['a', 'b', 'c']
    eq_(expect, actual)
    
    actual = table.values('foo').tuple()
    expect = ('a', 'b', 'c')
    eq_(expect, actual)
    
    actual = table.values('bar').sum()
    expect = 5
    eq_(expect, actual)
    
    actual = table.data().dict()
    expect = {'a': 1, 'b': 2, 'c': 2}
    eq_(expect, actual)
Example #20
0
def test_repr_html_limit():
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))

    # lower limit
    etl.config.display_limit = 2

    expect = """<table class='petl'>
<thead>
<tr>
<th>foo</th>
<th>bar</th>
</tr>
</thead>
<tbody>
<tr>
<td>a</td>
<td style='text-align: right'>1</td>
</tr>
<tr>
<td>b</td>
<td style='text-align: right'>2</td>
</tr>
</tbody>
</table>
<p><strong>...</strong></p>
"""
    actual = etl.wrap(table)._repr_html_()
    print(actual)
    for l1, l2 in zip(expect.split('\n'), actual.split('\n')):
        eq_(l1, l2)
Example #21
0
 def test_integration():
     tbl = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4),
            ('pears', 7, .1)]
     df = etl.wrap(tbl).todataframe()
     tbl2 = etl.fromdataframe(df)
     ieq(tbl, tbl2)
     ieq(tbl, tbl2)
Example #22
0
 def test_valuesarray_explicit_dtype():
     t = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4),
          ('pears', 7, .1)]
     expect = np.array([1, 3, 7], dtype='i2')
     actual = etl.wrap(t).values('bar').array(dtype='i2')
     eq_(expect.dtype, actual.dtype)
     assert np.all(expect == actual)
def test_transform_characters_data(settings):
    settings.STAR_WARS_CHARACTERS_BASE_FIELDS = ['name']
    url_to_name_map = {
        'http://test.com/planets/1/': 'Test planet 1',
        'http://test.com/planets/2/': 'Test planet 2',
    }
    table = etl.wrap([
        ['name', 'edited', 'homeworld'],
        [
            'Test 1', '2014-12-09T13:50:51.644000Z',
            'http://test.com/planets/1/'
        ],
        [
            'Test 2', '2014-12-20T21:17:56.891000Z',
            'http://test.com/planets/2/'
        ],
    ])
    result = transform_characters_data(
        table,
        url_to_name_map,
    )
    assert list(result.values('name')) == ['Test 1', 'Test 2']
    assert list(result.values('date')) == ['2014-12-09', '2014-12-20']
    assert list(result.values('homeworld')) == [
        'Test planet 1',
        'Test planet 2',
    ]
Example #24
0
def test_teetsv():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (etl.wrap(t1).teetsv(f1.name,
                         encoding='ascii').selectgt('bar',
                                                    1).totsv(f2.name,
                                                             encoding='ascii'))

    ieq(t1, etl.fromtsv(f1.name, encoding='ascii').convertnumbers())
    ieq(
        etl.wrap(t1).selectgt('bar', 1),
        etl.fromtsv(f2.name, encoding='ascii').convertnumbers())
Example #25
0
    def __init__(self, lst=[]):

        self.table = None

        lst_type = type(lst)

        if lst_type in [list, tuple]:

            # Check for empty list
            if not len(lst):
                self.table = petl.fromdicts([])
            else:
                row_type = type(lst[0])
                # Check for list of dicts
                if row_type == dict:
                    self.table = petl.fromdicts(lst)
                # Check for list of lists
                elif row_type in [list, tuple]:
                    self.table = petl.wrap(lst)

        else:
            # Create from a petl table
            self.table = lst

        if not self.is_valid_table():
            raise ValueError("Could not create Table")

        # Count how many times someone is indexing directly into this table, so we can warn
        # against inefficient usage.
        self._index_count = 0
Example #26
0
def extract(profile, workdir):
    dicts = []
    outer = parse(profile)
    station_ids = outer.findall('./stationID')
    updatetime_str = outer.getroot().attrib.get('updatetime')
    modify_time = parser.parse(updatetime_str, ignoretz=True) if updatetime_str else datetime.datetime.now()
    for station_id_tree in station_ids:
        status = station_id_tree.find('./status')
        if status is not None and status.find('./station') is not None:
            if status.find('./station').text.strip().startswith(u'無觀測'):
                continue
        profile = station_id_tree.find('./profile')
        station_id = station_id_tree.attrib['id'].strip()
        # filename = station_id + '.xml'
        outer_data = {
            'station_id': station_id,
            'seas_chName': profile.find('./seas_chName').text.strip(),
            'latitude': float(profile.find('./latitude').text.strip()),
            'longitude': float(profile.find('./longitude').text.strip()),
            'chName': getattr(profile.find('./chName'), 'text', None) and profile.find('./chName').text.strip(),
            'chCity': getattr(profile.find('./chCity'), 'text', None) and profile.find('./chCity').text.strip(),
            'kind_chName': getattr(profile.find('./kind_chName'), 'text', None) and profile.find('./kind_chName').text.strip(),
            'chTown':  getattr(profile.find('./chTown'), 'text', None) and profile.find('./chTown').text.strip(),
            'chLocation': getattr(profile.find('./chLocation'), 'text', None) and profile.find('./chLocation').text.strip(),
            'dataItem': getattr(profile.find('./dataItem'), 'text', None) and profile.find('./dataItem').text.strip(),
          #  'file_path': (workdir + filename) if workdir.endswith('/') else ('%s/%s' % (workdir, filename)),
            'modifytime': modify_time,
            'updatetime': datetime.datetime.now(),
        }
        dicts.append(outer_data)
    return petl.wrap([row for row in petl.fromdicts(dicts)])
Example #27
0
def test_repr_html():
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    expect = """<table class='petl'>
<thead>
<tr>
<th>foo</th>
<th>bar</th>
</tr>
</thead>
<tbody>
<tr>
<td>a</td>
<td style='text-align: right'>1</td>
</tr>
<tr>
<td>b</td>
<td style='text-align: right'>2</td>
</tr>
<tr>
<td>c</td>
<td style='text-align: right'>2</td>
</tr>
</tbody>
</table>
"""
    actual = etl.wrap(table)._repr_html_()
    for l1, l2 in zip(expect.split('\n'), actual.split('\n')):
        eq_(l1, l2)
Example #28
0
def test_export_gift_cards_in_batches_to_csv(
    gift_card,
    gift_card_expiry_date,
    gift_card_used,
    tmpdir,
):
    # given
    gift_cards = GiftCard.objects.exclude(id=gift_card_used.id).order_by("pk")

    table = etl.wrap([["code"]])
    temp_file = NamedTemporaryFile()
    etl.tocsv(table, temp_file.name, delimiter=",")

    # when
    export_gift_cards_in_batches(
        gift_cards,
        ["code"],
        ",",
        temp_file,
        "csv",
    )

    # then
    file_content = temp_file.read().decode().split("\r\n")

    # ensure headers are in the file
    assert "code" in file_content

    for card in gift_cards:
        assert card.code in file_content

    shutil.rmtree(tmpdir)
Example #29
0
def test_repr_html_limit():
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))

    # lower limit
    etl.config.display_limit = 2

    expect = """<table class='petl'>
<thead>
<tr>
<th>foo</th>
<th>bar</th>
</tr>
</thead>
<tbody>
<tr>
<td>a</td>
<td style='text-align: right'>1</td>
</tr>
<tr>
<td>b</td>
<td style='text-align: right'>2</td>
</tr>
</tbody>
</table>
<p><strong>...</strong></p>
"""
    actual = etl.wrap(table)._repr_html_()
    print(actual)
    for l1, l2 in zip(expect.split('\n'), actual.split('\n')):
        eq_(l1, l2)
Example #30
0
def vcfunpackcall(table, *keys):
    """
    Unpack the call column. E.g.::
    
        >>> import petl as etl
        >>> # activate bio extensions
        ... import petlx.bio
        >>> table1 = (
        ...     etl
        ...     .fromvcf('fixture/sample.vcf')
        ...     .vcfmeltsamples()
        ...     .vcfunpackcall()
        ... )
        >>> table1
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | CHROM | POS | ID   | REF | ALT | QUAL | FILTER | INFO | SAMPLE    | DP   | GQ   | GT    | HQ       |
        +=======+=====+======+=====+=====+======+========+======+===========+======+======+=======+==========+
        | '19'  | 111 | None | 'A' | [C] |  9.6 | None   | {}   | 'NA00001' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 111 | None | 'A' | [C] |  9.6 | None   | {}   | 'NA00002' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 111 | None | 'A' | [C] |  9.6 | None   | {}   | 'NA00003' | None | None | '0/1' | [3, 3]   |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 112 | None | 'A' | [G] |   10 | None   | {}   | 'NA00001' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 112 | None | 'A' | [G] |   10 | None   | {}   | 'NA00002' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        ...

    """

    result = (etl.wrap(table).convert(
        'CALL', lambda v: v.data._asdict()).unpackdict('CALL', keys=keys))
    return result
Example #31
0
def test_export_products_in_batches_for_csv(
    product_list,
    user_export_file,
    tmpdir,
    media_root,
):
    # given
    qs = Product.objects.all()
    export_info = {
        "fields": [
            ProductFieldEnum.NAME.value,
            ProductFieldEnum.DESCRIPTION.value,
            ProductFieldEnum.VARIANT_SKU.value,
        ],
        "warehouses": [],
        "attributes": [],
        "channels": [],
    }
    export_fields = ["id", "name", "variants__sku"]
    expected_headers = ["id", "name", "variant sku"]

    table = etl.wrap([expected_headers])

    temp_file = NamedTemporaryFile()
    etl.tocsv(table, temp_file.name, delimiter=";")

    # when
    export_products_in_batches(
        qs,
        export_info,
        set(export_fields),
        export_fields,
        ";",
        temp_file,
        FileTypes.CSV,
    )

    # then

    expected_data = []
    for product in qs.order_by("pk"):
        product_data = []
        id = graphene.Node.to_global_id("Product", product.pk)
        product_data.append(id)
        product_data.append(product.name)

        for variant in product.variants.all():
            product_data.append(str(variant.sku))
            expected_data.append(product_data)

    file_content = temp_file.read().decode().split("\r\n")

    # ensure headers are in file
    assert ";".join(expected_headers) in file_content

    for row in expected_data:
        assert ";".join(row) in file_content

    shutil.rmtree(tmpdir)
Example #32
0
def test_export_products_in_batches_for_xlsx(
    product_list, user_export_file, tmpdir, media_root,
):
    # given
    qs = Product.objects.all()
    export_info = {
        "fields": [ProductFieldEnum.NAME.value, ProductFieldEnum.VARIANT_SKU.value],
        "warehouses": [],
        "attributes": [],
        "channels": [],
    }
    export_fields = ["id", "name", "variants__sku"]
    expected_headers = ["id", "name", "variant sku"]

    table = etl.wrap([expected_headers])

    temp_file = NamedTemporaryFile(suffix=".xlsx")
    etl.io.xlsx.toxlsx(table, temp_file.name)

    # when
    export_products_in_batches(
        qs,
        export_info,
        set(export_fields),
        export_fields,
        ";",
        temp_file,
        FileTypes.XLSX,
    )

    # then
    expected_data = []
    for product in qs.order_by("pk"):
        product_data = []
        product_data.append(product.pk)
        product_data.append(product.name)

        for variant in product.variants.all():
            product_data.append(variant.sku)
            expected_data.append(product_data)

    wb_obj = openpyxl.load_workbook(temp_file)

    sheet_obj = wb_obj.active
    max_col = sheet_obj.max_column
    max_row = sheet_obj.max_row
    headers = [sheet_obj.cell(row=1, column=i).value for i in range(1, max_col + 1)]
    data = []
    for i in range(2, max_row + 1):
        row = []
        for j in range(1, max_col + 1):
            row.append(sheet_obj.cell(row=i, column=j).value)
        data.append(row)

    assert headers == expected_headers
    for row in expected_data:
        assert row in data

    shutil.rmtree(tmpdir)
Example #33
0
def test_teecsv_write_header():

    t1 = (("foo", "bar"), ("a", "2"), ("b", "1"), ("c", "3"))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (
        etl.wrap(t1)
        .convertnumbers()
        .teecsv(f1.name, write_header=False, encoding="ascii")
        .selectgt("bar", 1)
        .tocsv(f2.name, encoding="ascii")
    )

    ieq(t1[1:], etl.fromcsv(f1.name, encoding="ascii"))
    ieq(etl.wrap(t1).convertnumbers().selectgt("bar", 1), etl.fromcsv(f2.name, encoding="ascii").convertnumbers())
Example #34
0
def test_teecsv_unicode():

    t1 = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2),
          (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (etl.wrap(t1).teecsv(f1.name,
                         encoding='utf-8').selectgt('id',
                                                    1).tocsv(f2.name,
                                                             encoding='utf-8'))

    ieq(t1, etl.fromcsv(f1.name, encoding='utf-8').convertnumbers())
    ieq(
        etl.wrap(t1).selectgt('id', 1),
        etl.fromcsv(f2.name, encoding='utf-8').convertnumbers())
Example #35
0
def test_repr():
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    expect = str(etl.look(table))
    actual = repr(etl.wrap(table))
    eq_(expect, actual)
Example #36
0
def main():
    """Compute various vol estimates."""
    logging.basicConfig(level=logging.INFO,
                        format='%(levelname)-8s: %(message)s')
    parser = argparse.ArgumentParser(description=__doc__.strip())
    ameritrade.add_args(parser)
    parser.add_argument('-s',
                        '--symbol',
                        action='store',
                        default='SPY',
                        help="Symbol to compute on")
    args = parser.parse_args()
    config = ameritrade.config_from_args(args)
    api = ameritrade.open(config)

    # Fetch call chain for underlying.
    hist = api.GetPriceHistory(symbol=args.symbol,
                               frequency=1,
                               frequencyType='daily',
                               period=2,
                               periodType='year')
    candle = price_history_to_arrays(hist)

    # Compute historical volatility estimates and centile of vol distribution of
    # underlying over various time periods.
    header = ['windays', 'annual_vol', 'iv_percentile', 'mean', 'std']
    rows = [header]
    for days in [7, 15, 20, 30, 60, 90, 120, 180, 365, None]:
        centiles = True
        if days is None:
            centiles = False
            days = candle.datetime.shape[0]
        vol = historical_volatility(candle.datetime[-days:],
                                    candle.close[-days:])

        if centiles:
            _, vols = historical_volatility_dist(candle.datetime, candle.close,
                                                 days)
            assert len(vols) > 0
            meanvol = numpy.mean(vols)
            stdvol = numpy.std(vols)
            centile = norm.cdf(vol, meanvol, stdvol)
            rows.append([days, vol, centile, meanvol, stdvol])
            # print("Vol over {:3} days: {:8.2f}"
            #       "  (IVpct: {:5.1%}; mean/std: {:.1f}~{:.1f})".format(
            #           days, vol, centile, meanvol, stdvol))
        else:
            #print("Vol over {:3} days: {:8.2f}".format(days, vol))
            rows.append([days, vol, '', '', ''])
    convert = lambda v: Decimal(v).quantize(Q) if v else ''
    table = (petl.wrap(rows).convert('annual_vol', convert).convert(
        'iv_percentile',
        convert).convert('mean',
                         convert).convert('std',
                                          convert).cut('windays', 'mean',
                                                       'std', 'annual_vol',
                                                       'iv_percentile'))
    print(table.lookallstr())
def test_export_products_in_batches_for_csv(
    product_list, user_export_file, tmpdir, media_root,
):
    # given
    qs = Product.objects.all()
    export_info = {
        "fields": [ProductFieldEnum.NAME.value, ProductFieldEnum.VARIANT_SKU.value],
        "warehouses": [],
        "attributes": [],
    }
    file_name = "test.csv"
    export_fields = ["id", "name", "variants__sku"]
    expected_headers = ["id", "name", "variant sku"]

    table = etl.wrap([expected_headers])

    with NamedTemporaryFile() as temp_file:
        etl.tocsv(table, temp_file.name, delimiter=";")
        user_export_file.content_file.save(file_name, temp_file)

    assert user_export_file.content_file

    # when
    export_products_in_batches(
        qs,
        export_info,
        set(export_fields),
        export_fields,
        ";",
        user_export_file,
        FileTypes.CSV,
    )

    # then
    user_export_file.refresh_from_db()
    csv_file = user_export_file.content_file
    assert csv_file

    expected_data = []
    for product in qs.order_by("pk"):
        product_data = []
        product_data.append(str(product.pk))
        product_data.append(product.name)

        for variant in product.variants.all():
            product_data.append(str(variant.sku))
            expected_data.append(product_data)

    file_content = csv_file.read().decode().split("\r\n")

    # ensure headers are in file
    assert ";".join(expected_headers) in file_content

    for row in expected_data:
        assert ";".join(row) in file_content

    shutil.rmtree(tmpdir)
Example #38
0
def test_teecsv_write_header():

    t1 = (('foo', 'bar'), ('a', '2'), ('b', '1'), ('c', '3'))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    (etl.wrap(t1).convertnumbers().teecsv(f1.name,
                                          write_header=False,
                                          encoding='ascii').selectgt(
                                              'bar',
                                              1).tocsv(f2.name,
                                                       encoding='ascii'))

    ieq(t1[1:], etl.fromcsv(f1.name, encoding='ascii'))
    ieq(
        etl.wrap(t1).convertnumbers().selectgt('bar', 1),
        etl.fromcsv(f2.name, encoding='ascii').convertnumbers())
Example #39
0
 def test_integration():
     tbl = [('foo', 'bar', 'baz'),
            ('apples', 1, 2.5),
            ('oranges', 3, 4.4),
            ('pears', 7, .1)]
     df = etl.wrap(tbl).todataframe()
     tbl2 = etl.fromdataframe(df)
     ieq(tbl, tbl2)
     ieq(tbl, tbl2)
Example #40
0
def test_teetext():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = 'foo,bar\n'
    template = '{foo},{bar}\n'
    epilogue = 'd,4'
    (etl.wrap(t1).teetext(f1.name,
                          template=template,
                          prologue=prologue,
                          epilogue=epilogue).selectgt('bar',
                                                      1).topickle(f2.name))

    ieq(t1 + (('d', 4), ), etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Example #41
0
def find1(org_name, org_settlement, pir_to_details, input_fields=INPUT_FIELDS, output_fields=OUTPUT_FIELDS, settlements=SETTLEMENTS):
    input = petl.wrap(
        [
            ['id', input_fields.org_name, input_fields.settlement],
            [1, org_name, org_settlement],
        ])
    parser = Parser()
    parser.build(settlements, report_conflicts=True)
    matches = find_matches(input, input_fields, output_fields, pir_to_details, parser.parse)
    return records_to_dict(matches)[1]
Example #42
0
def test_integration(xlsx_test_table):
    f = NamedTemporaryFile(delete=True, suffix='.xlsx')
    f.close()
    tbl = etl.wrap(xlsx_test_table)
    tbl.toxlsx(f.name, 'Sheet1')
    actual = etl.fromxlsx(f.name, 'Sheet1')
    ieq(tbl, actual)
    tbl.appendxlsx(f.name, 'Sheet1')
    expect = tbl.cat(tbl)
    ieq(expect, actual)
Example #43
0
def test_teetext():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = "foo,bar\n"
    template = "{foo},{bar}\n"
    epilogue = "d,4"
    (
        etl.wrap(t1)
        .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue)
        .selectgt("bar", 1)
        .topickle(f2.name)
    )

    ieq(t1 + (("d", 4),), etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Example #44
0
    def materialize(self):
        """
        "Materializes" a Table, meaning all data is loaded into memory and all pending
        transformations are applied.

        Use this if petl's lazy-loading behavior is causing you problems, eg. if you want to read
        data from a file immediately.
        """

        self.table = petl.wrap(petl.tupleoftuples(self.table))
Example #45
0
def test_teetext_unicode():

    t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = u"foo,bar\n"
    template = u"{foo},{bar}\n"
    epilogue = u"章子怡,4"
    (
        etl.wrap(t1)
        .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue, encoding="utf-8")
        .selectgt("bar", 1)
        .topickle(f2.name)
    )

    ieq(t1 + ((u"章子怡", 4),), etl.fromcsv(f1.name, encoding="utf-8").convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Example #46
0
 def _parse_crew_list(self):
     crew_pattern = re.compile(r'\n(\d{2}-\w{3})\s+(\S*)\s+(.*)')
     crew_lines = crew_pattern.findall(self.roster_text_string)
     crew_table = (
         etl.wrap(crew_lines)
            .pushheader(['flight_date', 'flight_number', 'crew_list'])
            .convert('crew_list', self._extract_crew)
            .convert('flight_date', self._convert_date_without_year)
            .convert('flight_number', self._sanitize_flight_number)
     )
     return crew_table
Example #47
0
def create_file_with_headers(file_headers: List[str], delimiter: str, file_type: str):
    table = etl.wrap([file_headers])

    if file_type == FileTypes.CSV:
        temp_file = NamedTemporaryFile("ab+", suffix=".csv")
        etl.tocsv(table, temp_file.name, delimiter=delimiter)
    else:
        temp_file = NamedTemporaryFile("ab+", suffix=".xlsx")
        etl.io.xlsx.toxlsx(table, temp_file.name)

    return temp_file
Example #48
0
def main():
    parser = argparse.ArgumentParser(description=__doc__.strip())

    parser.add_argument('--betasym',
                        default='SPY',
                        help='Symbol to use as reference for beta')

    ameritrade.add_args(parser)
    args = parser.parse_args()
    config = ameritrade.config_from_args(args)
    api = ameritrade.open(config)

    accountId = utils.GetMainAccount(api)

    # Get the account's portfolio stock positions.
    all_positions = utils.GetPositions(api, accountId)
    positions = [(pos.instrument.symbol, pos.longQuantity - pos.shortQuantity)
                 for pos in all_positions
                 if pos.instrument.assetType == 'EQUITY']
    positions.append(('$SPX.X', Decimal('1')))
    positions.append(('SPY', Decimal('1')))
    positions = [list(x) for x in sorted(positions)]

    # Get betas from the API.
    for row in positions:
        symbol = row[0]
        api_beta = GetBeta(api, symbol)
        row.append(api_beta.quantize(Q))

    # Get time series for the benchmark.
    for periodType in PERIOD_TYPES:
        # Get benchmark returns for that period type.
        time, bench_returns = GetReturns(api, args.betasym, periodType)

        # Get price time series for all symbols.
        for row in positions:
            symbol = row[0]
            ptime, returns = GetReturns(api, symbol, periodType)
            if time.shape != ptime.shape:
                print((time.shape, ptime.shape, row))
                pyplot.plot(time, bench_returns)
                pyplot.plot(ptime, returns)
                pyplot.show()
                computed_beta = Decimal('-100')
            else:
                assert list(time) == list(ptime)
                cov = numpy.cov(returns, bench_returns)
                computed_beta = cov[0, 1] / cov[1, 1]
            row.append(Decimal(computed_beta).quantize(Q))

    header = ['symbol', 'quantity', 'api_beta'] + PERIOD_TYPES
    positions.insert(0, header)
    table = (petl.wrap(positions).cutout('quantity'))
    print(table.lookallstr())
Example #49
0
def test_teetext_unicode():

    t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1),
          (u'Вагиф Сәмәдоғлу', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = u'foo,bar\n'
    template = u'{foo},{bar}\n'
    epilogue = u'章子怡,4'
    (etl.wrap(t1).teetext(f1.name,
                          template=template,
                          prologue=prologue,
                          epilogue=epilogue,
                          encoding='utf-8').selectgt('bar',
                                                     1).topickle(f2.name))

    ieq(t1 + ((u'章子怡', 4), ),
        etl.fromcsv(f1.name, encoding='utf-8').convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Example #50
0
 def test_integration():
     tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2),
            (u'é', datetime(2012, 1, 1)))
     f = NamedTemporaryFile(delete=True, suffix='.xlsx')
     f.close()
     tbl = etl.wrap(tbl)
     tbl.toxlsx(f.name, 'Sheet1')
     actual = etl.fromxlsx(f.name, 'Sheet1')
     ieq(tbl, actual)
     tbl.appendxlsx(f.name, 'Sheet1')
     expect = tbl.cat(tbl)
     ieq(expect, actual)
Example #51
0
def test_basics():
    
    t1 = (('foo', 'bar'),
          ('A', 1),
          ('B', 2))
    w1 = etl.wrap(t1)
    
    eq_(('foo', 'bar'), w1.header())
    eq_(etl.header(w1), w1.header())
    ieq((('A', 1), ('B', 2)), w1.data())
    ieq(etl.data(w1), w1.data())
    
    w2 = w1.cut('bar', 'foo')
    expect2 = (('bar', 'foo'),
               (1, 'A'),
               (2, 'B'))
    ieq(expect2, w2)
    ieq(etl.cut(w1, 'bar', 'foo'), w2)
    
    w3 = w1.cut('bar', 'foo').cut('foo', 'bar')
    ieq(t1, w3)
Example #52
0
    def test_integration():

        f = NamedTemporaryFile()

        # set up a new hdf5 table to work with
        h5file = tables.open_file(f.name, mode="w", title="Test file")
        h5file.create_group('/', 'testgroup', 'Test Group')
        h5file.create_table('/testgroup', 'testtable', FooBar, 'Test Table')
        h5file.flush()
        h5file.close()

        # load some initial data via tohdf5()
        table1 = etl.wrap((('foo', 'bar'),
                           (1, b'asdfgh'),
                           (2, b'qwerty'),
                           (3, b'zxcvbn')))
        table1.tohdf5(f.name, '/testgroup', 'testtable')
        ieq(table1, etl.fromhdf5(f.name, '/testgroup', 'testtable'))

        # append some more data
        table1.appendhdf5(f.name, '/testgroup', 'testtable')
        ieq(chain(table1, table1[1:]), etl.fromhdf5(f.name, '/testgroup',
                                                    'testtable'))
Example #53
0
def vcfunpackcall(table, *keys):
    """
    Unpack the call column. E.g.::
    
        >>> import petl as etl
        >>> # activate bio extensions
        ... import petlx.bio
        >>> table1 = (
        ...     etl
        ...     .fromvcf('fixture/sample.vcf')
        ...     .vcfmeltsamples()
        ...     .vcfunpackcall()
        ... )
        >>> table1
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | CHROM | POS | ID   | REF | ALT | QUAL | FILTER | INFO | SAMPLE    | DP   | GQ   | GT    | HQ       |
        +=======+=====+======+=====+=====+======+========+======+===========+======+======+=======+==========+
        | '19'  | 111 | None | 'A' | [C] |  9.6 | None   | {}   | 'NA00001' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 111 | None | 'A' | [C] |  9.6 | None   | {}   | 'NA00002' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 111 | None | 'A' | [C] |  9.6 | None   | {}   | 'NA00003' | None | None | '0/1' | [3, 3]   |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 112 | None | 'A' | [G] |   10 | None   | {}   | 'NA00001' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        | '19'  | 112 | None | 'A' | [G] |   10 | None   | {}   | 'NA00002' | None | None | '0|0' | [10, 10] |
        +-------+-----+------+-----+-----+------+--------+------+-----------+------+------+-------+----------+
        ...

    """

    result = (
        etl.wrap(table)
        .convert('CALL', lambda v: v.data._asdict())
        .unpackdict('CALL', keys=keys)
    )
    return result
#Extracting data from example csv file
table1 = etl.fromcsv('example.csv')
print table1
#etl.look(table1)

#Transformation function to be applied on extracted data
table2 = etl.convert(table1,'foo','upper')
table3 = etl.convert(table2,'bar',int)
table4 = etl.convert(table3,'baz',float)
table5 = etl.addfield(table4, 'finally', lambda row: row.bar * row.baz)
print table5
#etl.look(table5)

#Writing above ETL pipeline in a functional style
table = (etl
         .fromcsv('example.csv')
         .convert('foo', 'upper')
         .convert('bar', int)
         .convert('baz', float)
         .addfield('finally', lambda row: row.bar * row.baz)
       )

table.look() #look function only displays five rows.
print table

#OOP style programming
l = [['foo','bar'], ['a', 1], ['b', 2], ['c', 2]]
table6 = etl.wrap(l)
print table6
Example #55
0
# the dtype can also be partially specified
a = etl.toarray(table, dtype={'foo': 'a4'})
a


# fromarray()
#############

import petl as etl
import numpy as np
a = np.array([('apples', 1, 2.5),
              ('oranges', 3, 4.4),
              ('pears', 7, 0.1)],
             dtype='U8, i4,f4')
table = etl.fromarray(a)
table


# valuestoarray()
#################

import petl as etl
table = [('foo', 'bar', 'baz'),
         ('apples', 1, 2.5),
         ('oranges', 3, 4.4),
         ('pears', 7, .1)]
table = etl.wrap(table)
table.values('bar').array()
# specify dtype
table.values('bar').array(dtype='i4')
Example #56
0
import petl as etl
table1 = etl.fromcsv('example.csv')
table2 = etl.convert(table1, 'foo', 'upper')
table3 = etl.convert(table2, 'bar', int)
table4 = etl.convert(table3, 'baz', float)
table5 = etl.addfield(table4, 'quux', lambda row: row.bar * row.baz)
table5

table = (
    etl
    .fromcsv('example.csv')
    .convert('foo', 'upper')
    .convert('bar', int)
    .convert('baz', float)
    .addfield('quux', lambda row: row.bar * row.baz)
)
table

l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]]
table = etl.wrap(l)
table.look()

l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]]
table = etl.wrap(l)
table

etl.config.look_index_header = True

table