def test_header(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = header(table) expect = ('foo', 'bar') eq_(expect, actual) table = (['foo', 'bar'], ['a', 1], ['b', 2]) actual = header(table) eq_(expect, actual)
def natural_key(left, right): # determine key field or fields lflds = header(left) rflds = header(right) key = [f for f in lflds if f in rflds] assert len(key) > 0, 'no fields in common' if len(key) == 1: key = key[0] # deal with singletons return key
def test_fromgff3(): features = fromgff3(plasmodb_gff3_file.name) expect_header = ('seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes') eq_(expect_header, header(features)) # apidb|MAL1 ApiDB supercontig 1 643292 . + . ID=apidb|MAL1;Name=MAL1;description=MAL1;size=643292;web_id=MAL1;molecule_type=dsDNA;organism_name=Plasmodium falciparum;translation_table=11;topology=linear;localization=nuclear;Dbxref=ApiDB_PlasmoDB:MAL1,GenBank:NC_004325,taxon:36329 row = list(features)[1] eq_('apidb|MAL1', row[0]) eq_('ApiDB', row[1]) eq_('supercontig', row[2]) eq_(1, row[3]) eq_(643292, row[4]) eq_('.', row[5]) eq_('+', row[6]) eq_('.', row[7]) eq_('apidb|MAL1', row[8]['ID']) eq_('MAL1', row[8]['Name']) eq_('Plasmodium falciparum', row[8]['organism_name']) # test data wrapped in hybrid rows eq_('apidb|MAL1', row['seqid']) eq_('ApiDB', row['source']) eq_('supercontig', row['type']) eq_(1, row['start']) eq_(643292, row['end']) eq_('.', row['score']) eq_('+', row['strand']) eq_('.', row['phase']) eq_('apidb|MAL1', row['attributes']['ID']) eq_('MAL1', row['attributes']['Name']) eq_('Plasmodium falciparum', row['attributes']['organism_name'])
def itercrossjoin(sources, prefix): # construct fields outflds = list() for i, s in enumerate(sources): if prefix: # use one-based numbering outflds.extend([str(i+1) + '_' + str(f) for f in header(s)]) else: outflds.extend(header(s)) yield tuple(outflds) datasrcs = [data(src) for src in sources] for prod in itertools.product(*datasrcs): outrow = list() for row in prod: outrow.extend(row) yield tuple(outrow)
def convertall(table, *args, **kwargs): """ Convenience function to convert all fields in the table using a common function or mapping. See also :func:`convert`. .. versionadded:: 0.4 .. versionchanged:: 0.22 The ``where`` keyword argument can be given with a callable or expression which is evaluated on each row and which should return True if the conversion should be applied on that row, else False. """ # TODO don't read the data twice! return convert(table, header(table), *args, **kwargs)
def itertranspose(source): fields = header(source) its = [iter(source) for _ in fields] for i in range(len(fields)): yield tuple(row[i] for row in its[i])
def recordcomplement(a, b, buffersize=None, tempdir=None, cache=True): """ Find records in `a` that are not in `b`. E.g.:: >>> from petl import recordcomplement, look >>> look(a) +-------+-------+-------+ | 'foo' | 'bar' | 'baz' | +=======+=======+=======+ | 'A' | 1 | True | +-------+-------+-------+ | 'C' | 7 | False | +-------+-------+-------+ | 'B' | 2 | False | +-------+-------+-------+ | 'C' | 9 | True | +-------+-------+-------+ >>> look(b) +-------+-------+-------+ | 'bar' | 'foo' | 'baz' | +=======+=======+=======+ | 2 | 'B' | False | +-------+-------+-------+ | 9 | 'A' | False | +-------+-------+-------+ | 3 | 'B' | True | +-------+-------+-------+ | 9 | 'C' | True | +-------+-------+-------+ >>> aminusb = recordcomplement(a, b) >>> look(aminusb) +-------+-------+-------+ | 'foo' | 'bar' | 'baz' | +=======+=======+=======+ | 'A' | 1 | True | +-------+-------+-------+ | 'C' | 7 | False | +-------+-------+-------+ >>> bminusa = recordcomplement(b, a) >>> look(bminusa) +-------+-------+-------+ | 'bar' | 'foo' | 'baz' | +=======+=======+=======+ | 3 | 'B' | True | +-------+-------+-------+ | 9 | 'A' | False | +-------+-------+-------+ Note that both tables must have the same set of fields, but that the order of the fields does not matter. See also the :func:`complement` function. See also the discussion of the `buffersize`, `tempdir` and `cache` arguments under the :func:`sort` function. .. versionadded:: 0.3 """ ha = header(a) hb = header(b) assert set(ha) == set(hb), 'both tables must have the same set of fields' # make sure fields are in the same order bv = cut(b, *ha) return complement(a, bv, buffersize=buffersize, tempdir=tempdir, cache=cache)
def appendsqlite3(table, filename_or_connection, tablename, commit=True): """ Load data into an existing table in an :mod:`sqlite3` database. Note that the database table will be appended, i.e., the new data will be inserted into the table, and any existing rows will remain. E.g.:: >>> from petl import appendsqlite3, look >>> look(moredata) +-------+-------+ | 'foo' | 'bar' | +=======+=======+ | 'd' | 7 | +-------+-------+ | 'e' | 9 | +-------+-------+ | 'f' | 1 | +-------+-------+ >>> appendsqlite3(moredata, 'test.db', 'foobar') >>> # look what it did ... from petl import look, fromsqlite3 >>> look(fromsqlite3('test.db', 'select * from foobar')) +-------+-------+ | 'foo' | 'bar' | +=======+=======+ | u'a' | 1 | +-------+-------+ | u'b' | 2 | +-------+-------+ | u'c' | 2 | +-------+-------+ | u'd' | 7 | +-------+-------+ | u'e' | 9 | +-------+-------+ | u'f' | 1 | +-------+-------+ .. versionchanged:: 0.10.2 Either a database file name or a connection object can be given as the second argument. (Note that `cachetag()` is only implemented if a file name is given.) """ # sanitise table name tablename = _quote(tablename) if isinstance(filename_or_connection, basestring): conn = sqlite3.connect(filename_or_connection) elif isinstance(filename_or_connection, sqlite3.Connection): conn = filename_or_connection else: raise Exception('filename_or_connection argument must be filename or connection; found %r' % filename_or_connection) cursor = conn.cursor() flds = header(table) # just need to know how many fields there are placeholders = ', '.join(['?'] * len(flds)) _insert(cursor, tablename, placeholders, table) # tidy up cursor.close() if commit: conn.commit() return conn # in case people want to close it