Beispiel #1
0
def test_header():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = header(table)
    expect = ('foo', 'bar')
    eq_(expect, actual)
    table = (['foo', 'bar'], ['a', 1], ['b', 2])
    actual = header(table)
    eq_(expect, actual)
Beispiel #2
0
def natural_key(left, right):
    # determine key field or fields
    lflds = header(left)
    rflds = header(right)
    key = [f for f in lflds if f in rflds]
    assert len(key) > 0, 'no fields in common'
    if len(key) == 1:
        key = key[0]  # deal with singletons
    return key
Beispiel #3
0
def test_fromgff3():
    
    features = fromgff3(plasmodb_gff3_file.name)
    
    expect_header = ('seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes')
    eq_(expect_header, header(features))

    # apidb|MAL1    ApiDB    supercontig    1    643292    .    +    .    ID=apidb|MAL1;Name=MAL1;description=MAL1;size=643292;web_id=MAL1;molecule_type=dsDNA;organism_name=Plasmodium falciparum;translation_table=11;topology=linear;localization=nuclear;Dbxref=ApiDB_PlasmoDB:MAL1,GenBank:NC_004325,taxon:36329
    row = list(features)[1]
    eq_('apidb|MAL1', row[0])
    eq_('ApiDB', row[1])
    eq_('supercontig', row[2])
    eq_(1, row[3])
    eq_(643292, row[4])
    eq_('.', row[5])
    eq_('+', row[6])
    eq_('.', row[7])
    eq_('apidb|MAL1', row[8]['ID']) 
    eq_('MAL1', row[8]['Name'])
    eq_('Plasmodium falciparum', row[8]['organism_name'])
    
    # test data wrapped in hybrid rows
    eq_('apidb|MAL1', row['seqid'])
    eq_('ApiDB', row['source'])
    eq_('supercontig', row['type'])
    eq_(1, row['start'])
    eq_(643292, row['end'])
    eq_('.', row['score'])
    eq_('+', row['strand'])
    eq_('.', row['phase'])
    eq_('apidb|MAL1', row['attributes']['ID']) 
    eq_('MAL1', row['attributes']['Name'])
    eq_('Plasmodium falciparum', row['attributes']['organism_name'])
Beispiel #4
0
def itercrossjoin(sources, prefix):

    # construct fields
    outflds = list()
    for i, s in enumerate(sources):
        if prefix:
            # use one-based numbering
            outflds.extend([str(i+1) + '_' + str(f) for f in header(s)])
        else:
            outflds.extend(header(s))
    yield tuple(outflds)

    datasrcs = [data(src) for src in sources]
    for prod in itertools.product(*datasrcs):
        outrow = list()
        for row in prod:
            outrow.extend(row)
        yield tuple(outrow)
Beispiel #5
0
def convertall(table, *args, **kwargs):
    """
    Convenience function to convert all fields in the table using a common
    function or mapping. See also :func:`convert`.

    .. versionadded:: 0.4

    .. versionchanged:: 0.22

    The ``where`` keyword argument can be given with a callable or expression which is evaluated on each row
    and which should return True if the conversion should be applied on that row, else False.

    """

    # TODO don't read the data twice!

    return convert(table, header(table), *args, **kwargs)
Beispiel #6
0
def convertall(table, *args, **kwargs):
    """
    Convenience function to convert all fields in the table using a common
    function or mapping. See also :func:`convert`.

    .. versionadded:: 0.4

    .. versionchanged:: 0.22

    The ``where`` keyword argument can be given with a callable or expression which is evaluated on each row
    and which should return True if the conversion should be applied on that row, else False.

    """

    # TODO don't read the data twice!

    return convert(table, header(table), *args, **kwargs)
Beispiel #7
0
def itertranspose(source):
    fields = header(source)
    its = [iter(source) for _ in fields]
    for i in range(len(fields)):
        yield tuple(row[i] for row in its[i])
Beispiel #8
0
def recordcomplement(a, b, buffersize=None, tempdir=None, cache=True):
    """
    Find records in `a` that are not in `b`. E.g.::

        >>> from petl import recordcomplement, look
        >>> look(a)
        +-------+-------+-------+
        | 'foo' | 'bar' | 'baz' |
        +=======+=======+=======+
        | 'A'   | 1     | True  |
        +-------+-------+-------+
        | 'C'   | 7     | False |
        +-------+-------+-------+
        | 'B'   | 2     | False |
        +-------+-------+-------+
        | 'C'   | 9     | True  |
        +-------+-------+-------+

        >>> look(b)
        +-------+-------+-------+
        | 'bar' | 'foo' | 'baz' |
        +=======+=======+=======+
        | 2     | 'B'   | False |
        +-------+-------+-------+
        | 9     | 'A'   | False |
        +-------+-------+-------+
        | 3     | 'B'   | True  |
        +-------+-------+-------+
        | 9     | 'C'   | True  |
        +-------+-------+-------+

        >>> aminusb = recordcomplement(a, b)
        >>> look(aminusb)
        +-------+-------+-------+
        | 'foo' | 'bar' | 'baz' |
        +=======+=======+=======+
        | 'A'   | 1     | True  |
        +-------+-------+-------+
        | 'C'   | 7     | False |
        +-------+-------+-------+

        >>> bminusa = recordcomplement(b, a)
        >>> look(bminusa)
        +-------+-------+-------+
        | 'bar' | 'foo' | 'baz' |
        +=======+=======+=======+
        | 3     | 'B'   | True  |
        +-------+-------+-------+
        | 9     | 'A'   | False |
        +-------+-------+-------+

    Note that both tables must have the same set of fields, but that the order
    of the fields does not matter. See also the :func:`complement` function.

    See also the discussion of the `buffersize`, `tempdir` and `cache` arguments under the :func:`sort`
    function.

    .. versionadded:: 0.3

    """

    ha = header(a)
    hb = header(b)
    assert set(ha) == set(hb), 'both tables must have the same set of fields'
    # make sure fields are in the same order
    bv = cut(b, *ha)
    return complement(a, bv, buffersize=buffersize, tempdir=tempdir, cache=cache)
Beispiel #9
0
def recordcomplement(a, b, buffersize=None, tempdir=None, cache=True):
    """
    Find records in `a` that are not in `b`. E.g.::

        >>> from petl import recordcomplement, look
        >>> look(a)
        +-------+-------+-------+
        | 'foo' | 'bar' | 'baz' |
        +=======+=======+=======+
        | 'A'   | 1     | True  |
        +-------+-------+-------+
        | 'C'   | 7     | False |
        +-------+-------+-------+
        | 'B'   | 2     | False |
        +-------+-------+-------+
        | 'C'   | 9     | True  |
        +-------+-------+-------+

        >>> look(b)
        +-------+-------+-------+
        | 'bar' | 'foo' | 'baz' |
        +=======+=======+=======+
        | 2     | 'B'   | False |
        +-------+-------+-------+
        | 9     | 'A'   | False |
        +-------+-------+-------+
        | 3     | 'B'   | True  |
        +-------+-------+-------+
        | 9     | 'C'   | True  |
        +-------+-------+-------+

        >>> aminusb = recordcomplement(a, b)
        >>> look(aminusb)
        +-------+-------+-------+
        | 'foo' | 'bar' | 'baz' |
        +=======+=======+=======+
        | 'A'   | 1     | True  |
        +-------+-------+-------+
        | 'C'   | 7     | False |
        +-------+-------+-------+

        >>> bminusa = recordcomplement(b, a)
        >>> look(bminusa)
        +-------+-------+-------+
        | 'bar' | 'foo' | 'baz' |
        +=======+=======+=======+
        | 3     | 'B'   | True  |
        +-------+-------+-------+
        | 9     | 'A'   | False |
        +-------+-------+-------+

    Note that both tables must have the same set of fields, but that the order
    of the fields does not matter. See also the :func:`complement` function.

    See also the discussion of the `buffersize`, `tempdir` and `cache` arguments under the :func:`sort`
    function.

    .. versionadded:: 0.3

    """

    ha = header(a)
    hb = header(b)
    assert set(ha) == set(hb), 'both tables must have the same set of fields'
    # make sure fields are in the same order
    bv = cut(b, *ha)
    return complement(a,
                      bv,
                      buffersize=buffersize,
                      tempdir=tempdir,
                      cache=cache)
Beispiel #10
0
def itertranspose(source):
    fields = header(source)
    its = [iter(source) for _ in fields]
    for i in range(len(fields)):
        yield tuple(row[i] for row in its[i])
Beispiel #11
0
Datei: io.py Projekt: deytao/petl
def appendsqlite3(table, filename_or_connection, tablename, commit=True):
    """
    Load data into an existing table in an :mod:`sqlite3`
    database. Note that the database table will be appended, i.e., the
    new data will be inserted into the table, and any existing rows
    will remain. E.g.::
    
        >>> from petl import appendsqlite3, look
        >>> look(moredata)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'd'   | 7     |
        +-------+-------+
        | 'e'   | 9     |
        +-------+-------+
        | 'f'   | 1     |
        +-------+-------+
        
        >>> appendsqlite3(moredata, 'test.db', 'foobar') 
        >>> # look what it did
        ... from petl import look, fromsqlite3
        >>> look(fromsqlite3('test.db', 'select * from foobar'))
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | u'a'  | 1     |
        +-------+-------+
        | u'b'  | 2     |
        +-------+-------+
        | u'c'  | 2     |
        +-------+-------+
        | u'd'  | 7     |
        +-------+-------+
        | u'e'  | 9     |
        +-------+-------+
        | u'f'  | 1     |
        +-------+-------+

    .. versionchanged:: 0.10.2
    
    Either a database file name or a connection object can be given as the
    second argument. (Note that `cachetag()` is only implemented if a file name
    is given.)

    """

    # sanitise table name
    tablename = _quote(tablename)

    if isinstance(filename_or_connection, basestring):
        conn = sqlite3.connect(filename_or_connection)
    elif isinstance(filename_or_connection, sqlite3.Connection):
        conn = filename_or_connection
    else:
        raise Exception('filename_or_connection argument must be filename or connection; found %r' % filename_or_connection)

    cursor = conn.cursor()
    
    flds = header(table) # just need to know how many fields there are
    placeholders = ', '.join(['?'] * len(flds))
    _insert(cursor, tablename, placeholders, table)

    # tidy up
    cursor.close()
    if commit:
        conn.commit()

    return conn # in case people want to close it