Esempio n. 1
0
def action(args):
    engine = create_engine(args.url, echo=args.verbosity > 2)
    tax = Taxonomy(engine, schema=args.schema)

    taxids = set()

    if args.taxids:
        if os.access(args.taxids, os.F_OK):
            for line in getlines(args.taxids):
                taxids.update(set(re.split(r'[\s,;]+', line)))
        else:
            taxids.update([x.strip()
                           for x in re.split(r'[\s,;]+', args.taxids)])

    if args.seq_info:
        with args.seq_info:
            reader = csv.DictReader(args.seq_info)
            taxids.update(frozenset(i['tax_id']
                                    for i in reader if i['tax_id']))

    writer = csv.writer(args.out)

    for t in taxids:
        try:
            tax._node(t)
        except ValueError:
            # Check for merged
            m = tax._get_merged(t)
            if m and m != t:
                writer.writerow([t, m])
            else:
                writer.writerow([t, None])

    engine.dispose()
    return 0
Esempio n. 2
0
def action(args):
    engine = create_engine(
        'sqlite:///%s' % args.database_file, echo=args.verbosity > 2)
    tax = Taxonomy(engine, ncbi.ranks)

    taxids = set()

    if args.taxids:
        if os.access(args.taxids, os.F_OK):
            for line in getlines(args.taxids):
                taxids.update(set(re.split(r'[\s,;]+', line)))
        else:
            taxids.update([x.strip()
                           for x in re.split(r'[\s,;]+', args.taxids)])

    if args.taxnames:
        for taxname in getlines(args.taxnames):
            for name in re.split(r'\s*[,;]\s*', taxname):
                tax_id, primary_name, is_primary = tax.primary_from_name(
                    name.strip())
                taxids.add(tax_id)

    if args.seq_info:
        with args.seq_info:
            reader = csv.DictReader(args.seq_info)
            taxids.update(frozenset(i['tax_id']
                                    for i in reader if i['tax_id']))

    # Before digging into lineages, make sure all the taxids exist in
    # the taxonomy database.
    valid_taxids = True
    for t in taxids:
        try:
            tax._node(t)
        except KeyError:
            # Check for merged
            m = tax._get_merged(t)
            if m and m != t:
                msg = ("Taxid {0} has been replaced by {1}. "
                       "Please update your records").format(t, m)
                print >> sys.stderr, msg
            else:
                print >>sys.stderr, "Taxid %s not found in taxonomy." % t
            valid_taxids = False
    if not(valid_taxids):
        print >>sys.stderr, "Some taxids were invalid.  Exiting."
        return 1  # exits with code 1

    # Extract all the taxids to be exported in the CSV file.
    taxids_to_export = set()
    for t in taxids:
        taxids_to_export.update([y for (x, y) in tax._get_lineage(t)])

    tax.write_table(taxids_to_export, csvfile=args.out_file, full=args.full)

    engine.dispose()
    return 0
Esempio n. 3
0
def action(args):
    engine = sqlalchemy.create_engine(args.url, echo=False)
    tax = Taxonomy(engine, schema=args.schema)

    names = []
    if args.infile:
        names += [line.split('#', 1)[0].strip()
                  for line in args.infile
                  if line.strip() and not line.startswith('#')]

    if args.names:
        names += [x.strip() for x in args.names.split(',')]

    writer = csv.writer(args.outfile)
    writer.writerow(['input', 'tax_name', 'tax_id', 'rank'])

    found = 0
    for name in names:
        try:
            tax_id, tax_name, is_primary = tax.primary_from_name(name)
        except ValueError:
            if args.include_unmatched:
                writer.writerow([name, None, None, None])
        else:
            found += 1
            parent, rank = tax._node(tax_id)
            writer.writerow([name, tax_name, tax_id, rank])

    log.warning('found {} of {} names'.format(found, len(names)))
Esempio n. 4
0
def test_species_below():
    engine = create_engine('sqlite:///../testfiles/taxonomy.db', echo=False)
    tax = Taxonomy(engine, taxtastic.ncbi.RANKS)
    t = tax.species_below('1239')
    parent_id, rank = tax._node(t)
    for t in [None, '1239', '186801', '1117']:
        s = tax.species_below(t)
        assert t is None or s is None or tax.is_ancestor_of(s, t)
        assert s is None or tax.rank(s) == 'species'
Esempio n. 5
0
def action(args):
    engine = create_engine('sqlite:///%s' %
                           args.database_file, echo=args.verbosity > 2)
    tax = Taxonomy(engine, ncbi.RANKS)

    taxids = set()

    if args.taxids:
        if os.access(args.taxids, os.F_OK):
            for line in getlines(args.taxids):
                taxids.update(set(re.split(r'[\s,;]+', line)))
        else:
            taxids.update([x.strip()
                           for x in re.split(r'[\s,;]+', args.taxids)])

    if args.seq_info:
        with args.seq_info:
            reader = csv.DictReader(args.seq_info)
            taxids.update(frozenset(i['tax_id']
                                    for i in reader if i['tax_id']))

    writer = csv.writer(args.out_file)

    for t in taxids:
        try:
            tax._node(t)
        except ValueError:
            # Check for merged
            m = tax._get_merged(t)
            if m and m != t:
                writer.writerow([t, m])
            else:
                writer.writerow([t, None])

    engine.dispose()
    return 0
Esempio n. 6
0
def action(args):

    dbfile = args.dbfile
    taxnames_file = args.taxnames_file
    taxnames = args.taxnames

    outfile = args.outfile

    engine = create_engine('sqlite:///%s' % dbfile, echo=False)
    tax = Taxonomy(engine, ncbi.RANKS)

    names = []
    if taxnames_file:
        names += [line.split('#', 1)[0].strip()
                  for line in taxnames_file
                  if line.strip() and not line.startswith('#')]

    if taxnames:
        names += [x.strip() for x in taxnames.split(',')]

    taxa = {}
    for name in set(names):
        tax_id, tax_name, is_primary, rank, note = '', '', '', '', ''

        try:
            tax_id, tax_name, is_primary = tax.primary_from_name(name)
        except ValueError:
            note = 'not found'
        else:
            parent, rank = tax._node(tax_id)
            note = '' if is_primary else 'not primary'

        if note:
            log.warning(
                '%(name)20s | %(tax_id)7s %(tax_name)20s %(note)s' % locals())

        if rank == 'species':
            taxa[tax_id] = dict(tax_id=tax_id, tax_name=tax_name, rank=rank)
        else:
            keys, rows = get_children(engine, [tax_id])
            taxa.update(dict((row['tax_id'], row) for row in rows))

    for d in sorted(taxa.values(), key=lambda x: x['tax_name']):
        outfile.write('%(tax_id)s # %(tax_name)s\n' % d)
Esempio n. 7
0
def test__node():
    engine = create_engine(
        'sqlite:///../testfiles/small_taxonomy.db', echo=False)
    tax = Taxonomy(engine, taxtastic.ncbi.RANKS)
    assert tax._node(None) is None
    assert tax._node('91061') == (u'1239', u'class')