Example #1
0
def main():
    args = argparser.parse_args()

    logger = logging.getLogger('redundancy_tables')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    # Check that taxon ids or taxon id file were provided.
    if not (args.taxon_ids or args.taxon_id_file):
        raise Exception("Must provide --taxon-ids or --taxon-id-file option")

    # Get taxons.
    if args.taxon_ids:
        taxon_ids = args.taxon_ids
    else:
        with open(args.taxon_id_file, 'r') as f:
            taxon_ids = [row[0] for row in csv.reader(f)]
    logger.info("Taxon Ids: %s" % (taxon_ids))

    cur = db.get_psycopg2_cursor()
    cur.execute(
        "select * from taxon_digest td where td.taxon_id in (select t.id from taxon t where t.id = any(%s));",
        (taxon_ids, ))

    taxon_digests = []

    for record in cur:
        td = TaxonDigest(id=record[0], taxon=record[1], digest=record[2])
        logger.info("Taxon Digest: %s" % (td.id))

        taxon_digests.append(td)
    db.psycopg2_connection.commit()

    # Generate the redundancy tables.
    tables = redundancy.generate_redundancy_tables(taxon_digests,
                                                   logger=logger)

    # Create output dir if it does not exist.
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Output tables.
    for table_id, table in list(tables.items()):
        table_file = os.path.join(args.output_dir, table_id + '.csv')
        logger.info("Writing '%s'..." % table_file)
        with open(table_file, 'w', newline='') as f:
            w = csv.writer(f)
            for row in table:
                w.writerow(row)

    logger.info("Done.")
def main():
    args = argparser.parse_args()

    logger = logging.getLogger('redundancy_tables')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    # Check that taxon ids or taxon id file were provided.
    if not (args.taxon_ids or args.taxon_id_file):
        raise Exception("Must provide --taxon-ids or --taxon-id-file option")

    session = db.get_session()

    # Get taxons.
    if args.taxon_ids:
        taxon_ids = args.taxon_ids
    else:
        with open(args.taxon_id_file, 'rb') as f:
            taxon_ids = [row[0] for row in csv.reader(f)]

    # Get the digest.
    digest = get_digest(logger, config.DEFAULT_DIGEST_DEFINITION, session)

    # Get the TaxonDigests.
    taxon_digests = (
        session.query(TaxonDigest)
        .filter(TaxonDigest.digest == digest)
        .join(Taxon)
        .filter(Taxon.id.in_(taxon_ids))
    ).all()

    # Generate the redundancy tables.
    tables = redundancy.generate_redundancy_tables(
        session, taxon_digests, logger=logger)

    # Create output dir if it does not exist.
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Output tables.
    for table_id, table in tables.items():
        table_file = os.path.join(args.output_dir, table_id + '.csv')
        logger.info("Writing '%s'..." % table_file)
        with open(table_file, 'wb') as f:
            w = csv.writer(f)
            for row in table:
                w.writerow(row)

    logger.info("Done.")
def main():
    args = argparser.parse_args()

    logger = logging.getLogger('redundancy_tables')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    # Check that taxon ids or taxon id file were provided.
    if not (args.taxon_ids or args.taxon_id_file):
        raise Exception("Must provide --taxon-ids or --taxon-id-file option")

    session = db.get_session()

    # Get taxons.
    if args.taxon_ids:
        taxon_ids = args.taxon_ids
    else:
        with open(args.taxon_id_file, 'rb') as f:
            taxon_ids = [row[0] for row in csv.reader(f)]

    # Get the digest.
    digest = get_digest(logger, config.DEFAULT_DIGEST_DEFINITION, session)

    # Get the TaxonDigests.
    taxon_digests = (session.query(TaxonDigest).filter(
        TaxonDigest.digest == digest).join(Taxon).filter(
            Taxon.id.in_(taxon_ids))).all()

    # Generate the redundancy tables.
    tables = redundancy.generate_redundancy_tables(session,
                                                   taxon_digests,
                                                   logger=logger)

    # Create output dir if it does not exist.
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Output tables.
    for table_id, table in tables.items():
        table_file = os.path.join(args.output_dir, table_id + '.csv')
        logger.info("Writing '%s'..." % table_file)
        with open(table_file, 'wb') as f:
            w = csv.writer(f)
            for row in table:
                w.writerow(row)

    logger.info("Done.")
Example #4
0
 def test_generate_redundancy_tables(self):
     taxon_digests = self.session.query(TaxonDigest).all()
     actual = redundancy.generate_redundancy_tables(
         session=self.session, taxon_digests=taxon_digests)