Beispiel #1
0
    with open(ogfile, 'r') as f:
        cluster_id = 0
        for line in f:
            cluster = filter(lambda s : s.isdigit(), re.split(r'[@\s]', line))
            n = len(cluster)
            hist[n] = hist.get(n, 0) + 1
            if n >= 4:
                nseqs += n
                for seq_id in cluster:
                    database.execute("""
                        INSERT INTO homology (run_id, component_id, sequence_id)
                        VALUES (?,?,?);""",
                        (_run_id, cluster_id, seq_id))
                cluster_id += 1
    database.execute("COMMIT")

    utils.info(
        "histogram of gene cluster sizes:\n",
        '\n '.join("%d\t:\t%d" % (k, hist[k]) for k in sorted(hist)))

    diagnostics.log('nseqs', nseqs)
    diagnostics.log('histogram', hist)



if __name__ == "__main__":
    # Run the pipeline.
    pipe.run()
    # Push the local diagnostics to the global database.
    diagnostics.merge()