Example #1
0
def cluster(seqfile, seqnames, identity=1.0, prefix='cluster-', threads=None):
    with util.ntf(prefix=prefix, suffix='.fasta') as fa, \
            util.ntf(prefix=prefix, suffix='.uc') as uc:
        wrap.esl_sfetch(seqfile, seqnames, fa)
        fa.flush()
        uclust.cluster(fa.name,
                       uc.name,
                       pct_id=identity,
                       pre_sorted=False,
                       quiet=True,
                       threads=threads)
        df = uclust.parse_uclust_as_df(uc)
        df = df[df.type != 'C']
        df = df[['type', 'query_label', 'target_label']]

        return df
Example #2
0
def cluster(seqfile, seqnames, identity=1.0, prefix='cluster-', threads=None):
    prefix = prefix.replace('/', '\\')  # / confuses the filesystem
    with util.ntf(prefix=prefix, suffix='.fasta') as fa, \
            util.ntf(prefix=prefix, suffix='.uc') as uc:
        wrap.esl_sfetch(seqfile, seqnames, fa)
        fa.flush()
        uclust.cluster(fa.name,
                       uc.name,
                       pct_id=identity,
                       pre_sorted=False,
                       quiet=True,
                       threads=threads)
        df = uclust.parse_uclust_as_df(uc)
        df = df[df.type != 'C']
        df = df[['type', 'query_label', 'target_label']]

        return df
Example #3
0
def add_clusters_to_refpkg(refpkg, **kwargs):
    with refpkg.open_resource('taxonomy') as tax_fp:
        tax = taxtable.read(tax_fp)
    with refpkg.open_resource('seq_info') as sinfo_fp:
        reader = csv.DictReader(sinfo_fp)
        sinfo = list(reader)

    # Annotate
    add_cluster_taxids(tax, sinfo, **kwargs)

    with util.ntf(prefix='seq_info-', suffix='.csv') as seqinfo_tf, \
         util.ntf(prefix='taxonomy-', suffix='.csv') as tax_tf:
        w = csv.DictWriter(seqinfo_tf, reader.fieldnames)
        w.writeheader()
        w.writerows(sinfo)
        seqinfo_tf.close()

        tax.write_taxtable(tax_tf)
        tax_tf.close()

        refpkg.start_transaction()
        refpkg.update_file('seq_info', seqinfo_tf.name)
        refpkg.update_file('taxonomy', tax_tf.name)
        refpkg.commit_transaction()