def cluster(seqfile, seqnames, identity=1.0, prefix='cluster-', threads=None): with util.ntf(prefix=prefix, suffix='.fasta') as fa, \ util.ntf(prefix=prefix, suffix='.uc') as uc: wrap.esl_sfetch(seqfile, seqnames, fa) fa.flush() uclust.cluster(fa.name, uc.name, pct_id=identity, pre_sorted=False, quiet=True, threads=threads) df = uclust.parse_uclust_as_df(uc) df = df[df.type != 'C'] df = df[['type', 'query_label', 'target_label']] return df
def cluster(seqfile, seqnames, identity=1.0, prefix='cluster-', threads=None): prefix = prefix.replace('/', '\\') # / confuses the filesystem with util.ntf(prefix=prefix, suffix='.fasta') as fa, \ util.ntf(prefix=prefix, suffix='.uc') as uc: wrap.esl_sfetch(seqfile, seqnames, fa) fa.flush() uclust.cluster(fa.name, uc.name, pct_id=identity, pre_sorted=False, quiet=True, threads=threads) df = uclust.parse_uclust_as_df(uc) df = df[df.type != 'C'] df = df[['type', 'query_label', 'target_label']] return df
def test01(self): df = uclust.parse_uclust_as_df(self.infile) # target_label always has a value for types S and H self.assertFalse(any(df[df['type'] != 'C']['target_label'].isnull()))