Beispiel #1
0
    def dereplicate(fasta):
        '''
        Process the input fasta entries.

        Populate a dictionary
        {seq => {sample => counts}}. Use the special sample key None to
        indicate the sum of all counts for that sequence.
        '''
        
        counts = {}
        provenances = {}
        for record in SeqIO.parse(fasta, 'fasta'):
            seq = str(record.seq)
            sample = util.strip_fasta_label(record.id)

            if seq not in counts:
                counts[seq] = 1
            else:
                counts[seq] += 1

            if seq not in provenances:
                provenances[seq] = {sample: 1}
            elif sample not in provenances[seq]:
                provenances[seq][sample] = 1
            else:
                provenances[seq][sample] += 1

        return counts, provenances
Beispiel #2
0
        def process_line(line):
            fields = line.rstrip().split('\t')
            query = util.strip_fasta_label(fields[0])
            target = fields[1]

            if target != no_hit:
                write_out(query, target)
            elif target == no_hit and save_no_hit:
                write_out(query, 'no_hit')
Beispiel #3
0
        def process_line(line):
            fields = line.rstrip().split('\t')
            seq = util.strip_fasta_label(fields[0])
            hit_type = fields[1]

            if hit_type == 'otu' and fields[2] == '*':
                otu = fields[4]
            elif hit_type == 'otu' and fields[2] != '*':
                otu = fields[5]
            elif hit_type == 'match':
                otu = fields[4]

            if hit_type != 'chimera':
                output.write("{}: {}\n".format(seq, otu))
Beispiel #4
0
    def b6_to_dict(b6, no_hit="*", save_no_hit=True):
        '''from a blast6 mapping file, make a dict {query => target}'''

        membership = {}
        with open(b6) as f:
            for line in f:
                fields = line.split()
                query = util.strip_fasta_label(fields[0])
                target = fields[1]

                if target != no_hit or save_no_hit:
                    membership[query] = target

        return membership
Beispiel #5
0
    def up_to_dict(up, keep_chimera=False):
        '''from a uparse mapping file, make a dict {seq => otu}'''

        membership = {}
        with open(up) as f:
            for line in f:
                fields = line.split()
                seq = util.strip_fasta_label(fields[0])
                hit_type = fields[1]
                otu = fields[4]

                if hit_type == 'chimera':
                    if keep_chimera:
                        otu = 'chimera'
                    else:
                        # ignore this entry
                        continue

                membership[seq] = otu

        return membership
Beispiel #6
0
 def b6_line_to_query_hit(line):
     query, hit = line.rstrip().split("\t")[0:2]
     query = util.strip_fasta_label(query)
     return query, hit