コード例 #1
0
ファイル: normalize-by-median.py プロジェクト: trilynn/khmer
    def __call__(self, is_paired, read0, read1):
        """
        Actually does digital normalization - the core algorithm.

        * get one (unpaired) or two (paired) reads;
        * sanitize the sequences (convert Ns to As);
        * get the median k-mer count of one/both reads;
        * if any read's median k-mer count is below desired coverage, keep all;
        * consume and yield kept reads.
        """
        batch = ReadBundle(read0, read1)
        desired_coverage = self.desired_coverage

        # if any in batch have coverage below desired coverage, consume &yield
        if not batch.coverages_at_least(self.countgraph, desired_coverage):
            for record in batch.reads:
                self.countgraph.consume(record.cleaned_seq)
                yield record
コード例 #2
0
ファイル: normalize-by-median.py プロジェクト: betatim/khmer
    def __call__(self, is_paired, read0, read1):
        """
        Actually does digital normalization - the core algorithm.

        * get one (unpaired) or two (paired) reads;
        * sanitize the sequences (convert Ns to As);
        * get the median k-mer count of one/both reads;
        * if any read's median k-mer count is below desired coverage, keep all;
        * consume and yield kept reads.
        """
        batch = ReadBundle(read0, read1)
        desired_coverage = self.desired_coverage

        # if any in batch have coverage below desired coverage, consume &yield
        if not batch.coverages_at_least(self.countgraph, desired_coverage):
            for record in batch.reads:
                self.countgraph.consume(record.cleaned_seq)
                yield record
コード例 #3
0
ファイル: trim-low-abund.py プロジェクト: shannonekj/khmer
    def pass2(self, reader):
        """
        The second pass across the data does the following.

        1. For each read, evaluate the coverage. If the coverage is
        sufficient to trim, OR we are trimming low-abundance reads (-V not
        set), do trimming.

        2. Otherwise, return the untrimmed read pair.
        """
        graph = self.graph
        TRIM_AT_COVERAGE = self.trim_at_coverage
        CUTOFF = self.cutoff
        K = graph.ksize()

        for n, is_pair, read1, read2 in reader:
            bundle = ReadBundle(read1, read2)

            # clean up the sequences for examination.
            self.n_reads += bundle.num_reads
            self.n_bp += bundle.total_length

            if self.do_trim_low_abund or \
               bundle.coverages_at_least(graph, TRIM_AT_COVERAGE):

                for read in bundle.reads:
                    trimmed_record, did_trim = trim_record(graph, read, CUTOFF)

                    if did_trim:
                        self.trimmed_reads += 1
                    if trimmed_record:
                        yield trimmed_record
            else:
                for read in bundle.reads:
                    self.n_skipped += 1
                    self.bp_skipped += 1
                    yield read
コード例 #4
0
ファイル: trim-low-abund.py プロジェクト: costypetrisor/khmer
    def pass2(self, reader):
        """
        The second pass across the data does the following.

        1. For each read, evaluate the coverage. If the coverage is
        sufficient to trim, OR we are trimming low-abundance reads (-V not
        set), do trimming.

        2. Otherwise, return the untrimmed read pair.
        """
        graph = self.graph
        TRIM_AT_COVERAGE = self.trim_at_coverage
        CUTOFF = self.cutoff
        K = graph.ksize()

        for n, is_pair, read1, read2 in reader:
            bundle = ReadBundle(read1, read2)

            # clean up the sequences for examination.
            self.n_reads += bundle.num_reads
            self.n_bp += bundle.total_length

            if self.do_trim_low_abund or \
               bundle.coverages_at_least(graph, TRIM_AT_COVERAGE):

                for read in bundle.reads:
                    trimmed_record, did_trim = trim_record(graph, read, CUTOFF)

                    if did_trim:
                        self.trimmed_reads += 1
                    if trimmed_record:
                        yield trimmed_record
            else:
                for read in bundle.reads:
                    self.n_skipped += 1
                    self.bp_skipped += 1
                    yield read