Python Reads.filter 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dark.reads

클래스/타입: Reads

메소드/함수: filter

hotexamples.com에서의 예제들: 8

Python Reads.filter - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dark.reads.Reads.filter에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Reads(30)

add(30)

save(4)

__init__(3)

filter(2)

variableSites(1)

예제 #1

파일 보기

파일: proteins.py 프로젝트: terrycojones/dark-matter

    def add(self, virusTitle, sampleName):
        """
        Add a a virus title, sample name combination and get its FASTA file
        name. Write the FASTA file if it does not already exist.

        @param virusTitle: A C{str} virus title.
        @param sampleName: A C{str} sample name.
        @return: A C{str} FASTA file name holding all the reads (without
            duplicates) from the sample that matched the proteins in the given
            virus.
        """
        virusIndex = self._viruses.setdefault(virusTitle, len(self._viruses))
        sampleIndex = self._samples.setdefault(sampleName, len(self._samples))

        try:
            return self._fastaFilenames[(virusIndex, sampleIndex)]
        except KeyError:
            result = Reads()
            for proteinMatch in self._proteinGrouper.virusTitles[
                    virusTitle][sampleName]:
                for read in FastaReads(proteinMatch['fastaFilename'],
                                       checkAlphabet=0):
                    result.add(read)
            saveFilename = join(
                proteinMatch['outDir'],
                'virus-%d-sample-%d.fasta' % (virusIndex, sampleIndex))
            result.filter(removeDuplicates=True).save(saveFilename)
            self._fastaFilenames[(virusIndex, sampleIndex)] = saveFilename
            return saveFilename

예제 #2

파일 보기

    def add(self, pathogenName, sampleName):
        """
        Add a (pathogen name, sample name) combination and get its FASTA/FASTQ
        file name and unique read count. Write the FASTA/FASTQ file if it does
        not already exist. Save the unique read count into
        C{self._proteinGrouper}.

        @param pathogenName: A C{str} pathogen name.
        @param sampleName: A C{str} sample name.
        @return: A C{str} giving the FASTA/FASTQ file name holding all the
            reads (without duplicates, by id) from the sample that matched the
            proteins in the given pathogen.
        """
        pathogenIndex = self._pathogens.setdefault(pathogenName,
                                                   len(self._pathogens))
        sampleIndex = self._samples.setdefault(sampleName, len(self._samples))

        try:
            return self._readsFilenames[(pathogenIndex, sampleIndex)]
        except KeyError:
            reads = Reads()
            for proteinMatch in self._proteinGrouper.pathogenNames[
                    pathogenName][sampleName]['proteins'].values():
                for read in self._readsClass(proteinMatch['readsFilename']):
                    reads.add(read)
            saveFilename = join(
                proteinMatch['outDir'], 'pathogen-%d-sample-%d.%s' %
                (pathogenIndex, sampleIndex, self._format))
            reads.filter(removeDuplicatesById=True)
            nReads = reads.save(saveFilename, format_=self._format)
            # Save the unique read count into self._proteinGrouper
            self._proteinGrouper.pathogenNames[pathogenName][sampleName][
                'uniqueReadCount'] = nReads
            self._readsFilenames[(pathogenIndex, sampleIndex)] = saveFilename
            return saveFilename

예제 #3

파일 보기

파일: test_reads.py 프로젝트: sophiemathias/dark-matter

 def testFilterOnMaxLength(self):
     """
     Filtering on maximal length must work.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(maxLength=3)
     self.assertEqual([read2], list(result))

예제 #4

파일 보기

파일: test_reads.py 프로젝트: sophiemathias/dark-matter

 def testFilterWithMinLengthEqualToMaxLength(self):
     """
     When filtering on length, a read should be returned if its length
     equals a passed minimum and maximum length.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=4, maxLength=4)
     self.assertEqual([read1], list(result))

예제 #5

파일 보기

파일: test_reads.py 프로젝트: sophiemathias/dark-matter

 def testFilterOnLengthEverythingMatches(self):
     """
     When filtering on length, all reads should be returned if they all
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=2, maxLength=5)
     self.assertEqual([read1, read2], list(result))

예제 #6

파일 보기

파일: test_reads.py 프로젝트: sophiemathias/dark-matter

 def testFilterOnLengthNothingMatches(self):
     """
     When filtering on length, no reads should be returned if none of them
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=10, maxLength=15)
     self.assertEqual([], list(result))

예제 #7

파일 보기

파일: proteins.py 프로젝트: bamueh/dark-matter

    def add(self, pathogenName, sampleName):
        """
        Add a (pathogen name, sample name) combination and get its FASTA/FASTQ
        file name and unique read count. Write the FASTA/FASTQ file if it does
        not already exist. Save the unique read count into
        C{self._proteinGrouper}.

        @param pathogenName: A C{str} pathogen name.
        @param sampleName: A C{str} sample name.
        @return: A C{str} giving the FASTA/FASTQ file name holding all the
            reads (without duplicates, by id) from the sample that matched the
            proteins in the given pathogen.
        """
        pathogenIndex = self._pathogens.setdefault(pathogenName,
                                                   len(self._pathogens))
        sampleIndex = self._samples.setdefault(sampleName, len(self._samples))

        try:
            return self._readsFilenames[(pathogenIndex, sampleIndex)]
        except KeyError:
            reads = Reads()
            for proteinMatch in self._proteinGrouper.pathogenNames[
                    pathogenName][sampleName]['proteins'].values():
                for read in self._readsClass(proteinMatch['readsFilename']):
                    reads.add(read)
            saveFilename = join(
                proteinMatch['outDir'],
                'pathogen-%d-sample-%d.%s' % (pathogenIndex, sampleIndex,
                                              self._format))
            reads.filter(removeDuplicatesById=True)
            nReads = reads.save(saveFilename, format_=self._format)
            # Save the unique read count into self._proteinGrouper
            self._proteinGrouper.pathogenNames[
                pathogenName][sampleName]['uniqueReadCount'] = nReads
            self._readsFilenames[(pathogenIndex, sampleIndex)] = saveFilename
            return saveFilename

예제 #8

파일 보기

        sys.exit(2)

    if variableSites:
        toDelete = set()
        if args.printSites:
            for site, counts in variableSites.items():
                if site >= baseOffset:
                    ref = ((' (ref %s)' %
                            reference.sequence[site]) if reference else '')
                    print('%d: %s%s' % (site + 1 - baseOffset, counts, ref),
                          file=sys.stderr)
                else:
                    toDelete.add(site)

        for site in toDelete:
            del variableSites[site]

    if variableSites:
        if args.sitesOnly:
            print(','.join(
                map(lambda site: str(site + 1 - baseOffset),
                    sorted(variableSites))))
        else:
            saveAs = 'fasta' if args.fasta else 'fastq'
            reads.filter(keepSites=set(variableSites)).save(sys.stdout, saveAs)
            printHeader(variableSites, args, baseOffset)
    else:
        print('No sites were %svariable (threshold for homogeneity: %.3f).' %
              ('confirmed ' if args.confirm else '', args.homogeneous),
              file=sys.stderr)