Exemple #1
0
    def __init__(self, reads, blastFilenames, databaseFilename=None,
                 databaseDirectory=None, sqliteDatabaseFilename=None,
                 scoreClass=HigherIsBetterScore,
                 sortBlastFilenames=True, randomizeZeroEValues=True):
        if type(blastFilenames) == str:
            blastFilenames = [blastFilenames]
        if sortBlastFilenames:
            self.blastFilenames = numericallySortFilenames(blastFilenames)
        else:
            self.blastFilenames = blastFilenames
        self._databaseFilename = databaseFilename
        self._sqliteDatabaseFilename = sqliteDatabaseFilename
        self._databaseDirectory = databaseDirectory
        self._subjectTitleToSubject = None
        self.randomizeZeroEValues = randomizeZeroEValues

        # Prepare application parameters in order to initialize self.
        self._reader = self._getReader(self.blastFilenames[0], scoreClass)
        application = self._reader.application
        blastParams = copy.deepcopy(self._reader.params)
        subjectIsNucleotides = application != 'blastx'
        scoreTitle = ('Bit score' if scoreClass is HigherIsBetterScore
                      else '$- log_{10}(e)$')

        applicationParams = ReadsAlignmentsParams(
            application, blastParams,
            subjectIsNucleotides=subjectIsNucleotides, scoreTitle=scoreTitle)

        ReadsAlignments.__init__(self, reads, applicationParams,
                                 scoreClass=scoreClass)
Exemple #2
0
    def __init__(self,
                 reads,
                 blastFilenames,
                 scoreClass=HigherIsBetterScore,
                 sortBlastFilenames=True,
                 randomizeZeroEValues=True):
        if type(blastFilenames) == str:
            blastFilenames = [blastFilenames]
        if sortBlastFilenames:
            self.blastFilenames = numericallySortFilenames(blastFilenames)
        else:
            self.blastFilenames = blastFilenames
        self.randomizeZeroEValues = randomizeZeroEValues

        # Prepare application parameters in order to initialize self.
        self._reader = self._getReader(self.blastFilenames[0], scoreClass)
        application = self._reader.application
        blastParams = copy.deepcopy(self._reader.params)
        subjectIsNucleotides = application != 'blastx'
        scoreTitle = ('Bit score' if scoreClass is HigherIsBetterScore else
                      '$- log_{10}(e)$')

        applicationParams = ReadsAlignmentsParams(
            application,
            blastParams,
            subjectIsNucleotides=subjectIsNucleotides,
            scoreTitle=scoreTitle)

        ReadsAlignments.__init__(self,
                                 reads,
                                 applicationParams,
                                 scoreClass=scoreClass)
Exemple #3
0
    def __init__(self, resultFilenames, database, sortFilenames=True):
        if isinstance(resultFilenames, str):
            resultFilenames = [resultFilenames]
        if sortFilenames:
            self.resultFilenames = numericallySortFilenames(resultFilenames)
        else:
            self.resultFilenames = resultFilenames
        self._database = database

        # Add a dictionary that will allow us to look up database subjects
        # by title.
        self._subjects = dict((subject.read.id, subject.read.sequence)
                              for subject in database.getSubjects())

        # Prepare application parameters in order to initialize self.
        self._reader = self._getReader(self.resultFilenames[0])
        params = copy.deepcopy(self._reader.params)

        applicationParams = ReadsAlignmentsParams('light',
                                                  applicationParams=params,
                                                  subjectIsNucleotides=False)

        # We will add to self._reads as we go through the results (which
        # contain the read ids and sequences). Note that this means the
        # reads will not be available to the ReadsAlignments instance until
        # after all results have been read.
        self._reads = Reads()

        ReadsAlignments.__init__(self, self._reads, applicationParams)
    def __init__(self, reads, filenames, databaseFilename=None,
                 databaseDirectory=None, sqliteDatabaseFilename=None,
                 scoreClass=HigherIsBetterScore, sortFilenames=False,
                 randomizeZeroEValues=True):
        if type(filenames) == str:
            filenames = [filenames]
        if sortFilenames:
            self.filenames = numericallySortFilenames(filenames)
        else:
            self.filenames = filenames

        self._databaseFilename = databaseFilename
        self._sqliteDatabaseFilename = sqliteDatabaseFilename
        self._databaseDirectory = databaseDirectory
        self._subjectTitleToSubject = None
        self.randomizeZeroEValues = randomizeZeroEValues

        # Prepare diamondTask parameters in order to initialize self.
        self._reader = self._getReader(self.filenames[0], scoreClass)
        diamondTask = self._reader.diamondTask
        diamondParams = copy.deepcopy(self._reader.params)
        scoreTitle = ('Bit score' if scoreClass is HigherIsBetterScore
                      else '$- log_{10}(e)$')

        diamondTaskParams = ReadsAlignmentsParams(
            diamondTask, diamondParams,
            subjectIsNucleotides=False,  # DIAMOND dbs are always protein.
            scoreTitle=scoreTitle)

        ReadsAlignments.__init__(self, reads, diamondTaskParams,
                                 scoreClass=scoreClass)
Exemple #5
0
 def testSeveralNames(self):
     """
     A list with several numeric names should result in a correctly
     sorted list of names being returned.
     """
     self.assertEqual(['1.json', '2.json', '3.json'],
                      numericallySortFilenames(
                          ['3.json', '1.json', '2.json']))
Exemple #6
0
 def testSeveralNames(self):
     """
     A list with several numeric names should result in a correctly
     sorted list of names being returned.
     """
     self.assertEqual(
         ['1.json', '2.json', '3.json'],
         numericallySortFilenames(['3.json', '1.json', '2.json']))
Exemple #7
0
 def testSeveralNamesWithUnequalPrefixLengths(self):
     """
     A list with several numeric names whose numeric prefixes differ
     in length should result in a correctly sorted list of names being
     returned.
     """
     self.assertEqual(
         ['2.json', '3.json', '21.json', '35.json', '250.json'],
         numericallySortFilenames(
             ['3.json', '21.json', '35.json', '250.json', '2.json']))
Exemple #8
0
 def testBasename(self):
     """
     Sorting must be according to file basename.
     """
     self.assertEqual(
         ['../output/2.json', '../output/3.json', '../output/21.json',
          '../output/35.json', '../output/250.json'],
         numericallySortFilenames(
             ['../output/3.json', '../output/21.json', '../output/35.json',
              '../output/250.json', '../output/2.json']))
Exemple #9
0
 def testSeveralNamesWithUnequalPrefixLengths(self):
     """
     A list with several numeric names whose numeric prefixes differ
     in length should result in a correctly sorted list of names being
     returned.
     """
     self.assertEqual(
         ['2.json', '3.json', '21.json', '35.json', '250.json'],
         numericallySortFilenames(
             ['3.json', '21.json', '35.json', '250.json', '2.json']))
Exemple #10
0
 def testBasename(self):
     """
     Sorting must be according to file basename.
     """
     self.assertEqual(
         ['../output/2.json', '../output/3.json', '../output/21.json',
          '../output/35.json', '../output/250.json'],
         numericallySortFilenames(
             ['../output/3.json', '../output/21.json', '../output/35.json',
              '../output/250.json', '../output/2.json']))
Exemple #11
0
 def testOneNumericName(self):
     """
     A list with a single numeric name should result in that same
     name being returned.
     """
     self.assertEqual(['3.json'], numericallySortFilenames(['3.json']))
Exemple #12
0
 def testNoNames(self):
     """
     An empty list must be returned when an empty list is given.
     """
     self.assertEqual([], numericallySortFilenames([]))
Exemple #13
0
 def testOneNumericName(self):
     """
     A list with a single numeric name should result in that same
     name being returned.
     """
     self.assertEqual(['3.json'], numericallySortFilenames(['3.json']))
Exemple #14
0
 def testNoNames(self):
     """
     An empty list must be returned when an empty list is given.
     """
     self.assertEqual([], numericallySortFilenames([]))
Exemple #15
0
    # Flatten lists of lists that we get from using both nargs='+' and
    # action='append'. We use both because it allows people to use (e.g.)
    # --json on the command line either via "--json file1 --json file2" or
    # "--json file1 file2", or a combination of these. That way it's not
    # necessary to remember which way you're supposed to use it and you also
    # can't be hit by the subtle problem encountered in
    # https://github.com/acorg/dark-matter/issues/453
    jsonFiles = list(chain.from_iterable(args.json))
    whitelist = (
        set(chain.from_iterable(args.whitelist)) if args.whitelist else None)
    blacklist = (
        set(chain.from_iterable(args.blacklist)) if args.blacklist else None)

    if args.fasta:
        if args.sortFilenames:
            files = numericallySortFilenames(chain.from_iterable(args.fasta))
        else:
            files = list(chain.from_iterable(args.fasta))
        reads = FastaReads(files)
    else:
        if args.sortFilenames:
            files = numericallySortFilenames(chain.from_iterable(args.fastq))
        else:
            files = list(chain.from_iterable(args.fastq))
        reads = FastqReads(files)

    if args.matcher == 'blast':
        from dark.blast.alignments import BlastReadsAlignments
        readsAlignments = BlastReadsAlignments(
            reads, jsonFiles, databaseFilename=args.databaseFastaFilename,
            databaseDirectory=args.databaseFastaDirectory,
    # --json on the command line either via "--json file1 --json file2" or
    # "--json file1 file2", or a combination of these. That way it's not
    # necessary to remember which way you're supposed to use it and you also
    # can't be hit by the subtle problem encountered in
    # https://github.com/acorg/dark-matter/issues/453
    jsonFiles = list(chain.from_iterable(args.json))
    whitelist = (
        set(chain.from_iterable(args.whitelist)) if args.whitelist else None)
    blacklist = (
        set(chain.from_iterable(args.blacklist)) if args.blacklist else None)

    # TODO: Add a --readClass command-line option in case we want to
    # process FASTA containing AA sequences.
    if args.fasta:
        if args.sortFilenames:
            files = numericallySortFilenames(chain.from_iterable(args.fasta))
        else:
            files = list(chain.from_iterable(args.fasta))
        reads = FastaReads(files)
    else:
        if args.sortFilenames:
            files = numericallySortFilenames(chain.from_iterable(args.fastq))
        else:
            files = list(chain.from_iterable(args.fastq))
        reads = FastqReads(files)

    if args.matcher == 'blast':
        from dark.blast.alignments import BlastReadsAlignments
        readsAlignments = BlastReadsAlignments(
            reads, jsonFiles, databaseFilename=args.databaseFastaFilename,
            databaseDirectory=args.databaseFastaDirectory,