예제 #1
0
 def testInstantiation(self):
     for localId in self.localIds:
         path = self.getFullPath(localId)
         variantSet = variants.HtslibVariantSet(self.dataset, localId)
         variantSet.populateFromDirectory(path)
         with self.assertRaises(exceptions.InconsistentCallSetIdException):
             variantSet.checkConsistency()
예제 #2
0
 def getDataModelInstance(self, localId, dataPath):
     variantSet = variants.HtslibVariantSet(self._dataset, localId)
     variantSet.populateFromDirectory(dataPath)
     referenceSet = references.AbstractReferenceSet("test")
     variantSet.setReferenceSet(referenceSet)
     variantSet.setPatientId("patient1")
     variantSet.setSampleId("sample1")
     return variantSet
예제 #3
0
 def _createVariantAnnotationSet(self, vcfDir):
     """
     Creates a VariantAnnotationSet from the specified directory of
     VCF files.
     """
     self._variantSetName = "testVariantSet"
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._dataset = datasets.Dataset("testDs")
     self._variantSet = variants.HtslibVariantSet(self._dataset,
                                                  self._variantSetName)
     self._variantSet.populateFromDirectory(vcfDir)
     self._variantAnnotationSet = variants.HtslibVariantAnnotationSet(
         self._variantSet, "testVAs")
     self._variantAnnotationSet.setOntology(
         self._repo.getOntologyByName(paths.ontologyName))
 def getDataModelInstance(self, localId, dataPath):
     dataset = datasets.Dataset("ds")
     variantSet = variants.HtslibVariantSet(dataset, localId)
     variantSet.populateFromDirectory(dataPath)
     referenceSet = references.AbstractReferenceSet("rs")
     variantSet.setReferenceSet(referenceSet)
     variantSet.setPatientId("patient1")
     variantSet.setSampleId("sample1")
     if variantSet.isAnnotated():
         sequenceOntology = ontologies.Ontology(paths.ontologyName)
         sequenceOntology.populateFromFile(paths.ontologyPath)
         annotationSet = variantSet.getVariantAnnotationSets()[0]
         annotationSet.setOntology(sequenceOntology)
         return annotationSet
     else:
         return variantSet
예제 #5
0
    def createRepo(self):
        """
        Creates the repository for all the data we've just downloaded.
        """
        repo = datarepo.SqlDataRepository(self.repoPath)
        repo.open("w")
        repo.initialise()

        referenceSet = references.HtslibReferenceSet("GRCh37-subset")
        referenceSet.populateFromFile(self.fastaFilePath)
        referenceSet.setDescription("Subset of GRCh37 used for demonstration")
        referenceSet.setSpeciesFromJson(
            '{"id": "9606",'
            + '"term": "H**o sapiens", "source_name": "NCBI"}')
        for reference in referenceSet.getReferences():
            reference.setSpeciesFromJson(
                '{"id": "9606",'
                + '"term": "H**o sapiens", "source_name": "NCBI"}')
            reference.setSourceAccessions(
                self.accessions[reference.getName()] + ".subset")
        repo.insertReferenceSet(referenceSet)

        dataset = datasets.Dataset("1kg-p3-subset")
        dataset.setDescription("Sample data from 1000 Genomes phase 3")
        repo.insertDataset(dataset)

        variantSet = variants.HtslibVariantSet(dataset, "mvncall")
        variantSet.setReferenceSet(referenceSet)
        dataUrls = [vcfFile for vcfFile, _ in self.vcfFilePaths]
        indexFiles = [indexFile for _, indexFile in self.vcfFilePaths]
        variantSet.populateFromFile(dataUrls, indexFiles)
        variantSet.checkConsistency()
        repo.insertVariantSet(variantSet)

        for sample, (bamFile, indexFile) in zip(
                self.samples, self.bamFilePaths):
            readGroupSet = reads.HtslibReadGroupSet(dataset, sample)
            readGroupSet.populateFromFile(bamFile, indexFile)
            readGroupSet.setReferenceSet(referenceSet)
            repo.insertReadGroupSet(readGroupSet)

        repo.commit()
        repo.close()
        self.log("Finished creating the repository; summary:\n")
        repo.open("r")
        repo.printSummary()
예제 #6
0
    def addVariantSet(self, variantFileName, dataset, referenceSet, ontology,
                      biosamples):
        inputVcf = os.path.join(self.inputDirectory, variantFileName)
        outputVcf = os.path.join(self.outputDirectory, variantFileName)
        shutil.copy(inputVcf, outputVcf)
        pysam.tabix_index(outputVcf, preset="vcf")
        variantSet = variants.HtslibVariantSet(dataset,
                                               variantFileName.split('_')[1])
        variantSet.setReferenceSet(referenceSet)
        variantSet.populateFromFile([os.path.abspath(outputVcf + ".gz")],
                                    [os.path.abspath(outputVcf + ".gz.tbi")])
        variantSet.checkConsistency()
        for callSet in variantSet.getCallSets():
            for biosample in biosamples:
                if biosample.getLocalId() == callSet.getLocalId():
                    callSet.setBiosampleId(biosample.getId())
        self.repo.insertVariantSet(variantSet)

        for annotationSet in variantSet.getVariantAnnotationSets():
            annotationSet.setOntology(ontology)
            self.repo.insertVariantAnnotationSet(annotationSet)
예제 #7
0
 def testInstantiation(self):
     for localId in self.localIds:
         path = self.getFullPath(localId)
         variantSet = variants.HtslibVariantSet(self.dataset, localId)
         with self.assertRaises(exceptions.OverlappingVcfException):
             variantSet.populateFromDirectory(path)