def testInstantiation(self): for localId in self.localIds: path = self.getFullPath(localId) variantSet = variants.HtslibVariantSet(self.dataset, localId) variantSet.populateFromDirectory(path) with self.assertRaises(exceptions.InconsistentCallSetIdException): variantSet.checkConsistency()
def getDataModelInstance(self, localId, dataPath): variantSet = variants.HtslibVariantSet(self._dataset, localId) variantSet.populateFromDirectory(dataPath) referenceSet = references.AbstractReferenceSet("test") variantSet.setReferenceSet(referenceSet) variantSet.setPatientId("patient1") variantSet.setSampleId("sample1") return variantSet
def _createVariantAnnotationSet(self, vcfDir): """ Creates a VariantAnnotationSet from the specified directory of VCF files. """ self._variantSetName = "testVariantSet" self._repo = datarepo.SqlDataRepository(paths.testDataRepo) self._repo.open(datarepo.MODE_READ) self._dataset = datasets.Dataset("testDs") self._variantSet = variants.HtslibVariantSet(self._dataset, self._variantSetName) self._variantSet.populateFromDirectory(vcfDir) self._variantAnnotationSet = variants.HtslibVariantAnnotationSet( self._variantSet, "testVAs") self._variantAnnotationSet.setOntology( self._repo.getOntologyByName(paths.ontologyName))
def getDataModelInstance(self, localId, dataPath): dataset = datasets.Dataset("ds") variantSet = variants.HtslibVariantSet(dataset, localId) variantSet.populateFromDirectory(dataPath) referenceSet = references.AbstractReferenceSet("rs") variantSet.setReferenceSet(referenceSet) variantSet.setPatientId("patient1") variantSet.setSampleId("sample1") if variantSet.isAnnotated(): sequenceOntology = ontologies.Ontology(paths.ontologyName) sequenceOntology.populateFromFile(paths.ontologyPath) annotationSet = variantSet.getVariantAnnotationSets()[0] annotationSet.setOntology(sequenceOntology) return annotationSet else: return variantSet
def createRepo(self): """ Creates the repository for all the data we've just downloaded. """ repo = datarepo.SqlDataRepository(self.repoPath) repo.open("w") repo.initialise() referenceSet = references.HtslibReferenceSet("GRCh37-subset") referenceSet.populateFromFile(self.fastaFilePath) referenceSet.setDescription("Subset of GRCh37 used for demonstration") referenceSet.setSpeciesFromJson( '{"id": "9606",' + '"term": "H**o sapiens", "source_name": "NCBI"}') for reference in referenceSet.getReferences(): reference.setSpeciesFromJson( '{"id": "9606",' + '"term": "H**o sapiens", "source_name": "NCBI"}') reference.setSourceAccessions( self.accessions[reference.getName()] + ".subset") repo.insertReferenceSet(referenceSet) dataset = datasets.Dataset("1kg-p3-subset") dataset.setDescription("Sample data from 1000 Genomes phase 3") repo.insertDataset(dataset) variantSet = variants.HtslibVariantSet(dataset, "mvncall") variantSet.setReferenceSet(referenceSet) dataUrls = [vcfFile for vcfFile, _ in self.vcfFilePaths] indexFiles = [indexFile for _, indexFile in self.vcfFilePaths] variantSet.populateFromFile(dataUrls, indexFiles) variantSet.checkConsistency() repo.insertVariantSet(variantSet) for sample, (bamFile, indexFile) in zip( self.samples, self.bamFilePaths): readGroupSet = reads.HtslibReadGroupSet(dataset, sample) readGroupSet.populateFromFile(bamFile, indexFile) readGroupSet.setReferenceSet(referenceSet) repo.insertReadGroupSet(readGroupSet) repo.commit() repo.close() self.log("Finished creating the repository; summary:\n") repo.open("r") repo.printSummary()
def addVariantSet(self, variantFileName, dataset, referenceSet, ontology, biosamples): inputVcf = os.path.join(self.inputDirectory, variantFileName) outputVcf = os.path.join(self.outputDirectory, variantFileName) shutil.copy(inputVcf, outputVcf) pysam.tabix_index(outputVcf, preset="vcf") variantSet = variants.HtslibVariantSet(dataset, variantFileName.split('_')[1]) variantSet.setReferenceSet(referenceSet) variantSet.populateFromFile([os.path.abspath(outputVcf + ".gz")], [os.path.abspath(outputVcf + ".gz.tbi")]) variantSet.checkConsistency() for callSet in variantSet.getCallSets(): for biosample in biosamples: if biosample.getLocalId() == callSet.getLocalId(): callSet.setBiosampleId(biosample.getId()) self.repo.insertVariantSet(variantSet) for annotationSet in variantSet.getVariantAnnotationSets(): annotationSet.setOntology(ontology) self.repo.insertVariantAnnotationSet(annotationSet)
def testInstantiation(self): for localId in self.localIds: path = self.getFullPath(localId) variantSet = variants.HtslibVariantSet(self.dataset, localId) with self.assertRaises(exceptions.OverlappingVcfException): variantSet.populateFromDirectory(path)