def testTopLevelIdsUnique(self): datasetId = "a" idStr = "b" dataset = datasets.AbstractDataset(datasetId) readGroupSet = reads.AbstractReadGroupSet(dataset, idStr) variantSet = variants.AbstractVariantSet(dataset, idStr) self.assertNotEqual(readGroupSet.getId(), variantSet.getId())
def __init__(self, readGroupSetId, baseDir): dataset = datasets.AbstractDataset("ds") super(ReadGroupSetTest, self).__init__(dataset, readGroupSetId, baseDir) self._readGroupInfos = {} for samFileName in glob.glob(os.path.join(self._dataDir, "*.bam")): self._readSam(readGroupSetId, samFileName)
def __init__(self, variantSetId, baseDir): self._dataset = datasets.AbstractDataset("ds") super(VariantSetTest, self).__init__(variantSetId, baseDir) self._variantRecords = [] self._referenceNames = set() # Read in all the VCF files in datadir and store each variant. for vcfFile in glob.glob(os.path.join(self._dataPath, "*.vcf.gz")): self._readVcf(vcfFile)
def testCreation(self): dataset = datasets.AbstractDataset('dataset1') localId = "readGroupSetId" simulatedReadGroupSet = reads.SimulatedReadGroupSet( dataset, localId) for readGroup in simulatedReadGroupSet.getReadGroups(): alignments = list(readGroup.getReadAlignments()) self.assertGreater(len(alignments), 0)
def testGetDatasetByIndexBadIndex(self): self.assertRaises(IndexError, self._dataRepo.getDatasetByIndex, 0) self.assertRaises(TypeError, self._dataRepo.getDatasetByIndex, None) self.assertRaises(TypeError, self._dataRepo.getDatasetByIndex, "") datasetName = "ds" dataset = datasets.AbstractDataset(datasetName) self._dataRepo.addDataset(dataset) self.assertRaises(IndexError, self._dataRepo.getDatasetByIndex, 1)
def _getSimulatedVariantSet(self): dataset = datasets.AbstractDataset('dataset1') simulatedVariantSet = variants.SimulatedVariantSet( dataset, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) return simulatedVariantSet
def __init__(self, localId, dataPath): self._backend = backend.Backend(datarepo.AbstractDataRepository()) self._referenceSet = None self._dataset = datasets.AbstractDataset("ds") self._readGroupInfos = {} self._readGroupSetInfo = None self._samFile = pysam.AlignmentFile(dataPath) self._readReferences() super(ReadGroupSetTest, self).__init__(localId, dataPath) self._readAlignmentInfo()
def testAddOneDataset(self): datasetName = "ds" dataset = datasets.AbstractDataset(datasetName) self.assertEqual(self._dataRepo.getNumDatasets(), 0) self.assertEqual(self._dataRepo.getDatasets(), []) self._dataRepo.addDataset(dataset) self.assertEqual(self._dataRepo.getNumDatasets(), 1) self.assertEqual(self._dataRepo.getDatasets(), [dataset]) self.assertEqual(self._dataRepo.getDatasetByIndex(0), dataset) self.assertEqual(self._dataRepo.getDatasetByName(datasetName), dataset) self.assertEqual(self._dataRepo.getDataset(dataset.getId()), dataset)
def setUp(self): self._variantSetName = "testVariantSet" self._backend = datarepo.FileSystemDataRepository("tests/data") self._dataset = datasets.AbstractDataset(self._backend) self._variantSet = variants.AbstractVariantSet( self._dataset, self._variantSetName) self._variantAnnotationSet = \ variants.HtslibVariantAnnotationSet( self._dataset, "vas", "tests/data/datasets/dataset1/variants/WASH7P_annotation", self._backend, self._variantSet)
def __init__(self, featureSetLocalName, dataPath): """ :param localId: Name of the GFF3 resource corresponding to a pair of files, .db and .gff3 :param dataPath: string representing full path to the .db file :return: """ self._dataset = datasets.AbstractDataset(_datasetName) self._datarepo = datarepo.FileSystemDataRepository("tests/data") featureSetLocalName = featureSetLocalName[:-3] # remove '.db' self._testData = _testDataForFeatureSetName[featureSetLocalName] super(FeatureSetTests, self).__init__(featureSetLocalName, dataPath)
def testGetVariantSet(self): path = utils.applyVersion("/variantsets") for variantSetId in self.variantSetIds: response = self.sendObjectGetRequest(path, variantSetId) self.assertEqual(200, response.status_code) responseObject = protocol.VariantSet.fromJsonString(response.data) self.assertEqual(responseObject.id, variantSetId) dataset = datasets.AbstractDataset("dataset1") for badId in ["terribly bad ID value", "x" * 1000]: variantSet = variants.AbstractVariantSet(dataset, badId) response = self.sendObjectGetRequest(path, variantSet.getId()) self.assertEqual(404, response.status_code)
def testAddMultipleDatasets(self): firstDatasetName = "ds1" firstDataset = datasets.AbstractDataset(firstDatasetName) secondDatasetName = "ds2" secondDataset = datasets.AbstractDataset(secondDatasetName) self.assertEqual(self._dataRepo.getNumDatasets(), 0) self.assertEqual(self._dataRepo.getDatasets(), []) self._dataRepo.addDataset(firstDataset) self._dataRepo.addDataset(secondDataset) self.assertEqual(self._dataRepo.getNumDatasets(), 2) self.assertEqual(self._dataRepo.getDatasets(), [firstDataset, secondDataset]) self.assertEqual(self._dataRepo.getDatasetByIndex(0), firstDataset) self.assertEqual(self._dataRepo.getDatasetByIndex(1), secondDataset) self.assertEqual(self._dataRepo.getDatasetByName(firstDatasetName), firstDataset) self.assertEqual(self._dataRepo.getDatasetByName(secondDatasetName), secondDataset) self.assertEqual(self._dataRepo.getDataset(firstDataset.getId()), firstDataset) self.assertEqual(self._dataRepo.getDataset(secondDataset.getId()), secondDataset)
def __init__(self, variantAnnotationSetId, baseDir): self._dataset = datasets.AbstractDataset("ds") self._datarepo = datarepo.FileSystemDataRepository("tests/data") super(VariantAnnotationSetTest, self).__init__(variantAnnotationSetId, baseDir) self._variantSet = variants.HtslibVariantSet(self._dataset, "vs", self._dataPath, None) self._variantRecords = [] self._referenceNames = set() # Only read in VCF files with a JSON sidecar saying they're annotated. for vcfFile in glob.glob(os.path.join(self._dataPath, "*.vcf.gz")): if self._isAnnotated(): self._readVcf(vcfFile) self._isCsq = self._hasConsequenceField()
def testCreation(self): dataset = datasets.AbstractDataset('dataset1') localId = "variantAnnotationSetId" simulatedVariantSet = variants.SimulatedVariantSet( dataset, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet( dataset, localId, simulatedVariantSet, self.randomSeed) annotations = simulatedVariantAnnotationSet.getVariantAnnotations( self.referenceName, self.startPosition, self.endPosition) self.assertEquals( simulatedVariantSet.toProtocolElement().id, simulatedVariantAnnotationSet.toProtocolElement().variantSetId, "Variant Set ID should match the annotation's variant set ID") for ann in annotations: for key in protocol.VariantAnnotation().requiredFields: self.assertEquals(datetime.datetime.strptime( ann.createDateTime, "%Y-%m-%dT%H:%M:%S.%fZ").strftime( "%Y-%m-%dT%H:%M:%S.%fZ"), ann.createDateTime, "Expect time format to be in ISO8601") self.assertTrue(hasattr(ann, key), "Failed to find required key: " + key)
def setUp(self): self._variantSetName = "testVariantSet" self._backend = backend.AbstractBackend() self._dataset = datasets.AbstractDataset(self._backend) self._variantSet = variants.AbstractVariantSet(self._dataset, self._variantSetName)
def setUp(self): self._featureSetName = "testFeatureSet" self._backend = backend.Backend(datarepo.AbstractDataRepository()) self._dataset = datasets.AbstractDataset(self._backend) self._featureSet = features.AbstractFeatureSet(self._dataset, self._featureSetName)
def setUp(self): self._variantSetName = "testVariantSet" self._backend = backend.Backend(datarepo.AbstractDataRepository()) self._dataset = datasets.AbstractDataset(self._backend) self._variantSet = variants.AbstractVariantSet(self._dataset, self._variantSetName)
def setUp(self): self.testDataDir = "tests/faultydata/variants" self.dataset = datasets.AbstractDataset('dataset1')
def getDataset(self): return datasets.AbstractDataset("dataset")