Example #1
0
 def testTopLevelIdsUnique(self):
     datasetId = "a"
     idStr = "b"
     dataset = datasets.AbstractDataset(datasetId)
     readGroupSet = reads.AbstractReadGroupSet(dataset, idStr)
     variantSet = variants.AbstractVariantSet(dataset, idStr)
     self.assertNotEqual(readGroupSet.getId(), variantSet.getId())
Example #2
0
 def __init__(self, readGroupSetId, baseDir):
     dataset = datasets.AbstractDataset("ds")
     super(ReadGroupSetTest, self).__init__(dataset, readGroupSetId,
                                            baseDir)
     self._readGroupInfos = {}
     for samFileName in glob.glob(os.path.join(self._dataDir, "*.bam")):
         self._readSam(readGroupSetId, samFileName)
Example #3
0
 def __init__(self, variantSetId, baseDir):
     self._dataset = datasets.AbstractDataset("ds")
     super(VariantSetTest, self).__init__(variantSetId, baseDir)
     self._variantRecords = []
     self._referenceNames = set()
     # Read in all the VCF files in datadir and store each variant.
     for vcfFile in glob.glob(os.path.join(self._dataPath, "*.vcf.gz")):
         self._readVcf(vcfFile)
 def testCreation(self):
     dataset = datasets.AbstractDataset('dataset1')
     localId = "readGroupSetId"
     simulatedReadGroupSet = reads.SimulatedReadGroupSet(
             dataset, localId)
     for readGroup in simulatedReadGroupSet.getReadGroups():
         alignments = list(readGroup.getReadAlignments())
         self.assertGreater(len(alignments), 0)
Example #5
0
 def testGetDatasetByIndexBadIndex(self):
     self.assertRaises(IndexError, self._dataRepo.getDatasetByIndex, 0)
     self.assertRaises(TypeError, self._dataRepo.getDatasetByIndex, None)
     self.assertRaises(TypeError, self._dataRepo.getDatasetByIndex, "")
     datasetName = "ds"
     dataset = datasets.AbstractDataset(datasetName)
     self._dataRepo.addDataset(dataset)
     self.assertRaises(IndexError, self._dataRepo.getDatasetByIndex, 1)
Example #6
0
 def _getSimulatedVariantSet(self):
     dataset = datasets.AbstractDataset('dataset1')
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset,
         'variantSet1',
         randomSeed=self.randomSeed,
         numCalls=self.numCalls,
         variantDensity=self.variantDensity)
     return simulatedVariantSet
Example #7
0
 def __init__(self, localId, dataPath):
     self._backend = backend.Backend(datarepo.AbstractDataRepository())
     self._referenceSet = None
     self._dataset = datasets.AbstractDataset("ds")
     self._readGroupInfos = {}
     self._readGroupSetInfo = None
     self._samFile = pysam.AlignmentFile(dataPath)
     self._readReferences()
     super(ReadGroupSetTest, self).__init__(localId, dataPath)
     self._readAlignmentInfo()
Example #8
0
 def testAddOneDataset(self):
     datasetName = "ds"
     dataset = datasets.AbstractDataset(datasetName)
     self.assertEqual(self._dataRepo.getNumDatasets(), 0)
     self.assertEqual(self._dataRepo.getDatasets(), [])
     self._dataRepo.addDataset(dataset)
     self.assertEqual(self._dataRepo.getNumDatasets(), 1)
     self.assertEqual(self._dataRepo.getDatasets(), [dataset])
     self.assertEqual(self._dataRepo.getDatasetByIndex(0), dataset)
     self.assertEqual(self._dataRepo.getDatasetByName(datasetName), dataset)
     self.assertEqual(self._dataRepo.getDataset(dataset.getId()), dataset)
 def setUp(self):
     self._variantSetName = "testVariantSet"
     self._backend = datarepo.FileSystemDataRepository("tests/data")
     self._dataset = datasets.AbstractDataset(self._backend)
     self._variantSet = variants.AbstractVariantSet(
         self._dataset, self._variantSetName)
     self._variantAnnotationSet = \
         variants.HtslibVariantAnnotationSet(
             self._dataset,
             "vas",
             "tests/data/datasets/dataset1/variants/WASH7P_annotation",
             self._backend, self._variantSet)
Example #10
0
 def __init__(self, featureSetLocalName, dataPath):
     """
     :param localId: Name of the GFF3 resource corresponding to a pair
     of files, .db and .gff3
     :param dataPath: string representing full path to the .db file
     :return:
     """
     self._dataset = datasets.AbstractDataset(_datasetName)
     self._datarepo = datarepo.FileSystemDataRepository("tests/data")
     featureSetLocalName = featureSetLocalName[:-3]  # remove '.db'
     self._testData = _testDataForFeatureSetName[featureSetLocalName]
     super(FeatureSetTests, self).__init__(featureSetLocalName, dataPath)
Example #11
0
 def testGetVariantSet(self):
     path = utils.applyVersion("/variantsets")
     for variantSetId in self.variantSetIds:
         response = self.sendObjectGetRequest(path, variantSetId)
         self.assertEqual(200, response.status_code)
         responseObject = protocol.VariantSet.fromJsonString(response.data)
         self.assertEqual(responseObject.id, variantSetId)
     dataset = datasets.AbstractDataset("dataset1")
     for badId in ["terribly bad ID value", "x" * 1000]:
         variantSet = variants.AbstractVariantSet(dataset, badId)
         response = self.sendObjectGetRequest(path, variantSet.getId())
         self.assertEqual(404, response.status_code)
Example #12
0
 def testAddMultipleDatasets(self):
     firstDatasetName = "ds1"
     firstDataset = datasets.AbstractDataset(firstDatasetName)
     secondDatasetName = "ds2"
     secondDataset = datasets.AbstractDataset(secondDatasetName)
     self.assertEqual(self._dataRepo.getNumDatasets(), 0)
     self.assertEqual(self._dataRepo.getDatasets(), [])
     self._dataRepo.addDataset(firstDataset)
     self._dataRepo.addDataset(secondDataset)
     self.assertEqual(self._dataRepo.getNumDatasets(), 2)
     self.assertEqual(self._dataRepo.getDatasets(),
                      [firstDataset, secondDataset])
     self.assertEqual(self._dataRepo.getDatasetByIndex(0), firstDataset)
     self.assertEqual(self._dataRepo.getDatasetByIndex(1), secondDataset)
     self.assertEqual(self._dataRepo.getDatasetByName(firstDatasetName),
                      firstDataset)
     self.assertEqual(self._dataRepo.getDatasetByName(secondDatasetName),
                      secondDataset)
     self.assertEqual(self._dataRepo.getDataset(firstDataset.getId()),
                      firstDataset)
     self.assertEqual(self._dataRepo.getDataset(secondDataset.getId()),
                      secondDataset)
 def __init__(self, variantAnnotationSetId, baseDir):
     self._dataset = datasets.AbstractDataset("ds")
     self._datarepo = datarepo.FileSystemDataRepository("tests/data")
     super(VariantAnnotationSetTest, self).__init__(variantAnnotationSetId,
                                                    baseDir)
     self._variantSet = variants.HtslibVariantSet(self._dataset, "vs",
                                                  self._dataPath, None)
     self._variantRecords = []
     self._referenceNames = set()
     # Only read in VCF files with a JSON sidecar saying they're annotated.
     for vcfFile in glob.glob(os.path.join(self._dataPath, "*.vcf.gz")):
         if self._isAnnotated():
             self._readVcf(vcfFile)
     self._isCsq = self._hasConsequenceField()
Example #14
0
 def testCreation(self):
     dataset = datasets.AbstractDataset('dataset1')
     localId = "variantAnnotationSetId"
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset, 'variantSet1', randomSeed=self.randomSeed,
         numCalls=self.numCalls, variantDensity=self.variantDensity)
     simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet(
         dataset, localId, simulatedVariantSet, self.randomSeed)
     annotations = simulatedVariantAnnotationSet.getVariantAnnotations(
                 self.referenceName, self.startPosition, self.endPosition)
     self.assertEquals(
         simulatedVariantSet.toProtocolElement().id,
         simulatedVariantAnnotationSet.toProtocolElement().variantSetId,
         "Variant Set ID should match the annotation's variant set ID")
     for ann in annotations:
         for key in protocol.VariantAnnotation().requiredFields:
             self.assertEquals(datetime.datetime.strptime(
                 ann.createDateTime, "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
                     "%Y-%m-%dT%H:%M:%S.%fZ"), ann.createDateTime,
                     "Expect time format to be in ISO8601")
             self.assertTrue(hasattr(ann, key),
                             "Failed to find required key: " + key)
Example #15
0
 def setUp(self):
     self._variantSetName = "testVariantSet"
     self._backend = backend.AbstractBackend()
     self._dataset = datasets.AbstractDataset(self._backend)
     self._variantSet = variants.AbstractVariantSet(self._dataset,
                                                    self._variantSetName)
 def setUp(self):
     self._featureSetName = "testFeatureSet"
     self._backend = backend.Backend(datarepo.AbstractDataRepository())
     self._dataset = datasets.AbstractDataset(self._backend)
     self._featureSet = features.AbstractFeatureSet(self._dataset,
                                                    self._featureSetName)
Example #17
0
 def setUp(self):
     self._variantSetName = "testVariantSet"
     self._backend = backend.Backend(datarepo.AbstractDataRepository())
     self._dataset = datasets.AbstractDataset(self._backend)
     self._variantSet = variants.AbstractVariantSet(self._dataset,
                                                    self._variantSetName)
Example #18
0
 def setUp(self):
     self.testDataDir = "tests/faultydata/variants"
     self.dataset = datasets.AbstractDataset('dataset1')
Example #19
0
 def getDataset(self):
     return datasets.AbstractDataset("dataset")