def __init__( self, localId, randomSeed=1, numCalls=1, variantDensity=1, numVariantSets=1, numAlignments=1): super(SimulatedDataset, self).__init__(localId) self._randomSeed = randomSeed self._randomGenerator = random.Random() self._randomGenerator.seed(self._randomSeed) # Variants for i in range(numVariantSets): localId = "simVs{}".format(i) seed = self._randomGenerator.randint(0, 2**32 - 1) variantSet = variants.SimulatedVariantSet( self, localId, seed, numCalls, variantDensity) self._variantSetIdMap[variantSet.getId()] = variantSet self._variantSetIds = sorted(self._variantSetIdMap.keys()) # Reads localId = 'aReadGroupSet' readGroupSet = reads.SimulatedReadGroupSet( self, localId, numAlignments) self._readGroupSetIdMap[readGroupSet.getId()] = readGroupSet for readGroup in readGroupSet.getReadGroups(): self._readGroupIdMap[readGroup.getId()] = readGroup self._readGroupSetIds = sorted(self._readGroupSetIdMap.keys()) self._readGroupIds = sorted(self._readGroupIdMap.keys())
def __init__( self, datasetId, randomSeed, numCalls, variantDensity, numVariantSets, numAlignments): super(SimulatedDataset, self).__init__() self._id = datasetId self._randomSeed = randomSeed self._randomGenerator = random.Random() self._randomGenerator.seed(self._randomSeed) # Variants for i in range(numVariantSets): variantSetId = "{}:simVs{}".format(self._id, i) seed = self._randomGenerator.randint(0, 2**32 - 1) variantSet = variants.SimulatedVariantSet( seed, numCalls, variantDensity, variantSetId) self._variantSetIdMap[variantSetId] = variantSet self._variantSetIds = sorted(self._variantSetIdMap.keys()) # Reads readGroupSetId = "{}:aReadGroupSet".format(self._id) readGroupSet = reads.SimulatedReadGroupSet( readGroupSetId, numAlignments) self._readGroupSetIdMap[readGroupSetId] = readGroupSet for readGroup in readGroupSet.getReadGroups(): self._readGroupIdMap[readGroup.getId()] = readGroup self._readGroupSetIds = sorted(self._readGroupSetIdMap.keys()) self._readGroupIds = sorted(self._readGroupIdMap.keys())
def __init__(self, localId, referenceSet, randomSeed=0, numVariantSets=1, numCalls=1, variantDensity=0.5, numReadGroupSets=1, numReadGroupsPerReadGroupSet=1, numAlignments=1, numFeatureSets=1): super(SimulatedDataset, self).__init__(localId) self._description = "Simulated dataset {}".format(localId) # TODO create a simulated Ontology # Variants for i in range(numVariantSets): localId = "simVs{}".format(i) seed = randomSeed + i variantSet = variants.SimulatedVariantSet(self, referenceSet, localId, seed, numCalls, variantDensity) callSets = variantSet.getCallSets() # Add biosamples for callSet in callSets: bioSample = biodata.BioSample(self, callSet.getLocalId()) bioSample2 = biodata.BioSample(self, callSet.getLocalId() + "2") individual = biodata.Individual(self, callSet.getLocalId()) bioSample.setIndividualId(individual.getId()) bioSample2.setIndividualId(individual.getId()) self.addIndividual(individual) self.addBioSample(bioSample) self.addBioSample(bioSample2) self.addVariantSet(variantSet) variantAnnotationSet = variants.SimulatedVariantAnnotationSet( variantSet, "simVas{}".format(i), seed) variantSet.addVariantAnnotationSet(variantAnnotationSet) # Reads for i in range(numReadGroupSets): localId = 'simRgs{}'.format(i) seed = randomSeed + i readGroupSet = reads.SimulatedReadGroupSet( self, localId, referenceSet, seed, numReadGroupsPerReadGroupSet, numAlignments) for rg in readGroupSet.getReadGroups(): bioSample = biodata.BioSample(self, rg.getLocalId()) individual = biodata.Individual(self, rg.getLocalId()) bioSample.setIndividualId(individual.getId()) rg.setBioSampleId(bioSample.getId()) self.addIndividual(individual) self.addBioSample(bioSample) self.addReadGroupSet(readGroupSet) # Features for i in range(numFeatureSets): localId = "simFs{}".format(i) seed = randomSeed + i featureSet = sequenceAnnotations.SimulatedFeatureSet( self, localId, seed) featureSet.setReferenceSet(referenceSet) self.addFeatureSet(featureSet)
def testCreation(self): dataset = datasets.Dataset('dataset1') referenceSet = references.SimulatedReferenceSet("srs1") localId = "variantAnnotationSetId" simulatedVariantSet = variants.SimulatedVariantSet( dataset, referenceSet, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet( simulatedVariantSet, localId, self.randomSeed) annotations = simulatedVariantAnnotationSet.getVariantAnnotations( self.referenceName, self.startPosition, self.endPosition) self.assertEquals( simulatedVariantSet.toProtocolElement().id, simulatedVariantAnnotationSet.toProtocolElement().variant_set_id, "Variant Set ID should match the annotation's variant set ID") for variant, ann in annotations: self.assertEquals( datetime.datetime.strptime( ann.created, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%dT%H:%M:%S.%fZ"), ann.created, "Expect time format to be in ISO8601") self.assertEqual(variant.id, ann.variant_id)
def __init__(self, localId, referenceSet, randomSeed=0, numVariantSets=1, numCalls=1, variantDensity=0.5, numReadGroupSets=1, numReadGroupsPerReadGroupSet=1, numAlignments=1): super(SimulatedDataset, self).__init__(localId) self._description = "Simulated dataset {}".format(localId) # Variants for i in range(numVariantSets): localId = "simVs{}".format(i) seed = randomSeed + i variantSet = variants.SimulatedVariantSet(self, localId, seed, numCalls, variantDensity) self.addVariantSet(variantSet) # Reads for i in range(numReadGroupSets): localId = 'simRgs{}'.format(i) seed = randomSeed + i readGroupSet = reads.SimulatedReadGroupSet( self, localId, referenceSet, seed, numReadGroupsPerReadGroupSet, numAlignments) self.addReadGroupSet(readGroupSet)
def _getSimulatedVariantSet(self): dataset = datasets.AbstractDataset('dataset1') simulatedVariantSet = variants.SimulatedVariantSet( dataset, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) return simulatedVariantSet
def _getSimulatedVariantSet(self): dataset = datasets.Dataset('dataset1') referenceSet = references.SimulatedReferenceSet("srs1") simulatedVariantSet = variants.SimulatedVariantSet( dataset, referenceSet, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) return simulatedVariantSet
def __init__(self, randomSeed=0, numCalls=1, variantDensity=0.5, numVariantSets=1): super(SimulatedBackend, self).__init__() self._randomSeed = randomSeed self._randomGenerator = random.Random() self._randomGenerator.seed(self._randomSeed) for i in range(numVariantSets): variantSetId = "simVs{}".format(i) seed = self._randomGenerator.randint(0, 2**32 - 1) variantSet = variants.SimulatedVariantSet( seed, numCalls, variantDensity, variantSetId) self._variantSetIdMap[variantSetId] = variantSet self._variantSetIds = sorted(self._variantSetIdMap.keys()) # Reads readGroupSetId = "aReadGroupSet" readGroupSet = reads.SimulatedReadGroupSet(readGroupSetId) self._readGroupSetIdMap[readGroupSetId] = readGroupSet for readGroup in readGroupSet.getReadGroups(): self._readGroupIdMap[readGroup.getId()] = readGroup self._readGroupSetIds = sorted(self._readGroupSetIdMap.keys()) self._readGroupIds = sorted(self._readGroupIdMap.keys())
def testCreation(self): dataset = datasets.AbstractDataset('dataset1') localId = "variantAnnotationSetId" simulatedVariantSet = variants.SimulatedVariantSet( dataset, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet( dataset, localId, simulatedVariantSet, self.randomSeed) annotations = simulatedVariantAnnotationSet.getVariantAnnotations( self.referenceName, self.startPosition, self.endPosition) self.assertEquals( simulatedVariantSet.toProtocolElement().id, simulatedVariantAnnotationSet.toProtocolElement().variantSetId, "Variant Set ID should match the annotation's variant set ID") for ann in annotations: for key in protocol.VariantAnnotation().requiredFields: self.assertEquals(datetime.datetime.strptime( ann.createDateTime, "%Y-%m-%dT%H:%M:%S.%fZ").strftime( "%Y-%m-%dT%H:%M:%S.%fZ"), ann.createDateTime, "Expect time format to be in ISO8601") self.assertTrue(hasattr(ann, key), "Failed to find required key: " + key)
def _getSimulatedVariantSet(self): simulatedVariantSet = variants.SimulatedVariantSet( self.randomSeed, self.numCalls, self.variantDensity, self.variantSetId) return simulatedVariantSet