def __init__(
            self, localId, randomSeed=1, numCalls=1,
            variantDensity=1, numVariantSets=1, numAlignments=1):
        super(SimulatedDataset, self).__init__(localId)
        self._randomSeed = randomSeed
        self._randomGenerator = random.Random()
        self._randomGenerator.seed(self._randomSeed)

        # Variants
        for i in range(numVariantSets):
            localId = "simVs{}".format(i)
            seed = self._randomGenerator.randint(0, 2**32 - 1)
            variantSet = variants.SimulatedVariantSet(
                self, localId, seed, numCalls, variantDensity)
            self._variantSetIdMap[variantSet.getId()] = variantSet
        self._variantSetIds = sorted(self._variantSetIdMap.keys())

        # Reads
        localId = 'aReadGroupSet'
        readGroupSet = reads.SimulatedReadGroupSet(
            self, localId, numAlignments)
        self._readGroupSetIdMap[readGroupSet.getId()] = readGroupSet
        for readGroup in readGroupSet.getReadGroups():
            self._readGroupIdMap[readGroup.getId()] = readGroup
        self._readGroupSetIds = sorted(self._readGroupSetIdMap.keys())
        self._readGroupIds = sorted(self._readGroupIdMap.keys())
Exemple #2
0
    def __init__(
            self, datasetId, randomSeed, numCalls,
            variantDensity, numVariantSets, numAlignments):
        super(SimulatedDataset, self).__init__()
        self._id = datasetId
        self._randomSeed = randomSeed
        self._randomGenerator = random.Random()
        self._randomGenerator.seed(self._randomSeed)

        # Variants
        for i in range(numVariantSets):
            variantSetId = "{}:simVs{}".format(self._id, i)
            seed = self._randomGenerator.randint(0, 2**32 - 1)
            variantSet = variants.SimulatedVariantSet(
                seed, numCalls, variantDensity, variantSetId)
            self._variantSetIdMap[variantSetId] = variantSet
        self._variantSetIds = sorted(self._variantSetIdMap.keys())

        # Reads
        readGroupSetId = "{}:aReadGroupSet".format(self._id)
        readGroupSet = reads.SimulatedReadGroupSet(
            readGroupSetId, numAlignments)
        self._readGroupSetIdMap[readGroupSetId] = readGroupSet
        for readGroup in readGroupSet.getReadGroups():
            self._readGroupIdMap[readGroup.getId()] = readGroup
        self._readGroupSetIds = sorted(self._readGroupSetIdMap.keys())
        self._readGroupIds = sorted(self._readGroupIdMap.keys())
Exemple #3
0
 def __init__(self,
              localId,
              referenceSet,
              randomSeed=0,
              numVariantSets=1,
              numCalls=1,
              variantDensity=0.5,
              numReadGroupSets=1,
              numReadGroupsPerReadGroupSet=1,
              numAlignments=1,
              numFeatureSets=1):
     super(SimulatedDataset, self).__init__(localId)
     self._description = "Simulated dataset {}".format(localId)
     # TODO create a simulated Ontology
     # Variants
     for i in range(numVariantSets):
         localId = "simVs{}".format(i)
         seed = randomSeed + i
         variantSet = variants.SimulatedVariantSet(self, referenceSet,
                                                   localId, seed, numCalls,
                                                   variantDensity)
         callSets = variantSet.getCallSets()
         # Add biosamples
         for callSet in callSets:
             bioSample = biodata.BioSample(self, callSet.getLocalId())
             bioSample2 = biodata.BioSample(self,
                                            callSet.getLocalId() + "2")
             individual = biodata.Individual(self, callSet.getLocalId())
             bioSample.setIndividualId(individual.getId())
             bioSample2.setIndividualId(individual.getId())
             self.addIndividual(individual)
             self.addBioSample(bioSample)
             self.addBioSample(bioSample2)
         self.addVariantSet(variantSet)
         variantAnnotationSet = variants.SimulatedVariantAnnotationSet(
             variantSet, "simVas{}".format(i), seed)
         variantSet.addVariantAnnotationSet(variantAnnotationSet)
     # Reads
     for i in range(numReadGroupSets):
         localId = 'simRgs{}'.format(i)
         seed = randomSeed + i
         readGroupSet = reads.SimulatedReadGroupSet(
             self, localId, referenceSet, seed,
             numReadGroupsPerReadGroupSet, numAlignments)
         for rg in readGroupSet.getReadGroups():
             bioSample = biodata.BioSample(self, rg.getLocalId())
             individual = biodata.Individual(self, rg.getLocalId())
             bioSample.setIndividualId(individual.getId())
             rg.setBioSampleId(bioSample.getId())
             self.addIndividual(individual)
             self.addBioSample(bioSample)
         self.addReadGroupSet(readGroupSet)
     # Features
     for i in range(numFeatureSets):
         localId = "simFs{}".format(i)
         seed = randomSeed + i
         featureSet = sequenceAnnotations.SimulatedFeatureSet(
             self, localId, seed)
         featureSet.setReferenceSet(referenceSet)
         self.addFeatureSet(featureSet)
Exemple #4
0
 def testCreation(self):
     dataset = datasets.Dataset('dataset1')
     referenceSet = references.SimulatedReferenceSet("srs1")
     localId = "variantAnnotationSetId"
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset,
         referenceSet,
         'variantSet1',
         randomSeed=self.randomSeed,
         numCalls=self.numCalls,
         variantDensity=self.variantDensity)
     simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet(
         simulatedVariantSet, localId, self.randomSeed)
     annotations = simulatedVariantAnnotationSet.getVariantAnnotations(
         self.referenceName, self.startPosition, self.endPosition)
     self.assertEquals(
         simulatedVariantSet.toProtocolElement().id,
         simulatedVariantAnnotationSet.toProtocolElement().variant_set_id,
         "Variant Set ID should match the annotation's variant set ID")
     for variant, ann in annotations:
         self.assertEquals(
             datetime.datetime.strptime(
                 ann.created,
                 "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
             ann.created, "Expect time format to be in ISO8601")
         self.assertEqual(variant.id, ann.variant_id)
Exemple #5
0
 def __init__(self,
              localId,
              referenceSet,
              randomSeed=0,
              numVariantSets=1,
              numCalls=1,
              variantDensity=0.5,
              numReadGroupSets=1,
              numReadGroupsPerReadGroupSet=1,
              numAlignments=1):
     super(SimulatedDataset, self).__init__(localId)
     self._description = "Simulated dataset {}".format(localId)
     # Variants
     for i in range(numVariantSets):
         localId = "simVs{}".format(i)
         seed = randomSeed + i
         variantSet = variants.SimulatedVariantSet(self, localId, seed,
                                                   numCalls, variantDensity)
         self.addVariantSet(variantSet)
     # Reads
     for i in range(numReadGroupSets):
         localId = 'simRgs{}'.format(i)
         seed = randomSeed + i
         readGroupSet = reads.SimulatedReadGroupSet(
             self, localId, referenceSet, seed,
             numReadGroupsPerReadGroupSet, numAlignments)
         self.addReadGroupSet(readGroupSet)
Exemple #6
0
 def _getSimulatedVariantSet(self):
     dataset = datasets.AbstractDataset('dataset1')
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset,
         'variantSet1',
         randomSeed=self.randomSeed,
         numCalls=self.numCalls,
         variantDensity=self.variantDensity)
     return simulatedVariantSet
Exemple #7
0
 def _getSimulatedVariantSet(self):
     dataset = datasets.Dataset('dataset1')
     referenceSet = references.SimulatedReferenceSet("srs1")
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset,
         referenceSet,
         'variantSet1',
         randomSeed=self.randomSeed,
         numCalls=self.numCalls,
         variantDensity=self.variantDensity)
     return simulatedVariantSet
Exemple #8
0
    def __init__(self, randomSeed=0, numCalls=1, variantDensity=0.5,
                 numVariantSets=1):
        super(SimulatedBackend, self).__init__()
        self._randomSeed = randomSeed
        self._randomGenerator = random.Random()
        self._randomGenerator.seed(self._randomSeed)
        for i in range(numVariantSets):
            variantSetId = "simVs{}".format(i)
            seed = self._randomGenerator.randint(0, 2**32 - 1)
            variantSet = variants.SimulatedVariantSet(
                seed, numCalls, variantDensity, variantSetId)
            self._variantSetIdMap[variantSetId] = variantSet
        self._variantSetIds = sorted(self._variantSetIdMap.keys())

        # Reads
        readGroupSetId = "aReadGroupSet"
        readGroupSet = reads.SimulatedReadGroupSet(readGroupSetId)
        self._readGroupSetIdMap[readGroupSetId] = readGroupSet
        for readGroup in readGroupSet.getReadGroups():
            self._readGroupIdMap[readGroup.getId()] = readGroup
        self._readGroupSetIds = sorted(self._readGroupSetIdMap.keys())
        self._readGroupIds = sorted(self._readGroupIdMap.keys())
Exemple #9
0
 def testCreation(self):
     dataset = datasets.AbstractDataset('dataset1')
     localId = "variantAnnotationSetId"
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset, 'variantSet1', randomSeed=self.randomSeed,
         numCalls=self.numCalls, variantDensity=self.variantDensity)
     simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet(
         dataset, localId, simulatedVariantSet, self.randomSeed)
     annotations = simulatedVariantAnnotationSet.getVariantAnnotations(
                 self.referenceName, self.startPosition, self.endPosition)
     self.assertEquals(
         simulatedVariantSet.toProtocolElement().id,
         simulatedVariantAnnotationSet.toProtocolElement().variantSetId,
         "Variant Set ID should match the annotation's variant set ID")
     for ann in annotations:
         for key in protocol.VariantAnnotation().requiredFields:
             self.assertEquals(datetime.datetime.strptime(
                 ann.createDateTime, "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
                     "%Y-%m-%dT%H:%M:%S.%fZ"), ann.createDateTime,
                     "Expect time format to be in ISO8601")
             self.assertTrue(hasattr(ann, key),
                             "Failed to find required key: " + key)
Exemple #10
0
 def _getSimulatedVariantSet(self):
     simulatedVariantSet = variants.SimulatedVariantSet(
         self.randomSeed, self.numCalls, self.variantDensity,
         self.variantSetId)
     return simulatedVariantSet