Example #1
0
 def _getSimulatedVariantSet(self):
     dataset = datasets.Dataset('dataset1')
     referenceSet = references.SimulatedReferenceSet("srs1")
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset, referenceSet, 'variantSet1', randomSeed=self.randomSeed,
         numCalls=self.numCalls, variantDensity=self.variantDensity)
     return simulatedVariantSet
 def testCreation(self):
     dataset = datasets.Dataset('dataset1')
     referenceSet = references.SimulatedReferenceSet("srs1")
     localId = "variantAnnotationSetId"
     simulatedVariantSet = variants.SimulatedVariantSet(
         dataset,
         referenceSet,
         'variantSet1',
         randomSeed=self.randomSeed,
         numCalls=self.numCalls,
         variantDensity=self.variantDensity)
     simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet(
         simulatedVariantSet, localId, self.randomSeed)
     annotations = simulatedVariantAnnotationSet.getVariantAnnotations(
         self.referenceName, self.startPosition, self.endPosition)
     self.assertEquals(
         simulatedVariantSet.toProtocolElement().id,
         simulatedVariantAnnotationSet.toProtocolElement().variant_set_id,
         "Variant Set ID should match the annotation's variant set ID")
     for variant, ann in annotations:
         self.assertEquals(
             datetime.datetime.strptime(
                 ann.created,
                 "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
             ann.created, "Expect time format to be in ISO8601")
         self.assertEqual(variant.id, ann.variant_id)
Example #3
0
    def __init__(self,
                 localId,
                 referenceSet,
                 randomSeed=0,
                 numVariantSets=1,
                 numCalls=1,
                 variantDensity=0.5,
                 numReadGroupSets=1,
                 numReadGroupsPerReadGroupSet=1,
                 numAlignments=1,
                 numFeatureSets=1,
                 numPhenotypeAssociationSets=1,
                 numPhenotypeAssociations=2,
                 numRnaQuantSets=2,
                 numExpressionLevels=2):
        super(SimulatedDataset, self).__init__(localId)
        self._description = "Simulated dataset {}".format(localId)

        for i in range(numPhenotypeAssociationSets):
            localId = "simPas{}".format(i)
            seed = randomSeed + i
            phenotypeAssociationSet = g2p.SimulatedPhenotypeAssociationSet(
                self, localId, seed, numPhenotypeAssociations)
            self.addPhenotypeAssociationSet(phenotypeAssociationSet)

        # TODO create a simulated Ontology
        # Variants
        for i in range(numVariantSets):
            localId = "simVs{}".format(i)
            seed = randomSeed + i
            variantSet = variants.SimulatedVariantSet(self, referenceSet,
                                                      localId, seed, numCalls,
                                                      variantDensity)
            callSets = variantSet.getCallSets()
            # Add biosamples
            for callSet in callSets:
                bioSample = biodata.BioSample(self, callSet.getLocalId())
                bioSample2 = biodata.BioSample(self,
                                               callSet.getLocalId() + "2")
                individual = biodata.Individual(self, callSet.getLocalId())
                bioSample.setIndividualId(individual.getId())
                bioSample2.setIndividualId(individual.getId())
                self.addIndividual(individual)
                self.addBioSample(bioSample)
                self.addBioSample(bioSample2)
            self.addVariantSet(variantSet)
            variantAnnotationSet = variants.SimulatedVariantAnnotationSet(
                variantSet, "simVas{}".format(i), seed)
            variantSet.addVariantAnnotationSet(variantAnnotationSet)
        # Reads
        for i in range(numReadGroupSets):
            localId = 'simRgs{}'.format(i)
            seed = randomSeed + i
            readGroupSet = reads.SimulatedReadGroupSet(
                self, localId, referenceSet, seed,
                numReadGroupsPerReadGroupSet, numAlignments)
            for rg in readGroupSet.getReadGroups():
                bioSample = biodata.BioSample(self, rg.getLocalId())
                individual = biodata.Individual(self, rg.getLocalId())
                bioSample.setIndividualId(individual.getId())
                rg.setBioSampleId(bioSample.getId())
                self.addIndividual(individual)
                self.addBioSample(bioSample)
            self.addReadGroupSet(readGroupSet)
        # Features
        for i in range(numFeatureSets):
            localId = "simFs{}".format(i)
            seed = randomSeed + i
            featureSet = sequence_annotations.SimulatedFeatureSet(
                self, localId, seed)
            featureSet.setReferenceSet(referenceSet)
            self.addFeatureSet(featureSet)
        # RnaQuantificationSets
        for i in range(numRnaQuantSets):
            localId = 'simRqs{}'.format(i)
            rnaQuantSet = rnaQuantification.SimulatedRnaQuantificationSet(
                self, localId)
            rnaQuantSet.setReferenceSet(referenceSet)
            self.addRnaQuantificationSet(rnaQuantSet)