def testCreation(self): dataset = datasets.Dataset('dataset1') localId = "readGroupSetId" referenceSet = references.SimulatedReferenceSet("srs1") simulatedReadGroupSet = reads.SimulatedReadGroupSet( dataset, localId, referenceSet) for readGroup in simulatedReadGroupSet.getReadGroups(): alignments = list(readGroup.getReadAlignments()) self.assertGreater(len(alignments), 0)
def __init__(self, localId, referenceSet, randomSeed=0, numVariantSets=1, numCalls=1, variantDensity=0.5, numReadGroupSets=1, numReadGroupsPerReadGroupSet=1, numAlignments=1, numFeatureSets=1, numPhenotypeAssociationSets=1, numPhenotypeAssociations=2, numRnaQuantSets=2, numExpressionLevels=2): super(SimulatedDataset, self).__init__(localId) self._description = "Simulated dataset {}".format(localId) for i in range(numPhenotypeAssociationSets): localId = "simPas{}".format(i) seed = randomSeed + i phenotypeAssociationSet = g2p.SimulatedPhenotypeAssociationSet( self, localId, seed, numPhenotypeAssociations) self.addPhenotypeAssociationSet(phenotypeAssociationSet) # TODO create a simulated Ontology # Variants for i in range(numVariantSets): localId = "simVs{}".format(i) seed = randomSeed + i variantSet = variants.SimulatedVariantSet(self, referenceSet, localId, seed, numCalls, variantDensity) callSets = variantSet.getCallSets() # Add biosamples for callSet in callSets: bioSample = biodata.BioSample(self, callSet.getLocalId()) bioSample2 = biodata.BioSample(self, callSet.getLocalId() + "2") individual = biodata.Individual(self, callSet.getLocalId()) bioSample.setIndividualId(individual.getId()) bioSample2.setIndividualId(individual.getId()) self.addIndividual(individual) self.addBioSample(bioSample) self.addBioSample(bioSample2) self.addVariantSet(variantSet) variantAnnotationSet = variants.SimulatedVariantAnnotationSet( variantSet, "simVas{}".format(i), seed) variantSet.addVariantAnnotationSet(variantAnnotationSet) # Reads for i in range(numReadGroupSets): localId = 'simRgs{}'.format(i) seed = randomSeed + i readGroupSet = reads.SimulatedReadGroupSet( self, localId, referenceSet, seed, numReadGroupsPerReadGroupSet, numAlignments) for rg in readGroupSet.getReadGroups(): bioSample = biodata.BioSample(self, rg.getLocalId()) individual = biodata.Individual(self, rg.getLocalId()) bioSample.setIndividualId(individual.getId()) rg.setBioSampleId(bioSample.getId()) self.addIndividual(individual) self.addBioSample(bioSample) self.addReadGroupSet(readGroupSet) # Features for i in range(numFeatureSets): localId = "simFs{}".format(i) seed = randomSeed + i featureSet = sequence_annotations.SimulatedFeatureSet( self, localId, seed) featureSet.setReferenceSet(referenceSet) self.addFeatureSet(featureSet) # RnaQuantificationSets for i in range(numRnaQuantSets): localId = 'simRqs{}'.format(i) rnaQuantSet = rnaQuantification.SimulatedRnaQuantificationSet( self, localId) rnaQuantSet.setReferenceSet(referenceSet) self.addRnaQuantificationSet(rnaQuantSet)