def _convertGaCall(self, recordId, name, pysamCall, genotypeData): compoundId = self.getCallSetId(name) callSet = self.getCallSet(compoundId) call = protocol.Call() call.callSetId = callSet.getId() call.callSetName = callSet.getSampleName() call.sampleId = callSet.getSampleName() # TODO: # NOTE: THE FOLLOWING TWO LINES IS NOT THE INTENDED IMPLEMENTATION, ########################################### call.phaseset = None call.genotype, call.phaseset = convertVCFGenotype( genotypeData, call.phaseset) ########################################### # THEY SHOULD BE REPLACED BY THE FOLLOWING, ONCE NEW PYSAM # RELEASE SUPPORTS phaseset. AS WELL AS REMOVING genotypeData # FROM THE FUNCTION CALL ########################################### # call.genotype = list(pysamCall.allele_indices) # call.phaseset = pysamCall.phaseset ########################################### call.genotypeLikelihood = [] for key, value in pysamCall.iteritems(): if key == 'GL' and value is not None: call.genotypeLikelihood = list(value) elif key != 'GT': call.info[key] = _encodeValue(value) return call
def generateVariant(self, referenceName, position, randomNumberGenerator): """ Generate a random variant for the specified position using the specified random number generator. This generator should be seeded with a value that is unique to this position so that the same variant will always be produced regardless of the order it is generated in. """ variant = self._createGaVariant() variant.names = [] variant.referenceName = referenceName variant.start = position variant.end = position + 1 # SNPs only for now bases = ["A", "C", "G", "T"] ref = randomNumberGenerator.choice(bases) variant.referenceBases = ref alt = randomNumberGenerator.choice( [base for base in bases if base != ref]) variant.alternateBases = [alt] variant.calls = [] for callSet in self.getCallSets(): call = protocol.Call() call.callSetId = callSet.getId() # for now, the genotype is either [0,1], [1,1] or [1,0] with equal # probability; probably will want to do something more # sophisticated later. randomChoice = randomNumberGenerator.choice( [[0, 1], [1, 0], [1, 1]]) call.genotype = randomChoice # TODO What is a reasonable model for generating these likelihoods? # Are these log-scaled? Spec does not say. call.genotypeLikelihood = [-100, -100, -100] variant.calls.append(call) variant.id = self.getVariantId(variant) return variant