def integrator( self, target, splts ) : if len(splts) != 1 : assert "len isn't right" for splt in splts : ##TODO generalize this to make it vendor independent, call start column is feature of VCF, not broad??? calls = splt[ broad.CALL_START: ] base_calls = [] for pat_ix,c in enumerate(calls) : sc = broad.splitCall(c) gt = broad.convertGT( sc ) if broad.isMutated( gt ) or broad.noInf( gt ) : base_calls.append( BaseCall(sc,pat_ix) ) fields = {} keys = broad.COLUMN_MAP.keys() for k in keys : if k == "chrom" : fields[k] = globes.chromNum( splt[self.indexOf[k]] ) elif k == "info" : ##TODO generalize this to make it vendor independent dinfo = broad.makeInfoDict( splt[ self.indexOf[k] ] ) fields["AF"] = dinfo["AF"] elif k == "dbSNP" : value = splt[self.indexOf[k]] #when we getFields, 'dbsnp' will be missing and yield null if value == '.' : pass #right now just take the first rs number if multiple elif value.startswith('rs') : fields[k] = [int(t.strip()[2:]) for t in value.split(';')][0] else : print "malformed rs number?", splt assert False else : fields[k] = splt[ self.indexOf[k] ] #according to: http://www.broadinstitute.org/gsa/wiki/index.php/Understanding_the_Unified_Genotyper's_VCF_files #ref and alt are always given for the forward strand fields['strand'] = True return Variant( fields, base_calls )
def isMutated(self) : return broad.isMutated( self.GT )