def __init__(self, call_splt, pat_name, variant_ix=1) : self.pat_name = pat_name self.fields = {} self.fields["GT"] = broad.convertGT( call_splt, variant_ix ) if len(call_splt) > 1 : (GT,AD,DP,GQ,PL) = call_splt self.fields["DP"] = int(DP) self.fields["GQ"] = float(GQ) (r,m) = [int(t) for t in AD.split(',')] self.fields["AD_ref"] = r self.fields["AD_mut"] = m (aa,ab,bb) = [pow(10, -1*(float(t)/10)) for t in PL.split(',')] self.fields["PL_AA"] = aa self.fields["PL_AB"] = ab self.fields["PL_BB"] = bb
def integrator( self, target, splts ) : if len(splts) != 1 : assert "len isn't right" for splt in splts : ##TODO generalize this to make it vendor independent, call start column is feature of VCF, not broad??? calls = splt[ broad.CALL_START: ] base_calls = [] for pat_ix,c in enumerate(calls) : sc = broad.splitCall(c) gt = broad.convertGT( sc ) if broad.isMutated( gt ) or broad.noInf( gt ) : base_calls.append( BaseCall(sc,pat_ix) ) fields = {} keys = broad.COLUMN_MAP.keys() for k in keys : if k == "chrom" : fields[k] = globes.chromNum( splt[self.indexOf[k]] ) elif k == "info" : ##TODO generalize this to make it vendor independent dinfo = broad.makeInfoDict( splt[ self.indexOf[k] ] ) fields["AF"] = dinfo["AF"] elif k == "dbSNP" : value = splt[self.indexOf[k]] #when we getFields, 'dbsnp' will be missing and yield null if value == '.' : pass #right now just take the first rs number if multiple elif value.startswith('rs') : fields[k] = [int(t.strip()[2:]) for t in value.split(';')][0] else : print "malformed rs number?", splt assert False else : fields[k] = splt[ self.indexOf[k] ] #according to: http://www.broadinstitute.org/gsa/wiki/index.php/Understanding_the_Unified_Genotyper's_VCF_files #ref and alt are always given for the forward strand fields['strand'] = True return Variant( fields, base_calls )