예제 #1
0
    def __init__(self, call_splt, pat_name, variant_ix=1) :
        self.pat_name = pat_name
        self.fields = {}
        self.fields["GT"] = broad.convertGT( call_splt, variant_ix )
        if len(call_splt) > 1 :
            (GT,AD,DP,GQ,PL) = call_splt
            self.fields["DP"] = int(DP)
            self.fields["GQ"] = float(GQ)

            (r,m) = [int(t) for t in AD.split(',')]
            self.fields["AD_ref"] = r
            self.fields["AD_mut"] = m

            (aa,ab,bb) = [pow(10, -1*(float(t)/10)) for t in PL.split(',')]
            self.fields["PL_AA"] = aa
            self.fields["PL_AB"] = ab
            self.fields["PL_BB"] = bb
예제 #2
0
    def integrator( self, target, splts ) :
        if len(splts) != 1 : assert "len isn't right"
        for splt in splts :
            ##TODO generalize this to make it vendor independent, call start column is feature of VCF, not broad???
            calls = splt[ broad.CALL_START: ]
            base_calls = []
            for pat_ix,c in enumerate(calls) :
                sc = broad.splitCall(c)
                gt = broad.convertGT( sc )
                if broad.isMutated( gt ) or broad.noInf( gt ) :
                    base_calls.append( BaseCall(sc,pat_ix) )

            fields = {}
            keys = broad.COLUMN_MAP.keys()
            for k in keys :
                if k == "chrom" :
                    fields[k] = globes.chromNum( splt[self.indexOf[k]] )
                elif k == "info" :
                    ##TODO generalize this to make it vendor independent
                    dinfo = broad.makeInfoDict( splt[ self.indexOf[k] ] )
                    fields["AF"] = dinfo["AF"]
                elif k == "dbSNP" :
                    value = splt[self.indexOf[k]]

                    #when we getFields, 'dbsnp' will be missing and yield null
                    if value == '.' : pass
                    #right now just take the first rs number if multiple
                    elif value.startswith('rs') :
                        fields[k] = [int(t.strip()[2:]) for t in value.split(';')][0]
                    else :
                        print "malformed rs number?", splt
                        assert False
                else :
                    fields[k] = splt[ self.indexOf[k] ]


            #according to: http://www.broadinstitute.org/gsa/wiki/index.php/Understanding_the_Unified_Genotyper's_VCF_files
            #ref and alt are always given for the forward strand
            fields['strand'] = True

        return Variant( fields, base_calls )