Esempio n. 1
0
    def integrator( self, target, splts ) :
        if len(splts) != 1 : assert "len isn't right"
        for splt in splts :
            ##TODO generalize this to make it vendor independent, call start column is feature of VCF, not broad???
            calls = splt[ broad.CALL_START: ]
            base_calls = []
            for pat_ix,c in enumerate(calls) :
                sc = broad.splitCall(c)
                gt = broad.convertGT( sc )
                if broad.isMutated( gt ) or broad.noInf( gt ) :
                    base_calls.append( BaseCall(sc,pat_ix) )

            fields = {}
            keys = broad.COLUMN_MAP.keys()
            for k in keys :
                if k == "chrom" :
                    fields[k] = globes.chromNum( splt[self.indexOf[k]] )
                elif k == "info" :
                    ##TODO generalize this to make it vendor independent
                    dinfo = broad.makeInfoDict( splt[ self.indexOf[k] ] )
                    fields["AF"] = dinfo["AF"]
                elif k == "dbSNP" :
                    value = splt[self.indexOf[k]]

                    #when we getFields, 'dbsnp' will be missing and yield null
                    if value == '.' : pass
                    #right now just take the first rs number if multiple
                    elif value.startswith('rs') :
                        fields[k] = [int(t.strip()[2:]) for t in value.split(';')][0]
                    else :
                        print "malformed rs number?", splt
                        assert False
                else :
                    fields[k] = splt[ self.indexOf[k] ]


            #according to: http://www.broadinstitute.org/gsa/wiki/index.php/Understanding_the_Unified_Genotyper's_VCF_files
            #ref and alt are always given for the forward strand
            fields['strand'] = True

        return Variant( fields, base_calls )
Esempio n. 2
0
 def isMutated(self) :
     return broad.isMutated( self.GT )