Python getPatients Examples

Programming Language: Python

Namespace/Package Name: broad

Method/Function: getPatients

Examples at hotexamples.com: 3

Python getPatients - 3 examples found. These are the top rated real world Python examples of broad.getPatients extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: variant.py Project: cincinnatusc123/gleeson_old

    def __init__(self, vcf_file, fast_forward=0) :
        self.indexOf = broad.COLUMN_MAP
        globes.printColumnWarning( vcf_file, self.indexOf )
        self.fin = open( vcf_file, "rb" )
        self.patients = broad.getPatients( self.fin )

        self.allow_absent = False
        self.group_repeats = False
        self.iterator = self.iterate(fast_forward)

Example #2

Show file

File: sift.py Project: cincinnatusc123/gleeson

def parseForINDEL( indel_file ) :
    print indel_file
    fin = open( indel_file )
    (path,ext) = indel_file.split('.',1)
    fname = path.split('/')[-1]
    fout = open( "%s/intermediate_data/sift/input/%s_sift_input.csv" \
                    % (globes.DATA_DIR, fname), 'wb' )
    print fout

    #fast-forward through header lines
    patients = broad.getPatients( fin )

    indexOf = broad.COLUMN_MAP
    for dataline in fin :

        splt = dataline.strip().split('\t')
        col_keys = ['chrom','pos','mut','ref']
        chrom,pos,mut,ref = [ splt[ indexOf[k] ] for k in col_keys ]

        dinfo = broad.makeInfoDict( splt[ indexOf["info"] ] )
        try :
            strand = dinfo["refseq.transcriptStrand"]
        except KeyError :
            try :
                strand = dinfo["refseq.transcriptStrand_1"]
            except KeyError : #dont have it guess '+'
                strand = '+' 
                ##raise Exception("what the f**k: %s" % splt[ indexOf["info"] ] )

        if strand == "+" : strand = 1
        elif strand == "-" : strand = -1
        else : raise Exception("Strand is not + or - ??")

        isInsertion = len(ref) == 1 and len(mut) > 1
        isDeletion = len(ref) > 1 and len(mut) == 1
        if isInsertion :
            start = int(pos)
            end = start
            allele = mut
        elif isDeletion :
            start = int(pos)
            end = start + (len(ref)-len(mut))
            allele = '-/'
        else : assert False

        fout.write( "%s,%d,%d,%d,%s\n" % (chrom,start,end,strand,allele) )

    fout.close()
    fin.close()

Example #3

Show file

File: seattle.py Project: cincinnatusc123/gleeson

def separateOutputToFamilies() :
    fin = open( "%s/seattle/input/indel_input.vcf" % (globes.INT_DIR) )
    patients = broad.getPatients( fin )
    fouts = [ open("%s/indels_by_fam/%s.tsv" % (globes.OUT_DIR, \
                                                pat.replace('/','-')), 'wb' ) \
              for pat in patients ]
    fin.close()

    #errrgg so I can re-get out the original read data
    finin = open( "%s/seattle/input/indel_input.vcf" % (globes.INT_DIR) )
    patients = broad.getPatients( finin )
    finin_splt = finin.readline().strip().split('\t')

    fin = open( "%s/seattle/output/indel_output.tsv" % (globes.INT_DIR) )
    column_splt = fin.readline().strip().split('\t')
    bp = indexOf["sampleAlleles"]
    column_splt = column_splt[:bp] + ["originalBroadCall"] + column_splt[bp:]
    new_columns = "\t".join( column_splt )
    for fout in fouts :
        fout.write( "%s\n" % new_columns )

    for line in fin :
        #ignore the comment lines at the end
        if '#' in line : continue

        #get the necessary column values
        splt = line.strip().split('\t')
        cols = ["chromosome","position","refBase","sampleGenotype"]
        chrom,pos,refBase,sampleGTs = [ splt[ indexOf[c] ] for c in cols ]
        sampleGTs = sampleGTs.split(',')

        #find line in input file that corresponds to the output line
        num_incs = 0
        while True :
            cols = ["chrom","pos"]
            values = [ finin_splt[ broad.COLUMN_MAP[c] ] for c in cols ]
            finin_chrom, finin_pos = values
            finin_calls = finin_splt[ broad.COLUMN_MAP["calls"]: ]
            if pos == finin_pos and chrom == finin_chrom :
                break
            else :
                num_incs += 1
                finin_splt = finin.readline().strip().split('\t')

        #The output may have multiple lines for each input line, corresponding
        #to the different transcripts. This means that if 'line' no longer
        #matches the finin_line, we should only have to jump next once
        assert num_incs <= 1

        #isMutated is a function that takes a GT from the output file
        #and determines if it is a mutation
        isInsertion = '-' in refBase
        if isInsertion :
            l,r = refBase.split('-')
            isMutated = lambda gt : \
                            not gt == '%s/%s' % (l,l) and not gt == "N/N"
        else :
            isMutated = lambda gt : \
                            not refBase in gt.split('/')[1] and not gt == 'N/N'

        num_mutations = 0
        for i,(fout,gt) in enumerate( zip(fouts,sampleGTs) ) :
            if isMutated(gt) :
                num_mutations += 1
                #splt_copy = list(splt)
                #print splt_copy
                splt[ indexOf["sampleGenotype"] ] = "%s" % (gt)
                newline = "\t".join( splt[:bp] + [ finin_calls[i] ] + splt[bp:] )
                fout.write( "%s\n" % newline )

        #because of indel.indelUniqueToDisease
        assert num_mutations == 1

    [f.close() for f in fouts]
    fin.close()