Python LinkCalculator Examples

Programming Language: Python

Namespace/Package Name: LinkUtils

Class/Type: LinkCalculator

Examples at hotexamples.com: 2

Python LinkCalculator - 2 examples found. These are the top rated real world Python examples of LinkUtils.LinkCalculator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

calculate_all(2)

get_fields(1)

Example #1

Show file

File: AlignUtils.py Project: JudoWill/LinkageAnalysis

def OldPredictionAnalysis(align1, align2, outfile, widths = range(1,5), same = False, mode = 'a', cons_cut = 0.98, calc_pval = False, short_linkage_format = False):
    """Analyzes the linkages between 2 alignments.
    
    A controller function which calculates the Linkage between columns in 
    the alignment files. This function takes care of opening/loading 
    alignments, iterating through the columns, calculating linkages, 
    and writing to an output file.

    Arguements:
    align1 -- Path to source alignment file.
    align2 -- Path to target alignment file.
    outfile -- Path to the results file.
    
    Kwargs:
    widths -- The desired column widths to check. Default: range(1,5)
    same -- A boolean indicating whether these files are the same protein.
            Default: False
    mode -- Which mode to open the results file. Default: 'a'
    cons_cut -- The conservation cutoff to use for ignoring columns. 
                Default: 0.8
    calc_pval -- A boolean indicating whether to calculate p-values for 
                 linkages. Default: False
    
    Returns:
    None"""
    
    def get_signals(align1, align2, widths, same, last):
        for sw, tw, ss, ts in crazy_iter([0, align1.width], [0, align2.width], widths, last_items = last):
            if same and getOverlap((ss, ss+sw), (ts, ts+tw)) > 0:
                continue
            if (ss, ss+sw) not in source_skip and (ts, ts+tw) not in target_skip:
                a1 = align1.get_slice(ss, ss+sw)
                a2 = align2.get_slice(ts, ts+tw)
                if a1 is None:
                    source_skip.add((ss, ss+sw))
                if a2 is None:
                    target_skip.add((ts, ts+tw))
                if a1 is None or a2 is None:
                    continue
                    
                over = set(a1.seqs.keys()) & set(a2.seqs.keys())
                if len(over) > 5:
                    yield a1, a2, sorted(over), {'Source-Start':ss, 'Source-End':ss+sw,
                                                    'Target-Start':ts, 'Target-End':ts+tw}
                else:
                    yield None, None, None, {'Source-Start':ss, 'Source-End':ss+sw,
                                                    'Target-Start':ts, 'Target-End':ts+tw}
                if len(a1.seqs) < 10:
                    source_skip.add((ss, ss+sw))
                if len(a2.seqs) < 10:
                    target_skip.add((ts, ts+tw))
                    
                    

    print 'widths!', widths

    line_count = 0
    a1 = Alignment.alignment_from_file(align1)
    a2 = Alignment.alignment_from_file(align2)

    sprot = prot_from_path(align1)
    tprot = prot_from_path(align2)

    defaults = dict(zip(LINK_FIELDS, [None]*len(LINK_FIELDS)))
    defaults['Source-Prot']=sprot
    defaults['Target-Prot']=tprot
    defaults.pop('Source-Start')
    defaults.pop('Source-End')
    defaults.pop('Target-Start')
    defaults.pop('Target-End')

    source_skip = set()
    target_skip = set()

    calculator = LinkCalculator()
    
    if mode == 'a' and os.path.exists(outfile):
        print 'trying to get last line!'
        with open(outfile) as handle:
            last = get_last(csv.DictReader(handle, delimiter = '\t'))
    else:
        last = None

    with open(outfile, mode) as handle:
        handle.write('\t'.join(LINK_FIELDS)+'\n')
        writer = csv.DictWriter(handle, fieldnames = LINK_FIELDS, 
                                delimiter = '\t')
        for slice1, slice2, seqs, loc in get_signals(a1, a2, widths, same, last):
            loc.update(defaults)            
            if slice1 is None:
                loc.update({'Source-Seq':None,
                            'Target-Seq':None,
                            'Correct-Num':'too few',
                            'Total-Num':'too few',
                            'This-Score': 0})
                #print 'few %(Source-Start)i, %(Source-End)i, %(Target-Start)i, %(Target-Start)i' % loc
                if not short_linkage_format:                
                    writer.writerow(loc)
                continue
            if not loc['Target-Start'] % 10:
                print '%(Source-Prot)s,%(Target-Prot)s,%(Source-Start)i,%(Target-Start)i' % loc
            s1, m1 = slice1.get_signal(seqs)
            s2, m2 = slice2.get_signal(seqs)

            #create reverse mappings
            rm1 = dict([(y,x) for x,y in m1.items()])
            rm2 = dict([(y,x) for x,y in m2.items()])

            seq1 = ''.join(rm1[s].upper() for s in s1)
            seq2 = ''.join(rm2[s].upper() for s in s2)

            #create count dictionary
            c1 = make_counts(s1)
            c2 = make_counts(s2)
            
            loc['Source-Cons'] = max(x/len(s1) for x in c1.values())
            loc['Target-Cons'] = max(x/len(s2) for x in c2.values())

            if loc['Source-Cons'] > cons_cut or loc['Target-Cons'] > cons_cut:
                loc.update({'Correct-Num':'too conserved',
                            'Total-Num':'too conserved'})
                if not short_linkage_format:
                    writer.writerow(loc)
                if loc['Source-Cons'] > cons_cut:
                    source_skip.add((loc['Source-Start'], loc['Source-End']))
                if loc['Target-Cons'] > cons_cut:
                    target_skip.add((loc['Target-Start'], loc['Target-End']))
                continue


            mappings = prediction_mapping(tuple(s1), tuple(s2))
            score = sum([z for _, _, z in mappings])/len(s1)
            loc['Total-Score'] = score
            for field, val in calculator.calculate_all(seq1, seq2):
                loc[field] = val

            #print '%(Source-Start)i, %(Source-End)i, %(Target-Start)i, %(Target-Start)i, %(Total-Score)f' % loc
            line_count += 1
            for source, target, val in mappings:
                loc.update({'Source-Seq':rm1[source],
                                'Target-Seq':rm2[target],
                                'Correct-Num':val,
                                'Total-Num':c1[source],
                                'This-Score': val/c1[source]})
                writer.writerow(loc)
            handle.flush()
            os.fsync(handle.fileno())
    return line_count

Example #2

Show file

File: AlignUtils.py Project: JudoWill/LinkageAnalysis

def PredictionAnalysis(align1, align2, outfile, cons_cut = 0.99, **kwargs):

    if not os.path.exists(outfile):
        mode = 'w'
        last = None
    elif check_headers(outfile):
        mode = 'w'
        last = None
    else:
        mode = 'a'
        with open(outfile) as handle:
            iterable = csv.DictReader(handle, delimiter = '\t')
            last = get_last(iterable)

    a1 = Alignment.alignment_from_file(align1)
    a2 = Alignment.alignment_from_file(align2)

    sprot = prot_from_path(align1)
    tprot = prot_from_path(align2)

    defaults = dict(zip(LINK_FIELDS, [None]*len(LINK_FIELDS)))
    defaults['Source-Prot']=sprot
    defaults['Target-Prot']=tprot
    defaults.pop('Source-Start')
    defaults.pop('Source-End')
    defaults.pop('Target-Start')
    defaults.pop('Target-End')

    calculator = LinkCalculator()
    rmheaders = dict((head, None) for head in calculator.get_fields())

    headers = sorted(set(a1.seqs.keys()) & set(a2.seqs.keys()))

    iterable = product(izip(count(), a1.iterate_columns(headers)),
                        izip(count(), a2.iterate_columns(headers)))
    if last:
        fiterable = dropwhile(iterable, lambda x:x[0][0]<last[2] and x[1][0]<last[3])
    else:
        fiterable = iterable

    ohandle = open(outfile, mode)
    writer = csv.DictWriter(ohandle, LINK_FIELDS, delimiter = '\t')
    if mode == 'w':
        writer.writerow(dict(zip(LINK_FIELDS, LINK_FIELDS)))

    for (ind1, seq1), (ind2, seq2) in fiterable:

        cseq1 = ''
        cseq2 = ''
        for s1, s2 in zip(seq1, seq2):
            if s1 != '-' and s2 != '-':
                cseq1 += s1
                cseq2 += s2

        if not cseq1 or not cseq2:
            continue

        c1 = make_counts(cseq1)
        c2 = make_counts(cseq2)

        row = dict()
        row.update(defaults)
        row['Source-Start'] = ind1
        row['Source-End'] = ind1+1
        row['Target-Start'] = ind2
        row['Target-End'] = ind2+1

        row['Source-Cons'] = max(x/len(cseq1) for x in c1.values())
        row['Target-Cons'] = max(x/len(cseq2) for x in c2.values())
        if row['Source-Cons'] == 1 or row['Target-Cons'] == 1:
            writer.writerow(row)
            continue

        row['Source-Entropy'] = calculate_entropy(cseq1)
        row['Target-Entropy'] = calculate_entropy(cseq2)

        for field, val in calculator.calculate_all(cseq1, cseq2):
            row[field] = val

        mappings = prediction_mapping(cseq1, cseq2)
        score = sum([z for _, _, z in mappings])/len(cseq1)
        row['Total-Score'] = score
        source, target, val = mappings[0]
        row.update({'Source-Seq':source,
                    'Target-Seq':target,
                    'Correct-Num':val,
                    'Total-Num':c1[source],
                    'This-Score': val/c1[source]})
        writer.writerow(row)
        row.update(rmheaders)

        for source, target, val in mappings[1:]:
            row.update({'Source-Seq':source,
                        'Target-Seq':target,
                        'Correct-Num':val,
                        'Total-Num':c1[source],
                        'This-Score': val/c1[source]})
            writer.writerow(row)
        ohandle.flush()
        os.fsync(ohandle.fileno())