예제 #1
0
def PredictionAnalysis(align1, align2, outfile, granular = True, open_mode = 'w', limit_functions = set(), **kwargs):

    a1 = Alignment.alignment_from_file(align1)
    a2 = Alignment.alignment_from_file(align2)

    print 'loaded alignments'
    sprot = prot_from_path(align1)
    tprot = prot_from_path(align2)

    defaults = dict(zip(LinkFields.LINK_FIELDS, [None]*len(LinkFields.LINK_FIELDS)))
    defaults['S1-Prot']=sprot
    defaults['S2-Prot']=tprot
    defaults.pop('Source-Start')
    defaults.pop('Source-End')
    defaults.pop('Target-Start')
    defaults.pop('Target-End')

    calculator = LinkUtils.LinkCalculator()
    rmheaders = dict((head, None) for head in calculator.get_fields())

    submats = LinkUtils.get_all_sub_mats()

    ohandle = open(outfile, open_mode)
    owriter = csv.DictWriter(ohandle, LinkFields.LINK_FIELDS,
        delimiter = '\t', extrasaction='ignore')
    if open_mode == 'w':
        owriter.writerow(dict(zip(LinkFields.LINK_FIELDS,
            LinkFields.LINK_FIELDS)))
        done = set()
    else:
        done = get_done(outfile)



    if granular:
        for row in task_loader(None, a1, a2, defaults,submats, 50, align1==align2, limit_functions=limit_functions, found_items = done):
            owriter.writerows(convert_row_to_writeable_rows(row, rmheaders))


    else:
        process_que = Queue(1000)
        loader = Thread(target=task_loader,
                        args= (process_que, a1, a2, defaults,submats, 50, align1==align2),
                        kwargs={'limit_functions':limit_functions, 'found_items': done})
        loader.start()


        print 'waiting for first'
        item = process_que.get()
        while item is not None:
            try:
                row = item.get(timeout = 60*30, interval = 60*1)
            except TimeoutError:
                logging.warning('no result for one!')
                item = process_que.get()
                continue
            logging.info('%i %i' % (row['S1-Start'], row['S2-End']))
            owriter.writerows(convert_row_to_writeable_rows(row, rmheaders))
            item = process_que.get()
예제 #2
0
def link_calculator(row, submats, seq1, seq2, granular = False,limit_functions = set()):

    c1 = AlignUtils.make_counts(seq1)
    c2 = AlignUtils.make_counts(seq2)

    row['S1-Entropy'] = AlignUtils.calculate_entropy(seq1)
    row['S2-Entropy'] = AlignUtils.calculate_entropy(seq2)
    row['S12-Mapping'] = LinkUtils.prediction_mapping(seq1, seq2)
    row['S21-Mapping'] = LinkUtils.prediction_mapping(seq2, seq1)
    row['SeqLength'] = len(seq1)
    row['S1-Cons'] = max(x/len(seq1) for x in c1.values())
    row['S2-Cons'] = max(x/len(seq2) for x in c2.values())
    if row['S1-Cons'] > 0.99999 or row['S2-Cons'] > 0.99999:
        return row

    logging.info('%s\t%f\t%s\t%f' % (seq1, row['S1-Cons'], seq2, row['S2-Cons']))

    processfuns = []
    for name, mat in submats:
        if granular:
            processfuns.append(('SBASC_'+name,
                LinkUtils.calculate_SBASC, (mat,)))
        else:
            processfuns.append(('SBASC_'+name,
                                partial(LinkUtils.calculate_SBASC, mat), ()))

    processfuns.append(('Mutual_Info', LinkUtils.calculate_mutual_info, {}))
    processfuns.append(('OMES', LinkUtils.calculate_OMES, ()))
    processfuns.append(('Linkage', LinkUtils.calculate_mapping, ()))
    suffs = ['_raw', '_pval', '_null', '_count']
    if limit_functions:
        processfuns = [x for x in processfuns if x[0] in limit_functions]

    for name, func, evals in processfuns:
        if granular:
            logging.info('calculating %s %i %i' % (name, row['S1-Start'], row['S2-Start']))
            res = LinkUtils.celery_calculate_vals(seq1, seq2, func, preargs=evals)
        else:
            res = LinkUtils.calculate_vals(seq1, seq2, func)
        for val, suff in zip(res, suffs):
            row[name+suff] = val
            logging.info(name+suff+':'+str(val))

    return row