コード例 #1
0
ファイル: ppi.py プロジェクト: pombredanne/biopsy
 def hits_that_share_interactors(self, interactors):
     "Return those hits that are associated with the given interactors."
     filtered_hits = biopsy.HitVec()
     for hit in self.analysis:
         if hit.binder in self.hits_to_interactors:
             if len(self.hits_to_interactors[hit.binder].intersection(
                     interactors)):
                 filtered_hits.append(hit)
     return filtered_hits
コード例 #2
0
def consolidate_hits(hits):
    """
    Takes a sequence of BiFa hits and consolidates all of those for the same binder that overlap.

    @arg hits: A sequence of BiFa hits
    @return:  A consolidated sequence of BiFa hits
    """
    from itertools import chain
    result = biopsy.HitVec()
    for hits in hits_per_binder(hits).values():
        result.extend(consolidate_overlapping_hits(hits))
    return result
コード例 #3
0
def map_hits(hits, binder_map):
    """
    Takes a sequence of BiFa binding hits and returns a sequence of BiFa hits over the
    binders that are mapped to binder_map.

    @arg hits: BiFa hits
    @arg binder_map: A dict like object that maps binder names. E.g. mapping BiFa pssm names to transcription factors
    @return: A sequence of BiFa hits
    """
    result = biopsy.HitVec()
    for hit in hits:
        for mapped_binder in binder_map[hit.binder]:
            result.append(biopsy.Hit(mapped_binder, hit.location, hit.p_binding))
    return result
コード例 #4
0
ファイル: biopsy_test.py プロジェクト: pombredanne/biopsy
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession('V$DEAF1_01')
    pssm_info = biopsy.get_pssm(pssm_acc)
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range(len(seq) - len(pssm_info.pssm) + 1):
        s = biopsy.score_pssm(pssm_info.pssm, seq[i:])
        p_binding = biopsy.get_p_binding(
            biopsy.get_odds_ratio(s, pssm_info.get_dist(True, False),
                                  pssm_info.get_dist(False, False)))
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence(pssm_acc, seq, 0.05, result)
    print 'Got', len(result), 'hits from', len(seq), 'bases'
    print p_binding
コード例 #5
0
ファイル: ppi.py プロジェクト: pombredanne/biopsy
 def write_svg(self,
               filename,
               hits,
               max_threshold=0.0,
               notes="",
               max_chain=biopsy.HitVec()):
     build_svg_args = biopsy.BuildSvgArgs(min_threshold=self.threshold,
                                          max_threshold=max_threshold,
                                          file=filename,
                                          title=self.name,
                                          notes=notes,
                                          open_file=False)
     if len(hits):
         biopsy.build_analysis_svg(hits,
                                   max_chain,
                                   self.converted_seqs[0],
                                   args=build_svg_args)
コード例 #6
0
ファイル: biopsy_test.py プロジェクト: pombredanne/biopsy
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [0.0, 0.25, 0.5, 1.0, 2.0]:
        # force cache load
        biopsy.get_pssm(sascha_acc)
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm(sascha_acc)
        score = biopsy.score_pssm(p.pssm, sascha_seq)
        (bind, back, cum_bind, cum_back, odds_ratio, cum_odds_ratio, p_bind,
         cum_p_bind,
         p_value_p_bind) = biopsy.get_pssm_likelihoods_for_score(p, score)
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods(p, sascha_acc + ': ' + str(pc), score)
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence(sascha_acc, sascha_seq, 0.001, hits)
        print hits
        print
コード例 #7
0
 def __missing__(self, k):
     self[k] = biopsy.HitVec()
     return self[k]
コード例 #8
0
            for species in remo.get_sequence_ids():
                yield remo.get_sequence_for(species, True)


def histogram(acc, score_counts):
    import pylab, numpy
    pylab.clf()
    pylab.bar(xrange(num_buckets), numpy.power(score_counts, 0.25))
    pylab.savefig('graphs/%s.png' % acc)


try:
    remome
except NameError:
    remome_file = 'c:/data/remos/100/100.filtered'
    print 'Loading remome: %s' % remome_file
    remome = biopsy.Remome.load(remome_file)

score_counts = {}
for acc in itertools.islice(pssm_accs(), num_pssms):
    score_counts[acc] = numpy.zeros(num_buckets, numpy.uint8)
    for seq in itertools.islice(sequences_from_remome(remome), num_sequences):
        hits = biopsy.HitVec()
        p_bind = biopsy.score_pssm_on_sequence(pssm_name=acc,
                                               threshold=0.0,
                                               sequence=seq,
                                               result=hits)
        for h in hits:
            score_counts[acc][int(h.p_binding * num_buckets)] += 1
    histogram(acc, score_counts[acc])
コード例 #9
0
def map_binders(hits, map):
    result = biopsy.HitVec()
    result.extend(imap(hit_mapper(map), hits))
    return result