예제 #1
0
def f_read_tffm(tffm_dir, tf_name):
    import tffm_module
    from constants import TFFM_KIND

    file_list = os.listdir(tffm_dir)

    if tf_name.lower() != "ctcf" and tf_name.lower() != "ebf1":
        print "TFFM not found for %s" % tf_name
        print "[Warning: change tf to CTCF motif]"
        tf_name = "ebf1"

    pattern = "%s.*detail.*xml" % (tf_name)

    matched_file = grep_list(pattern, file_list)
    if len(matched_file) == 1:
        tffm_file = tffm_dir + grep_list(pattern, file_list)[0]

        #logging.debug(tffm_file)

        #tffm_first_order = tffm_module.tffm_from_xml(tffm_file, TFFM_KIND.FIRST_ORDER)
        tffm_detailed = tffm_module.tffm_from_xml(tffm_file,
                                                  TFFM_KIND.DETAILED)
        return tffm_detailed
    else:
        return None
예제 #2
0
def find_tffm_hits(xml, seq_file):
    """ Predict hits in sequences using a TFFM. """
    #import sys
    #sys.path.append("/raid6/amathelier/TFFM+DNAshape/bin/TFFM/")
    import tffm_module
    from constants import TFFM_KIND  # TFFM-framework
    from hit_module import HIT
    tffm = tffm_module.tffm_from_xml(xml, TFFM_KIND.FIRST_ORDER)
    return [
        hit for hit in tffm.scan_sequences(seq_file, only_best=True) if hit
    ]
예제 #3
0
def find_tffm_hits(xml, seq_file, tffm_kind):
    """ Predict hits in sequences using a TFFM. """
    #import sys
    #sys.path.append("/raid6/amathelier/TFFM+DNAshape/bin/TFFM/")
    import tffm_module
    from constants import TFFM_KIND  # TFFM-framework
    if tffm_kind == 'first_order':
        tffm_kind = TFFM_KIND.FIRST_ORDER
    elif tffm_kind == 'detailed':
        tffm_kind = TFFM_KIND.DETAILED
    else:
        sys.exit('The type of TFFM should be "first_order" or "detailed".')
    tffm = tffm_module.tffm_from_xml(xml, tffm_kind)
    return [hit for hit in
            tffm.scan_sequences(seq_file, only_best=True) if hit]
    Mdata = num.findall(matrix)
    #print("list(reversed(Mdata)) : ",list(reversed(Mdata)))
    matScore, lenMotif = get_score_matrix(Mdata, matrixType, pseudoCount)
    # The following line allows to produce the reversed matrix
    '''if we take the example given before : A T G C
			Position 1:      0.4444  0.155  0.654   0.645
			Position 2:      0.1645  0.1565 0.21614 0.16456
        Now, we can notice that scores change between the positions 1 and 2, and between A and T, and between G and C.
        So we can calculate with this reverse matrix, the score of the complementary strand.
        '''
    matRev = list(reversed(matScore))
    tffm_first_order = ""
else:
    matRev = ""
    matScore = ""
    tffm_first_order = tffm_module.tffm_from_xml(tffm, TFFM_KIND.FIRST_ORDER)
    lenMotif = tffm_first_order.__len__() + 2

########## get INTERDISTANCE VALUES for POSITIVE sets:
if not pos_flag:
    len_pos = 0
    sequence_number = 0

    with open(FastaFile, "r") as f1:
        for line in f1:
            if line.find(">") != -1:
                line = line.strip()
                line = line.replace("-", ":")
                line = line.split(":")
                len_pos += float(line[2]) - float(line[1]) + 1 - lenMotif
                sequence_number += 1
예제 #5
0
out = open("tffm_first_order_dense_logo.svg", "w")
tffm_first_order.print_dense_logo(out)
out.close()

tffm_detailed = tffm_module.tffm_from_meme("meme.txt", TFFM_KIND.DETAILED)
tffm_detailed.write("tffm_detailed_initial.xml")
tffm_detailed.train("train.fa")
tffm_detailed.write("tffm_detailed.xml")
out = open("tffm_detailed_summary_logo.svg", "w")
tffm_detailed.print_summary_logo(out)
out.close()
out = open("tffm_detailed_dense_logo.svg", "w")
tffm_detailed.print_dense_logo(out)
out.close()

tffm_first_order = tffm_module.tffm_from_xml("tffm_first_order.xml",
        TFFM_KIND.FIRST_ORDER)
print "1st-order all"
for hit in tffm_first_order.scan_sequences("test.fa"):
    if hit:
        print hit

print "1st-order best"
for hit in tffm_first_order.scan_sequences("test.fa", only_best=True):
    print hit

tffm_detailed = tffm_module.tffm_from_xml("tffm_detailed.xml",
        TFFM_KIND.DETAILED)
print "detailed all"
for hit in tffm_detailed.scan_sequences("test.fa"):
    if hit:
        print hit