def f_read_tffm(tffm_dir, tf_name): import tffm_module from constants import TFFM_KIND file_list = os.listdir(tffm_dir) if tf_name.lower() != "ctcf" and tf_name.lower() != "ebf1": print "TFFM not found for %s" % tf_name print "[Warning: change tf to CTCF motif]" tf_name = "ebf1" pattern = "%s.*detail.*xml" % (tf_name) matched_file = grep_list(pattern, file_list) if len(matched_file) == 1: tffm_file = tffm_dir + grep_list(pattern, file_list)[0] #logging.debug(tffm_file) #tffm_first_order = tffm_module.tffm_from_xml(tffm_file, TFFM_KIND.FIRST_ORDER) tffm_detailed = tffm_module.tffm_from_xml(tffm_file, TFFM_KIND.DETAILED) return tffm_detailed else: return None
def find_tffm_hits(xml, seq_file): """ Predict hits in sequences using a TFFM. """ #import sys #sys.path.append("/raid6/amathelier/TFFM+DNAshape/bin/TFFM/") import tffm_module from constants import TFFM_KIND # TFFM-framework from hit_module import HIT tffm = tffm_module.tffm_from_xml(xml, TFFM_KIND.FIRST_ORDER) return [ hit for hit in tffm.scan_sequences(seq_file, only_best=True) if hit ]
def find_tffm_hits(xml, seq_file, tffm_kind): """ Predict hits in sequences using a TFFM. """ #import sys #sys.path.append("/raid6/amathelier/TFFM+DNAshape/bin/TFFM/") import tffm_module from constants import TFFM_KIND # TFFM-framework if tffm_kind == 'first_order': tffm_kind = TFFM_KIND.FIRST_ORDER elif tffm_kind == 'detailed': tffm_kind = TFFM_KIND.DETAILED else: sys.exit('The type of TFFM should be "first_order" or "detailed".') tffm = tffm_module.tffm_from_xml(xml, tffm_kind) return [hit for hit in tffm.scan_sequences(seq_file, only_best=True) if hit]
Mdata = num.findall(matrix) #print("list(reversed(Mdata)) : ",list(reversed(Mdata))) matScore, lenMotif = get_score_matrix(Mdata, matrixType, pseudoCount) # The following line allows to produce the reversed matrix '''if we take the example given before : A T G C Position 1: 0.4444 0.155 0.654 0.645 Position 2: 0.1645 0.1565 0.21614 0.16456 Now, we can notice that scores change between the positions 1 and 2, and between A and T, and between G and C. So we can calculate with this reverse matrix, the score of the complementary strand. ''' matRev = list(reversed(matScore)) tffm_first_order = "" else: matRev = "" matScore = "" tffm_first_order = tffm_module.tffm_from_xml(tffm, TFFM_KIND.FIRST_ORDER) lenMotif = tffm_first_order.__len__() + 2 ########## get INTERDISTANCE VALUES for POSITIVE sets: if not pos_flag: len_pos = 0 sequence_number = 0 with open(FastaFile, "r") as f1: for line in f1: if line.find(">") != -1: line = line.strip() line = line.replace("-", ":") line = line.split(":") len_pos += float(line[2]) - float(line[1]) + 1 - lenMotif sequence_number += 1
out = open("tffm_first_order_dense_logo.svg", "w") tffm_first_order.print_dense_logo(out) out.close() tffm_detailed = tffm_module.tffm_from_meme("meme.txt", TFFM_KIND.DETAILED) tffm_detailed.write("tffm_detailed_initial.xml") tffm_detailed.train("train.fa") tffm_detailed.write("tffm_detailed.xml") out = open("tffm_detailed_summary_logo.svg", "w") tffm_detailed.print_summary_logo(out) out.close() out = open("tffm_detailed_dense_logo.svg", "w") tffm_detailed.print_dense_logo(out) out.close() tffm_first_order = tffm_module.tffm_from_xml("tffm_first_order.xml", TFFM_KIND.FIRST_ORDER) print "1st-order all" for hit in tffm_first_order.scan_sequences("test.fa"): if hit: print hit print "1st-order best" for hit in tffm_first_order.scan_sequences("test.fa", only_best=True): print hit tffm_detailed = tffm_module.tffm_from_xml("tffm_detailed.xml", TFFM_KIND.DETAILED) print "detailed all" for hit in tffm_detailed.scan_sequences("test.fa"): if hit: print hit