def parseScan(input="test.fasta", zip=True): """ dump[motif_name][Sequence][LOC][strand]=[start,end] """ basename = input.split(".")[0] Motif_lst = loadMotifs2list("test.db") CSV = {} Motif_dic = {} for entry in Motif_lst: (motif_name, Sequence) = (entry[0], entry[2]) result = loadScanned("%s/Scan/%s.zip" % (basename, Sequence)) seq_dict = ScanParser(entry, result, CSV) # TODO nestDictUpdate(Motif_dic, [motif_name, Sequence], seq_dict) if zip: file = ZipFile("%s_csv.zip" % basename, "w", ZIP_DEFLATED) for LOC in CSV.keys(): file.writestr(LOC + ".csv", "\n".join(CSV[LOC])) file.close() else: for LOC in CSV.keys(): writeCSV(LOC, "\n".join(CSV[LOC])) zf = ZipFile("%s.zpkl" % basename, "w", ZIP_DEFLATED) zf.writestr("Motif_dic.pkl", cPickle.dumps(Motif_dic)) zf.close()
def SearchCARE(input="test.fasta"): """ 在序列中搜索顺式调控元件motif """ basename = input.split(".")[0] CARE = CAREdb("test.db") Motif_lst = loadMotifs2list("test.db") scanned_path = "%s/Scan" % basename if not os.path.exists(scanned_path): os.makedirs(scanned_path) scanned = checkScanned("%s/Scan/" % basename) for entry in Motif_lst: (motif_name, Description, Sequence) = entry Description = Description.replace(",", ";") print motif_name, Sequence if Sequence in scanned: print "Skipping" continue ScanMotif(Sequence, input) parseScan(input=input)