wpath = "dumps/wurcs" gpath = "dumps/glycoct" wlist = os.listdir(wpath) glist = os.listdir(gpath) alllist = list(set(wlist+glist)) print "Total glycan number %s" % len(alllist) glycanobj = {} for filename in alllist: acc = filename.rstrip(".txt") try: gseq = open(os.path.join(gpath, filename)).read().strip() obj = glycoct_parser.toGlycan(gseq) except: try: wseq = open(os.path.join(wpath, filename)).read().strip() obj = wurcs_parser.toGlycan(wseq) except: continue glycanobj[acc] = obj rpath = "dumps/redend" reducing_end = {} for filename in alllist: acc = filename.rstrip(".txt") r = open(os.path.join(rpath, filename)).read().strip()
glycan_length = {} AllMotifpageid = AllMotif.id for m in w.itermotif(): acc = m.get("glytoucan") if acc in glycans: continue try: glycans[acc] = wp.toGlycan(str(m.get("wurcs"))) except: try: glycans[acc] = gp.toGlycan(m.get("glycoct")) except: continue g = glycans[acc] l = len(list(g.all_nodes())) glycan_length[acc] = l print "%s motifs are supported" % len(glycans) supported_acc = list(glycans.keys()) topology_pool = [] for i in range(len(supported_acc)): for j in range(i, len(supported_acc)):
linkCheck = GlycanLinkCompatibleEitherway() monoCheck = MonosaccharideCompatibleOneway() rootMonoCheck = MonosaccharideCompatibleOneway() if __name__ == "__main__": seq1 = """RES 1b:x-dgal-HEX-1:5 2b:a-dgal-HEX-1:5 LIN 1:1o(3+1)2d""" seq2 = """RES 1b:x-dglc-HEX-1:5 2s:n-acetyl 3b:b-dgal-HEX-1:5 4b:a-dgal-HEX-1:5 LIN 1:1d(2+1)2n 2:1o(4+1)3d 3:3o(4+1)4d""" wurcsp = WURCS20Format() glycoctp = GlycoCTFormat() g1 = glycoctp.toGlycan(seq1) g2 = glycoctp.toGlycan(seq2) mstsa = MotifSearchTopologicalSameAs() print mstsa.get(g1, g2)
strict_nred_matcher = pygly.alignment.NonReducingEndMotifStrict( connected_nodes_cache=nodes_cache) motif_gobjs = {} for m in w.itermotif(): acc = m.get("glytoucan") if acc in motif_gobjs: continue try: motif_gobjs[acc] = wp.toGlycan(str(m.get("wurcs"))) except: try: motif_gobjs[acc] = gp.toGlycan(m.get("glycoct")) except: continue archived = set() gco = GlyCosmosNoCache() for acc in gco.archived(): acc = acc["accession"] archived.add(acc) def secondtostr(i): i = int(i) h = i / 3600 m = (i - h * 3600) / 60
def substructure_search_init(shared_resources, structure_list_file_path, PPID): print >> sys.stderr, "Computing Processor%s is starting" % PPID task_queue, result_queue = shared_resources gp = GlycoCTFormat() wp = WURCS20Format() motif_match_connected_nodes_cache = pygly.alignment.ConnectedNodesCache() mm1 = pygly.alignment.GlyTouCanMotif( connected_nodes_cache=motif_match_connected_nodes_cache) # mm2 = pygly.alignment.MotifAllowOptionalSub(connected_nodes_cache=motif_match_connected_nodes_cache) glycans = {} for line in open(structure_list_file_path): acc, s = line.strip().split() glycans[acc] = wp.toGlycan(s) print >> sys.stderr, "Processor-%s: finishes loading %s glycans" % ( PPID, len(glycans)) while True: task_detail = task_queue.get(block=True) print >> sys.stderr, "Processor-%s: Job %s received." % ( PPID, task_detail["id"]) seq = task_detail["seq"] jobid = task_detail["id"] #loose_root_match = task_detail["loose_root_match"] #additional_subst = task_detail["additional_subst"] motif_match_position = task_detail["motif_match_position"] motif_matcher = mm1 """ if loose_root_match: motif_matcher = mm3 """ #fullstructure = False rootOnly = False anywhereExceptRoot = False if motif_match_position == "anywhere": pass elif motif_match_position == "reo": rootOnly = True else: pass """ elif motif_match_position == "notre": anywhereExceptRoot = True elif motif_match_position == "fullstructure": rootOnly = True fullstructure = True """ matches = [] error = [] calculation_start_time = time.time() try: if "RES" in seq: motif = gp.toGlycan(seq) elif "WURCS" in seq: motif = wp.toGlycan(seq) else: raise RuntimeError except: error.append("Unable to parse") if len(error) == 0: motif_node_num = len(list(motif.all_nodes())) if motif_node_num > max_motif_size: error.append("Motif is too big") # TODO time out mechanism to avoid running for too long for acc, glycan in glycans.items(): if len(error) != 0: for e in error: print >> sys.stderr, "Processor-%s: Issues (%s) is found with task %s" % ( PPID, e, task_detail["id"]) break #if fullstructure: # if motif_node_num != len(list(glycan.all_nodes())): # continue if motif_matcher.leq(motif, glycan, rootOnly=rootOnly, anywhereExceptRoot=anywhereExceptRoot): matches.append(acc) calculation_end_time = time.time() calculation_time_cost = calculation_end_time - calculation_start_time res = { "id": jobid, "start time": calculation_start_time, "end time": calculation_end_time, "alignment calculation time": calculation_time_cost, "matches": matches, "error": error } print >> sys.stderr, "Processor-%s: Job %s finished within %ss" % ( PPID, task_detail["id"], calculation_time_cost) result_queue.put(res)