コード例 #1
0
wpath = "dumps/wurcs"
gpath = "dumps/glycoct"

wlist = os.listdir(wpath)
glist = os.listdir(gpath)
alllist = list(set(wlist+glist))
print "Total glycan number %s" % len(alllist)

glycanobj = {}
for filename in alllist:
    acc = filename.rstrip(".txt")

    try:
        gseq = open(os.path.join(gpath, filename)).read().strip()
        obj = glycoct_parser.toGlycan(gseq)

    except:
        try:
            wseq = open(os.path.join(wpath, filename)).read().strip()
            obj = wurcs_parser.toGlycan(wseq)
        except:
            continue

    glycanobj[acc] = obj

rpath = "dumps/redend"
reducing_end = {}
for filename in alllist:
    acc = filename.rstrip(".txt")
    r = open(os.path.join(rpath, filename)).read().strip()
コード例 #2
0
glycan_length = {}

AllMotifpageid = AllMotif.id

for m in w.itermotif():

    acc = m.get("glytoucan")
    if acc in glycans:
        continue

    try:
        glycans[acc] = wp.toGlycan(str(m.get("wurcs")))
    except:

        try:
            glycans[acc] = gp.toGlycan(m.get("glycoct"))
        except:
            continue

    g = glycans[acc]
    l = len(list(g.all_nodes()))

    glycan_length[acc] = l

print "%s motifs are supported" % len(glycans)

supported_acc = list(glycans.keys())
topology_pool = []

for i in range(len(supported_acc)):
    for j in range(i, len(supported_acc)):
コード例 #3
0
    linkCheck = GlycanLinkCompatibleEitherway()
    monoCheck = MonosaccharideCompatibleOneway()
    rootMonoCheck = MonosaccharideCompatibleOneway()


if __name__ == "__main__":
    seq1 = """RES
    1b:x-dgal-HEX-1:5
    2b:a-dgal-HEX-1:5
    LIN
    1:1o(3+1)2d"""

    seq2 = """RES
    1b:x-dglc-HEX-1:5
    2s:n-acetyl
    3b:b-dgal-HEX-1:5
    4b:a-dgal-HEX-1:5
    LIN
    1:1d(2+1)2n
    2:1o(4+1)3d
    3:3o(4+1)4d"""

    wurcsp = WURCS20Format()
    glycoctp = GlycoCTFormat()

    g1 = glycoctp.toGlycan(seq1)
    g2 = glycoctp.toGlycan(seq2)

    mstsa = MotifSearchTopologicalSameAs()
    print mstsa.get(g1, g2)
コード例 #4
0
strict_nred_matcher = pygly.alignment.NonReducingEndMotifStrict(
    connected_nodes_cache=nodes_cache)

motif_gobjs = {}
for m in w.itermotif():

    acc = m.get("glytoucan")
    if acc in motif_gobjs:
        continue

    try:
        motif_gobjs[acc] = wp.toGlycan(str(m.get("wurcs")))
    except:

        try:
            motif_gobjs[acc] = gp.toGlycan(m.get("glycoct"))
        except:
            continue

archived = set()
gco = GlyCosmosNoCache()
for acc in gco.archived():
    acc = acc["accession"]
    archived.add(acc)


def secondtostr(i):
    i = int(i)

    h = i / 3600
    m = (i - h * 3600) / 60
コード例 #5
0
def substructure_search_init(shared_resources, structure_list_file_path, PPID):
    print >> sys.stderr, "Computing Processor%s is starting" % PPID
    task_queue, result_queue = shared_resources

    gp = GlycoCTFormat()
    wp = WURCS20Format()

    motif_match_connected_nodes_cache = pygly.alignment.ConnectedNodesCache()
    mm1 = pygly.alignment.GlyTouCanMotif(
        connected_nodes_cache=motif_match_connected_nodes_cache)
    # mm2 = pygly.alignment.MotifAllowOptionalSub(connected_nodes_cache=motif_match_connected_nodes_cache)

    glycans = {}
    for line in open(structure_list_file_path):
        acc, s = line.strip().split()
        glycans[acc] = wp.toGlycan(s)
    print >> sys.stderr, "Processor-%s: finishes loading %s glycans" % (
        PPID, len(glycans))

    while True:
        task_detail = task_queue.get(block=True)

        print >> sys.stderr, "Processor-%s: Job %s received." % (
            PPID, task_detail["id"])

        seq = task_detail["seq"]
        jobid = task_detail["id"]

        #loose_root_match = task_detail["loose_root_match"]
        #additional_subst = task_detail["additional_subst"]

        motif_match_position = task_detail["motif_match_position"]

        motif_matcher = mm1
        """
        if loose_root_match:
            motif_matcher = mm3

        """

        #fullstructure = False
        rootOnly = False
        anywhereExceptRoot = False
        if motif_match_position == "anywhere":
            pass
        elif motif_match_position == "reo":
            rootOnly = True
        else:
            pass
        """
        elif motif_match_position == "notre":
            anywhereExceptRoot = True
        elif motif_match_position == "fullstructure":
            rootOnly = True
            fullstructure = True
        """

        matches = []
        error = []
        calculation_start_time = time.time()

        try:
            if "RES" in seq:
                motif = gp.toGlycan(seq)
            elif "WURCS" in seq:
                motif = wp.toGlycan(seq)
            else:
                raise RuntimeError
        except:
            error.append("Unable to parse")

        if len(error) == 0:
            motif_node_num = len(list(motif.all_nodes()))
            if motif_node_num > max_motif_size:
                error.append("Motif is too big")

        # TODO time out mechanism to avoid running for too long
        for acc, glycan in glycans.items():

            if len(error) != 0:
                for e in error:
                    print >> sys.stderr, "Processor-%s: Issues (%s) is found with task %s" % (
                        PPID, e, task_detail["id"])
                break

            #if fullstructure:
            #    if motif_node_num != len(list(glycan.all_nodes())):
            #        continue

            if motif_matcher.leq(motif,
                                 glycan,
                                 rootOnly=rootOnly,
                                 anywhereExceptRoot=anywhereExceptRoot):
                matches.append(acc)

        calculation_end_time = time.time()
        calculation_time_cost = calculation_end_time - calculation_start_time

        res = {
            "id": jobid,
            "start time": calculation_start_time,
            "end time": calculation_end_time,
            "alignment calculation time": calculation_time_cost,
            "matches": matches,
            "error": error
        }
        print >> sys.stderr, "Processor-%s: Job %s finished within %ss" % (
            PPID, task_detail["id"], calculation_time_cost)
        result_queue.put(res)