예제 #1
0
def getcharmemdata(reldic,
                   chardic,
                   maxchar=70,
                   maxwords=30,
                   labelp="../../../data/simplequestions/labels.map"):
    rels = ents2labels(labelp, reldic, maxwords=maxwords)
    rels = map(lambda (x, y): (" ".join(x), y), rels)
    maxlen = 0
    prevc = -1
    allrelchars = set()
    for rel, c in rels:
        assert (c - 1 == prevc)
        prevc = c
        maxlen = max(maxlen, len(rel))
        for relchar in rel:
            allrelchars.add(relchar)
    charsnotinchardic = allrelchars.difference(set(chardic.keys()))
    nextid = 0
    for cnic in charsnotinchardic:
        while nextid in chardic.values():
            nextid += 1
        chardic[cnic] = nextid
    maxlen = min(maxlen, maxchar)
    retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1
    for rel, k in rels:
        rel = [chardic[c] for c in rel[:min(len(rel), maxchar)]]
        retmat[k, :len(rel)] = rel
    return retmat
예제 #2
0
def getcharmemdata(reldic, chardic, maxchar=70, maxwords=30,
                   labelp="../../../data/simplequestions/labels.map"):
    rels = ents2labels(labelp, reldic, maxwords=maxwords)
    rels = map(lambda (x, y): (" ".join(x), y), rels)
    maxlen = 0
    prevc = -1
    allrelchars = set()
    for rel, c in rels:
        assert(c-1 == prevc)
        prevc = c
        maxlen = max(maxlen, len(rel))
        for relchar in rel:
            allrelchars.add(relchar)
    charsnotinchardic = allrelchars.difference(set(chardic.keys()))
    nextid = 0
    for cnic in charsnotinchardic:
        while nextid in chardic.values():
            nextid += 1
        chardic[cnic] = nextid
    maxlen = min(maxlen, maxchar)
    retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1
    for rel, k in rels:
        rel = [chardic[c] for c in rel[:min(len(rel), maxchar)]]
        retmat[k, :len(rel)] = rel
    return retmat
예제 #3
0
def getmemdata(entdic,
               worddic,
               chardic,
               maxchar=30,
               maxwords=30,
               labelp="../../../data/simplequestions/labels.map"
               ):  # updates worddic with words found in entity labels
    ents = ents2labels(labelp, entdic, maxwords=maxwords)
    allentwords = set()
    allentchars = set()
    maxlen = 0
    maxwordlen = 0
    prevc = -1
    for ent, c in ents:
        assert (c - 1 == prevc)
        prevc = c
        maxlen = max(maxlen, len(ent))
        for entw in ent:
            allentwords.add(entw)
            maxwordlen = max(maxwordlen, len(entw))
            for entwchar in entw:
                allentchars.add(entwchar)
    maxchar = min(maxchar, maxwordlen)
    entwordsnotinworddic = allentwords.difference(set(worddic.keys()))
    charsnotinchardic = allentchars.difference(set(chardic.keys()))
    for rwniw in entwordsnotinworddic:
        worddic[rwniw] = len(worddic)
    nextid = 0
    for cnic in charsnotinchardic:
        while nextid in chardic.values():
            nextid += 1
        chardic[cnic] = nextid

    wordmat = np.zeros((len(ents), maxlen)).astype("int32") - 1
    charten = np.zeros((len(ents), maxlen, maxchar)).astype("int32") - 1
    for ent, c in ents:
        wordmat[c, :len(ent)] = map(lambda x: worddic[x], ent)
        j = 0
        for entw in ent:
            charten[c, j, :min(len(entw), maxchar)] = \
                map(lambda x: chardic[x], entw[:min(len(entw), maxchar)])
            j += 1
    datamat = np.concatenate([wordmat.reshape(wordmat.shape + (1, )), charten],
                             axis=2)
    return datamat
예제 #4
0
def getmemdata(reldic, worddic, labelp="../../../data/simplequestions/labels.map"):    # updates worddic with words found in relation
    rels = ents2labels(labelp, reldic)
    allrelwords = set()
    maxlen = 0
    prevc = -1
    for rel, c in rels:
        assert(c-1 == prevc)
        prevc = c
        maxlen = max(maxlen, len(rel))
        for relw in rel:
            allrelwords.add(relw)
    relwordsnotinworddic = allrelwords.difference(set(worddic.keys()))
    for rwniw in relwordsnotinworddic:
        worddic[rwniw] = len(worddic)
    ret = [[worddic[w] for w in rel] for (rel, _) in rels]
    retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1
    i = 0
    for r in ret:
        retmat[i, :len(r)] = r
        i += 1
    return retmat
예제 #5
0
def getmemdata(entdic, worddic, chardic, maxchar=30, maxwords=30,
                   labelp="../../../data/simplequestions/labels.map"):  # updates worddic with words found in entity labels
    ents = ents2labels(labelp, entdic, maxwords=maxwords)
    allentwords = set()
    allentchars = set()
    maxlen = 0
    maxwordlen = 0
    prevc = -1
    for ent, c in ents:
        assert(c-1 == prevc)
        prevc = c
        maxlen = max(maxlen, len(ent))
        for entw in ent:
            allentwords.add(entw)
            maxwordlen = max(maxwordlen, len(entw))
            for entwchar in entw:
                allentchars.add(entwchar)
    maxchar = min(maxchar, maxwordlen)
    entwordsnotinworddic = allentwords.difference(set(worddic.keys()))
    charsnotinchardic = allentchars.difference(set(chardic.keys()))
    for rwniw in entwordsnotinworddic:
        worddic[rwniw] = len(worddic)
    nextid = 0
    for cnic in charsnotinchardic:
        while nextid in chardic.values():
            nextid += 1
        chardic[cnic] = nextid

    wordmat = np.zeros((len(ents), maxlen)).astype("int32") - 1
    charten = np.zeros((len(ents), maxlen, maxchar)).astype("int32") - 1
    for ent, c in ents:
        wordmat[c, :len(ent)] = map(lambda x: worddic[x], ent)
        j = 0
        for entw in ent:
            charten[c, j, :min(len(entw), maxchar)] = \
                map(lambda x: chardic[x], entw[:min(len(entw), maxchar)])
            j += 1
    datamat = np.concatenate([wordmat.reshape(wordmat.shape + (1,)),
                              charten], axis=2)
    return datamat
예제 #6
0
def getmemdata(reldic,
               worddic,
               labelp="../../../data/simplequestions/labels.map"
               ):  # updates worddic with words found in relation
    rels = ents2labels(labelp, reldic)
    allrelwords = set()
    maxlen = 0
    prevc = -1
    for rel, c in rels:
        assert (c - 1 == prevc)
        prevc = c
        maxlen = max(maxlen, len(rel))
        for relw in rel:
            allrelwords.add(relw)
    relwordsnotinworddic = allrelwords.difference(set(worddic.keys()))
    for rwniw in relwordsnotinworddic:
        worddic[rwniw] = len(worddic)
    ret = [[worddic[w] for w in rel] for (rel, _) in rels]
    retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1
    i = 0
    for r in ret:
        retmat[i, :len(r)] = r
        i += 1
    return retmat