def getcharmemdata(reldic, chardic, maxchar=70, maxwords=30, labelp="../../../data/simplequestions/labels.map"): rels = ents2labels(labelp, reldic, maxwords=maxwords) rels = map(lambda (x, y): (" ".join(x), y), rels) maxlen = 0 prevc = -1 allrelchars = set() for rel, c in rels: assert (c - 1 == prevc) prevc = c maxlen = max(maxlen, len(rel)) for relchar in rel: allrelchars.add(relchar) charsnotinchardic = allrelchars.difference(set(chardic.keys())) nextid = 0 for cnic in charsnotinchardic: while nextid in chardic.values(): nextid += 1 chardic[cnic] = nextid maxlen = min(maxlen, maxchar) retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1 for rel, k in rels: rel = [chardic[c] for c in rel[:min(len(rel), maxchar)]] retmat[k, :len(rel)] = rel return retmat
def getcharmemdata(reldic, chardic, maxchar=70, maxwords=30, labelp="../../../data/simplequestions/labels.map"): rels = ents2labels(labelp, reldic, maxwords=maxwords) rels = map(lambda (x, y): (" ".join(x), y), rels) maxlen = 0 prevc = -1 allrelchars = set() for rel, c in rels: assert(c-1 == prevc) prevc = c maxlen = max(maxlen, len(rel)) for relchar in rel: allrelchars.add(relchar) charsnotinchardic = allrelchars.difference(set(chardic.keys())) nextid = 0 for cnic in charsnotinchardic: while nextid in chardic.values(): nextid += 1 chardic[cnic] = nextid maxlen = min(maxlen, maxchar) retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1 for rel, k in rels: rel = [chardic[c] for c in rel[:min(len(rel), maxchar)]] retmat[k, :len(rel)] = rel return retmat
def getmemdata(entdic, worddic, chardic, maxchar=30, maxwords=30, labelp="../../../data/simplequestions/labels.map" ): # updates worddic with words found in entity labels ents = ents2labels(labelp, entdic, maxwords=maxwords) allentwords = set() allentchars = set() maxlen = 0 maxwordlen = 0 prevc = -1 for ent, c in ents: assert (c - 1 == prevc) prevc = c maxlen = max(maxlen, len(ent)) for entw in ent: allentwords.add(entw) maxwordlen = max(maxwordlen, len(entw)) for entwchar in entw: allentchars.add(entwchar) maxchar = min(maxchar, maxwordlen) entwordsnotinworddic = allentwords.difference(set(worddic.keys())) charsnotinchardic = allentchars.difference(set(chardic.keys())) for rwniw in entwordsnotinworddic: worddic[rwniw] = len(worddic) nextid = 0 for cnic in charsnotinchardic: while nextid in chardic.values(): nextid += 1 chardic[cnic] = nextid wordmat = np.zeros((len(ents), maxlen)).astype("int32") - 1 charten = np.zeros((len(ents), maxlen, maxchar)).astype("int32") - 1 for ent, c in ents: wordmat[c, :len(ent)] = map(lambda x: worddic[x], ent) j = 0 for entw in ent: charten[c, j, :min(len(entw), maxchar)] = \ map(lambda x: chardic[x], entw[:min(len(entw), maxchar)]) j += 1 datamat = np.concatenate([wordmat.reshape(wordmat.shape + (1, )), charten], axis=2) return datamat
def getmemdata(reldic, worddic, labelp="../../../data/simplequestions/labels.map"): # updates worddic with words found in relation rels = ents2labels(labelp, reldic) allrelwords = set() maxlen = 0 prevc = -1 for rel, c in rels: assert(c-1 == prevc) prevc = c maxlen = max(maxlen, len(rel)) for relw in rel: allrelwords.add(relw) relwordsnotinworddic = allrelwords.difference(set(worddic.keys())) for rwniw in relwordsnotinworddic: worddic[rwniw] = len(worddic) ret = [[worddic[w] for w in rel] for (rel, _) in rels] retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1 i = 0 for r in ret: retmat[i, :len(r)] = r i += 1 return retmat
def getmemdata(entdic, worddic, chardic, maxchar=30, maxwords=30, labelp="../../../data/simplequestions/labels.map"): # updates worddic with words found in entity labels ents = ents2labels(labelp, entdic, maxwords=maxwords) allentwords = set() allentchars = set() maxlen = 0 maxwordlen = 0 prevc = -1 for ent, c in ents: assert(c-1 == prevc) prevc = c maxlen = max(maxlen, len(ent)) for entw in ent: allentwords.add(entw) maxwordlen = max(maxwordlen, len(entw)) for entwchar in entw: allentchars.add(entwchar) maxchar = min(maxchar, maxwordlen) entwordsnotinworddic = allentwords.difference(set(worddic.keys())) charsnotinchardic = allentchars.difference(set(chardic.keys())) for rwniw in entwordsnotinworddic: worddic[rwniw] = len(worddic) nextid = 0 for cnic in charsnotinchardic: while nextid in chardic.values(): nextid += 1 chardic[cnic] = nextid wordmat = np.zeros((len(ents), maxlen)).astype("int32") - 1 charten = np.zeros((len(ents), maxlen, maxchar)).astype("int32") - 1 for ent, c in ents: wordmat[c, :len(ent)] = map(lambda x: worddic[x], ent) j = 0 for entw in ent: charten[c, j, :min(len(entw), maxchar)] = \ map(lambda x: chardic[x], entw[:min(len(entw), maxchar)]) j += 1 datamat = np.concatenate([wordmat.reshape(wordmat.shape + (1,)), charten], axis=2) return datamat
def getmemdata(reldic, worddic, labelp="../../../data/simplequestions/labels.map" ): # updates worddic with words found in relation rels = ents2labels(labelp, reldic) allrelwords = set() maxlen = 0 prevc = -1 for rel, c in rels: assert (c - 1 == prevc) prevc = c maxlen = max(maxlen, len(rel)) for relw in rel: allrelwords.add(relw) relwordsnotinworddic = allrelwords.difference(set(worddic.keys())) for rwniw in relwordsnotinworddic: worddic[rwniw] = len(worddic) ret = [[worddic[w] for w in rel] for (rel, _) in rels] retmat = np.zeros((len(rels), maxlen)).astype("int32") - 1 i = 0 for r in ret: retmat[i, :len(r)] = r i += 1 return retmat