def getvector(self, word): try: if isstring(word): return self.w[self.D[word]] elif isnumber(word): return self.w[word, :] except Exception: return None
def loadgeopl(p="../../../data/semparse/geoquery.txt", customemb=False, reverse=True, splitre=None, adic={}): qss, ass = [], [] maxqlen, maxalen = 0, 0 qwords, awords = {"<RARE>": 1}, {} if isstring(p): p = open(p) for line in p: splitre = ( "[\s-]" if customemb else "\s") if splitre is None else splitre q, a = [re.split(splitre, x) for x in line.split("\t")] q = ["<s>"] + q + ["</s>"] a = ["<s>"] + a + ["</s>"] qss.append(q) ass.append(a) maxqlen = max(len(q), maxqlen) maxalen = max(len(a), maxalen) for qw in q: if qw not in qwords: qwords[qw] = 0 qwords[qw] += 1 for aw in a: if aw not in awords: awords[aw] = 0 awords[aw] += 1 qmat = np.zeros((len(qss), maxqlen), dtype="int32") amat = np.zeros((len(ass), maxalen), dtype="int32") qdic = dict( zip([ x for x, y in sorted( qwords.items(), reverse=True, key=lambda (x, y): y) ], range(2, len(qwords) + 2))) maxadic = max(adic.values()) if len(adic) > 0 else 0 for x, y in sorted(awords.items(), reverse=True, key=lambda (x, y): y): if x not in adic: adic[x] = maxadic + 1 maxadic += 1 '''adic = dict(zip([x for x, y in sorted(awords.items(), reverse=True, key=lambda (x, y): y)], range(1, len(awords) + 1)))''' for i in range(len(qss)): q = qss[i] a = ass[i] qx = [qdic[x] for x in q] if reverse: qx.reverse() qmat[i, :len(q)] = qx amat[i, :len(a)] = [adic[x] for x in a] return qmat, amat, qdic, adic, qwords, awords
def wrapf(attr, root=None): if isfunction(attr): # real function innerwrap = prefwrap(attr, root) #lambda *args, **kwargs: fwrap(attr, root, *args, **kwargs) elif isnumber(attr) or isstring(attr): # or other literals/non-syms/modules/properties/... return attr elif isinstance(attr, ModuleType): innerwrap = pwrap(attr) elif isinstance(attr, theano.Variable): innerwrap = vwrap(attr, root) else: innerwrap = attr return innerwrap
def wrapf(attr, root=None): if isfunction(attr): # real function innerwrap = OpBlock(attr, root) elif isnumber(attr) or isstring(attr): # or other literals/non-syms/modules/properties/... return attr elif isinstance(attr, ModuleType): innerwrap = WrappedAttr(attr) elif isinstance(attr, theano.Variable): innerwrap = Var(attr) if isinstance(root, Var): # propagate params innerwrap.push_params(root._params) else: innerwrap = attr return innerwrap
def __init__(self, subjinfop="subjs-counts-labels-types.fb2m.tsv", revind=None): self.indexdict = {} self.ignoresubgrams = True self.processor = Processor() self.revind = revind self.maxeditdistance = 1 if isstring(subjinfop): self.build(subjinfop) elif isinstance(subjinfop, dict): self.indexdict = subjinfop else: raise Exception("unknown stuff")
def __init__(self, dim=50, indim=None, value=None, worddic=None, normalize=False, trainfrac=1.0, init=None, raretoken="<RARE>", **kw): if isstring(value): # path assert (init is None and worddic is None) value, worddic = self.loadvalue(value, dim, indim=indim) indim = max(worddic.values()) + 1 if worddic is not None: wdvals = worddic.values() #embed() if raretoken is not None: if raretoken not in worddic: assert (0 not in wdvals) # make sure index zero is free worddic[raretoken] = 0 assert (raretoken in worddic) # raretoken must be in dic else: pass # no rare tokens assert (min(wdvals) >= 0) # word ids must be positive non-zero assert (indim == max(wdvals) + 1 or indim is None) if indim is None: indim = max(worddic.values()) + 1 # to init from worddic super(WordEmb, self).__init__(indim=indim, dim=dim, value=value, normalize=normalize, worddic=worddic, trainfrac=trainfrac, init=init, raretoken=raretoken, **kw)
def init(self, arg, *args, **kwargs): if isstring(arg): assert hasattr(self, arg) return getattr(self, arg)(*args, **kwargs) elif isfunction(arg): return self._init_helper(arg)
def init(self, arg, *args, **kwargs): if isstring(arg): assert hasattr(self, arg) return getattr(self, arg)(*args, **kwargs)