Beispiel #1
0
 def getvector(self, word):
     try:
         if isstring(word):
             return self.w[self.D[word]]
         elif isnumber(word):
             return self.w[word, :]
     except Exception:
         return None
Beispiel #2
0
def loadgeopl(p="../../../data/semparse/geoquery.txt",
              customemb=False,
              reverse=True,
              splitre=None,
              adic={}):
    qss, ass = [], []
    maxqlen, maxalen = 0, 0
    qwords, awords = {"<RARE>": 1}, {}

    if isstring(p):
        p = open(p)

    for line in p:
        splitre = (
            "[\s-]" if customemb else "\s") if splitre is None else splitre
        q, a = [re.split(splitre, x) for x in line.split("\t")]
        q = ["<s>"] + q + ["</s>"]
        a = ["<s>"] + a + ["</s>"]
        qss.append(q)
        ass.append(a)
        maxqlen = max(len(q), maxqlen)
        maxalen = max(len(a), maxalen)
        for qw in q:
            if qw not in qwords:
                qwords[qw] = 0
            qwords[qw] += 1
        for aw in a:
            if aw not in awords:
                awords[aw] = 0
            awords[aw] += 1
    qmat = np.zeros((len(qss), maxqlen), dtype="int32")
    amat = np.zeros((len(ass), maxalen), dtype="int32")
    qdic = dict(
        zip([
            x for x, y in sorted(
                qwords.items(), reverse=True, key=lambda (x, y): y)
        ], range(2,
                 len(qwords) + 2)))
    maxadic = max(adic.values()) if len(adic) > 0 else 0
    for x, y in sorted(awords.items(), reverse=True, key=lambda (x, y): y):
        if x not in adic:
            adic[x] = maxadic + 1
            maxadic += 1
    '''adic = dict(zip([x for x, y in sorted(awords.items(), reverse=True, key=lambda (x, y): y)],
                    range(1, len(awords) + 1)))'''
    for i in range(len(qss)):
        q = qss[i]
        a = ass[i]
        qx = [qdic[x] for x in q]
        if reverse:
            qx.reverse()
        qmat[i, :len(q)] = qx
        amat[i, :len(a)] = [adic[x] for x in a]
    return qmat, amat, qdic, adic, qwords, awords
Beispiel #3
0
def wrapf(attr, root=None):
    if isfunction(attr): # real function
        innerwrap = prefwrap(attr, root)    #lambda *args, **kwargs: fwrap(attr, root, *args, **kwargs)
    elif isnumber(attr) or isstring(attr): # or other literals/non-syms/modules/properties/...
        return attr
    elif isinstance(attr, ModuleType):
        innerwrap = pwrap(attr)
    elif isinstance(attr, theano.Variable):
        innerwrap = vwrap(attr, root)
    else:
        innerwrap = attr
    return innerwrap
Beispiel #4
0
def wrapf(attr, root=None):
    if isfunction(attr): # real function
        innerwrap = OpBlock(attr, root)
    elif isnumber(attr) or isstring(attr): # or other literals/non-syms/modules/properties/...
        return attr
    elif isinstance(attr, ModuleType):
        innerwrap = WrappedAttr(attr)
    elif isinstance(attr, theano.Variable):
        innerwrap = Var(attr)
        if isinstance(root, Var):       # propagate params
            innerwrap.push_params(root._params)
    else:
        innerwrap = attr
    return innerwrap
Beispiel #5
0
 def __init__(self,
              subjinfop="subjs-counts-labels-types.fb2m.tsv",
              revind=None):
     self.indexdict = {}
     self.ignoresubgrams = True
     self.processor = Processor()
     self.revind = revind
     self.maxeditdistance = 1
     if isstring(subjinfop):
         self.build(subjinfop)
     elif isinstance(subjinfop, dict):
         self.indexdict = subjinfop
     else:
         raise Exception("unknown stuff")
Beispiel #6
0
def wrapf(attr, root=None):
    if isfunction(attr): # real function
        innerwrap = OpBlock(attr, root)
    elif isnumber(attr) or isstring(attr): # or other literals/non-syms/modules/properties/...
        return attr
    elif isinstance(attr, ModuleType):
        innerwrap = WrappedAttr(attr)
    elif isinstance(attr, theano.Variable):
        innerwrap = Var(attr)
        if isinstance(root, Var):       # propagate params
            innerwrap.push_params(root._params)
    else:
        innerwrap = attr
    return innerwrap
Beispiel #7
0
 def __init__(self,
              dim=50,
              indim=None,
              value=None,
              worddic=None,
              normalize=False,
              trainfrac=1.0,
              init=None,
              raretoken="<RARE>",
              **kw):
     if isstring(value):  # path
         assert (init is None and worddic is None)
         value, worddic = self.loadvalue(value, dim, indim=indim)
         indim = max(worddic.values()) + 1
     if worddic is not None:
         wdvals = worddic.values()
         #embed()
         if raretoken is not None:
             if raretoken not in worddic:
                 assert (0 not in wdvals)  # make sure index zero is free
                 worddic[raretoken] = 0
             assert (raretoken in worddic)  # raretoken must be in dic
         else:
             pass  # no rare tokens
         assert (min(wdvals) >= 0)  # word ids must be positive non-zero
         assert (indim == max(wdvals) + 1 or indim is None)
         if indim is None:
             indim = max(worddic.values()) + 1  # to init from worddic
     super(WordEmb, self).__init__(indim=indim,
                                   dim=dim,
                                   value=value,
                                   normalize=normalize,
                                   worddic=worddic,
                                   trainfrac=trainfrac,
                                   init=init,
                                   raretoken=raretoken,
                                   **kw)
Beispiel #8
0
 def init(self, arg, *args, **kwargs):
     if isstring(arg):
         assert hasattr(self, arg)
         return getattr(self, arg)(*args, **kwargs)
     elif isfunction(arg):
         return self._init_helper(arg)
Beispiel #9
0
 def init(self, arg, *args, **kwargs):
     if isstring(arg):
         assert hasattr(self, arg)
         return getattr(self, arg)(*args, **kwargs)
     elif isfunction(arg):
         return self._init_helper(arg)
Beispiel #10
0
 def init(self, arg, *args, **kwargs):
     if isstring(arg):
         assert hasattr(self, arg)
         return getattr(self, arg)(*args, **kwargs)