def __init__(self, data=None, computer=None, worddic=None, bias=False): """ WordLinout that computes the weight matrix of the Linear transformation dynamically based on provided data and computer. :param data: numpy array, 2D or more, one symbol data per row. Automatically wrapped so watch the dtype :param computer: module that builds vectors for rows of data :param worddic: token dictionary from token to id :param bias: (optional) use bias (not computed) """ super(ComputedWordLinout, self).__init__(worddic) self.data = q.val(torch.from_numpy(data)).v self.computer = computer # TODO: batches for computer??? wdvals = worddic.values() assert (min(wdvals) >= 0) # word ids must be positive # extract maskid and rareid from worddic maskid = worddic[self.masktoken] if self.masktoken in worddic else None rareid = worddic[self.raretoken] if self.raretoken in worddic else None self.outdim = max(worddic.values()) + 1 if bias: self.bias = nn.Parameter(torch.Tensor(self.outdim)) else: self.register_parameter("bias", None) self.reset_parameters() self.base_weight = None
def __init__(self, wordlinout, wdic, **kw): D = wordlinout.D # assert (self.raretoken in D) # must have rareid in D to map extra words to it # assert(wordlinout.raretoken in wdic) super(AdaptedWordLinout, self).__init__(wdic, **kw) self.inner = wordlinout rareid_new2old = D[ wordlinout.raretoken] if wordlinout.raretoken in D else 0 rareid_old2new = wdic[self.raretoken] if self.raretoken in wdic else 0 self.new_to_old_d = { v: D[k] if k in D else rareid_new2old for k, v in wdic.items() } # mapping from new indexes (wdic) to old indexes (wordlinout) self.old_to_new_d = { v: wdic[k] if k in wdic else rareid_old2new for k, v in D.items() } # mapping from old indexes (wordlinout) to new indexes (wdic) numnew = max(self.new_to_old_d.keys()) + 1 numold = max(self.old_to_new_d.keys()) + 1 new_to_old = np.zeros((numnew, ), dtype="int64") for i in range(new_to_old.shape[0]): j = self.new_to_old_d[ i] if i in self.new_to_old_d else rareid_new2old new_to_old[i] = j self.new_to_old = q.val( new_to_old).v # for every new dic word id, contains old dic id # index in new dic contains idx value of old dic # --> used to slice from matrix in old idxs to get matrix in new idxs old_to_new = np.zeros((numold, ), dtype="int64") for i in range(old_to_new.shape[0]): j = self.old_to_new_d[ i] if i in self.old_to_new_d else rareid_old2new old_to_new[i] = j self.old_to_new = q.val( old_to_new).v # for every old dic word id, contains new dic id
def __init__(self, base, override, which=None, whichnot=None, **kw): super(OverriddenWordVecBase, self).__init__(base.D) self.base = base self.over = override.adapt(base.D) assert (not (which is not None and whichnot is not None)) numout = max(base.D.values()) + 1 whichnot = set() overridemask_val = np.zeros((numout, ), dtype="float32") if which is None: # which: list of words to override for k, v in base.D.items(): # for all symbols in base dic if k in override.D and k not in whichnot: # if also in override dic overridemask_val[v] = 1 else: for k in which: if k in override.D: # TODO: if k from which is missing from base.D overridemask_val[base.D[k]] = 1 self.overridemask = q.val(overridemask_val).v
def __init__(self, wordemb, wdic, **kw): D = wordemb.D # assert(wordemb.raretoken in D) # must have rareid in D to map extra words to it super(AdaptedWordEmb, self).__init__(wdic, **kw) self.inner = wordemb rareid = D[wordemb.raretoken] if wordemb.raretoken in D else 0 # maps all idx from wdic (new) to idx in wordemb.D (old) # maps words from wdic (new) that are missing in wordemb.D (old) # to wordemb.D's rare id self.ad = {v: D[k] if k in D else rareid for k, v in wdic.items()} valval = np.ones((max(self.ad.keys()) + 1, ), dtype="int64") for i in range(valval.shape[0]): valval[i] = self.ad[i] if i in self.ad else rareid self.adb = q.val(valval).v
def test_masked_3D_data(self): self.linout.data = q.val( np.random.random((7, 10, 3)).astype(dtype="float32")).v self.linout.computer = q.GRULayer(3, 15).return_final("only") x = Variable(torch.randn(3, 15)).float() msk_nonzero_batches = [0, 0, 0, 1, 1, 2] msk_nonzero_values = [0, 2, 3, 2, 6, 5] msk = np.zeros((3, 7)).astype("int32") msk[msk_nonzero_batches, msk_nonzero_values] = 1 print(msk) msk = Variable(torch.from_numpy(msk)) out = self.linout(x, mask=msk) self.assertEqual(out.size(), (3, 7)) data = self.linout.data computer = self.linout.computer cout = torch.matmul(x, computer(data).t()) cout = cout * msk.float() self.assertTrue(np.allclose(cout.data.numpy(), out.data.numpy()))