def _train_neg_adam(self, e, c, epochs, neg_size, C_train, b_train, me_train, mC_train, mb_train, ve_train, vC_train, vb_train, t_train, beta1_train, beta2_train): # start epochs for i in range(epochs): neg = utils.sample_negative(neg_size, {c}, vocab_size=self.V_dash) # forward propagation labels = [c] + neg z = np.dot(e, C_train[labels].T) + b_train[labels] exp_z = np.exp(z) sum_exp_z = np.sum(exp_z) # back propagation dz = exp_z / sum_exp_z dz[0] -= 1 # for true label dz = dz / 10000 dC = np.dot(dz.reshape(-1, 1), e.reshape(1, -1)) db = dz dE = np.dot(dz.reshape(1, -1), C_train[labels]).reshape(-1) # adam step t_train = t_train + 1 beta1_train = beta1_train * self.beta1 beta2_train = beta2_train * self.beta2 # adam things lr = self.lr * ma.sqrt(1 - beta2_train) / (1 - beta1_train) mE = self.beta1 * me_train + (1 - self.beta1) * dE mC = self.beta1 * mC_train[labels] + (1 - self.beta1) * dC mb = self.beta1 * mb_train[labels] + (1 - self.beta1) * db vE = self.beta2 * ve_train + (1 - self.beta2) * dE * dE vC = self.beta2 * vC_train[labels] + (1 - self.beta2) * dC * dC vb = self.beta2 * vb_train[labels] + (1 - self.beta2) * db * db # update weights e -= lr * mE / (np.sqrt(vE + self.epsilon)) C_train[labels] -= lr * mC / (np.sqrt(vC + self.epsilon)) b_train[labels] -= lr * mb / (np.sqrt(vb + self.epsilon)) # save status me_train = mE mC_train[labels] = mC mb_train[labels] = mb ve_train = vE vC_train[labels] = vC vb_train[labels] = vb # get probability neg = utils.sample_negative(neg_size, {c}, vocab_size=self.V_dash) labels = [c] + neg z = np.dot(e, C_train[labels].T) + b_train[labels] exp_z = np.exp(z) prob = exp_z[0] / np.sum(exp_z) return prob
def _train_update_neg_adam(self, w, c, epochs, neg_size): for i in range(epochs): neg = utils.sample_negative(neg_size, {c}, vocab_size=self.V_dash) # forward propagation e = self.E[w] labels = [c] + neg z = np.dot(e, self.C[labels].T) + self.b[labels] exp_z = np.exp(z) sum_exp_z = np.sum(exp_z) # back propagation dz = exp_z / sum_exp_z dz[0] -= 1 # for true label dz = dz / 100000 dC = np.dot(dz.reshape(-1, 1), e.reshape(1, -1)) db = dz dE = np.dot(dz.reshape(1, -1), self.C[labels]).reshape(-1) # adam step self.t = self.t + 1 self.beta1_t = self.beta1_t * self.beta1 self.beta2_t = self.beta2_t * self.beta2 # adam things lr = self.lr * ma.sqrt(1 - self.beta2_t) / (1 - self.beta1_t) mE = self.beta1 * self.mE_t[w] + (1 - self.beta1) * dE mC = self.beta1 * self.mC_t[labels] + (1 - self.beta1) * dC mb = self.beta1 * self.mb_t[labels] + (1 - self.beta1) * db vE = self.beta2 * self.vE_t[w] + (1 - self.beta2) * dE * dE vC = self.beta2 * self.vC_t[labels] + (1 - self.beta2) * dC * dC vb = self.beta2 * self.vb_t[labels] + (1 - self.beta2) * db * db # update weights self.E[w] -= lr * mE / (np.sqrt(vE + self.epsilon)) self.C[labels] -= lr * mC / (np.sqrt(vC + self.epsilon)) self.b[labels] -= lr * mb / (np.sqrt(vb + self.epsilon)) # save status self.mE_t[w] = mE self.mC_t[labels] = mC self.mb_t[labels] = mb self.vE_t[w] = vE self.vC_t[labels] = vC self.vb_t[labels] = vb # get probability neg = utils.sample_negative(neg_size, {c}, vocab_size=self.V_dash) labels = [c] + neg z = np.dot(self.E[w], self.C[labels].T) + self.b[labels] exp_z = np.exp(z) prob = exp_z[0] / np.sum(exp_z) return prob
def _train_update_neg_adam(self, w, c, epochs, neg_size): for i in range(epochs): neg = utils.sample_negative(neg_size, {c}, vocab_size=self.V_dash) # forward propagation e = self.E[w] labels = [c] + neg z = np.dot(e, self.C[labels].T) + self.b[labels] exp_z = np.exp(z) sum_exp_z = np.sum(exp_z) # back propagation dz = exp_z / sum_exp_z dz[0] -= 1 # for true label dz = dz dC = np.dot(dz.reshape(-1, 1), e.reshape(1, -1)) db = dz dE = np.dot(dz.reshape(1, -1), self.C[labels]).reshape(-1) # momentum things vE = self.beta * self.vE[w] + dE vC = self.beta * self.vC[labels] + dC vb = self.beta * self.vb[labels] + db # update weights self.E[w] -= self.learning_rate * vE self.C[labels] -= self.learning_rate * vC self.b[labels] -= self.learning_rate * vb # save status self.vE[w] = vE self.vC[labels] = vC self.vb[labels] = vb return self.get_prob(w, c)
def get_neg_prob(self, word, context, neg_size=200): neg = utils.sample_negative(neg_size, {context}, vocab_size=self.V_dash) # forward propagation e = self.E[word] # K dimensions vector labels = [context] + neg z = np.dot(e, self.C[labels].T) + self.b[labels] exp_z = np.exp(z) sum_exp_z = np.sum(exp_z) prob = exp_z[0] / sum_exp_z return prob
def _train_neg_adam(self, e, c, epochs, neg_size, C_train, b_train, ve_train, vC_train, vb_train): # start epochs for i in range(epochs): neg = utils.sample_negative(neg_size, {c}, vocab_size=self.V_dash) # forward propagation labels = [c] + neg z = np.dot(e, C_train[labels].T) + b_train[labels] exp_z = np.exp(z) sum_exp_z = np.sum(exp_z) # back propagation dz = exp_z / sum_exp_z dz[0] -= 1 # for true label dz = dz dC = np.dot(dz.reshape(-1, 1), e.reshape(1, -1)) db = dz dE = np.dot(dz.reshape(1, -1), C_train[labels]).reshape(-1) # adam step vE = self.beta * ve_train + dE vC = self.beta * vC_train[labels] + dC vb = self.beta * vb_train[labels] + db # update weights e -= self.learning_rate * vE C_train[labels] -= self.learning_rate * vC b_train[labels] -= self.learning_rate * vb # save status ve_train = vE vC_train[labels] = vC vb_train[labels] = vb # get probability z = np.dot(e, C_train.T) + b_train exp_z = np.exp(z) prob = exp_z[c] / np.sum(exp_z) return prob