def __call__(self, context, sentiment, context_pre, context_fol, neg_context): bs = context.shape[1] # ----- context ---------------- # ---------- positive ---------- e = self.embed(context) e = F.concat(e,axis=1) h = self.l1(e) th = F.tanh(h) pout = self.lc(th) pout = F.tile(pout,(args.negative_size,1)) # ---------- negative ---------- # embedding pe = self.embed.W.data[context_pre.T] fe = self.embed.W.data[context_fol.T] shape = pe.shape pe = pe.reshape(shape[0],shape[1]*shape[2]) fe = fe.reshape(shape[0],shape[1]*shape[2]) pe = xp.tile(pe,(args.negative_size,1)) fe = xp.tile(fe,(args.negative_size,1)) ne = xp.array(self.embed.W.data[neg_context]) # concatenate tmp = xp.concatenate((pe,ne),axis=1) ne_in = xp.concatenate((tmp,fe),axis=1) # forward nout = xp.tanh(ne_in.dot(self.l1.W.data.T)) nout = nout.dot(self.lc.W.data.T) # ---------- hinge loss calculate ---------- loss_c = F.hinge(pout + nout, xp.zeros((args.negative_size*bs,),dtype=np.int32)) loss_c = loss_c * args.negative_size * bs # ----- sentiment ----- sout = self.ls(th) # 0-> nega, 1-> posi sentiment[sentiment==1] = -1 sentiment[sentiment==0] = 1 sout = sout[:,0]*sentiment + sout[:,1]*sentiment loss_s = F.hinge(F.reshape(sout,(bs,1)),xp.zeros((bs,),dtype=np.int32)) loss_s = loss_s + bs # ----- loss calculate ----- #print(loss_c) #print(loss_s) alpha = args.loss_weight return (1-alpha) * loss_c + alpha * loss_s
def check_forward(self, x_data, t_data): x_val = chainer.Variable(x_data) t_val = chainer.Variable(t_data) loss = functions.hinge(x_val, t_val, self.norm, self.reduce) if self.reduce == 'mean': self.assertEqual(loss.data.shape, ()) else: self.assertEqual(loss.data.shape, self.x.shape) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = cuda.to_cpu(loss.data) # Compute expected value for i in six.moves.range(self.x.shape[0]): self.x[i, self.t[i]] *= -1 for i in six.moves.range(self.x.shape[0]): for j in six.moves.range(self.x.shape[1]): self.x[i, j] = max(0, 1.0 + self.x[i, j]) if self.norm == 'L1': loss_expect = self.x elif self.norm == 'L2': loss_expect = self.x**2 if self.reduce == 'mean': loss_expect = numpy.sum(loss_expect) / self.x.shape[0] testing.assert_allclose(loss_expect, loss_value)
def check_forward(self, x_data, t_data): x_val = chainer.Variable(x_data) t_val = chainer.Variable(t_data) loss = functions.hinge(x_val, t_val, self.norm, self.reduce) if self.reduce == 'mean': self.assertEqual(loss.data.shape, ()) else: self.assertEqual(loss.data.shape, self.x.shape) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = cuda.to_cpu(loss.data) # Compute expected value for i in six.moves.range(self.x.shape[0]): self.x[i, self.t[i]] *= -1 for i in six.moves.range(self.x.shape[0]): for j in six.moves.range(self.x.shape[1]): self.x[i, j] = max(0, 1.0 + self.x[i, j]) if self.norm == 'L1': loss_expect = self.x elif self.norm == 'L2': loss_expect = self.x ** 2 if self.reduce == 'mean': loss_expect = numpy.sum(loss_expect) / self.x.shape[0] testing.assert_allclose(loss_expect, loss_value)
def __call__(self, xs: chainer.Variable, crf_pact_structures ): # crf_pact_structure is batch of CRFPackageStructure xp = chainer.cuda.cupy.get_array_module(xs.data) # xs is batch # return shape = B * N * D , B is batch_size, N is one video all nodes count, D is each node output vector dimension h = self.structural_rnn(xs, crf_pact_structures) if self.with_crf: # open_crf only support CPU mode # convert_xs = self.bn(self.convert_dim_fc(xs.reshape(-1, xs.shape[-1]))) # note that we remove batch = 1 dimension # h = F.relu(h.reshape(-1, h.shape[-1]) + convert_xs) # just like ResNet # h = F.expand_dims(h, 0) # add one batch dimension h = F.copy(h, -1) # gt_label is hidden inside crf_pact_structure's sample. this step directly compute loss loss = self.open_crf(h, crf_pact_structures) else: # only structural_rnn ts = self.get_gt_labels( xp, crf_pact_structures, is_bin=False) # B x N x 1, and B = 1 forever ts = chainer.Variable( ts.reshape(-1) ) # because ts label is 0~L which is one more than ground truth, 0 represent 0,0,0,0,0 h = h.reshape( -1, h.shape[-1] ) # h must have 0~L which = L+1 including non_AU = 0(also background class) assert ts.shape[0] == h.shape[0] loss = F.hinge(h, ts, norm='L2', reduce='mean') accuracy = F.accuracy(h, ts) report_dict = {'loss': loss} if not self.with_crf: report_dict["accuracy"] = accuracy chainer.reporter.report(report_dict, self) return loss
def check_backward(self, x_data, t_data, norm): x = chainer.Variable(x_data) t = chainer.Variable(t_data) loss = functions.hinge(x, t, norm) loss.backward() self.assertEqual(None, t.grad) func = loss.creator f = lambda: func.forward((x.data, t.data)) gx, = gradient_check.numerical_grad(f, (x.data,), (1,), eps=0.01) gradient_check.assert_allclose(gx, x.grad, atol=1e-4)
def __call__(self, context, context_pre, context_fol, neg_context): bs = context.shape[1] # ----- context ---------------- # ---------- positive ---------- e = self.embed(context) e = F.concat(e, axis=1) h = self.l1(e) th = F.tanh(h) pout = self.l2(th) pout = F.tile(pout, (args.negative_size, 1)) # ---------- negative ---------- # embedding pe = self.embed.W.data[context_pre.T] fe = self.embed.W.data[context_fol.T] shape = pe.shape pe = pe.reshape(shape[0], shape[1] * shape[2]) fe = fe.reshape(shape[0], shape[1] * shape[2]) pe = xp.tile(pe, (args.negative_size, 1)) fe = xp.tile(fe, (args.negative_size, 1)) #ne = xp.array([self.embed.W.data[val] for val in neg_context]) ne = xp.array(self.embed.W.data[neg_context]) # concatenate tmp = xp.concatenate((pe, ne), axis=1) ne_in = xp.concatenate((tmp, fe), axis=1) # forward nout = xp.tanh(ne_in.dot(self.l1.W.data.T)) nout = nout.dot(self.l2.W.data.T) # ---------- hinge loss calculate ---------- loss = F.hinge(pout + nout, xp.zeros((args.negative_size * bs, ), dtype=np.int32)) print(loss * args.negative_size * bs) #return loss return loss * args.negative_size * bs
def check_forward(self, x_data, t_data, norm): x_val = chainer.Variable(x_data) t_val = chainer.Variable(t_data) loss = functions.hinge(x_val, t_val, norm) self.assertEqual(loss.data.shape, ()) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = float(cuda.to_cpu(loss.data)) # Compute expected value for i in six.moves.range(self.x.shape[0]): self.x[i, self.t[i]] *= -1 for i in six.moves.range(self.x.shape[0]): for j in six.moves.range(self.x.shape[1]): self.x[i, j] = max(0, 1.0 + self.x[i, j]) loss_expect = 0 if norm == 'L1': loss_expect = numpy.sum(self.x) / self.x.shape[0] elif norm == 'L2': loss_expect += numpy.sum(self.x ** 2) / self.x.shape[0] self.assertAlmostEqual(loss_expect, loss_value, places=5)
def f(x, t): return functions.hinge(x, t, self.norm)
def forward_chainer(self, inputs): x, = inputs t = self.t norm = self.norm_str out = F.hinge(x, t, norm=norm, reduce='no') return out,
def check_invalid_reduce_option(self, xp): x = xp.asarray(self.x) t = xp.asarray(self.t) with self.assertRaises(ValueError): functions.hinge(x, t, 'L1', 'invalid_option')
def check_invalid_norm_option(self, xp): x = xp.asarray(self.x) t = xp.asarray(self.t) with self.assertRaises(NotImplementedError): functions.hinge(x, t, 'invalid_norm', 'mean')
def f(x): return functions.hinge(x, t_data, self.norm)
def __call__(self, context, sentiment, context_pre, context_fol, neg_context): bs = context.shape[1] # ----- context ---------------- # ---------- positive ---------- e = self.embed(context) e = F.concat(e, axis=1) h = self.l1(e) th = F.tanh(h) pout = self.lc(th) pout = F.tile(pout, (args.negative_size, 1)) # ---------- negative ---------- # embedding pe = self.embed.W.data[context_pre.T] fe = self.embed.W.data[context_fol.T] shape = pe.shape pe = pe.reshape(shape[0], shape[1] * shape[2]) fe = fe.reshape(shape[0], shape[1] * shape[2]) pe = xp.tile(pe, (args.negative_size, 1)) fe = xp.tile(fe, (args.negative_size, 1)) ne = xp.array(self.embed.W.data[neg_context]) # concatenate tmp = xp.concatenate((pe, ne), axis=1) ne_in = xp.concatenate((tmp, fe), axis=1) # forward nout = xp.tanh(ne_in.dot(self.l1.W.data.T)) nout = nout.dot(self.lc.W.data.T) # ---------- hinge loss calculate ---------- loss_c = F.hinge(pout + nout, xp.zeros((args.negative_size * bs, ), dtype=np.int32)) loss_c = loss_c * args.negative_size * bs # ----- sentiment ----- sout = self.ls(th) # 0-> nega, 1-> posi sentiment[sentiment == 1] = -1 sentiment[sentiment == 0] = 1 sout = sout[:, 0] * sentiment + sout[:, 1] * sentiment loss_s = F.hinge(F.reshape(sout, (bs, 1)), xp.zeros((bs, ), dtype=np.int32)) loss_s = loss_s * bs # ----- word-sentiment regularizaton ----- swe = self.embed(dict_index) ws_regular = F.softmax_cross_entropy(self.lws(swe), dict_label) # ----- word-word regularization ----- rw1 = self.embed.W.data[w_cluster[:, 0]] rw2 = self.embed.W.data[w_cluster[:, 1]] ww_regular = xp.sum(xp.linalg.norm(rw1 - rw2, axis=1)**2) # ----- loss calculate ----- alpha = args.loss_weight lamb_ww = args.ww_regular lamb_ws = args.ws_regular return ( 1 - alpha ) * loss_c + alpha * loss_s + lamb_ww * ww_regular - lamb_ws * ws_regular
def evaluate(self): # iterator = self._iterators['main'] target = self._targets['main'] eval_func = self.eval_func or target if self.eval_hook: self.eval_hook(self) summary = reporter_module.DictSummary() # return summary.compute_mean() # test data evaluation all_batch_count = sum([i for i in self.n_qd_pairs]) batch_count = 0.0 print >> sys.stderr, "\ntest set evaluation" iterator = self._iterators['test'] it = copy.copy(iterator) score_first = [] score_second = [] pre_rate = 0 for batch in it: observation = {} with reporter_module.report_scope(observation): xs1, xs2, xs3, y = self.converter(batch, device=self.device) y_score, first_rel_score, second_rel_score = target.predictor( xs1, xs2, xs3) loss = F.hinge(x=y_score, t=y).data reporter_module.report({'loss_test': loss}, target) score_first += first_rel_score.data.flatten().tolist() score_second += second_rel_score.data.flatten().tolist() batch_count += len(batch) rate = int(batch_count / all_batch_count * 10) if rate != pre_rate: print >> sys.stderr, "{}%".format( int(batch_count / all_batch_count * 10) * 10), pre_rate = rate summary.add(observation) # development data evaluation print >> sys.stderr, "\ndev set evaluation" iterator = self._iterators['dev'] it = copy.copy(iterator) batch_count = 0.0 pre_rate = 0 for batch in it: observation = {} with reporter_module.report_scope(observation): xs1, xs2, xs3, y = self.converter(batch, device=self.device) y_score, first_rel_score, second_rel_score = target.predictor( xs1, xs2, xs3) loss = F.hinge(x=y_score, t=y).data reporter_module.report({'loss_dev': loss}, target) batch_count += len(batch) rate = int(batch_count / all_batch_count * 10) if rate != pre_rate: print >> sys.stderr, "{}%".format( int(batch_count / all_batch_count * 10) * 10), pre_rate = rate summary.add(observation) current_position = 0 score = [] for t in self.n_qd_pairs: score.append(score_first[current_position]) for s in score_second[current_position:current_position + t]: score.append(s) current_position += t with open( self.score_abs_dest + "/score_epoch{}.txt".format(self.epoch_num), "w") as fo: for s in score: fo.write("{}\n".format(s)) self.epoch_num += 1 return summary.compute_mean()