def __call__(self, x): self.model.train = False Wh = chainer.Variable(self.Wh_data) Ww = chainer.Variable(self.Ww_data) tp = chainer.Variable(self.truth_probabiliry) t = chainer.Variable(self.t_dummy) model_loss = self.model(self.img(), t) #p = self.getprobability(model_loss) a = self.getbeforesoftmax(model_loss) #print(a.data, F.get_item(a, (0, self.label)).data) #print(F.sum(p**2).data) #p = (1 / F.sum(p**2)) .* (p**2) #ce = self.getCrossEntropy(model_loss) #print(t.data, ce.data, p.data, tp.data) #class_mse = ce#F.mean_squared_error(p, tp) #class_mse = F.sum(-F.log(p**2 / F.sum(p**2).data) * tp) #activation = -F.sum(a * tp) activation = -F.get_item(a, (0, self.label)) #activation = -F.get_item(a, (0, self.label)) / F.sqrt(F.sum(a**2) - F.get_item(a, (0, self.label))**2) #activation = -F.get_item(a, (0, self.label)) + (F.sum(a**2) - F.get_item(a, (0, self.label))**2) / (a.data.shape[1] - 1) #class_mse = F.sum(-F.log(p) * tp) tv = self.tv_norm(self.img(), Wh, Ww) / np.prod(self.img().data.shape[1:]) lp = (F.sum(self.img()**self.p) ** (1./self.p)) / np.prod(self.img().data.shape[1:]) loss = self.lambda_a * activation + self.lambda_tv * tv + self.lambda_lp * lp chainer.report({'inv_loss': loss, 'activation': activation, 'tv': tv, 'lp': lp}, self) #print('inverter', x.data, class_mse.data, tv.data) return loss
def train(self, n_step=10000, policy=None): policy =self.get_policy(policy) state = np.empty(shape=(n_step+1,) +self.shape, dtype=np.float32) action = np.empty(shape=(n_step+1,), dtype=np.int32) value_max = np.empty(shape=(n_step+1,), dtype=np.float32) value_act = [None for i in range(n_step+1)] # list of chainer.Variable reward = np.empty(shape=(n_step+1,), dtype=np.float32) for t in range(n_step+1): reward[t] = game.get_reward() state[t] = self.game.get_state() value_all = F.reshape(self.q_a_given_s(state[t]),(-1,)) action[t] = policy(value_all.data) value_act[t] = F.get_item(value_all, slice(action[t], action[t]+1)) value_max[t] = np.max(value_act[t].data) game.transit(action[t]) value_predicted = F.get_item(F.concat(value_act, axis=0), (slice(0, n_step), )) value_actual = (reward[1:] +self.discount_factor*value_max[1:]) self.opt.update(F.mean_squared_error, value_predicted, value_actual) return reward.sum()
def check_forward(self, x_data): slices = [] for i, s in enumerate(self.slices): if isinstance(s, numpy.ndarray): s = chainer.backends.cuda.cupy.array(s) if isinstance(s, list): s = chainer.backends.cuda.cupy.array(s, dtype=numpy.int32) slices.append(s) slices = tuple(slices) x = chainer.Variable(x_data) y = functions.get_item(x, slices) self.assertEqual(y.data.dtype, numpy.float32) numpy.testing.assert_equal(cuda.to_cpu(x_data)[self.slices], cuda.to_cpu(y.data))
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.get_item(x, self.slices) self.assertEqual(y.data.dtype, numpy.float) numpy.testing.assert_equal(cuda.to_cpu(x_data)[self.slices], cuda.to_cpu(y.data))
def forward(self, h_s, c_s, n_speakers=None, to_train=1): # h_s: (1, B, F) h_0 # c_s: (1, B, F) c_0 # n_speakers: (B,) number of speakers (for test set None) # to_train: 1 to grab S+1 speakers while training; 0 to grab S speakers if given for inference batch_size = h_s.shape[1] if n_speakers: # zeros: (B, 1, F) zeros = [cp.zeros((1, self.in_size)).astype(cp.float32) for i in range(batch_size)] #import sys #print(n_speakers) #sys.exit() max_speakers = max(n_speakers).tolist() # max_speakers = 2 A = cp.array([]) for i in range(max_speakers + to_train): h_s, c_s, _ = self.lstm(h_s, c_s, zeros) a_s = h_s[0] A = F.vstack((A, a_s)) if A.size else a_s #P = F.sigmoid(self.linear(A)) we will use sigmoid_cross_entropy P = self.linear(A) # dimension manipulation to get # A: (B, n_speakers, F) # P: (B, n_speakers, 1) A = F.swapaxes(A.reshape(max_speakers + to_train, batch_size, -1), 0, 1) P = F.swapaxes(P.reshape(max_speakers + to_train, batch_size, -1), 0, 1) # strip A = [F.get_item(a, slice(0, n_spk)) for a, n_spk in zip(A, n_speakers)] P = [F.get_item(p, slice(0, n_spk + to_train)) for p, n_spk in zip(P, n_speakers)] else: # don't know number of speakers so generate a_s and p_s until p_s < 0.5 # cannot do this batch wise like above # process it for each group in the batch # zeros: (1, 1, F) zeros = [cp.zeros((1, self.in_size)).astype(cp.float32)] A = [] for batch in range(batch_size): h_b, c_b = h_s[:, batch: batch + 1, :], c_s[:, batch: batch + 1, :] a = p = cp.array([]) while True: h_b, c_b, _ = self.lstm(h_b, c_b, zeros) a_s = h_b[0] p_s = F.sigmoid(self.linear(a_s)) if p_s.array[0] < 0.5: break a = F.vstack((a, a_s)) if a.size else a_s # p = F.vstack((p, p_s)) if p.size else p_s a = a if a.size else cp.zeros((1, h_s.shape[2])).astype(cp.float32) # p = p if p.size else Variable(np.array([[0]]).astype(np.float32)) A.append(a) # P.append(p) P = P if to_train else None return A, None
def test_multiple_ellipsis(self): with self.assertRaises(ValueError): functions.get_item(self.x_data, (Ellipsis, Ellipsis))
def f(x): return functions.get_item(x, slices)
def forward(self, inputs, device): x, = inputs slices = self._convert_slices(self.slices, device) y = functions.get_item(x, slices) return y,
def _subsamplex(x, n): x = [F.get_item(xx, (slice(None, None, n), slice(None))) for xx in x] ilens = [xx.shape[0] for xx in x] return x, ilens
def CalcLoss(self, xs, ys): wxs = [ np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs ] cxs = [ np.array([source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs ] concat_wxs = np.concatenate(wxs) concat_cxs = np.concatenate(cxs) # Target token can be either a word or a char wcys = [ np.array([target_ids.get(w, UNK) for w in y], dtype=np.int32) for y in ys ] eos = self.xp.array([EOS], 'i') ys_in = [F.concat([eos, y], axis=0) for y in wcys] ys_out = [F.concat([y, eos], axis=0) for y in wcys] # Both xs and ys_in are lists of arrays. wexs = sequence_embed(self.embed_xw, wxs) cexs = sequence_embed(self.embed_xc, cxs) wexs_f = wexs wexs_b = [F.get_item(wex, range(len(wex))[::-1]) for wex in wexs] cexs_f = cexs cexs_b = [F.get_item(cex, range(len(cex))[::-1]) for cex in cexs] eys = sequence_embed(self.embed_y, ys_in) batch = len(xs) # None represents a zero vector in an encoder. _, hfw = self.encoder_fw(None, wexs_f) h1, hbw = self.encoder_bw(None, wexs_b) _, hfc = self.encoder_fc(None, cexs_f) h2, hbc = self.encoder_bc(None, cexs_b) # 隠れ状態ベクトルの集合 hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw] hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc] htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw)) htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc)) ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc)) h = F.concat([h1, h2], axis=2) h_list, h_bar_list, c_s_list, z_s_list = self.decoder(h, ht, eys) # It is faster to concatenate data before calculating loss # because only one matrix multiplication is called. os = h_list os_len = [len(s) for s in os] os_section = np.cumsum(os_len[:-1]) concat_os = F.concat(os, axis=0) concat_os_out = self.W(concat_os) concat_ys_out = F.concat(ys_out, axis=0) n_words = concat_ys_out.shape[0] loss = F.sum( F.softmax_cross_entropy(concat_os_out, concat_ys_out, reduce='no')) / n_words return loss
def translate(self, xs, max_length=100): print("Now translating") batch = len(xs) print("batch", batch) with chainer.no_backprop_mode(), chainer.using_config('train', False): wxs = [ np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs ] wx_len = [len(wx) for wx in wxs] wx_section = np.cumsum(wx_len[:-1]) valid_wx_section = np.insert(wx_section, 0, 0) cxs = [ np.array( [source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs ] wexs = sequence_embed(self.embed_xw, wxs) cexs = sequence_embed(self.embed_xc, cxs) wexs_f = wexs wexs_b = [wex[::-1] for wex in wexs] cexs_f = cexs cexs_b = [cex[::-1] for cex in cexs] _, hfw = self.encoder_fw(None, wexs_f) h1, hbw = self.encoder_bw(None, wexs_b) _, hfc = self.encoder_fc(None, cexs_f) h2, hbc = self.encoder_bc(None, cexs_b) hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw] hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc] htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw)) htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc)) ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc)) ys = self.xp.full(batch, EOS, 'i') result = [] h = F.concat([h1, h2], axis=2) for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h_list, h_bar_list, c_s_list, z_s_list = self.decoder( h, ht, eys) cys = chainer.functions.concat(h_list, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) h = F.transpose_sequence(h_list)[-1] h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1])) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def calc_loss(self, image_size, predicted_grids, gt_bbox_points, objectness_scores, normalize=True): predicted_bbox_points = self.get_corners(predicted_grids, image_size, scale_to_image_size=False) # 1. transform box coordinates to aabb coordinates for determination of iou predicted_bbox_points = predicted_bbox_points[ 0], predicted_bbox_points[4], predicted_bbox_points[ 3], predicted_bbox_points[7] predicted_bbox_points = F.stack(predicted_bbox_points, axis=1) # 2. find best prediction area for each gt bbox gt_bboxes_to_use_for_loss = [] positive_anchor_indices = self.xp.empty((0, ), dtype=self.xp.int32) not_contributing_anchors = self.xp.empty((0, ), dtype=self.xp.int32) for index, gt_bbox in enumerate(gt_bbox_points): # determine which bboxes are positive boxes as they have high iou with gt and also which bboxes are negative # this is also used to train objectness classification gt_bbox = self.xp.tile(gt_bbox[None, ...], (len(predicted_bbox_points), 1)) ious = bbox_iou(gt_bbox, predicted_bbox_points.data) positive_boxes = self.xp.where((ious[0] >= 0.7)) not_contributing_boxes = self.xp.where( self.xp.logical_and(0.3 < ious[0], ious[0] < 0.7)) if len(positive_boxes[0]) == 0: best_iou_index = ious[0, :].argmax() positive_anchor_indices = self.xp.concatenate( (positive_anchor_indices, best_iou_index[None, ...]), axis=0) gt_bboxes_to_use_for_loss.append(gt_bbox[0]) else: positive_anchor_indices = self.xp.concatenate( (positive_anchor_indices, positive_boxes[0]), axis=0) gt_bboxes_to_use_for_loss.extend( gt_bbox[:len(positive_boxes[0])]) not_contributing_anchors = self.xp.concatenate( (not_contributing_anchors, not_contributing_boxes[0]), axis=0) if len(gt_bboxes_to_use_for_loss) == 0: return Variable(self.xp.array(0, dtype=predicted_grids.dtype)) gt_bboxes_to_use_for_loss = F.stack(gt_bboxes_to_use_for_loss) # filter predicted bboxes and only keep bboxes from those regions that actually contain a bbox predicted_bbox_points = F.get_item(predicted_bbox_points, positive_anchor_indices) # 3. calculate L1 loss for bbox regression loss = F.huber_loss(predicted_bbox_points, gt_bboxes_to_use_for_loss, 1) # 4. calculate objectness loss objectness_labels = self.xp.zeros(len(objectness_scores), dtype=self.xp.int32) objectness_labels[not_contributing_anchors] = -1 objectness_labels[positive_anchor_indices] = 1 objectness_loss = F.softmax_cross_entropy( objectness_scores, objectness_labels, ignore_label=-1, ) return F.mean(loss), objectness_loss
def act_and_merge_features(self, xs, ws, vs, ms, gcs=None, get_att_score=False): hs = [] pcs = [] ass = [] # attention scores xp = cuda.get_array_module(xs[0]) closs = chainer.Variable(xp.array(0, dtype='f')) if gcs is None: gcs = [None] * len(xs) for x, w, v, gc, mask in zip(xs, ws, vs, gcs, ms): # print('x', x.shape) if w is None and v is None: # no words were found for devel/test data a = xp.zeros((len(x), self.chunk_embed_out_dim), dtype='f') pc = np.zeros(len(x), 'i') pcs.append(pc) h = F.concat((x, a), axis=1) # (n, dt) @ (n, dc) => (n, dt+dc) hs.append(h) continue if w is not None: w = F.dropout(w, self.embed_dropout) ## calculate weight for w mask_ij = mask[0] if self.use_attention: # wavg or wcon mask_i = mask[1] # print('w', w.shape) w_scores = self.biaffine( F.dropout(x, self.biaffine_dropout), F.dropout(w, self.biaffine_dropout)) # (n, m) w_scores = w_scores + mask_ij # a masked element becomes 0 after softmax operation w_weight = F.softmax(w_scores) w_weight = w_weight * mask_i # raw of char w/o no candidate words become a 0 vector # print('ww', w_weight.shape, '\n', w_weight) elif self.chunk_pooling_type == constants.AVG: w_weight = self.normalize(mask_ij, xp=xp) if not self.use_concat and self.chunk_vector_dropout > 0: mask_drop = xp.ones(w_weight.shape, dtype='f') for i in range(w_weight.shape[0]): if self.chunk_vector_dropout > np.random.rand(): mask_drop[i] = xp.zeros(w_weight.shape[1], dtype='f') w_weight = w_weight * mask_drop ## calculate weight for v if self.use_concat: mask_ik = mask[2] n = x.shape[0] wd = self.chunk_embed_dim_merged #w.shape[1] if self.chunk_pooling_type == constants.WCON: ikj_table = mask[3] v_weight0 = F.concat( [ F.expand_dims( # (n, m) -> (n, k) F.get_item(w_weight[i], ikj_table[i]), axis=0) for i in range(n) ], axis=0) # print('mask_ik', mask_ik.shape, '\n', mask_ik) # print('v_weight0', v_weight0.shape, '\n', v_weight0) v_weight0 *= mask_ik # print('ikj_table', ikj_table) else: v_weight0 = mask_ik v_weight = F.transpose(v_weight0) # (n,k) v_weight = F.expand_dims(v_weight, 2) # (k,n) v_weight = F.broadcast_to( v_weight, (self.chunk_concat_num, n, wd)) # (k,n,wd) v_weight = F.concat(v_weight, axis=1) # (k,n*wd) if self.chunk_vector_dropout > 0: mask_drop = xp.ones(v_weight.shape, dtype='f') for i in range(v_weight.shape[0]): if self.chunk_vector_dropout > np.random.rand(): mask_drop[i] = xp.zeros(v_weight.shape[1], dtype='f') v_weight *= mask_drop ## calculate summary vector a if self.use_average: # avg or wavg a = F.matmul(w_weight, w) # (n, m) * (m, dc) => (n, dc) else: # con or wcon v = F.concat(v, axis=1) a = v * v_weight # print('a', a.shape, a) ## get predicted (attended) chunks if self.use_attention: # wavg or wcon if self.chunk_pooling_type == constants.WAVG: weight = w_weight else: weight = v_weight0 pc = minmax.argmax(weight, axis=1).data if xp is cuda.cupy: pc = cuda.to_cpu(pc) pcs.append(pc) # if get_att_score: # ascore = minmax.max(weight, axis=1).data # ass.append(ascore) # ncand = [sum([1 if val >= 0 else 0 for val in raw]) for raw in _mask] # print('pred', pc) # print('gold', gc) # print('ncand', ncand) # print('weight', weight.shape, weight.data) # print('weight') # for i, e in enumerate(weight.data): # print(i, e) h = F.concat((x, a), axis=1) # (n, dt) @ (n, dc) => (n, dt+dc) hs.append(h) if closs.data == 0: closs = None else: closs /= len(xs) if get_att_score: return closs, pcs, hs, ass else: return closs, pcs, hs
def f(x): return functions.get_item(x, self.slices)
def f(x): y = functions.get_item(x, self.slices) return y * y
def main(id): model_path = "/efs/fMRI_AE/SimpleFCAE_E32D32/model/model_iter_108858" gpu = 0 get_device_from_id(gpu).use() """NibDataset def __init__(self, directory: str, crop: list): """ crop = [[9, 81], [11, 99], [0, 80]] test_dataset = NibDataset("/data/test", crop=crop) mask = load_mask_nib("/data/mask/average_optthr.nii", crop) """SimpleFCAE_E32D32 def __init__(self, mask, r: int, in_mask: str, out_mask: str): """ model = Model(mask, 2, "mask", "mask") load_npz(model_path, model) model.to_gpu() # feature_idx = 0 # feature_idx = (0, 4, 5, 5) # == [0, 9/2, 11/2, 10/2] # feature_idx = (0, 1, 1, 1) feature_idx = (0, 2, 7, 4) resample_size = 100 batch_size = 10 noise_level = 0.2 for i in range(len(test_dataset)): if i % 8 != id: continue print("{:4}/{:4}".format(i, len(test_dataset))) subject = test_dataset.get_subject(i) frame = test_dataset.get_frame(i) test_img = xp.asarray(test_dataset[i]) resample_remain = resample_size resample_processed = 0 ret = xp.zeros(test_img.shape) while resample_remain > 0: batch_size_this_loop = min(batch_size, resample_remain) resample_remain -= batch_size_this_loop batch = xp.broadcast_to( test_img, chain((batch_size_this_loop, ), test_img.shape)) sigma = noise_level / (xp.max(test_img) - xp.min(test_img)) batch += sigma * xp.random.randn(*batch.shape) x = Variable(batch) feature = model.extract(x) assert feature.shape == (batch_size, 1, 9, 11, 10) feature = F.sum(feature, axis=0) assert feature.shape == (1, 9, 11, 10) feature = F.get_item(feature, feature_idx) feature.backward() grad = xp.mean(x.grad, axis=0) ret = (ret * resample_processed + grad * batch_size_this_loop) / ( resample_processed + batch_size_this_loop) model.cleargrads() xp.save( "/efs/fMRI_AE/SimpleFCAE_E32D32/grad/sensitivity_map_feature_{}_{}_{}_subject{:03d}_frame{:03d}" .format(feature_idx[1], feature_idx[2], feature_idx[3], subject, frame), ret)
def mean_dice_coefficient(dice_coefficients, ret_nan=True): if ret_nan: xp = cuda.get_array_module(dice_coefficients) selector = ~xp.isnan(dice_coefficients.data) dice_coefficients = F.get_item(dice_coefficients, selector) return F.mean(dice_coefficients, keepdims=True)
def CalcLoss(self, xs, ys): char_hidden = [] ''' wxs = [x[0] for x in xs] unk_xs = [x[1] for x in xs] ''' wxs = [ np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs ] unk_words = list(map(lambda x, y: np.array(y)[x == UNK], wxs, xs)) unk_xs = list( map( lambda x: np.array([ np.array([source_char_ids.get(c, UNK) for c in list(w)], dtype=np.int32) for w in x ]), unk_words)) unk_pos = [np.where(x == UNK)[0] for x in wxs] concat_wxs = np.concatenate(wxs) wys = [ np.array([target_word_ids.get(w, UNK) for w in y], dtype=np.int32) for y in ys ] eos = self.xp.array([EOS], 'i') ys_in = [F.concat([eos, y], axis=0) for y in wys] ys_out = [F.concat([y, eos], axis=0) for y in wys] # Both xs and ys_in are lists of arrays. exs = sequence_embed(self.embed_x, wxs) # Convert an UNK word vector into a char-hidden vector exs = list( map( lambda s, t, u: get_unk_hidden_vector( s, t, u, self.embed_xc, self.char_encoder, char_hidden), exs, unk_pos, unk_xs)) exs_f = exs exs_b = [F.get_item(ex, range(len(ex))[::-1]) for ex in exs] eys = sequence_embed(self.embed_y, ys_in) batch = len(xs) # None represents a zero vector in an encoder. _, hf = self.encoder_f(None, exs_f) _, hb = self.encoder_b(None, exs_b) # 隠れ状態ベクトルの集合 hb = [F.get_item(h, range(len(h))[::-1]) for h in hb] ht = list(map(lambda x, y: F.concat([x, y], axis=1), hf, hb)) h_list, h_bar_list, c_s_list, z_s_list = self.decoder(None, ht, eys) # It is faster to concatenate data before calculating loss # because only one matrix multiplication is called. os = h_list os_len = [len(s) for s in os] os_section = np.cumsum(os_len[:-1]) concat_os = F.concat(os, axis=0) concat_os_out = self.W(concat_os) concat_ys_out = F.concat(ys_out, axis=0) loss_w = 0 loss_c1 = 0 loss_c2 = 0 # If predicted word is UNK concat_pred_w = F.argmax(concat_os_out, axis=1) #concat_isUNK = concat_pred_w==0 is_unk = concat_pred_w.data == UNK count_unk_with_no_att = 0 if UNK in concat_pred_w.data: print(True) ##案2: #全てconcat #総単語数*2048 concat_c_s = F.concat(c_s_list, axis=0) concat_h_bar = F.concat(h_bar_list, axis=0) c_ss = concat_c_s[is_unk] h_bars = concat_h_bar[is_unk] c = F.concat([c_ss, h_bars], axis=1) ds_hats = F.relu(self.W_hat(c)) z_s_len = [len(z_s) - 1 for z_s in z_s_list] z_s_section = np.cumsum(z_s_len[:-1]) valid_z_s_section = np.insert(z_s_section, 0, 0) abs_z_s_list = [ z_s_list[i] + valid_z_s_section[i] for i in range(len(z_s_list)) ] concat_z_s = F.concat(abs_z_s_list, axis=0) z_ss = concat_z_s[is_unk] true_wys = concat_ys_out[is_unk] #"予想単語==UNK"の各ケースについて個別に処理 for i, wy in enumerate(true_wys): bow = self.xp.array([BOW], 'i') wy = int(wy.data) print(target_words[wy]) if wy != UNK and wy != EOS: cys = np.array( [[target_char_ids[c] for c in list(target_words[wy])]], np.int32) elif wy == UNK: #本来ありえない cys = np.array([[target_char_ids['UNK']]], np.int32) elif wy == EOS: cys = np.array([[target_char_ids['BOW']]], np.int32) cys_in = [F.concat([bow, y], axis=0) for y in cys] cys_out = [F.concat([y, bow], axis=0) for y in cys] concat_cys_out = F.concat(cys_out, axis=0) ceys = sequence_embed(self.embed_yc, cys_in) z_s = int(z_ss[i].data) ds_hat = F.reshape(ds_hats[i], (1, 1, ds_hats[i].shape[0])) if concat_wxs[z_s] != UNK: #attentionなし文字ベースdecoder _, cos = self.char_decoder(ds_hat, ceys) print("attなし") concat_cos = F.concat(cos, axis=0) concat_cos_out = self.W_char(concat_cos) loss_c1 = loss_c1 + F.sum( F.softmax_cross_entropy( concat_cos_out, concat_cys_out, reduce='no')) count_unk_with_no_att += 1 else: #attentionあり文字ベースdecoder ht = char_hidden[z_s] h_list, h_bar_list, c_s_list, z_s_list = self.char_att_decoder( ds_hat, ht, ceys) print("attあり") concat_cos = F.concat(h_list, axis=0) concat_cos_out = self.W_char(concat_cos) loss_c2 = loss_c2 + F.sum( F.softmax_cross_entropy( concat_cos_out, concat_cys_out, reduce='no')) else: print(False) n_words = concat_ys_out.shape[0] n_unk = np.sum(is_unk) count_unk_with_att = n_unk - count_unk_with_no_att count_kno = n_words - n_unk loss_w = F.sum( F.softmax_cross_entropy(concat_os_out[is_unk != 1], concat_ys_out[is_unk != 1], reduce='no')) loss = F.sum(loss_w + Alpha * loss_c1 + Beta * loss_c2) / n_words return loss, count_kno, count_unk_with_no_att, count_unk_with_att
def test_get_item_unsupported_advanced_index(self, slices): model = chainer.Sequential(lambda x: F.get_item(x, slices=slices)) x = input_generator.increasing(2, 3, 4) with pytest.raises(ValueError): export(model, x)
def forward(self, inputs, device): x, = inputs y = functions.get_item(x, self.slices) return y,
def test_advanced_indexing(self): with self.assertRaises(ValueError): functions.get_item(self.x_data, ([0, 0, 0],))
def test_too_many_indices(self): with self.assertRaises(type_check.InvalidType): functions.get_item(self.x_data, (0, 0, 0, 0))
def forward(enc_words, dec_words, model, ARR): """ forwardの計算をする関数 :param enc_words: 入力文 :param dec_words: 出力文 :param model: モデル :param ARR: numpyかcuda.cupyのどちらか :return loss: 損失 """ # バッチサイズを記録 batch_size = len(enc_words[0]) # モデルの中に記録されている勾配のリセット model.reset() # 入力文の中で使用されている単語をチェックするためのリストを用意 enc_key = enc_words.T # Encoderに入力する文をVariable型に変更する enc_words = [Variable(ARR.array(row, dtype='int32')) for row in enc_words] # Encoderの計算 model.encode(enc_words) # 損失の初期化 loss = Variable(ARR.zeros((), dtype='float32')) # <eos>をデコーダーに読み込ませる t = Variable(ARR.array([0 for _ in range(batch_size)], dtype='int32')) # デコーダーの計算 for w in dec_words: # 1単語ずつをデコードする y, att, lambda_ = model.decode(t) # 正解単語をVariable型に変換 t = Variable(ARR.array(w, dtype='int32')) # Generative Modeにより計算された単語のlog_softmaxをとる s = functions.log_softmax(y) # Attentionの重みのlog_softmaxをとる att_s = functions.log_softmax(att) # lambdaをsigmoid関数にかけることで、0~1の値に変更する lambda_s = functions.reshape(functions.sigmoid(lambda_), (batch_size, )) # Generative Modeの損失の初期化 Pg = Variable(ARR.zeros((), dtype='float32')) # Copy Modeの損失の初期化 Pc = Variable(ARR.zeros((), dtype='float32')) # lambdaのバランスを学習するための損失の初期化 epsilon = Variable(ARR.zeros((), dtype='float32')) # ここからバッチ内の一単語ずつの損失を計算する、for文を回してしまっているところがダサい・・・ counter = 0 for i, words in enumerate(w): # -1は学習しない単語につけているラベル if words != -1: # Generative Modeの損失の計算 Pg += functions.get_item(functions.get_item( s, i), words) * functions.reshape( (1.0 - functions.get_item(lambda_s, i)), ()) counter += 1 # もし入力文の中に出力したい単語が存在すれば if words in enc_key[i]: # Copy Modeの計算をする Pc += functions.get_item( functions.get_item(att_s, i), list(enc_key[i]).index(words)) * functions.reshape( functions.get_item(lambda_s, i), ()) # ラムダがCopy Modeよりになるように学習 epsilon += functions.log(functions.get_item(lambda_s, i)) # 入力文の中に出力したい単語がなければ else: # ラムダがGenerative Modeよりになるように学習 epsilon += functions.log(1.0 - functions.get_item(lambda_s, i)) # それぞれの損失をバッチサイズで割って、合計する Pg *= (-1.0 / np.max([1, counter])) Pc *= (-1.0 / np.max([1, counter])) epsilon *= (-1.0 / np.max([1, counter])) loss += Pg + Pc + epsilon return loss