def main(): parser = argparse.ArgumentParser(description='GraphSAGE') parser.add_argument("--dataset", type=str, default='reddit') parser.add_argument("--device", type=int, default=0) parser.add_argument("--dropout", type=float, default=0.5, help="dropout probability") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument("--epochs", type=int, default=200, help="number of training epochs") parser.add_argument("--n-hidden", type=int, default=16, help="number of hidden gcn units") parser.add_argument("--aggr", type=str, choices=['sum', 'mean'], default='mean', help='Aggregation for messages') parser.add_argument("--weight-decay", type=float, default=5e-4, help="Weight for L2 loss") parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) features = features.to(device) labels = labels.to(device) train_mask = train_mask.to(device) val_mask = val_mask.to(device) test_mask = test_mask.to(device) # Remove duplicate edges # In PyG, this is a default pre-processing step for Reddit, see # https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/datasets/reddit.py#L58 g = data.graph g = g.int().to(device) # create GraphSAGE model model = GraphSAGE(g, in_feats, args.n_hidden, n_classes, args.aggr, F.relu, args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print("Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}".format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def construct_bucket_vb_wc(word_features, forw_features, fea_len, input_labels, thresholds, pad_word_feature, pad_char_feature, pad_label, label_size): """ Construct bucket by thresholds for viterbi decode, word-level and char-level """ # construct corpus for language model pre-training forw_corpus = [pad_char_feature] + list( reduce(lambda x, y: x + [pad_char_feature] + y, forw_features)) + [pad_char_feature] back_corpus = forw_corpus[::-1] # two way construct, first build the bucket, then calculate padding length, then do the padding buckets = [[[], [], [], [], [], [], [], []] for ind in range(len(thresholds))] # forw, forw_ind, back, back_in, label, mask buckets_len = [0 for ind in range(len(thresholds))] # thresholds is the padded length for fea # buckets_len is the padded length for char for f_f, f_l in zip(forw_features, fea_len): cur_len_1 = len(f_l) + 1 idx = 0 while thresholds[idx] < cur_len_1: idx += 1 tmp_concat_len = len(f_f) + thresholds[idx] - len(f_l) if buckets_len[idx] < tmp_concat_len: buckets_len[idx] = tmp_concat_len # calc padding for f_f, f_l, w_f, i_l in zip(forw_features, fea_len, word_features, input_labels): cur_len = len(f_l) idx = 0 cur_len_1 = cur_len + 1 while thresholds[idx] < cur_len_1: idx += 1 padded_feature = f_f + [pad_char_feature ] * (buckets_len[idx] - len(f_f) ) # pad feature with <'\n'>, at least one padded_feature_len = f_l + [1] * ( thresholds[idx] - len(f_l) ) # pad feature length with <'\n'>, at least one padded_feature_len_cum = list( itertools.accumulate(padded_feature_len) ) # start from 0, but the first is ' ', so the position need not to be -1 buckets[idx][0].append(padded_feature) # char buckets[idx][1].append(padded_feature_len_cum) buckets[idx][2].append(padded_feature[::-1]) buckets[idx][3].append([buckets_len[idx] - 1] + [ buckets_len[idx] - 1 - tup for tup in padded_feature_len_cum[:-1] ]) buckets[idx][4].append(w_f + [pad_word_feature] * (thresholds[idx] - cur_len)) #word buckets[idx][5].append( [ i_l[ind] * label_size + i_l[ind + 1] for ind in range(0, cur_len) ] + [i_l[cur_len] * label_size + pad_label] + [pad_label * label_size + pad_label] * (thresholds[idx] - cur_len_1)) # has additional start, label buckets[idx][6].append( [1] * cur_len_1 + [0] * (thresholds[idx] - cur_len_1)) # has additional start, mask buckets[idx][7].append( [len(f_f) + thresholds[idx] - len(f_l), cur_len_1]) bucket_dataset = [ CRFDataset_WC(torch.LongTensor(bucket[0]), torch.LongTensor(bucket[1]), torch.LongTensor(bucket[2]), torch.LongTensor(bucket[3]), torch.LongTensor(bucket[4]), torch.LongTensor(bucket[5]), torch.ByteTensor(bucket[6]), torch.LongTensor(bucket[7])) for bucket in buckets ] return bucket_dataset, forw_corpus, back_corpus
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # create TAGCN model model = TAGCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def make_permute(feature, reuse_len, seq_len, perm_size, num_predict): inputs = torch.LongTensor(feature.pop("input")) target = torch.LongTensor(feature.pop("target")) is_masked = torch.ByteTensor(feature.pop("is_masked")) non_reuse_len = seq_len - reuse_len assert perm_size <= reuse_len and perm_size <= non_reuse_len # (reuse, reuse), (reuse,), (reuse,), (reuse,), (reuse,) perm_mask_0, target_0, target_mask_0, input_k_0, input_q_0 = _local_perm( inputs[:reuse_len], # inp target[:reuse_len], is_masked[:reuse_len], perm_size, reuse_len) # (non_reuse, non_reuse), (non_reuse,), (non_reuse,), (non_reuse,), (non_reuse,) perm_mask_1, target_1, target_mask_1, input_k_1, input_q_1 = _local_perm( inputs[reuse_len:], # (senA, seq, senBm seq, cls) target[reuse_len:], is_masked[reuse_len:], perm_size, non_reuse_len) # (reuse, seq) / one last append perm_mask_0 = torch.cat( [perm_mask_0, torch.ones([reuse_len, non_reuse_len])], dim=1) # (non_reuse, seq) / zero first append perm_mask_1 = torch.cat( [torch.zeros([non_reuse_len, reuse_len]), perm_mask_1], dim=1) # (seq, seq) perm_mask = torch.cat([perm_mask_0, perm_mask_1], dim=0) # (seq) target = torch.cat([target_0, target_1], dim=0) # (seq) target_mask = torch.cat([target_mask_0, target_mask_1], dim=0) # (seq) input_k = torch.cat([input_k_0, input_k_1], dim=0) # (seq) input_q = torch.cat([input_q_0, input_q_1], dim=0) if num_predict is not None: # (0 .. seq-1) indices = torch.arange(seq_len, dtype=torch.int64) bool_target_mask = target_mask.bool() # (predict,) indices = indices[bool_target_mask] ##### extra padding due to CLS/SEP introduced after prepro actual_num_predict = indices.shape[0] # zero (num_predict = actual_num_predict) pad_len = num_predict - actual_num_predict assert seq_len >= actual_num_predict ##### target_mapping # (predict, seq) target_mapping = torch.eye(seq_len, dtype=torch.float32)[indices] # (0, seq) paddings = torch.zeros([pad_len, seq_len], dtype=target_mapping.dtype) # (predict, seq) target_mapping = torch.cat([target_mapping, paddings], dim=0) feature["target_mapping"] = torch.reshape(target_mapping, [num_predict, seq_len]) ##### target # (predict,) target = target[bool_target_mask] # (0) paddings = torch.zeros([pad_len], dtype=target.dtype) # (predict,) target = torch.cat([target, paddings], dim=0) feature["target"] = torch.reshape(target, [num_predict]) ##### target mask # (predict,) target_mask = torch.cat([ torch.ones([actual_num_predict], dtype=torch.float32), torch.zeros([pad_len], dtype=torch.float32) ], dim=0) feature["target_mask"] = torch.reshape(target_mask, [num_predict]) else: feature["target"] = torch.reshape(target, [seq_len]) feature["target_mask"] = torch.reshape(target_mask, [seq_len]) # reshape back to fixed shape # (seq,) feature["seg_id"] = torch.IntTensor(feature["seg_id"]) # (seq, seq) feature["perm_mask"] = torch.reshape(perm_mask, [seq_len, seq_len]) # (seq,) feature["input_k"] = torch.reshape(input_k, [seq_len]) # (seq,) feature["input_q"] = torch.reshape(input_q, [seq_len]) return feature
def forward( self, # type: ignore words, words_embeds, #: Dict[str, torch.LongTensor], pos_tags: torch.LongTensor = None, head_tags: torch.LongTensor = None, head_indices: torch.LongTensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- words : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, sequence_length)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. pos_tags : ``torch.LongTensor``, required. The output of a ``SequenceLabelField`` containing POS tags. POS tags are required regardless of whether they are used in the model, because they are used to filter the evaluation metric to only consider heads of words which are not punctuation. head_tags : torch.LongTensor, optional (default = None) A torch tensor representing the sequence of integer gold class labels for the arcs in the dependency parse. Has shape ``(batch_size, sequence_length)``. head_indices : torch.LongTensor, optional (default = None) A torch tensor representing the sequence of integer indices denoting the parent of every word in the dependency parse. Has shape ``(batch_size, sequence_length)``. Returns ------- An output dictionary consisting of: loss : ``torch.FloatTensor``, optional A scalar loss to be optimised. arc_loss : ``torch.FloatTensor`` The loss contribution from the unlabeled arcs. loss : ``torch.FloatTensor``, optional The loss contribution from predicting the dependency tags for the gold arcs. heads : ``torch.FloatTensor`` The predicted head indices for each word. A tensor of shape (batch_size, sequence_length). head_types : ``torch.FloatTensor`` The predicted head types for each arc. A tensor of shape (batch_size, sequence_length). mask : ``torch.LongTensor`` A mask denoting the padded elements in the batch. """ # LISA # embedded_text_input = words_embeds.view(1, len(words_embeds), -1) # self.text_field_embedder(words) # LISA2 embedded_text_input = words_embeds # self.text_field_embedder(words) bsz, seqlen, dim = words_embeds.shape # mask = get_text_field_mask(words) # LISA # mask = torch.LongTensor([1 for _ in words]).view(1, -1) # LISA2 # mask = torch.LongTensor(bsz, seqlen).fill_(1) mask = torch.ByteTensor(bsz, seqlen).fill_(1).to(self.device) embedded_text_input = self._input_dropout(embedded_text_input) encoded_text = self.encoder(embedded_text_input, mask) batch_size, _, encoding_dim = encoded_text.size() head_sentinel = self._head_sentinel.expand(batch_size, 1, encoding_dim) # Concatenate the head sentinel onto the sentence representation. encoded_text = torch.cat([head_sentinel, encoded_text], 1) mask = torch.cat([mask.new_ones(batch_size, 1), mask], 1) if head_indices is not None: head_indices = torch.cat( [head_indices.new_zeros(batch_size, 1), head_indices], 1) if head_tags is not None: head_tags = torch.cat( [head_tags.new_zeros(batch_size, 1), head_tags], 1) float_mask = mask.float() encoded_text = self._dropout(encoded_text) # shape (batch_size, sequence_length, arc_representation_dim) head_arc_representation = self._dropout( self.head_arc_feedforward(encoded_text)) child_arc_representation = self._dropout( self.child_arc_feedforward(encoded_text)) # shape (batch_size, sequence_length, tag_representation_dim) head_tag_representation = self._dropout( self.head_tag_feedforward(encoded_text)) child_tag_representation = self._dropout( self.child_tag_feedforward(encoded_text)) # shape (batch_size, sequence_length, sequence_length) attended_arcs = self.arc_attention(head_arc_representation, child_arc_representation) minus_inf = -1e8 minus_mask = (1 - float_mask) * minus_inf attended_arcs = attended_arcs + minus_mask.unsqueeze( 2) + minus_mask.unsqueeze(1) if self.training or not self.use_mst_decoding_for_validation: predicted_heads, predicted_head_tags = self._greedy_decode( head_tag_representation, child_tag_representation, attended_arcs, mask) else: predicted_heads, predicted_head_tags = self._mst_decode( head_tag_representation, child_tag_representation, attended_arcs, mask) if head_indices is not None and head_tags is not None: arc_nll, tag_nll = self._construct_loss( head_tag_representation=head_tag_representation, child_tag_representation=child_tag_representation, attended_arcs=attended_arcs, head_indices=head_indices, head_tags=head_tags, mask=mask) loss = arc_nll + tag_nll evaluation_mask = self._get_mask_for_eval(mask[:, 1:], pos_tags) # We calculate attatchment scores for the whole sentence # but excluding the symbolic ROOT token at the start, # which is why we start from the second element in the sequence. self._attachment_scores(predicted_heads[:, 1:], predicted_head_tags[:, 1:], head_indices[:, 1:], head_tags[:, 1:], evaluation_mask) else: arc_nll, tag_nll = self._construct_loss( head_tag_representation=head_tag_representation, child_tag_representation=child_tag_representation, attended_arcs=attended_arcs, head_indices=predicted_heads.long(), head_tags=predicted_head_tags.long(), mask=mask) loss = arc_nll + tag_nll output_dict = { "heads": predicted_heads, "head_tags": predicted_head_tags, "arc_loss": arc_nll, "tag_loss": tag_nll, "loss": loss, "mask": mask, } return output_dict
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset data = load_data(args) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) # Normalize features if args.normalize: train_feats = data.features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(data.features) else: features = data.features features = torch.FloatTensor(features) if not multitask: labels = torch.LongTensor(data.labels) else: labels = torch.FloatTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask).type(torch.bool) val_mask = torch.ByteTensor(data.val_mask).type(torch.bool) test_mask = torch.ByteTensor(data.test_mask).type(torch.bool) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().item() n_val_samples = val_mask.sum().item() n_test_samples = test_mask.sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = data.graph if args.self_loop and not args.dataset.startswith('reddit'): g.remove_edges_from(g.selfloop_edges()) g.add_edges_from(zip(g.nodes(), g.nodes())) print("adding self-loop edges") g = DGLGraph(g, readonly=True) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print(torch.cuda.get_device_name(0)) g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask print('labels shape:', labels.shape) cluster_iterator = ClusterIter(args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) print("features shape, ", features.shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(cluster_iterator): # sync with upper level training graph cluster.copy_from_parent() model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print( f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: val_f1_mic, val_f1_mac = evaluate(model, g, labels, val_mask, multitask) print("Val F1-mic{:.4f}, Val F1-mac{:.4f}".format( val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join(log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict( torch.load(os.path.join(log_dir, 'best_model.pkl'))) test_f1_mic, test_f1_mac = evaluate(model, g, labels, test_mask, multitask) print("Test F1-mic{:.4f}, Test F1-mac{:.4f}".format( test_f1_mic, test_f1_mac)) writer.add_scalar('test/f1-mic', test_f1_mic) writer.add_scalar('test/f1-mac', test_f1_mac)
def forward(self, predictions, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) targets (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ arm_loc_data, arm_conf_data, trm_loc_data1, trm_conf_data1, trm_loc_data2, trm_conf_data2, trm_loc_data3, trm_conf_data3, priors = predictions #print(arm_loc_data.size(), arm_conf_data.size(), # odm_loc_data.size(), odm_conf_data.size(), priors.size()) #input() if self.use_ARM: loc_data1, conf_data1 = trm_loc_data1, trm_conf_data1 loc_data2, conf_data2 = trm_loc_data2, trm_conf_data2 loc_data3, conf_data3 = trm_loc_data3, trm_conf_data3 # assert loc_data1.size == loc_data2.size == loc_data3.size num = loc_data1.size(0) priors = priors[:loc_data1.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes #print(loc_data.size(), conf_data.size(), priors.size()) # init valid_scale_index pos_for_small = torch.ByteTensor(num, num_priors) pos_for_middle = torch.ByteTensor(num, num_priors) pos_for_big = torch.ByteTensor(num, num_priors) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data if num_classes == 2: labels = labels >= 0 defaults = priors.data if self.use_ARM: matches = refine_match_return_matches(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, arm_loc_data[idx].data) else: matches = refine_match_return_matches(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) pos_for_small[ idx], pos_for_middle[idx], pos_for_big[idx] = scaleAssign( matches, conf_t, idx ) # matches: using ARM loc as priors to match with pred loc if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets #loc_t = Variable(loc_t, requires_grad=False) #conf_t = Variable(conf_t, requires_grad=False) loc_t.requires_grad = False conf_t.requires_grad = False #print(loc_t.size(), conf_t.size()) if self.use_ARM: P = F.softmax(arm_conf_data, 2) arm_conf_tmp = P[:, :, 1] object_score_index = arm_conf_tmp <= self.theta pos_for_small[object_score_index.data] = 0 pos_for_middle[object_score_index.data] = 0 pos_for_big[object_score_index.data] = 0 pos = conf_t > 0 pos[object_score_index.data] = 0 if not self.use_multiscale: pos_for_small = pos pos_for_middle = pos pos_for_big = pos pos_for_small = pos_for_small.cuda() pos_for_middle = pos_for_middle.cuda() pos_for_big = pos_for_big.cuda() #print(pos.size()) #num_pos = pos.sum(dim=1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] loss_l_for_small = self.computeSmothL1Loss(pos_for_WHAT=pos_for_small, loc_pred=loc_data1, loc_thruth=loc_t) loss_l_for_middle = self.computeSmothL1Loss( pos_for_WHAT=pos_for_middle, loc_pred=loc_data2, loc_thruth=loc_t) loss_l_for_big = self.computeSmothL1Loss(pos_for_WHAT=pos_for_big, loc_pred=loc_data3, loc_thruth=loc_t) ''' pos_for_middle_idx = pos_for_middle.unsqueeze(pos_for_middle.dim()).expand_as(loc_data2) loc_p2 = loc_data2[pos_for_middle_idx].view(-1, 4) loc_t = loc_t[pos_for_middle_idx].view(-1, 4) loss_l_for_middle = F.smooth_l1_loss(loc_p2, loc_t, reduction='sum') pos_for_big_idx = pos_for_big.unsqueeze(pos_for_big.dim()).expand_as(loc_data3) loc_p3 = loc_data1[pos_for_big_idx].view(-1, 4) loc_t = loc_t[pos_for_big_idx].view(-1, 4) loss_l_for_big = F.smooth_l1_loss(loc_p3, loc_t, reduction='sum') ''' # Compute max conf across batch for hard negative mining batch_conf = conf_data1.view(-1, self.num_classes) loss_c_for_small = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) batch_conf = conf_data2.view(-1, self.num_classes) loss_c_for_middle = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) batch_conf = conf_data3.view(-1, self.num_classes) loss_c_for_big = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) #print(loss_c.size()) loss_conf_for_small, num_pos_for_small = self.computeCrossEntropy( loss_c_for_WHAT=loss_c_for_small, num_batch=num, pos_for_WHAT=pos_for_small, conf_data=conf_data1, conf_truth=conf_t) loss_conf_for_middle, num_pos_for_middle = self.computeCrossEntropy( loss_c_for_WHAT=loss_c_for_middle, num_batch=num, pos_for_WHAT=pos_for_middle, conf_data=conf_data2, conf_truth=conf_t) loss_conf_for_big, num_pos_for_big = self.computeCrossEntropy( loss_c_for_WHAT=loss_c_for_big, num_batch=num, pos_for_WHAT=pos_for_big, conf_data=conf_data3, conf_truth=conf_t) # # Hard Negative Mining # loss_c_for_small[pos_for_small.view(-1,1)] = 0 # filter out pos boxes for now # loss_c_for_small = loss_c_for_small.view(num, -1) # _, loss_idx = loss_c_for_small.sort(1, descending=True) # _, idx_rank = loss_idx.sort(1) # num_pos = pos_for_small.long().sum(1, keepdim=True) # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos_for_small.size(1)-1) # neg = idx_rank < num_neg.expand_as(idx_rank) # neg = neg.long() # #print(num_pos.size(), num_neg.size(), neg.size()) # # # Confidence Loss Including Positive and Negative Examples # pos_idx = pos_for_small.unsqueeze(2).expand_as(conf_data1) # neg_idx = neg.unsqueeze(2).expand_as(conf_data1) # conf_p = conf_data1[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) # targets_weighted = conf_t[(pos_for_small+neg).gt(0)] # #print(pos_idx.size(), neg_idx.size(), conf_p.size(), targets_weighted.size()) # loss_c_for_small = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # num_pos_for_small = num_pos # # # Hard Negative Mining # loss_c_for_middle[pos_for_middle.view(-1,1)] = 0 # filter out pos boxes for now # loss_c_for_middle = loss_c_for_middle.view(num, -1) # _, loss_idx = loss_c_for_middle.sort(1, descending=True) # _, idx_rank = loss_idx.sort(1) # num_pos = pos_for_middle.long().sum(1, keepdim=True) # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos_for_middle.size(1)-1) # neg = idx_rank < num_neg.expand_as(idx_rank) # neg = neg.long() # #print(num_pos.size(), num_neg.size(), neg.size()) # # # Confidence Loss Including Positive and Negative Examples # pos_idx = pos_for_middle.unsqueeze(2).expand_as(conf_data2) # neg_idx = neg.unsqueeze(2).expand_as(conf_data2).long() # conf_p = conf_data2[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) # targets_weighted = conf_t[(pos_for_middle+neg).gt(0)] # #print(pos_idx.size(), neg_idx.size(), conf_p.size(), targets_weighted.size()) # loss_c_for_middle = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # num_pos_for_middle = num_pos # # # Hard Negative Mining # loss_c_for_big[pos_for_big.view(-1,1)] = 0 # filter out pos boxes for now # loss_c_for_big = loss_c_for_big.view(num, -1) # _, loss_idx = loss_c_for_big.sort(1, descending=True) # _, idx_rank = loss_idx.sort(1) # num_pos = pos_for_big.long().sum(1, keepdim=True) # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos_for_big.size(1)-1) # neg = idx_rank < num_neg.expand_as(idx_rank) # neg = neg.long() # #print(num_pos.size(), num_neg.size(), neg.size()) # # # Confidence Loss Including Positive and Negative Examples # pos_idx = pos_for_big.unsqueeze(2).expand_as(conf_data3) # neg_idx = neg.unsqueeze(2).expand_as(conf_data3).long() # conf_p = conf_data3[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) # targets_weighted = conf_t[(pos_for_big+neg).gt(0)] # #print(pos_idx.size(), neg_idx.size(), conf_p.size(), targets_weighted.size()) # loss_c_for_big = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # num_pos_for_big = num_pos # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N num_pos = pos.long().sum(1, keepdim=True) N_for_all = num_pos.data.sum().float() N_for_small = num_pos_for_small.data.sum().float() N_for_middle = num_pos_for_middle.data.sum().float() N_for_big = num_pos_for_big.data.sum().float() N_for_small = max(N_for_small, 1.0) N_for_middle = max(N_for_middle, 1.0) N_for_big = max(N_for_big, 1.0) # print('all:{} small:{} middle:{} big:{}'.format(N_for_all,N_for_small,N_for_middle,N_for_big)) # N = N_for_small+N_for_middle+N_for_big #N = max(num_pos.data.sum().float(), 1) # loss_l_for_small /= N_for_small # loss_l_for_middle /= N_for_middle # loss_l_for_big /= N_for_big # loss_l = loss_l_for_small + loss_l_for_middle + loss_l_for_big # loss_conf_for_small /= N_for_small # loss_conf_for_middle /= N_for_middle # loss_conf_for_big /= N_for_big # loss_c = loss_conf_for_small + loss_conf_for_middle + loss_conf_for_big #print(N, loss_l, loss_c) return loss_l_for_small/N_for_small, \ loss_l_for_middle/N_for_middle, \ loss_l_for_big/N_for_big, \ loss_conf_for_small/N_for_small, \ loss_conf_for_middle/N_for_middle,\ loss_conf_for_big/N_for_big, N_for_all, N_for_small, N_for_middle, N_for_big # # predictions # arm_loc = torch.rand((4,100,4)) # arm_conf = torch.rand((4,100,2)) # odm_loc1 = torch.rand((4,100,4)) # odm_loc2 = torch.rand((4,100,4)) # odm_loc3 = torch.rand((4,100,4)) # odm_conf1 = torch.rand((4,100,21)) # odm_conf2 = torch.rand((4,100,21)) # odm_conf3 = torch.rand((4,100,21)) # anchor = torch.rand((100,4)) # # ground truths # gt1 = torch.Tensor([[0.56,0.42,0.8,0.5,14.]]) # gt2 = torch.Tensor([[0.23,0.24,0.56,0.34,7.]]) # gt3 = torch.Tensor([[0.4527,0.0516,0.4938,0.1463,15.], # [0.3247,0.0516,0.7708,0.5237,14.0]]) # gt4 = torch.Tensor([[0.4863,0.3579,0.7280,0.8428,11.0]]) # # put them together # truths = [gt1,gt2,gt3,gt4] # preds = (arm_loc, arm_conf, odm_loc1, odm_conf1, odm_loc2, odm_conf2, odm_loc3, odm_conf3, anchor) # # init a lossfunction # lossfunction = multitridentMultiBoxLoss(21, 0.0, True, 0, True, 3, 0.5, # False, False, use_ARM=True) # loss = lossfunction.forward(preds, truths) # print(loss)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) dataset_val = DataLoader(opt, split=opt.val_split) dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) input_imgs = torch.FloatTensor(1) input_seqs = torch.LongTensor(1) input_ppls = torch.FloatTensor(1) gt_bboxs = torch.FloatTensor(1) mask_bboxs = torch.ByteTensor(1) gt_seqs = torch.LongTensor(1) input_num = torch.LongTensor(1) if opt.cuda: input_imgs = input_imgs.cuda() input_seqs = input_seqs.cuda() gt_seqs = gt_seqs.cuda() input_num = input_num.cuda() input_ppls = input_ppls.cuda() gt_bboxs = gt_bboxs.cuda() mask_bboxs = mask_bboxs.cuda() input_imgs = Variable(input_imgs) input_seqs = Variable(input_seqs) gt_seqs = Variable(gt_seqs)
def cv2_tensor(pic): img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) img = img.view(pic.shape[0], pic.shape[1], 3) img = img.transpose(0, 2).transpose(1, 2).contiguous() return img.float().div(255)
def forward(self, enc_in, enc_out, enc_mask, max_length=None, inputs=None, use_teacher_forcing=True): if max_length is None: max_length = self.dec_max_len batch_size = enc_out.size(0) enc_in2 = torch.unsqueeze(enc_in, dim=1) enc_mask = torch.ByteTensor(enc_mask).cuda() decoder_output = torch.empty(batch_size, max_length, self.output_size).cuda() sequence_symbols = torch.empty(batch_size, max_length, dtype=torch.int32).cuda() dec_hidden1, dec_hidden2, elmo_hidden1, elmo_hidden2 = None, None, None, None dec_symbol = START * torch.ones(enc_out.size(0), 1, dtype=torch.long).cuda() dec_att_out = torch.zeros(batch_size, 1, self.attn_size).cuda() select_read = torch.zeros(batch_size, 1, self.enc_hidden_size).cuda() for i in range(max_length): '''第一步,将上次的attenton输出和这次的input拼接起来''' in_embed, elmo_hidden1, elmo_hidden2 = self.dec_elmo_embed( dec_symbol, elmo_hidden1, elmo_hidden2) dec_in=self.dropout(self.inputlayer(torch.cat((in_embed,dec_att_out,select_read),dim=2))\ +self.dec_pos_embed[:,i:i+1,:]) '''2ceng 经过rnn后得到输出''' dec_out, dec_hidden1 = self.rnn(dec_in, dec_hidden1) dec_att_out = self.attention(dec_out, enc_out, enc_mask) dec_out, dec_hidden2 = self.rnn2(dec_att_out, dec_hidden2) dec_att_out = self.attention2(dec_out, enc_out, enc_mask) + dec_att_out '''copyscore''' score_c = torch.bmm( dec_att_out, torch.transpose(torch.tanh(self.W_copy(enc_out)), 1, 2)) score_c.data.masked_fill_(enc_mask, -float('inf')) score_c = F.softmax(score_c, dim=-1) score_e = score_c * self.scale * self.scale '''经过vocab层映射得到下一个输出''' dec_to_vocab = self.outlayer(dec_att_out) dec_to_vocab.scatter_add_(dim=-1, index=enc_in2, src=score_e) decoder_output[:, i:i + 1, :] = dec_to_vocab if use_teacher_forcing: dec_symbol = inputs[:, i:i + 1] sequence_symbols[:, i:i + 1] = torch.argmax(dec_to_vocab, dim=2) else: dec_symbol = torch.argmax(dec_to_vocab, dim=2) sequence_symbols[:, i:i + 1] = dec_symbol score_f = score_c * ( (enc_in == dec_symbol).float().unsqueeze(dim=1)) select_read = torch.bmm(score_f, enc_out) return decoder_output, sequence_symbols
def beam_search(self, enc_in, enc_out, enc_mask, beam_width=5): '''每次只接受一条输入,反正是解码,可以慢一点''' max_length = self.dec_max_len batch_size = enc_out.size(0) assert (batch_size == 1) '''一共会有beam_width个输出''' dec_hidden1, dec_hidden2, elmo_hidden1, elmo_hidden2 = None, None, None, None dec_symbol = START * torch.ones(beam_width, 1, dtype=torch.long).cuda() dec_att_out = torch.zeros(beam_width, 1, self.attn_size).cuda() select_read = torch.zeros(beam_width, 1, self.enc_hidden_size).cuda() '''将enc的输入复制beam_width份''' enc_out = enc_out.repeat(beam_width, 1, 1) enc_mask = torch.ByteTensor(np.tile(enc_mask, [beam_width, 1, 1])).cuda() enc_in = enc_in.repeat(beam_width, 1) enc_in2 = torch.unsqueeze(enc_in, dim=1) beam_proba = torch.zeros(beam_width, 1).cuda() sequence_symbols = [] length = 0 for i in range(max_length): in_embed, elmo_hidden1, elmo_hidden2 = self.dec_elmo_embed( dec_symbol, elmo_hidden1, elmo_hidden2) dec_in=self.dropout(self.inputlayer(torch.cat((in_embed,dec_att_out,select_read),dim=2))\ +self.dec_pos_embed[:,i:i+1,:]) '''2ceng 经过rnn后得到输出''' dec_out, dec_hidden1 = self.rnn(dec_in, dec_hidden1) dec_att_out = self.attention(dec_out, enc_out, enc_mask) dec_out, dec_hidden2 = self.rnn2(dec_att_out, dec_hidden2) dec_att_out = self.attention2(dec_out, enc_out, enc_mask) + dec_att_out '''copyscore''' score_c = torch.bmm( dec_att_out, torch.transpose(torch.tanh(self.W_copy(enc_out)), 1, 2)) score_c.data.masked_fill_(enc_mask, -float('inf')) score_c = F.softmax(score_c, dim=-1) score_e = score_c * self.scale * self.scale '''经过vocab层映射得到下一个输出''' dec_to_vocab = self.outlayer(dec_att_out) dec_to_vocab.scatter_add_(dim=-1, index=enc_in2, src=score_e) '''找到最大的proba''' proba = F.log_softmax(dec_to_vocab, dim=2).squeeze() + beam_proba if i == 0: select = torch.topk(proba[0], beam_width)[1] dec_symbol = select.reshape(beam_width, 1) beam_proba = proba[0, select].reshape(beam_width, 1) sequence_symbols.append(dec_symbol) choose = select // self.output_size else: if i <= 26: maxproba = torch.max(proba, dim=0) proba2 = maxproba[0] index = maxproba[1] select = torch.topk(proba2, beam_width)[1] choose = index[select] beam_proba = proba2[select].reshape(beam_width, 1) dec_symbol = select.reshape(beam_width, 1) else: proba = proba.reshape(-1) select = torch.topk(proba, beam_width)[1] choose = select // self.output_size beam_proba = proba[select].reshape(beam_width, 1) select = select % self.output_size #第几个token dec_symbol = select.reshape(beam_width, 1) '''注意!!!这里symbol要重新安排!!!''' ls = torch.cat((sequence_symbols[-1][choose, :], dec_symbol), dim=1) sequence_symbols.append(ls) if dec_symbol[0, 0] == END: break '''TODO 这一步需要认真思考!!!应该要修改,因为score_f跟上一个时刻的序列有很大的关系!!!!!''' score_f = score_c[choose, :] * ( (enc_in == dec_symbol).float().unsqueeze(dim=1)) select_read = torch.bmm(score_f, enc_out) length = i + 1 elmo_hidden1 = (elmo_hidden1[0][:, choose, :], elmo_hidden1[1][:, choose, :]) elmo_hidden2 = (elmo_hidden2[0][:, choose, :], elmo_hidden2[1][:, choose, :]) dec_hidden1 = (dec_hidden1[0][:, choose, :], dec_hidden1[1][:, choose, :]) dec_hidden2 = (dec_hidden2[0][:, choose, :], dec_hidden2[1][:, choose, :]) dec_att_out = dec_att_out[choose, :, :] return sequence_symbols[-1], beam_proba[0] / length
def reset(self): self.first_action = True self.state = torch.ByteTensor(1, 84, 84).to(device)
def _demo_mm_inputs(input_shape=(1, 3, 300, 300), num_items=None, num_classes=10, with_semantic=False): # yapf: disable """Create a superset of inputs needed to run test or train batches. Args: input_shape (tuple): input batch dimensions num_items (None | List[int]): specifies the number of boxes in each batch item num_classes (int): number of different labels a box might have """ from mmdet.core import BitmapMasks (N, C, H, W) = input_shape rng = np.random.RandomState(0) imgs = rng.rand(*input_shape) img_metas = [{ 'img_shape': (H, W, C), 'ori_shape': (H, W, C), 'pad_shape': (H, W, C), 'filename': '<demo>.png', 'scale_factor': np.array([1.1, 1.2, 1.1, 1.2]), 'flip': False, 'flip_direction': None, } for _ in range(N)] gt_bboxes = [] gt_labels = [] gt_masks = [] for batch_idx in range(N): if num_items is None: num_boxes = rng.randint(1, 10) else: num_boxes = num_items[batch_idx] cx, cy, bw, bh = rng.rand(num_boxes, 4).T tl_x = ((cx * W) - (W * bw / 2)).clip(0, W) tl_y = ((cy * H) - (H * bh / 2)).clip(0, H) br_x = ((cx * W) + (W * bw / 2)).clip(0, W) br_y = ((cy * H) + (H * bh / 2)).clip(0, H) boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T class_idxs = rng.randint(1, num_classes, size=num_boxes) gt_bboxes.append(torch.FloatTensor(boxes)) gt_labels.append(torch.LongTensor(class_idxs)) mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8) gt_masks.append(BitmapMasks(mask, H, W)) mm_inputs = { 'imgs': torch.FloatTensor(imgs).requires_grad_(True), 'img_metas': img_metas, 'gt_bboxes': gt_bboxes, 'gt_labels': gt_labels, 'gt_bboxes_ignore': None, 'gt_masks': gt_masks, } if with_semantic: # assume gt_semantic_seg using scale 1/8 of the img gt_semantic_seg = np.random.randint(0, num_classes, (1, 1, H // 8, W // 8), dtype=np.uint8) mm_inputs.update( {'gt_semantic_seg': torch.ByteTensor(gt_semantic_seg)}) return mm_inputs
def all_gather_list(data, group=None, max_size=16384): """Gathers arbitrary data from all nodes into a list. Similar to :func:`~torch.distributed.all_gather` but for arbitrary Python data. Note that *data* must be picklable and any CUDA tensors will be moved to CPU and returned on CPU as well. Args: data (Any): data from the local worker to be gathered on other workers group: group of the collective max_size (int, optional): maximum size of the data to be gathered across workers """ from fairseq import utils if group is None: group = get_global_group() rank = get_rank(group=group) world_size = get_world_size(group=group) buffer_size = max_size * world_size if (not hasattr(all_gather_list, "_buffer") or all_gather_list._buffer.numel() < buffer_size): all_gather_list._buffer = torch.cuda.ByteTensor(buffer_size) all_gather_list._cpu_buffer = torch.ByteTensor(max_size).pin_memory() buffer = all_gather_list._buffer buffer.zero_() cpu_buffer = all_gather_list._cpu_buffer data = utils.move_to_cpu(data) enc = pickle.dumps(data) enc_size = len(enc) header_size = 4 # size of header that contains the length of the encoded data size = header_size + enc_size if size > max_size: raise ValueError("encoded data size ({}) exceeds max_size ({})".format( size, max_size)) header = struct.pack(">I", enc_size) cpu_buffer[:size] = torch.ByteTensor(list(header + enc)) start = rank * max_size buffer[start:start + size].copy_(cpu_buffer[:size]) all_reduce(buffer, group=group) buffer = buffer.cpu() try: result = [] for i in range(world_size): out_buffer = buffer[i * max_size:(i + 1) * max_size] (enc_size, ) = struct.unpack( ">I", bytes(out_buffer[:header_size].tolist())) if enc_size > 0: result.append( pickle.loads( bytes(out_buffer[header_size:header_size + enc_size].tolist()))) return result except pickle.UnpicklingError: raise Exception( "Unable to unpickle data from other workers. all_gather_list requires all " "workers to enter the function together, so this error usually indicates " "that the workers have fallen out of sync somehow. Workers can fall out of " "sync if one of them runs out of memory, or if there are other conditions " "in your training script that can cause one worker to finish an epoch " "while other workers are still iterating over their portions of the data. " "Try rerunning with --ddp-backend=legacy_ddp and see if that helps." )
def get_f1(model: BiRecurrentConvCRF4NestedNER, mode: str, file_path: str = None) -> float: with torch.no_grad(): model.eval() pred_all, pred, recall_all, recall = 0, 0, 0, 0 gold_cross_num = 0 pred_cross_num = 0 if mode == 'dev': batch_zip = zip(dev_input_ids_batches, dev_input_mask_batches, dev_first_subtokens_batches, dev_last_subtokens_batches, dev_label_batches, dev_mask_batches) elif mode == 'test': batch_zip = zip(test_input_ids_batches, test_input_mask_batches, test_first_subtokens_batches, test_last_subtokens_batches, test_label_batches, test_mask_batches) else: raise ValueError f = None if file_path is not None: f = open(file_path, 'w') for input_ids_batch, input_mask_batch, first_subtokens_batch, last_subtokens_batch, label_batch, mask_batch \ in batch_zip: input_ids_batch_var = torch.LongTensor(np.array(input_ids_batch)) input_mask_batch_var = torch.LongTensor(np.array(input_mask_batch)) mask_batch_var = torch.ByteTensor( np.array(mask_batch, dtype=np.uint8)) if config.if_gpu: input_ids_batch_var = input_ids_batch_var.cuda() input_mask_batch_var = input_mask_batch_var.cuda() mask_batch_var = mask_batch_var.cuda() pred_sequence_entities = model.predict(input_ids_batch_var, input_mask_batch_var, first_subtokens_batch, last_subtokens_batch, mask_batch_var) pred_entities = unpack_prediction(model, pred_sequence_entities) p_a, p, r_a, r = evaluate(label_batch, pred_entities) gold_cross_num += 0 pred_cross_num += 0 pred_all += p_a pred += p recall_all += r_a recall += r if file_path is not None: for input_ids, input_mask, first_subtokens, last_subtokens, mask, label, preds \ in zip(input_ids_batch, input_mask_batch, first_subtokens_batch, last_subtokens_batch, mask_batch, label_batch, pred_entities): words = [] for t, m in zip(input_ids, input_mask): if m == 0: break words.append(voc_dict.get_instance(t)) f.write(' '.join(words) + '\n') labels = [] for l in sorted(label, key=lambda x: (x[0], x[1], x[2])): s = first_subtokens[l[0]] e = last_subtokens[l[1] - 1] labels.append("{},{} {}".format( s, e, label_dict.get_instance(l[2]))) f.write('|'.join(labels) + '\n') labels = [] for p in sorted(preds, key=lambda x: (x[0], x[1], x[2])): s = first_subtokens[p[0]] e = last_subtokens[p[1] - 1] labels.append("{},{} {}".format( s, e, label_dict.get_instance(p[2]))) f.write('|'.join(labels) + '\n') f.write('\n') if file_path is not None: f.close() pred = pred / pred_all if pred_all > 0 else 1. recall = recall / recall_all if recall_all > 0 else 1. f1 = 2 / ((1. / pred) + (1. / recall)) if pred > 0. and recall > 0. else 0. logger.info( "{} precision: {:.2f}%, recall: {:.2f}%, F1: {:.2f}%".format( mode, pred * 100., recall * 100., f1 * 100.)) # logger.info("Prediction Crossing: ", pred_cross_num) # logger.info("Gold Crossing: ", gold_cross_num) return f1
def __getitem__(self, index): A_paths = self.A_paths[index] match = re.search('(/\d+)?/serie(\d+)', A_paths[0]) folder_id = self.folder_id_lookup[match.group(1)[1:] if match. group(1) else ''] dir_tag = '_' + match.group(1)[1:] + '_' if match.group(1) else '_' series_number = int(match.group(2)) # Check that all files are of the same series number, as glob doesn't always # return the files in the correct order s_no = series_number - 100000 assert all([get_series_number(path) == s_no for path in A_paths[1:]]), A_paths series = 'serie' + dir_tag + str(series_number) data = OrderedDict([(get_file_tag(path), read_geo_file(path)) for path in A_paths]) rows = len(np.unique(data['Vx']['y'])) cols = len(np.unique(data['Vx']['x'])) # It is possible to do an interpolation here, but it's really slow # and ends up looking about the same for key in data.keys(): data[key]['values'] = data[key]['values'].reshape((rows, cols), order='C') data[key]['values'] = resize( data[key]['values'], (self.opt.fineSize, self.opt.fineSize * 2), mode='constant') rows = 256 cols = 512 # Create discrete image before we normalise A = create_one_hot(data['DIV']['values'], self.opt.div_threshold) # We're done with x/y data now, so discard A_data = [data[key]['values'] for key in data.keys() if key != 'cont'] # Normalise A_DIV, A_Vx, A_Vy = A_data A_DIV = np.interp(A_DIV, [np.min(A_DIV.ravel()), np.max(A_DIV.ravel())], [-1, 1]) A_Vx = np.interp( A_Vx, [np.min(A_Vx.ravel()), np.max(A_Vx.ravel())], [-1, 1]) A_Vy = np.interp( A_Vy, [np.min(A_Vy.ravel()), np.max(A_Vy.ravel())], [-1, 1]) w_offset, h_offset, layer = self.get_inpaint_region( index, A, rows, cols) mask_x1 = w_offset mask_x2 = w_offset + 100 mask_y1 = h_offset mask_y2 = h_offset + 100 mask = np.zeros((rows, cols), dtype=np.uint8) mask[mask_y1:mask_y2, mask_x1:mask_x2] = 1 # B_DIV = A_DIV.copy() # B_DIV[mask_y1:mask_y2, mask_x1:mask_x2] = 0 # B_Vx = A_Vx.copy() # B_Vx[mask_y1:mask_y2, mask_x1:mask_x2] = 0 # B_Vy = A_Vy.copy() # B_Vy[mask_y1:mask_y2, mask_x1:mask_x2] = 0 B_data = [ mask_out_inpaint_region(im, mask) for im in [A_DIV, A_Vx, A_Vy] ] B = A.copy() if self.opt.inpaint_single_class: B[:, :, 1][np.where(np.logical_and(mask, B[:, :, layer]))] = 1 B[mask_y1:mask_y2, mask_x1:mask_x2, layer] = 0 else: B[np.where(mask)] = [0, 1, 0] mask = np.expand_dims(mask, 2) mask = torch.ByteTensor(mask.transpose(2, 0, 1)).clone() # A_DIV = np.interp(A_DIV, [np.min(A_DIV), np.max(A_DIV)], [-1, 1]) # A_Vx = np.interp(A_Vx, [np.min(A_Vx), np.max(A_Vx)], [-1, 1]) # A_Vy = np.interp(A_Vy, [np.min(A_Vy), np.max(A_Vy)], [-1, 1]) # B_DIV = np.interp(B_DIV, [np.min(B_DIV), np.max(B_DIV)], [-1, 1]) # B_Vx = np.interp(B_Vx, [np.min(B_Vx), np.max(B_Vx)], [-1, 1]) # B_Vy = np.interp(B_Vy, [np.min(B_Vy), np.max(B_Vy)], [-1, 1]) def process_image(A, B, discrete=False): if not discrete: A = np.expand_dims(A, 0) B = np.expand_dims(B, 0) A = torch.FloatTensor(A) B = torch.FloatTensor(B) # A = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(A) # B = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(B) else: A = torch.FloatTensor(A.transpose(2, 0, 1)) B = torch.FloatTensor(B.transpose(2, 0, 1)) return A, B B_DIV, B_Vx, B_Vy = B_data if self.opt.continent_data: if 'cont' in data.keys(): continents = data['cont']['values'] else: continents = np.zeros((rows, cols)) A, B = process_image(A, B, discrete=True) A_DIV, B_DIV = process_image(A_DIV, B_DIV) A_Vx, B_Vx = process_image(A_Vx, B_Vx) A_Vy, B_Vy = process_image(A_Vy, B_Vy) if self.opt.continent_data: continents = (continents > 0).astype(np.uint8) continents = np.expand_dims(continents, 2) continents = continents.transpose(2, 0, 1) continents = torch.ByteTensor(continents).clone() if (not self.opt.no_flip) and random.random() < 0.5: idx = [i for i in range(A.size(2) - 1, -1, -1)] idx = torch.LongTensor(idx) A = A.index_select(2, idx) B = B.index_select(2, idx) A_DIV = A_DIV.index_select(2, idx) B_DIV = B_DIV.index_select(2, idx) A_Vx = A_Vx.index_select(2, idx) B_Vx = B_Vx.index_select(2, idx) A_Vy = A_Vy.index_select(2, idx) B_Vy = B_Vy.index_select(2, idx) if self.opt.continent_data: continents = continents.index_select(2, idx) mask = mask.index_select(2, idx) tmp = mask_x1 mask_x1 = mask.shape[2] - mask_x2 mask_x2 = mask.shape[2] - tmp mask_x1 = torch.LongTensor([mask_x1]).expand(1, -1) mask_x2 = torch.LongTensor([mask_x2]).expand(1, -1) mask_y1 = torch.LongTensor([mask_y1]).expand(1, -1) mask_y2 = torch.LongTensor([mask_y2]).expand(1, -1) data = { 'A': A, 'B': B, 'A_DIV': A_DIV, 'B_DIV': B_DIV, 'A_Vx': A_Vx, 'B_Vx': B_Vx, 'A_Vy': A_Vy, 'B_Vy': B_Vy, 'mask': mask, 'mask_x1': mask_x1, 'mask_x2': mask_x2, 'mask_y1': mask_y1, 'mask_y2': mask_y2, 'A_paths': os.path.join(self.dir_A, series), 'B_paths': os.path.join(self.dir_A, series + '_inpainted'), 'series_number': int(dir_tag[1:-1] + str(series_number)), 'folder_id': folder_id } if self.opt.continent_data: data['cont'] = continents return data
def create_data_loader(loader, batch_size=5000): array, lengths = np.array(loader["data"]), np.array(loader["length"]) data = TensorDataset( torch.from_numpy(array).type(torch.LongTensor), torch.ByteTensor(lengths) ) return DataLoader(data, batch_size=batch_size, drop_last=False)
def train(config): hidden_size = config["hidden_size"] save_dir = config["save_dir"] learning_rate = config["learning_rate"] batch_size = config["batch_size"] epoch_size = config["epoch_size"] dataset = DataUtil(config) input_vocab, target_vocab, intent_vocab = dataset.get_vocab() dataloader = DataLoader(dataset, batch_size, shuffle=True) if not os.path.exists(save_dir): os.makedirs(save_dir) encoder = Encoder(len(input_vocab), config) decoder = Decoder(len(target_vocab), len(intent_vocab), hidden_size * 2) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() encoder.init_weights() decoder.init_weights() loss_function_1 = nn.CrossEntropyLoss(ignore_index=0) loss_function_2 = nn.CrossEntropyLoss() enc_optim = optim.Adam(encoder.parameters(), lr=learning_rate) dec_optim = optim.Adam(decoder.parameters(), lr=learning_rate) for epoch in range(1, epoch_size+1): losses = [] for i, batch in enumerate(dataloader): input_batch, target_batch, intent_batch = batch input_batch = input_batch.long() target_batch = target_batch.long() if USE_CUDA: input_batch = input_batch.cuda() target_batch = target_batch.cuda() input_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))) for t in input_batch]).view(batch_size, -1) encoder.zero_grad() decoder.zero_grad() output, hidden_c = encoder(input_batch, input_mask) start_decode = Variable(torch.LongTensor([[input_vocab.index('PAD')] * batch_size])).transpose(1, 0) if USE_CUDA: start_decode = start_decode.cuda() tag_score, intent_score = decoder(start_decode, hidden_c, output, input_mask) loss_1 = loss_function_1(tag_score, target_batch.view(-1)) loss_2 = loss_function_2(intent_score, intent_batch.cuda()) loss = loss_1 + loss_2 losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()) loss.backward() torch.nn.utils.clip_grad_norm_(encoder.parameters(), 5.0) torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5.0) enc_optim.step() dec_optim.step() if i % 10 == 0: print(f"Epoch {epoch}: {np.mean(losses)}") losses = [] if epoch % 100 == 0: torch.save(encoder, os.path.join(save_dir, f'encoder-{epoch}.pt')) torch.save(decoder, os.path.join(save_dir, f'decoder-{epoch}.pt')) print(f"Epoch: {epoch} save model...") print("Training Complete!")
def test_model(test_dataset, test_num_each): num_test = len(test_dataset) test_count = 0 for i in range(len(test_num_each)): test_count += test_num_each[i] test_useful_start_idx = get_useful_start_idx(sequence_length, test_num_each) num_test_we_use = len(test_useful_start_idx) # 其实需要除以gpu个数再乘以gpu个数,但是为了保证所有都测试到,尽量保证test个数完整 # num_test_we_use = 804 test_we_use_start_idx = test_useful_start_idx[0:num_test_we_use] test_idx = [] for i in range(num_test_we_use): for j in range(sequence_length): test_idx.append(test_we_use_start_idx[i] + j) num_test_all = len(test_idx) print('num test start idx : {:6d}'.format(len(test_useful_start_idx))) print('last idx test start: {:6d}'.format(test_useful_start_idx[-1])) print('num of test dataset: {:6d}'.format(num_test)) print('num of test we use : {:6d}'.format(num_test_we_use)) print('num of all test use: {:6d}'.format(num_test_all)) test_loader = DataLoader( test_dataset, batch_size=test_batch_size, sampler=test_idx, num_workers=1, pin_memory=False ) model = multi_lstm_p2t() model = DataParallel(model) model.load_state_dict(torch.load(model_name)) # model = model.module # model = DataParallel(model) if use_gpu: model = model.cuda() # model = DataParallel(model) # model = model.module criterion_1 = nn.BCEWithLogitsLoss(size_average=False) criterion_2 = nn.CrossEntropyLoss(size_average=False) sig_f = nn.Sigmoid() model.eval() test_loss_1 = 0.0 test_loss_2 = 0.0 test_corrects_2 = 0 test_start_time = time.time() all_preds_1 = [] all_labels_1 = [] all_preds_2 = [] for data in test_loader: inputs, labels_1, labels_2 = data # labels_1 = labels_1[(sequence_length - 1)::sequence_length] labels_2 = labels_2[(sequence_length - 1)::sequence_length] if use_gpu: inputs = Variable(inputs.cuda(), volatile=True) labels_1 = Variable(labels_1.cuda(), volatile=True) labels_2 = Variable(labels_2.cuda(), volatile=True) else: inputs = Variable(inputs, volatile=True) labels_1 = Variable(labels_1, volatile=True) labels_2 = Variable(labels_2, voatile=True) if crop_type == 0 or crop_type == 1: outputs_1, outputs_2, _ = model.forward(inputs) elif crop_type == 5: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs_1, outputs_2, _ = model.forward(inputs) outputs_1 = outputs_1.view(5, -1, 7) outputs_1 = torch.mean(outputs_1, 0) outputs_2 = outputs_2.view(5, -1, 7) outputs_2 = torch.mean(outputs_2, 0) elif crop_type == 10: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs_1, outputs_2, _ = model.forward(inputs) outputs_1 = outputs_1.view(10, -1, 7) outputs_1 = torch.mean(outputs_1, 0) outputs_2 = outputs_2.view(10, -1, 7) outputs_2 = torch.mean(outputs_2, 0) # outputs_1 = outputs_1[sequence_length-1::sequence_length] outputs_2 = outputs_2[sequence_length - 1::sequence_length] _, preds_2 = torch.max(outputs_2.data, 1) for i in range(len(outputs_1)): all_preds_1.append(outputs_1[i].data.cpu().numpy().tolist()) all_labels_1.append(labels_1[i].data.cpu().numpy().tolist()) for i in range(len(preds_2)): all_preds_2.append(preds_2[i]) print('preds_1: {:6d} preds_2: {:6d}'.format(len(all_preds_1), len(all_preds_2))) # labels_1 = Variable(labels_1.data.float()) # loss_1 = criterion_1(outputs_1, labels_1) # test_loss_1 += loss_1.data[0] loss_2 = criterion_2(outputs_2, labels_2) test_loss_2 += loss_2.data[0] test_corrects_2 += torch.sum(preds_2 == labels_2.data) all_preds_1_cor = [] all_labels_1_cor = [] cor_count = 0 for i in range(len(test_num_each)): for j in range(cor_count, cor_count + test_num_each[i] - (sequence_length - 1)): if j == cor_count: for k in range(sequence_length - 1): all_preds_1_cor.append(all_preds_1[sequence_length * j + k]) all_labels_1_cor.append(all_labels_1[sequence_length * j + k]) all_preds_1_cor.append(all_preds_1[sequence_length * j + sequence_length - 1]) all_labels_1_cor.append(all_labels_1[sequence_length * j + sequence_length - 1]) cor_count += test_num_each[i] + 1 - sequence_length print('all_preds_1 : {:6d}'.format(len(all_preds_1))) print('all_labels_1: {:6d}'.format(len(all_labels_1))) print('cor_labels_1: {:6d}'.format(len(all_preds_1_cor))) print('cor_labels_1: {:6d}'.format(len(all_labels_1_cor))) pt_preds_1 = torch.from_numpy(np.asarray(all_preds_1_cor, dtype=np.float32)) pt_labels_1 = torch.from_numpy(np.asarray(all_labels_1_cor, dtype=np.float32)) pt_labels_1 = Variable(pt_labels_1, requires_grad=False) pt_preds_1 = Variable(pt_preds_1, requires_grad=False) loss_1 = criterion_1(pt_preds_1, pt_labels_1) test_loss_1 += loss_1.data[0] pt_labels_1 = pt_labels_1.data pt_preds_1 = pt_preds_1.data sig_out = sig_f(pt_preds_1) preds_cor = torch.ByteTensor(sig_out > 0.5) preds_cor = preds_cor.long() pt_labels_1 = pt_labels_1.long() test_corrects_1 = torch.sum(preds_cor == pt_labels_1) test_elapsed_time = time.time() - test_start_time test_accuracy_1 = test_corrects_1 / num_test / 7 test_accuracy_2 = test_corrects_2 / num_test_we_use test_average_loss_1 = test_loss_1 / num_test / 7 test_average_loss_2 = test_loss_2 / num_test_we_use print('preds_1 num: {:6d} preds_2 num: {:6d}'.format(len(all_preds_1_cor), len(all_preds_2))) save_test1 = int("{:4.0f}".format(test_accuracy_1 * 10000)) save_test2 = int("{:4.0f}".format(test_accuracy_2 * 10000)) pred_1_name = model_pure_name + '_test1_' + str(save_test1) + '_crop_' + str(crop_type) + '.pkl' pred_2_name = model_pure_name + '_test2_' + str(save_test2) + '_crop_' + str(crop_type) + '.pkl' with open(pred_1_name, 'wb') as f: pickle.dump(all_preds_1_cor, f) with open(pred_2_name, 'wb') as f: pickle.dump(all_preds_2, f) print('test completed in:' ' {:2.0f}m{:2.0f}s' ' test loss_1: {:4.4f}' ' test loss_2: {:4.4f}' ' test accu_1: {:.4f}' ' test accu_2: {:.4f}' .format(test_elapsed_time // 60, test_elapsed_time % 60, test_average_loss_1, test_average_loss_2, test_accuracy_1, test_accuracy_2))
def main(args): # load and preprocess dataset data = load_data(args) if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i, i) for i in range(len(data.graph))]) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) test_nid = np.nonzero(data.test_mask)[0].astype(np.int64) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().item() n_val_samples = val_mask.sum().item() n_test_samples = test_mask.sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = DGLGraph(data.graph, readonly=True) norm = 1. / g.in_degrees().float().unsqueeze(1) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() norm = norm.cuda() g.ndata['features'] = features num_neighbors = args.num_neighbors n_layers = args.n_layers g.ndata['norm'] = norm g.update_all( fn.copy_src(src='features', out='m'), fn.sum(msg='m', out='preprocess'), lambda node: {'preprocess': node.data['preprocess'] * node.data['norm']}) for i in range(n_layers): g.ndata['h_{}'.format(i)] = torch.zeros( features.shape[0], args.n_hidden).to(device=features.device) g.ndata['h_{}'.format(n_layers - 1)] = torch.zeros( features.shape[0], 2 * args.n_hidden).to(device=features.device) model = GCNSampling(in_feats, args.n_hidden, n_classes, n_layers, F.relu, args.dropout) loss_fcn = nn.CrossEntropyLoss() infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, n_layers, F.relu) if cuda: model.cuda() infer_model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.n_epochs): for nf in dgl.contrib.sampling.NeighborSampler(g, args.batch_size, num_neighbors, neighbor_type='in', shuffle=True, num_workers=32, num_hops=n_layers, seed_nodes=train_nid): for i in range(n_layers): agg_history_str = 'agg_h_{}'.format(i) g.pull( nf.layer_parent_nid(i + 1).long(), fn.copy_src(src='h_{}'.format(i), out='m'), fn.sum(msg='m', out=agg_history_str), lambda node: { agg_history_str: node.data[agg_history_str] * node.data['norm'] }) node_embed_names = [['preprocess', 'h_0']] for i in range(1, n_layers): node_embed_names.append( ['h_{}'.format(i), 'agg_h_{}'.format(i - 1)]) node_embed_names.append(['agg_h_{}'.format(n_layers - 1)]) nf.copy_from_parent(node_embed_names=node_embed_names) model.train() # forward pred = model(nf) batch_nids = nf.layer_parent_nid(-1).to(device=pred.device).long() batch_labels = labels[batch_nids] loss = loss_fcn(pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)] node_embed_names.append([]) nf.copy_to_parent(node_embed_names=node_embed_names) for infer_param, param in zip(infer_model.parameters(), model.parameters()): infer_param.data.copy_(param.data) num_acc = 0. for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size, g.number_of_nodes(), neighbor_type='in', num_workers=32, num_hops=n_layers, seed_nodes=test_nid): node_embed_names = [['preprocess']] for i in range(n_layers): node_embed_names.append(['norm']) nf.copy_from_parent(node_embed_names=node_embed_names) infer_model.eval() with torch.no_grad(): pred = infer_model(nf) batch_nids = nf.layer_parent_nid(-1).to( device=pred.device).long() batch_labels = labels[batch_nids] num_acc += (pred.argmax( dim=1) == batch_labels).sum().cpu().item() print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
def __call__(self, input): def _just_resize(): img = input['img'] w, h = img.size # perform scaling input['img'] = img.resize((self.ix, self.iy), Image.ANTIALIAS) if np.sum(input['loc']) != 0: loc = input['loc'] loc[0, :] = loc[0, :] * self.ix / w loc[1, :] = loc[1, :] * self.iy / h input['loc'] = loc def _transform(): angle = self.rangle * (2 * torch.rand(1)[0] - 1) grad_angle = angle * math.pi / 180 scale = 1 + self.rscale * (2 * torch.rand(1)[0] - 1) transx = self.rtrans * (2 * torch.rand(1)[0] - 1) transy = self.rtrans * (2 * torch.rand(1)[0] - 1) img = input['img'] w, h = img.size centerX, centerY = w // 2, h // 2 # perform rotation img = img.rotate(angle, Image.BICUBIC) # perform translation img = img.transform(img.size, Image.AFFINE, (1, 0, transx, 0, 1, transy)) # perform scaling img = img.resize((int(math.ceil(scale * h)), int(math.ceil(scale * w))), Image.ANTIALIAS) w, h = img.size x1 = round((w - self.ix) // 2) y1 = round((h - self.iy) // 2) input['img'] = img.crop((x1, y1, x1 + self.ix, y1 + self.iy)) if np.sum(input['loc']) != 0: loc = input['loc'] newloc = np.ones((3, loc.shape[1])) newloc[0:2, :] = loc trans_matrix = np.array([[1,0,-1*transx], [0,1,-1*transy], [0,0,1]]) scale_matrix = np.array([[scale,0,0], [0,scale,0], [0,0,1]]) angle_matrix = np.array([ [math.cos(grad_angle),math.sin(grad_angle),0], [-math.sin(grad_angle),math.cos(grad_angle),0], [0,0,1]]) # perform rotation newloc[0,:] = newloc[0,:] - centerY newloc[1,:] = newloc[1,:] - centerX newloc = np.dot(angle_matrix, newloc) newloc[0,:] = newloc[0,:] + centerY newloc[1,:] = newloc[1,:] + centerX # perform translation newloc = np.dot(trans_matrix, newloc) # perform scaling newloc = np.dot(scale_matrix, newloc) newloc[0,:] = newloc[0,:] - y1 newloc[1,:] = newloc[1,:] - x1 input['loc'] = newloc[0:2,:] for i in range(input['loc'].shape[1]): if not np.isnan(input['loc'][:, i]).any(): if np.any(input['loc'][:, i] < 0) or \ input['loc'][0,i] > self.iy or \ input['loc'][1,i] > self.ix: input['loc'][:, i] = np.nan # TODO: fill the surrounding with normal noise input['occ'][0, i] = 0 # FIXME: create multiple images for the same sample with different occluded blocks for testing purposes # input['im'][:, 10:40, 22:50] = 0 # adding one more at the end for the center landmark # add the center of image as the last landmark h, w = input['img'].size input['loc'] = np.hstack((input['loc'], np.array([[w // 2], [h // 2]]))) input['occ'] = torch.cat((input['occ'], torch.ByteTensor([[1]])), 1) input['mask'] = torch.cat((input['mask'], torch.ByteTensor([[1]])), 1) orig_img = input['img'] orig_loc = input['loc'] orig_occ = input['occ'].clone() orig_mask = input['mask'].clone() _transform() if self.keep_landmarks_visible: # train: making sure all landmarks are still visible, if not perform # another transformation mask = input['mask'] mask2D = torch.cat((mask, mask), dim=0) landmarks = torch.from_numpy(input['loc']) limit = 100 while not (mask == mask * input['occ']).all() or utils.isnan(landmarks[mask2D]).any(): input['img'] = orig_img input['loc'] = orig_loc input['occ'] = orig_occ.clone() input['mask'] = orig_mask.clone() _transform() mask = input['mask'] mask2D = torch.cat((mask, mask), dim=0) landmarks = torch.from_numpy(input['loc']) limit -= 1 if limit == 0: input['img'] = orig_img input['loc'] = orig_loc input['occ'] = orig_occ.clone() input['mask'] = orig_mask.clone() _just_resize() print('using the orignal data because even after 100 transformation, there are still occluded landmarks!!!') break input['tgt'] = self.toHeatmaps(input['loc'], self.image_resolution) return input
def build_targets_max(target, anchor_wh, nA, nC, nGh, nGw): """ returns nT, nCorrect, tx, ty, tw, th, tconf, tcls """ nB = len(target) # number of images in batch txy = torch.zeros(nB, nA, nGh, nGw, 2).cuda() # batch size, anchors, grid size twh = torch.zeros(nB, nA, nGh, nGw, 2).cuda() tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda() tcls = torch.ByteTensor(nB, nA, nGh, nGw, nC).fill_(0).cuda() # nC = number of classes tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda() for b in range(nB): t = target[b] t_id = t[:, 1].clone().long().cuda() t = t[:, [0, 2, 3, 4, 5]] nTb = len(t) # number of targets if nTb == 0: continue #gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG gxy, gwh = t[:, 1:3].clone(), t[:, 3:5].clone() gxy[:, 0] = gxy[:, 0] * nGw gxy[:, 1] = gxy[:, 1] * nGh gwh[:, 0] = gwh[:, 0] * nGw gwh[:, 1] = gwh[:, 1] * nGh gi = torch.clamp(gxy[:, 0], min=0, max=nGw - 1).long() gj = torch.clamp(gxy[:, 1], min=0, max=nGh - 1).long() # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors) #gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t() #gi, gj = gxy.long().t() # iou of targets-anchors (using wh only) box1 = gwh box2 = anchor_wh.unsqueeze(1) inter_area = torch.min(box1, box2).prod(2) iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16) # Select best iou_pred and anchor iou_best, a = iou.max(0) # best anchor [0-2] for each target # Select best unique target-anchor combinations if nTb > 1: _, iou_order = torch.sort(-iou_best) # best to worst # Unique anchor selection u = torch.stack((gi, gj, a), 0)[:, iou_order] # _, first_unique = np.unique(u, axis=1, return_index=True) # first unique indices first_unique = return_torch_unique_index(u, torch.unique( u, dim=1)) # torch alternative i = iou_order[first_unique] # best anchor must share significant commonality (iou) with target i = i[iou_best[i] > 0.60] # TODO: examine arbitrary threshold if len(i) == 0: continue a, gj, gi, t = a[i], gj[i], gi[i], t[i] t_id = t_id[i] if len(t.shape) == 1: t = t.view(1, 5) else: if iou_best < 0.60: continue tc, gxy, gwh = t[:, 0].long(), t[:, 1:3].clone(), t[:, 3:5].clone() gxy[:, 0] = gxy[:, 0] * nGw gxy[:, 1] = gxy[:, 1] * nGh gwh[:, 0] = gwh[:, 0] * nGw gwh[:, 1] = gwh[:, 1] * nGh # XY coordinates txy[b, a, gj, gi] = gxy - gxy.floor() # Width and height twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a]) # yolo method # twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method # One-hot encoding of label tcls[b, a, gj, gi, tc] = 1 tconf[b, a, gj, gi] = 1 tid[b, a, gj, gi] = t_id.unsqueeze(1) tbox = torch.cat([txy, twh], -1) return tconf, tbox, tid
def length2mask(length): mask = torch.ByteTensor(len(length), max(length)).zero_().cuda() for i, l in enumerate(length): mask[i][:l].fill_(1) return Variable(mask)
def makeData(srcFile, ldaFile, tgtFile, srcDicts, ldaDicts, tgtDicts): src, tgt = [], [] eq_mask = [] lda = [] sizes = [] count, ignored = 0, 0 logger.info('Processing %s & %s ...' % (srcFile, tgtFile)) srcF = open(srcFile, encoding='utf-8') ldaF = open(ldaFile, encoding='utf-8') tgtF = open(tgtFile, encoding='utf-8') while True: sline = srcF.readline() ldaLine = ldaF.readline() tline = tgtF.readline() # normal end of file if sline == "" and tline == "" and ldaLine == "": break # source or target does not have same number of lines if sline == "" or tline == "" or ldaLine == "": logger.info( 'WARNING: source and target do not have the same number of sentences' ) break sline = sline.strip() ldaLine = ldaLine.strip() tline = tline.strip() # source and/or target are empty if sline == "" or tline == "" or ldaLine == "": # TODO: Fix this, does this affect dev logger.info('WARNING: ignoring an empty line (' + str(count + 1) + ')') continue srcWords = sline.split(' ') ldaWords = ldaLine.split(' ') tgtWords = tline.split(' ') if len(srcWords) <= seq_length and len(tgtWords) <= seq_length: src += [srcDicts.convertToIdx(srcWords, s2s.Constants.UNK_WORD)] eq_mask += [ torch.ByteTensor([ 1 if ((len(x) == 1 and 'a' <= x <= 'z') or x.startswith('[num')) else 0 for x in srcWords ]) ] tgt += [ tgtDicts.convertToIdx(tgtWords, s2s.Constants.UNK_WORD, s2s.Constants.BOS_WORD, s2s.Constants.EOS_WORD) ] lda += [ldaDicts.convertToIdx(ldaWords, s2s.Constants.UNK_WORD)] sizes += [len(srcWords)] else: ignored += 1 count += 1 if count % report_every == 0: logger.info('... %d sentences prepared' % count) srcF.close() ldaF.close() tgtF.close() if shuffle == 1: logger.info('... shuffling sentences') perm = torch.randperm(len(src)) src = [src[idx] for idx in perm] eq_mask = [eq_mask[idx] for idx in perm] lda = [lda[idx] for idx in perm] tgt = [tgt[idx] for idx in perm] sizes = [sizes[idx] for idx in perm] logger.info('... sorting sentences by size') _, perm = torch.sort(torch.Tensor(sizes)) src = [src[idx] for idx in perm] eq_mask = [eq_mask[idx] for idx in perm] lda = [lda[idx] for idx in perm] tgt = [tgt[idx] for idx in perm] logger.info( 'Prepared %d sentences (%d ignored due to length == 0 or > %d)' % (len(src), ignored, seq_length)) return src, eq_mask, lda, tgt
def apply_model(self, ner_model, features): """ apply_model function for LM-LSTM-CRF args: ner_model: sequence labeling model feature (list): list of words list """ char_features = encode2char_safe(features, self.c_map) if self.caseless: word_features = encode_safe( list(map(lambda t: list(map(lambda x: x.lower(), t)), features)), self.f_map, self.f_map['<unk>']) else: word_features = encode_safe(features, self.f_map, self.f_map['<unk>']) fea_len = [list(map(lambda t: len(t) + 1, f)) for f in char_features] forw_features = concatChar(char_features, self.c_map) word_len = max(map(lambda t: len(t) + 1, word_features)) char_len = max( map(lambda t: len(t[0]) + word_len - len(t[1]), zip(forw_features, word_features))) forw_t = list( map(lambda t: t + [self.pad_char] * (char_len - len(t)), forw_features)) back_t = torch.LongTensor(list(map(lambda t: t[::-1], forw_t))) forw_t = torch.LongTensor(forw_t) forw_p = torch.LongTensor( list( map( lambda t: list( itertools.accumulate(t + [1] * (word_len - len(t)))), fea_len))) back_p = torch.LongTensor( list( map( lambda t: [char_len - 1] + [char_len - 1 - tup for tup in t[:-1]], forw_p))) masks = torch.ByteTensor( list( map( lambda t: [1] * (len(t) + 1) + [0] * (word_len - len(t) - 1), word_features))) word_t = torch.LongTensor( list( map(lambda t: t + [self.pad_word] * (word_len - len(t)), word_features))) if self.if_cuda: f_f = autograd.Variable(forw_t.transpose(0, 1)).cuda() f_p = autograd.Variable(forw_p.transpose(0, 1)).cuda() b_f = autograd.Variable(back_t.transpose(0, 1)).cuda() b_p = autograd.Variable(back_p.transpose(0, 1)).cuda() w_f = autograd.Variable(word_t.transpose(0, 1)).cuda() mask_v = masks.transpose(0, 1).cuda() else: f_f = autograd.Variable(forw_t.transpose(0, 1)) f_p = autograd.Variable(forw_p.transpose(0, 1)) b_f = autograd.Variable(back_t.transpose(0, 1)) b_p = autograd.Variable(back_p.transpose(0, 1)) w_f = autograd.Variable(word_t.transpose(0, 1)) mask_v = masks.transpose(0, 1) scores = ner_model(f_f, f_p, b_f, b_p, w_f) decoded = self.decoder.decode(scores.data, mask_v) return decoded
def get_sequence_info(self, seq_id): bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno']) valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0) visible = torch.ByteTensor( self.sequence_list[seq_id]['target_visible']) & valid.byte() return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
def __iter__(self): # Random permutation for the context idx_perm = range(0, self.context_num) if not self.eval: idx_perm = np.random.permutation(idx_perm) batch_size = self.batch_size for batch_i in range( (self.context_num + self.batch_size - 1) // self.batch_size): batch_idx = idx_perm[self.batch_size * batch_i:self.batch_size * (batch_i + 1)] context_batch = [self.data['context'][i] for i in batch_idx] batch_size = len(context_batch) context_batch = list(zip(*context_batch)) # Process Context Tokens context_len = max(len(x) for x in context_batch[0]) if not self.eval: context_len = min(context_len, self.context_maxlen) context_id = torch.LongTensor(batch_size, context_len).fill_(0) for i, doc in enumerate(context_batch[0]): select_len = min(len(doc), context_len) context_id[i, :select_len] = torch.LongTensor(doc[:select_len]) # Process Context POS Tags context_tag = torch.LongTensor(batch_size, context_len).fill_(0) for i, doc in enumerate(context_batch[1]): select_len = min(len(doc), context_len) context_tag[i, :select_len] = torch.LongTensor( doc[:select_len]) # Process Context Named Entity context_ent = torch.LongTensor(batch_size, context_len).fill_(0) for i, doc in enumerate(context_batch[2]): select_len = min(len(doc), context_len) context_ent[i, :select_len] = torch.LongTensor( doc[:select_len]) if self.precompute_elmo > 0: if batch_i % self.precompute_elmo == 0: precompute_idx = idx_perm[self.batch_size * batch_i:self.batch_size * (batch_i + self.precompute_elmo)] elmo_tokens = [ self.data['context'][i][6] for i in precompute_idx ] context_cid = batch_to_ids(elmo_tokens) else: context_cid = torch.LongTensor(1).fill_(0) else: context_cid = batch_to_ids(context_batch[6]) # Process Questions (number = batch * Qseq) qa_data = self.data['qa'] question_num, question_len = 0, 0 question_batch = [] for first_QID in context_batch[5]: i, question_seq = 0, [] while True: if first_QID + i >= len( qa_data ) or qa_data[first_QID + i][0] != qa_data[first_QID][ 0]: # their corresponding context ID is different break question_seq.append(first_QID + i) question_len = max(question_len, len(qa_data[first_QID + i][1])) i += 1 question_batch.append(question_seq) question_num = max(question_num, i) question_id = torch.LongTensor(batch_size, question_num, question_len).fill_(0) question_tokens = [] for i, q_seq in enumerate(question_batch): for j, id in enumerate(q_seq): doc = qa_data[id][1] question_id[i, j, :len(doc)] = torch.LongTensor(doc) question_tokens.append(qa_data[id][8]) for j in range(len(q_seq), question_num): question_id[i, j, :2] = torch.LongTensor([2, 3]) question_tokens.append(["<S>", "</S>"]) question_cid = batch_to_ids(question_tokens) # Process Context-Question Features feature_len = len(qa_data[0][2][0]) context_feature = torch.Tensor( batch_size, question_num, context_len, feature_len + (self.dialog_ctx * (self.use_dialog_act * 3 + 2))).fill_(0) for i, q_seq in enumerate(question_batch): for j, id in enumerate(q_seq): doc = qa_data[id][2] select_len = min(len(doc), context_len) context_feature[ i, j, :select_len, :feature_len] = torch.Tensor( doc[:select_len]) for prv_ctx in range(0, self.dialog_ctx): if j > prv_ctx: prv_id = id - prv_ctx - 1 prv_ans_st, prv_ans_end, prv_ans_choice = qa_data[ prv_id][3], qa_data[prv_id][4], qa_data[ prv_id][5] # dialog act: don't follow-up, follow-up, maybe follow-up (prv_ans_choice // 10) if self.use_dialog_act: context_feature[i, j, :select_len, feature_len + prv_ctx * (self.use_dialog_act * 3 + 2) + 2 + (prv_ans_choice // 10)] = 1 if prv_ans_choice == 0: # indicating that the previous reply is NO ANSWER context_feature[i, j, :select_len, feature_len + prv_ctx * (self.use_dialog_act * 3 + 2) + 1] = 1 continue # There is an answer for k in range(prv_ans_st, prv_ans_end + 1): if k >= context_len: break context_feature[ i, j, k, feature_len + prv_ctx * (self.use_dialog_act * 3 + 2)] = 1 # Process Answer (w/ raw question, answer text) answer_s = torch.LongTensor(batch_size, question_num).fill_(0) answer_e = torch.LongTensor(batch_size, question_num).fill_(0) answer_c = torch.LongTensor(batch_size, question_num).fill_(0) overall_mask = torch.ByteTensor(batch_size, question_num).fill_(0) question, answer = [], [] for i, q_seq in enumerate(question_batch): question_pack, answer_pack = [], [] for j, id in enumerate(q_seq): answer_s[i, j], answer_e[i, j], answer_c[ i, j] = qa_data[id][3], qa_data[id][4], qa_data[id][5] overall_mask[i, j] = 1 question_pack.append(qa_data[id][6]) answer_pack.append(qa_data[id][7]) question.append(question_pack) answer.append(answer_pack) # Process Masks context_mask = torch.eq(context_id, 0) question_mask = torch.eq(question_id, 0) text = list(context_batch[3]) # raw text span = list(context_batch[4]) # character span for each words if self.use_bert is None: context_bert = None context_bert_mask = None context_bert_offsets = None question_bert = None question_bert_mask = None question_bert_offsets = None else: pass if self.gpu: # page locked memory for async data transfer context_id = context_id.pin_memory() context_feature = context_feature.pin_memory() context_tag = context_tag.pin_memory() context_ent = context_ent.pin_memory() context_mask = context_mask.pin_memory() question_id = question_id.pin_memory() question_mask = question_mask.pin_memory() answer_s = answer_s.pin_memory() answer_e = answer_e.pin_memory() answer_c = answer_c.pin_memory() overall_mask = overall_mask.pin_memory() context_cid = context_cid.pin_memory() question_cid = question_cid.pin_memory() if self.use_bert: context_bert = context_bert.pin_memory() context_bert_mask = context_bert_mask.pin_memory() context_bert_offsets = context_bert_offsets.pin_memory() question_bert = question_bert.pin_memory() question_bert_mask = question_bert_mask.pin_memory() question_bert_offsets = question_bert_offsets.pin_memory() yield (context_id, context_cid, context_feature, context_tag, context_ent, context_mask, question_id, question_cid, question_mask, overall_mask, answer_s, answer_e, answer_c, text, span, question, answer, context_bert, context_bert_mask, context_bert_offsets, question_bert, question_bert_mask, question_bert_offsets)
if config.if_shuffle: shuffle(train_all_batches) batch_counter = 0 start_time = time.time() ner_model.train() num_back = 0 for input_ids_batch, input_mask_batch, first_subtokens_batch, last_subtokens_batch, label_batch, mask_batch \ in train_all_batches: batch_len = max([ len(first_subtokens) for first_subtokens in first_subtokens_batch ]) input_ids_batch_var = torch.LongTensor(np.array(input_ids_batch)) input_mask_batch_var = torch.LongTensor(np.array(input_mask_batch)) mask_batch_var = torch.ByteTensor(np.array(mask_batch, dtype=np.uint8)) if config.if_gpu: input_ids_batch_var = input_ids_batch_var.cuda() input_mask_batch_var = input_mask_batch_var.cuda() mask_batch_var = mask_batch_var.cuda() optimizer.zero_grad() loss = ner_model.forward(input_ids_batch_var, input_mask_batch_var, first_subtokens_batch, last_subtokens_batch, label_batch, mask_batch_var) loss.backward() clip_model_grad(ner_model, config.clip_norm) batch_counter += 1 optimizer.step(None)
def build_targets(target, anchor_wh, nA, nC, nG): """ returns nT, nCorrect, tx, ty, tw, th, tconf, tcls """ nB = len(target) # number of images in batch nT = [len(x) for x in target] txy = torch.zeros(nB, nA, nG, nG, 2) # batch size, anchors, grid size twh = torch.zeros(nB, nA, nG, nG, 2) tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0) tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0) # nC = number of classes for b in range(nB): nTb = nT[b] # number of targets if nTb == 0: continue t = target[b] gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors) gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t() # iou of targets-anchors (using wh only) box1 = gwh box2 = anchor_wh.unsqueeze(1) inter_area = torch.min(box1, box2).prod(2) iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16) # Select best iou_pred and anchor iou_best, a = iou.max(0) # best anchor [0-2] for each target # Select best unique target-anchor combinations if nTb > 1: iou_order = torch.argsort(-iou_best) # best to worst # Unique anchor selection u = torch.cat((gi, gj, a), 0).view((3, -1)) # u = torch.stack((gi, gj, a),0) _, first_unique = np.unique(u[:, iou_order], axis=1, return_index=True) # first unique indices # _, first_unique = torch.unique(u[:, iou_order], dim=1, return_inverse=True) # different than numpy? i = iou_order[first_unique] # best anchor must share significant commonality (iou) with target i = i[iou_best[i] > 0.10] # TODO: arbitrary threshold is problematic if len(i) == 0: continue a, gj, gi, t = a[i], gj[i], gi[i], t[i] if len(t.shape) == 1: t = t.view(1, 5) else: if iou_best < 0.10: continue tc, gxy, gwh = t[:, 0].long(), t[:, 1:3] * nG, t[:, 3:5] * nG # XY coordinates txy[b, a, gj, gi] = gxy - gxy.floor() # Width and height twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a]) # yolo method # twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method # One-hot encoding of label tcls[b, a, gj, gi, tc] = 1 tconf[b, a, gj, gi] = 1 return txy, twh, tconf, tcls
# [torch.FloatTensor of size 4x3] # out: # 0.8403 0.1383 0.5636 # 0.1963 0.2446 0.8257 # [torch.FloatTensor of size 2x3] a1 = a[:, 0] #所有的行,第一列 a2 = a[[0, 1], :] #前两行,所有列,等同于out a3 = a[0:2, 0:2] #前两行,前两列 print(a1, "\n", a2, "\n", a3) x = torch.Tensor([[1, 2, 3], [3, 4, 5]]) # 1 2 3 # 3 4 5 # [torch.FloatTensor of size 2x3] mask = torch.ByteTensor([[0, 0, 1], [0, 1, 0]]) # 0 0 1 # 0 1 0 # [torch.ByteTensor of size 2x3] out = torch.masked_select(x, mask) # 3 # 4 # [torch.FloatTensor of size 2] print(x, "\n", mask, "\n", out) #%% # 2.2 Joining x = torch.FloatTensor([[1, 2, 3], [4, 5, 6]]) #2x3 y = torch.FloatTensor([[-1, -2, -3], [-4, -5, -6]]) #2x3 z1 = torch.cat([x, y], dim=0) #在第一个维度上进行叠加,4x3 # 1 2 3