Esempio n. 1
0
def main():    
    parser = argparse.ArgumentParser(description='GraphSAGE')
    parser.add_argument("--dataset", type=str, default='reddit')
    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--dropout", type=float, default=0.5,
                        help="dropout probability")
    parser.add_argument("--lr", type=float, default=1e-2,
                        help="learning rate")
    parser.add_argument("--epochs", type=int, default=200,
                        help="number of training epochs")
    parser.add_argument("--n-hidden", type=int, default=16,
                        help="number of hidden gcn units")
    parser.add_argument("--aggr", type=str, choices=['sum', 'mean'], default='mean',
                        help='Aggregation for messages')
    parser.add_argument("--weight-decay", type=float, default=5e-4,
                        help="Weight for L2 loss")
    parser.add_argument("--eval", action='store_true',
                        help='If not set, we will only do the training part.')
    parser.add_argument("--runs", type=int, default=10)
    args = parser.parse_args()
    print(args)
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    # Remove duplicate edges
    # In PyG, this is a default pre-processing step for Reddit, see
    # https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/datasets/reddit.py#L58
    g = data.graph
    g = g.int().to(device)

    # create GraphSAGE model
    model = GraphSAGE(g,
                      in_feats,
                      args.n_hidden,
                      n_classes,
                      args.aggr,
                      F.relu,
                      args.dropout).to(device)

    loss_fcn = nn.CrossEntropyLoss()

    logger = Logger(args.runs, args)
    dur = []
    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        for epoch in range(args.epochs):
            model.train()
            if epoch >= 3:
                t0 = time.time()
            # forward
            logits = model(features)
            loss = loss_fcn(logits[train_mask], labels[train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if epoch >= 3:
                dur.append(time.time() - t0)
                print('Training time/epoch {}'.format(np.mean(dur)))

            if not args.eval:
                continue

            train_acc, val_acc, test_acc = evaluate(model, features, labels, train_mask, val_mask, test_mask)
            logger.add_result(run, (train_acc, val_acc, test_acc))

            print("Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}".format(run, epoch, loss.item(), train_acc, val_acc, test_acc))

        if args.eval:
            logger.print_statistics(run)

    if args.eval:
        logger.print_statistics()
Esempio n. 2
0
def construct_bucket_vb_wc(word_features, forw_features, fea_len, input_labels,
                           thresholds, pad_word_feature, pad_char_feature,
                           pad_label, label_size):
    """
    Construct bucket by thresholds for viterbi decode, word-level and char-level
    """
    # construct corpus for language model pre-training
    forw_corpus = [pad_char_feature] + list(
        reduce(lambda x, y: x + [pad_char_feature] + y,
               forw_features)) + [pad_char_feature]
    back_corpus = forw_corpus[::-1]
    # two way construct, first build the bucket, then calculate padding length, then do the padding
    buckets = [[[], [], [], [], [], [], [], []]
               for ind in range(len(thresholds))]
    # forw, forw_ind, back, back_in, label, mask
    buckets_len = [0 for ind in range(len(thresholds))]

    # thresholds is the padded length for fea
    # buckets_len is the padded length for char
    for f_f, f_l in zip(forw_features, fea_len):
        cur_len_1 = len(f_l) + 1
        idx = 0
        while thresholds[idx] < cur_len_1:
            idx += 1
        tmp_concat_len = len(f_f) + thresholds[idx] - len(f_l)
        if buckets_len[idx] < tmp_concat_len:
            buckets_len[idx] = tmp_concat_len

    # calc padding
    for f_f, f_l, w_f, i_l in zip(forw_features, fea_len, word_features,
                                  input_labels):
        cur_len = len(f_l)
        idx = 0
        cur_len_1 = cur_len + 1
        while thresholds[idx] < cur_len_1:
            idx += 1

        padded_feature = f_f + [pad_char_feature
                                ] * (buckets_len[idx] - len(f_f)
                                     )  # pad feature with <'\n'>, at least one

        padded_feature_len = f_l + [1] * (
            thresholds[idx] - len(f_l)
        )  # pad feature length with <'\n'>, at least one
        padded_feature_len_cum = list(
            itertools.accumulate(padded_feature_len)
        )  # start from 0, but the first is ' ', so the position need not to be -1
        buckets[idx][0].append(padded_feature)  # char
        buckets[idx][1].append(padded_feature_len_cum)
        buckets[idx][2].append(padded_feature[::-1])
        buckets[idx][3].append([buckets_len[idx] - 1] + [
            buckets_len[idx] - 1 - tup for tup in padded_feature_len_cum[:-1]
        ])
        buckets[idx][4].append(w_f + [pad_word_feature] *
                               (thresholds[idx] - cur_len))  #word
        buckets[idx][5].append(
            [
                i_l[ind] * label_size + i_l[ind + 1]
                for ind in range(0, cur_len)
            ] + [i_l[cur_len] * label_size + pad_label] +
            [pad_label * label_size + pad_label] *
            (thresholds[idx] - cur_len_1))  # has additional start, label
        buckets[idx][6].append(
            [1] * cur_len_1 + [0] *
            (thresholds[idx] - cur_len_1))  # has additional start, mask
        buckets[idx][7].append(
            [len(f_f) + thresholds[idx] - len(f_l), cur_len_1])
    bucket_dataset = [
        CRFDataset_WC(torch.LongTensor(bucket[0]), torch.LongTensor(bucket[1]),
                      torch.LongTensor(bucket[2]), torch.LongTensor(bucket[3]),
                      torch.LongTensor(bucket[4]), torch.LongTensor(bucket[5]),
                      torch.ByteTensor(bucket[6]), torch.LongTensor(bucket[7]))
        for bucket in buckets
    ]
    return bucket_dataset, forw_corpus, back_corpus
Esempio n. 3
0
File: train.py Progetto: hacors/Drug
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if args.self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()

    # create TAGCN model
    model = TAGCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                  args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
def make_permute(feature, reuse_len, seq_len, perm_size, num_predict):

    inputs = torch.LongTensor(feature.pop("input"))
    target = torch.LongTensor(feature.pop("target"))
    is_masked = torch.ByteTensor(feature.pop("is_masked"))

    non_reuse_len = seq_len - reuse_len
    assert perm_size <= reuse_len and perm_size <= non_reuse_len

    # (reuse, reuse), (reuse,), (reuse,), (reuse,), (reuse,)
    perm_mask_0, target_0, target_mask_0, input_k_0, input_q_0 = _local_perm(
        inputs[:reuse_len],  # inp
        target[:reuse_len],
        is_masked[:reuse_len],
        perm_size,
        reuse_len)

    # (non_reuse, non_reuse), (non_reuse,), (non_reuse,), (non_reuse,), (non_reuse,)
    perm_mask_1, target_1, target_mask_1, input_k_1, input_q_1 = _local_perm(
        inputs[reuse_len:],  # (senA, seq, senBm seq, cls)
        target[reuse_len:],
        is_masked[reuse_len:],
        perm_size,
        non_reuse_len)

    # (reuse, seq) / one last append
    perm_mask_0 = torch.cat(
        [perm_mask_0, torch.ones([reuse_len, non_reuse_len])], dim=1)
    # (non_reuse, seq) / zero first append
    perm_mask_1 = torch.cat(
        [torch.zeros([non_reuse_len, reuse_len]), perm_mask_1], dim=1)

    # (seq, seq)
    perm_mask = torch.cat([perm_mask_0, perm_mask_1], dim=0)
    # (seq)
    target = torch.cat([target_0, target_1], dim=0)
    # (seq)
    target_mask = torch.cat([target_mask_0, target_mask_1], dim=0)
    # (seq)
    input_k = torch.cat([input_k_0, input_k_1], dim=0)
    # (seq)
    input_q = torch.cat([input_q_0, input_q_1], dim=0)

    if num_predict is not None:
        # (0 .. seq-1)
        indices = torch.arange(seq_len, dtype=torch.int64)
        bool_target_mask = target_mask.bool()
        # (predict,)
        indices = indices[bool_target_mask]

        ##### extra padding due to CLS/SEP introduced after prepro
        actual_num_predict = indices.shape[0]
        # zero (num_predict = actual_num_predict)
        pad_len = num_predict - actual_num_predict

        assert seq_len >= actual_num_predict

        ##### target_mapping
        # (predict, seq)
        target_mapping = torch.eye(seq_len, dtype=torch.float32)[indices]
        # (0, seq)
        paddings = torch.zeros([pad_len, seq_len], dtype=target_mapping.dtype)
        # (predict, seq)
        target_mapping = torch.cat([target_mapping, paddings], dim=0)
        feature["target_mapping"] = torch.reshape(target_mapping,
                                                  [num_predict, seq_len])
        ##### target
        # (predict,)
        target = target[bool_target_mask]
        # (0)
        paddings = torch.zeros([pad_len], dtype=target.dtype)
        # (predict,)
        target = torch.cat([target, paddings], dim=0)
        feature["target"] = torch.reshape(target, [num_predict])

        ##### target mask
        # (predict,)
        target_mask = torch.cat([
            torch.ones([actual_num_predict], dtype=torch.float32),
            torch.zeros([pad_len], dtype=torch.float32)
        ],
                                dim=0)
        feature["target_mask"] = torch.reshape(target_mask, [num_predict])
    else:
        feature["target"] = torch.reshape(target, [seq_len])
        feature["target_mask"] = torch.reshape(target_mask, [seq_len])

    # reshape back to fixed shape
    # (seq,)
    feature["seg_id"] = torch.IntTensor(feature["seg_id"])
    # (seq, seq)
    feature["perm_mask"] = torch.reshape(perm_mask, [seq_len, seq_len])
    # (seq,)
    feature["input_k"] = torch.reshape(input_k, [seq_len])
    # (seq,)
    feature["input_q"] = torch.reshape(input_q, [seq_len])

    return feature
Esempio n. 5
0
    def forward(
            self,  # type: ignore
            words,
            words_embeds,  #: Dict[str, torch.LongTensor],
            pos_tags: torch.LongTensor = None,
            head_tags: torch.LongTensor = None,
            head_indices: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        words : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, sequence_length)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        pos_tags : ``torch.LongTensor``, required.
            The output of a ``SequenceLabelField`` containing POS tags.
            POS tags are required regardless of whether they are used in the model,
            because they are used to filter the evaluation metric to only consider
            heads of words which are not punctuation.
        head_tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels for the arcs
            in the dependency parse. Has shape ``(batch_size, sequence_length)``.
        head_indices : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer indices denoting the parent of every
            word in the dependency parse. Has shape ``(batch_size, sequence_length)``.

        Returns
        -------
        An output dictionary consisting of:
        loss : ``torch.FloatTensor``, optional
            A scalar loss to be optimised.
        arc_loss : ``torch.FloatTensor``
            The loss contribution from the unlabeled arcs.
        loss : ``torch.FloatTensor``, optional
            The loss contribution from predicting the dependency
            tags for the gold arcs.
        heads : ``torch.FloatTensor``
            The predicted head indices for each word. A tensor
            of shape (batch_size, sequence_length).
        head_types : ``torch.FloatTensor``
            The predicted head types for each arc. A tensor
            of shape (batch_size, sequence_length).
        mask : ``torch.LongTensor``
            A mask denoting the padded elements in the batch.
        """
        # LISA
        # embedded_text_input = words_embeds.view(1, len(words_embeds), -1) # self.text_field_embedder(words)
        # LISA2
        embedded_text_input = words_embeds  # self.text_field_embedder(words)

        bsz, seqlen, dim = words_embeds.shape
        # mask = get_text_field_mask(words)
        # LISA
        # mask = torch.LongTensor([1 for _ in words]).view(1, -1)
        # LISA2
        # mask = torch.LongTensor(bsz, seqlen).fill_(1)
        mask = torch.ByteTensor(bsz, seqlen).fill_(1).to(self.device)

        embedded_text_input = self._input_dropout(embedded_text_input)

        encoded_text = self.encoder(embedded_text_input, mask)

        batch_size, _, encoding_dim = encoded_text.size()

        head_sentinel = self._head_sentinel.expand(batch_size, 1, encoding_dim)
        # Concatenate the head sentinel onto the sentence representation.
        encoded_text = torch.cat([head_sentinel, encoded_text], 1)
        mask = torch.cat([mask.new_ones(batch_size, 1), mask], 1)

        if head_indices is not None:
            head_indices = torch.cat(
                [head_indices.new_zeros(batch_size, 1), head_indices], 1)
        if head_tags is not None:
            head_tags = torch.cat(
                [head_tags.new_zeros(batch_size, 1), head_tags], 1)

        float_mask = mask.float()
        encoded_text = self._dropout(encoded_text)

        # shape (batch_size, sequence_length, arc_representation_dim)
        head_arc_representation = self._dropout(
            self.head_arc_feedforward(encoded_text))
        child_arc_representation = self._dropout(
            self.child_arc_feedforward(encoded_text))

        # shape (batch_size, sequence_length, tag_representation_dim)
        head_tag_representation = self._dropout(
            self.head_tag_feedforward(encoded_text))
        child_tag_representation = self._dropout(
            self.child_tag_feedforward(encoded_text))
        # shape (batch_size, sequence_length, sequence_length)
        attended_arcs = self.arc_attention(head_arc_representation,
                                           child_arc_representation)

        minus_inf = -1e8
        minus_mask = (1 - float_mask) * minus_inf
        attended_arcs = attended_arcs + minus_mask.unsqueeze(
            2) + minus_mask.unsqueeze(1)

        if self.training or not self.use_mst_decoding_for_validation:
            predicted_heads, predicted_head_tags = self._greedy_decode(
                head_tag_representation, child_tag_representation,
                attended_arcs, mask)
        else:
            predicted_heads, predicted_head_tags = self._mst_decode(
                head_tag_representation, child_tag_representation,
                attended_arcs, mask)
        if head_indices is not None and head_tags is not None:

            arc_nll, tag_nll = self._construct_loss(
                head_tag_representation=head_tag_representation,
                child_tag_representation=child_tag_representation,
                attended_arcs=attended_arcs,
                head_indices=head_indices,
                head_tags=head_tags,
                mask=mask)
            loss = arc_nll + tag_nll

            evaluation_mask = self._get_mask_for_eval(mask[:, 1:], pos_tags)
            # We calculate attatchment scores for the whole sentence
            # but excluding the symbolic ROOT token at the start,
            # which is why we start from the second element in the sequence.
            self._attachment_scores(predicted_heads[:, 1:],
                                    predicted_head_tags[:,
                                                        1:], head_indices[:,
                                                                          1:],
                                    head_tags[:, 1:], evaluation_mask)
        else:
            arc_nll, tag_nll = self._construct_loss(
                head_tag_representation=head_tag_representation,
                child_tag_representation=child_tag_representation,
                attended_arcs=attended_arcs,
                head_indices=predicted_heads.long(),
                head_tags=predicted_head_tags.long(),
                mask=mask)
            loss = arc_nll + tag_nll

        output_dict = {
            "heads": predicted_heads,
            "head_tags": predicted_head_tags,
            "arc_loss": arc_nll,
            "tag_loss": tag_nll,
            "loss": loss,
            "mask": mask,
        }

        return output_dict
Esempio n. 6
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    multitask_data = set(['ppi'])
    multitask = args.dataset in multitask_data

    # load and preprocess dataset
    data = load_data(args)

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)

    # Normalize features
    if args.normalize:
        train_feats = data.features[train_nid]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats)
        features = scaler.transform(data.features)
    else:
        features = data.features

    features = torch.FloatTensor(features)
    if not multitask:
        labels = torch.LongTensor(data.labels)
    else:
        labels = torch.FloatTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask).type(torch.bool)
    val_mask = torch.ByteTensor(data.val_mask).type(torch.bool)
    test_mask = torch.ByteTensor(data.test_mask).type(torch.bool)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().item()
    n_val_samples = val_mask.sum().item()
    n_test_samples = test_mask.sum().item()

    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))
    # create GCN model
    g = data.graph
    if args.self_loop and not args.dataset.startswith('reddit'):
        g.remove_edges_from(g.selfloop_edges())
        g.add_edges_from(zip(g.nodes(), g.nodes()))
        print("adding self-loop edges")
    g = DGLGraph(g, readonly=True)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    print(torch.cuda.get_device_name(0))

    g.ndata['features'] = features
    g.ndata['labels'] = labels
    g.ndata['train_mask'] = train_mask
    print('labels shape:', labels.shape)

    cluster_iterator = ClusterIter(args.dataset,
                                   g,
                                   args.psize,
                                   args.batch_size,
                                   train_nid,
                                   use_pp=args.use_pp)

    print("features shape, ", features.shape)

    model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers,
                      F.relu, args.dropout, args.use_pp)

    if cuda:
        model.cuda()

    # logger and so on
    log_dir = save_log_dir(args)
    writer = SummaryWriter(log_dir)
    logger = Logger(os.path.join(log_dir, 'loggings'))
    logger.write(args)

    # Loss function
    if multitask:
        print('Using multi-label loss')
        loss_f = nn.BCEWithLogitsLoss()
    else:
        print('Using multi-class loss')
        loss_f = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()
    print("current memory after model before training",
          torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024)
    start_time = time.time()
    best_f1 = -1

    for epoch in range(args.n_epochs):
        for j, cluster in enumerate(cluster_iterator):
            # sync with upper level training graph
            cluster.copy_from_parent()
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['labels']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # in PPI case, `log_every` is chosen to log one time per epoch.
            # Choose your log freq dynamically when you want more info within one epoch
            if j % args.log_every == 0:
                print(
                    f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/"
                    f"{len(cluster_iterator)}:training loss", loss.item())
                writer.add_scalar('train/loss',
                                  loss.item(),
                                  global_step=j +
                                  epoch * len(cluster_iterator))
        print("current memory:",
              torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)

        # evaluate
        if epoch % args.val_every == 0:
            val_f1_mic, val_f1_mac = evaluate(model, g, labels, val_mask,
                                              multitask)
            print("Val F1-mic{:.4f}, Val F1-mac{:.4f}".format(
                val_f1_mic, val_f1_mac))
            if val_f1_mic > best_f1:
                best_f1 = val_f1_mic
                print('new best val f1:', best_f1)
                torch.save(model.state_dict(),
                           os.path.join(log_dir, 'best_model.pkl'))
            writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch)
            writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch)

    end_time = time.time()
    print(f'training using time {start_time-end_time}')

    # test
    if args.use_val:
        model.load_state_dict(
            torch.load(os.path.join(log_dir, 'best_model.pkl')))
    test_f1_mic, test_f1_mac = evaluate(model, g, labels, test_mask, multitask)
    print("Test F1-mic{:.4f}, Test F1-mac{:.4f}".format(
        test_f1_mic, test_f1_mac))
    writer.add_scalar('test/f1-mic', test_f1_mic)
    writer.add_scalar('test/f1-mac', test_f1_mac)
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        arm_loc_data, arm_conf_data, trm_loc_data1, trm_conf_data1, trm_loc_data2, trm_conf_data2, trm_loc_data3, trm_conf_data3, priors = predictions
        #print(arm_loc_data.size(), arm_conf_data.size(),
        #      odm_loc_data.size(), odm_conf_data.size(), priors.size())
        #input()
        if self.use_ARM:
            loc_data1, conf_data1 = trm_loc_data1, trm_conf_data1
            loc_data2, conf_data2 = trm_loc_data2, trm_conf_data2
            loc_data3, conf_data3 = trm_loc_data3, trm_conf_data3
        # assert loc_data1.size == loc_data2.size == loc_data3.size
        num = loc_data1.size(0)
        priors = priors[:loc_data1.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes
        #print(loc_data.size(), conf_data.size(), priors.size())
        # init valid_scale_index
        pos_for_small = torch.ByteTensor(num, num_priors)
        pos_for_middle = torch.ByteTensor(num, num_priors)
        pos_for_big = torch.ByteTensor(num, num_priors)
        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            if num_classes == 2:
                labels = labels >= 0
            defaults = priors.data
            if self.use_ARM:
                matches = refine_match_return_matches(self.threshold, truths,
                                                      defaults, self.variance,
                                                      labels, loc_t, conf_t,
                                                      idx,
                                                      arm_loc_data[idx].data)
            else:
                matches = refine_match_return_matches(self.threshold, truths,
                                                      defaults, self.variance,
                                                      labels, loc_t, conf_t,
                                                      idx)
            pos_for_small[
                idx], pos_for_middle[idx], pos_for_big[idx] = scaleAssign(
                    matches, conf_t, idx
                )  # matches: using ARM loc as priors to match with pred loc
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        #loc_t = Variable(loc_t, requires_grad=False)
        #conf_t = Variable(conf_t, requires_grad=False)
        loc_t.requires_grad = False
        conf_t.requires_grad = False
        #print(loc_t.size(), conf_t.size())

        if self.use_ARM:
            P = F.softmax(arm_conf_data, 2)
            arm_conf_tmp = P[:, :, 1]
            object_score_index = arm_conf_tmp <= self.theta
            pos_for_small[object_score_index.data] = 0

            pos_for_middle[object_score_index.data] = 0

            pos_for_big[object_score_index.data] = 0
            pos = conf_t > 0
            pos[object_score_index.data] = 0
            if not self.use_multiscale:
                pos_for_small = pos
                pos_for_middle = pos
                pos_for_big = pos

        pos_for_small = pos_for_small.cuda()
        pos_for_middle = pos_for_middle.cuda()
        pos_for_big = pos_for_big.cuda()

        #print(pos.size())
        #num_pos = pos.sum(dim=1, keepdim=True)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]

        loss_l_for_small = self.computeSmothL1Loss(pos_for_WHAT=pos_for_small,
                                                   loc_pred=loc_data1,
                                                   loc_thruth=loc_t)

        loss_l_for_middle = self.computeSmothL1Loss(
            pos_for_WHAT=pos_for_middle, loc_pred=loc_data2, loc_thruth=loc_t)
        loss_l_for_big = self.computeSmothL1Loss(pos_for_WHAT=pos_for_big,
                                                 loc_pred=loc_data3,
                                                 loc_thruth=loc_t)
        '''
        pos_for_middle_idx = pos_for_middle.unsqueeze(pos_for_middle.dim()).expand_as(loc_data2)
        loc_p2 = loc_data2[pos_for_middle_idx].view(-1, 4)
        loc_t = loc_t[pos_for_middle_idx].view(-1, 4)
        loss_l_for_middle = F.smooth_l1_loss(loc_p2, loc_t, reduction='sum')

        pos_for_big_idx = pos_for_big.unsqueeze(pos_for_big.dim()).expand_as(loc_data3)
        loc_p3 = loc_data1[pos_for_big_idx].view(-1, 4)
        loc_t = loc_t[pos_for_big_idx].view(-1, 4)
        loss_l_for_big = F.smooth_l1_loss(loc_p3, loc_t, reduction='sum')
        '''

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data1.view(-1, self.num_classes)
        loss_c_for_small = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        batch_conf = conf_data2.view(-1, self.num_classes)
        loss_c_for_middle = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        batch_conf = conf_data3.view(-1, self.num_classes)
        loss_c_for_big = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        #print(loss_c.size())

        loss_conf_for_small, num_pos_for_small = self.computeCrossEntropy(
            loss_c_for_WHAT=loss_c_for_small,
            num_batch=num,
            pos_for_WHAT=pos_for_small,
            conf_data=conf_data1,
            conf_truth=conf_t)
        loss_conf_for_middle, num_pos_for_middle = self.computeCrossEntropy(
            loss_c_for_WHAT=loss_c_for_middle,
            num_batch=num,
            pos_for_WHAT=pos_for_middle,
            conf_data=conf_data2,
            conf_truth=conf_t)
        loss_conf_for_big, num_pos_for_big = self.computeCrossEntropy(
            loss_c_for_WHAT=loss_c_for_big,
            num_batch=num,
            pos_for_WHAT=pos_for_big,
            conf_data=conf_data3,
            conf_truth=conf_t)

        # # Hard Negative Mining
        # loss_c_for_small[pos_for_small.view(-1,1)] = 0  # filter out pos boxes for now
        # loss_c_for_small = loss_c_for_small.view(num, -1)
        # _, loss_idx = loss_c_for_small.sort(1, descending=True)
        # _, idx_rank = loss_idx.sort(1)
        # num_pos = pos_for_small.long().sum(1, keepdim=True)
        # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos_for_small.size(1)-1)
        # neg = idx_rank < num_neg.expand_as(idx_rank)
        # neg = neg.long()
        # #print(num_pos.size(), num_neg.size(), neg.size())
        #
        # # Confidence Loss Including Positive and Negative Examples
        # pos_idx = pos_for_small.unsqueeze(2).expand_as(conf_data1)
        # neg_idx = neg.unsqueeze(2).expand_as(conf_data1)
        # conf_p = conf_data1[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
        # targets_weighted = conf_t[(pos_for_small+neg).gt(0)]
        # #print(pos_idx.size(), neg_idx.size(), conf_p.size(), targets_weighted.size())
        # loss_c_for_small = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        # num_pos_for_small = num_pos
        #
        # # Hard Negative Mining
        # loss_c_for_middle[pos_for_middle.view(-1,1)] = 0  # filter out pos boxes for now
        # loss_c_for_middle = loss_c_for_middle.view(num, -1)
        # _, loss_idx = loss_c_for_middle.sort(1, descending=True)
        # _, idx_rank = loss_idx.sort(1)
        # num_pos = pos_for_middle.long().sum(1, keepdim=True)
        # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos_for_middle.size(1)-1)
        # neg = idx_rank < num_neg.expand_as(idx_rank)
        # neg = neg.long()
        # #print(num_pos.size(), num_neg.size(), neg.size())
        #
        # # Confidence Loss Including Positive and Negative Examples
        # pos_idx = pos_for_middle.unsqueeze(2).expand_as(conf_data2)
        # neg_idx = neg.unsqueeze(2).expand_as(conf_data2).long()
        # conf_p = conf_data2[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
        # targets_weighted = conf_t[(pos_for_middle+neg).gt(0)]
        # #print(pos_idx.size(), neg_idx.size(), conf_p.size(), targets_weighted.size())
        # loss_c_for_middle = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        # num_pos_for_middle = num_pos
        #
        # # Hard Negative Mining
        # loss_c_for_big[pos_for_big.view(-1,1)] = 0  # filter out pos boxes for now
        # loss_c_for_big = loss_c_for_big.view(num, -1)
        # _, loss_idx = loss_c_for_big.sort(1, descending=True)
        # _, idx_rank = loss_idx.sort(1)
        # num_pos = pos_for_big.long().sum(1, keepdim=True)
        # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos_for_big.size(1)-1)
        # neg = idx_rank < num_neg.expand_as(idx_rank)
        # neg = neg.long()
        # #print(num_pos.size(), num_neg.size(), neg.size())
        #
        # # Confidence Loss Including Positive and Negative Examples
        # pos_idx = pos_for_big.unsqueeze(2).expand_as(conf_data3)
        # neg_idx = neg.unsqueeze(2).expand_as(conf_data3).long()
        # conf_p = conf_data3[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
        # targets_weighted = conf_t[(pos_for_big+neg).gt(0)]
        # #print(pos_idx.size(), neg_idx.size(), conf_p.size(), targets_weighted.size())
        # loss_c_for_big = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        # num_pos_for_big = num_pos

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        num_pos = pos.long().sum(1, keepdim=True)
        N_for_all = num_pos.data.sum().float()
        N_for_small = num_pos_for_small.data.sum().float()
        N_for_middle = num_pos_for_middle.data.sum().float()
        N_for_big = num_pos_for_big.data.sum().float()

        N_for_small = max(N_for_small, 1.0)
        N_for_middle = max(N_for_middle, 1.0)
        N_for_big = max(N_for_big, 1.0)

        # print('all:{}  small:{}  middle:{}  big:{}'.format(N_for_all,N_for_small,N_for_middle,N_for_big))
        # N = N_for_small+N_for_middle+N_for_big
        #N = max(num_pos.data.sum().float(), 1)

        # loss_l_for_small /= N_for_small
        # loss_l_for_middle /= N_for_middle
        # loss_l_for_big /= N_for_big

        # loss_l = loss_l_for_small + loss_l_for_middle + loss_l_for_big

        # loss_conf_for_small /= N_for_small
        # loss_conf_for_middle /= N_for_middle
        # loss_conf_for_big /= N_for_big

        # loss_c = loss_conf_for_small + loss_conf_for_middle + loss_conf_for_big

        #print(N, loss_l, loss_c)
        return loss_l_for_small/N_for_small, \
               loss_l_for_middle/N_for_middle, \
               loss_l_for_big/N_for_big, \
               loss_conf_for_small/N_for_small, \
               loss_conf_for_middle/N_for_middle,\
               loss_conf_for_big/N_for_big,  N_for_all, N_for_small, N_for_middle, N_for_big


# # predictions
# arm_loc = torch.rand((4,100,4))
# arm_conf = torch.rand((4,100,2))
# odm_loc1 = torch.rand((4,100,4))
# odm_loc2 = torch.rand((4,100,4))
# odm_loc3 = torch.rand((4,100,4))
# odm_conf1 = torch.rand((4,100,21))
# odm_conf2 = torch.rand((4,100,21))
# odm_conf3 = torch.rand((4,100,21))
# anchor = torch.rand((100,4))
# # ground truths
# gt1 = torch.Tensor([[0.56,0.42,0.8,0.5,14.]])
# gt2 = torch.Tensor([[0.23,0.24,0.56,0.34,7.]])
# gt3 = torch.Tensor([[0.4527,0.0516,0.4938,0.1463,15.],
#                     [0.3247,0.0516,0.7708,0.5237,14.0]])
# gt4 = torch.Tensor([[0.4863,0.3579,0.7280,0.8428,11.0]])
# # put them together
# truths = [gt1,gt2,gt3,gt4]
# preds = (arm_loc, arm_conf, odm_loc1, odm_conf1, odm_loc2, odm_conf2, odm_loc3, odm_conf3, anchor)
# # init a lossfunction
# lossfunction = multitridentMultiBoxLoss(21, 0.0, True, 0, True, 3, 0.5,
#                              False, False, use_ARM=True)
# loss = lossfunction.forward(preds, truths)
# print(loss)
Esempio n. 8
0
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers)

    dataset_val = DataLoader(opt, split=opt.val_split)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=opt.batch_size,
                                                 shuffle=False,
                                                 num_workers=opt.num_workers)

    input_imgs = torch.FloatTensor(1)
    input_seqs = torch.LongTensor(1)
    input_ppls = torch.FloatTensor(1)
    gt_bboxs = torch.FloatTensor(1)
    mask_bboxs = torch.ByteTensor(1)
    gt_seqs = torch.LongTensor(1)
    input_num = torch.LongTensor(1)

    if opt.cuda:
        input_imgs = input_imgs.cuda()
        input_seqs = input_seqs.cuda()
        gt_seqs = gt_seqs.cuda()
        input_num = input_num.cuda()
        input_ppls = input_ppls.cuda()
        gt_bboxs = gt_bboxs.cuda()
        mask_bboxs = mask_bboxs.cuda()

    input_imgs = Variable(input_imgs)
    input_seqs = Variable(input_seqs)
    gt_seqs = Variable(gt_seqs)
Esempio n. 9
0
def cv2_tensor(pic):
    img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
    img = img.view(pic.shape[0], pic.shape[1], 3)
    img = img.transpose(0, 2).transpose(1, 2).contiguous()
    return img.float().div(255)
Esempio n. 10
0
    def forward(self,
                enc_in,
                enc_out,
                enc_mask,
                max_length=None,
                inputs=None,
                use_teacher_forcing=True):

        if max_length is None:
            max_length = self.dec_max_len
        batch_size = enc_out.size(0)
        enc_in2 = torch.unsqueeze(enc_in, dim=1)

        enc_mask = torch.ByteTensor(enc_mask).cuda()
        decoder_output = torch.empty(batch_size, max_length,
                                     self.output_size).cuda()
        sequence_symbols = torch.empty(batch_size,
                                       max_length,
                                       dtype=torch.int32).cuda()
        dec_hidden1, dec_hidden2, elmo_hidden1, elmo_hidden2 = None, None, None, None

        dec_symbol = START * torch.ones(enc_out.size(0), 1,
                                        dtype=torch.long).cuda()
        dec_att_out = torch.zeros(batch_size, 1, self.attn_size).cuda()
        select_read = torch.zeros(batch_size, 1, self.enc_hidden_size).cuda()

        for i in range(max_length):
            '''第一步,将上次的attenton输出和这次的input拼接起来'''
            in_embed, elmo_hidden1, elmo_hidden2 = self.dec_elmo_embed(
                dec_symbol, elmo_hidden1, elmo_hidden2)

            dec_in=self.dropout(self.inputlayer(torch.cat((in_embed,dec_att_out,select_read),dim=2))\
                                +self.dec_pos_embed[:,i:i+1,:])
            '''2ceng 经过rnn后得到输出'''
            dec_out, dec_hidden1 = self.rnn(dec_in, dec_hidden1)
            dec_att_out = self.attention(dec_out, enc_out, enc_mask)

            dec_out, dec_hidden2 = self.rnn2(dec_att_out, dec_hidden2)
            dec_att_out = self.attention2(dec_out, enc_out,
                                          enc_mask) + dec_att_out
            '''copyscore'''
            score_c = torch.bmm(
                dec_att_out,
                torch.transpose(torch.tanh(self.W_copy(enc_out)), 1, 2))
            score_c.data.masked_fill_(enc_mask, -float('inf'))
            score_c = F.softmax(score_c, dim=-1)
            score_e = score_c * self.scale * self.scale
            '''经过vocab层映射得到下一个输出'''
            dec_to_vocab = self.outlayer(dec_att_out)
            dec_to_vocab.scatter_add_(dim=-1, index=enc_in2, src=score_e)
            decoder_output[:, i:i + 1, :] = dec_to_vocab

            if use_teacher_forcing:
                dec_symbol = inputs[:, i:i + 1]
                sequence_symbols[:, i:i + 1] = torch.argmax(dec_to_vocab,
                                                            dim=2)
            else:
                dec_symbol = torch.argmax(dec_to_vocab, dim=2)
                sequence_symbols[:, i:i + 1] = dec_symbol

            score_f = score_c * (
                (enc_in == dec_symbol).float().unsqueeze(dim=1))
            select_read = torch.bmm(score_f, enc_out)

        return decoder_output, sequence_symbols
Esempio n. 11
0
    def beam_search(self, enc_in, enc_out, enc_mask, beam_width=5):
        '''每次只接受一条输入,反正是解码,可以慢一点'''
        max_length = self.dec_max_len
        batch_size = enc_out.size(0)
        assert (batch_size == 1)
        '''一共会有beam_width个输出'''
        dec_hidden1, dec_hidden2, elmo_hidden1, elmo_hidden2 = None, None, None, None
        dec_symbol = START * torch.ones(beam_width, 1, dtype=torch.long).cuda()
        dec_att_out = torch.zeros(beam_width, 1, self.attn_size).cuda()
        select_read = torch.zeros(beam_width, 1, self.enc_hidden_size).cuda()
        '''将enc的输入复制beam_width份'''
        enc_out = enc_out.repeat(beam_width, 1, 1)
        enc_mask = torch.ByteTensor(np.tile(enc_mask,
                                            [beam_width, 1, 1])).cuda()
        enc_in = enc_in.repeat(beam_width, 1)
        enc_in2 = torch.unsqueeze(enc_in, dim=1)

        beam_proba = torch.zeros(beam_width, 1).cuda()
        sequence_symbols = []
        length = 0

        for i in range(max_length):
            in_embed, elmo_hidden1, elmo_hidden2 = self.dec_elmo_embed(
                dec_symbol, elmo_hidden1, elmo_hidden2)

            dec_in=self.dropout(self.inputlayer(torch.cat((in_embed,dec_att_out,select_read),dim=2))\
                                +self.dec_pos_embed[:,i:i+1,:])
            '''2ceng 经过rnn后得到输出'''
            dec_out, dec_hidden1 = self.rnn(dec_in, dec_hidden1)
            dec_att_out = self.attention(dec_out, enc_out, enc_mask)
            dec_out, dec_hidden2 = self.rnn2(dec_att_out, dec_hidden2)
            dec_att_out = self.attention2(dec_out, enc_out,
                                          enc_mask) + dec_att_out
            '''copyscore'''
            score_c = torch.bmm(
                dec_att_out,
                torch.transpose(torch.tanh(self.W_copy(enc_out)), 1, 2))
            score_c.data.masked_fill_(enc_mask, -float('inf'))
            score_c = F.softmax(score_c, dim=-1)
            score_e = score_c * self.scale * self.scale
            '''经过vocab层映射得到下一个输出'''
            dec_to_vocab = self.outlayer(dec_att_out)
            dec_to_vocab.scatter_add_(dim=-1, index=enc_in2, src=score_e)
            '''找到最大的proba'''
            proba = F.log_softmax(dec_to_vocab, dim=2).squeeze() + beam_proba

            if i == 0:
                select = torch.topk(proba[0], beam_width)[1]
                dec_symbol = select.reshape(beam_width, 1)
                beam_proba = proba[0, select].reshape(beam_width, 1)
                sequence_symbols.append(dec_symbol)
                choose = select // self.output_size
            else:
                if i <= 26:
                    maxproba = torch.max(proba, dim=0)
                    proba2 = maxproba[0]
                    index = maxproba[1]

                    select = torch.topk(proba2, beam_width)[1]
                    choose = index[select]

                    beam_proba = proba2[select].reshape(beam_width, 1)
                    dec_symbol = select.reshape(beam_width, 1)

                else:
                    proba = proba.reshape(-1)
                    select = torch.topk(proba, beam_width)[1]
                    choose = select // self.output_size

                    beam_proba = proba[select].reshape(beam_width, 1)
                    select = select % self.output_size  #第几个token
                    dec_symbol = select.reshape(beam_width, 1)
                '''注意!!!这里symbol要重新安排!!!'''
                ls = torch.cat((sequence_symbols[-1][choose, :], dec_symbol),
                               dim=1)
                sequence_symbols.append(ls)

                if dec_symbol[0, 0] == END:
                    break
            '''TODO   这一步需要认真思考!!!应该要修改,因为score_f跟上一个时刻的序列有很大的关系!!!!!'''
            score_f = score_c[choose, :] * (
                (enc_in == dec_symbol).float().unsqueeze(dim=1))
            select_read = torch.bmm(score_f, enc_out)

            length = i + 1

            elmo_hidden1 = (elmo_hidden1[0][:, choose, :],
                            elmo_hidden1[1][:, choose, :])
            elmo_hidden2 = (elmo_hidden2[0][:, choose, :],
                            elmo_hidden2[1][:, choose, :])
            dec_hidden1 = (dec_hidden1[0][:, choose, :],
                           dec_hidden1[1][:, choose, :])
            dec_hidden2 = (dec_hidden2[0][:, choose, :],
                           dec_hidden2[1][:, choose, :])

            dec_att_out = dec_att_out[choose, :, :]
        return sequence_symbols[-1], beam_proba[0] / length
 def reset(self):
     self.first_action = True
     self.state = torch.ByteTensor(1, 84, 84).to(device)
Esempio n. 13
0
def _demo_mm_inputs(input_shape=(1, 3, 300, 300),
                    num_items=None, num_classes=10,
                    with_semantic=False):  # yapf: disable
    """Create a superset of inputs needed to run test or train batches.

    Args:
        input_shape (tuple):
            input batch dimensions

        num_items (None | List[int]):
            specifies the number of boxes in each batch item

        num_classes (int):
            number of different labels a box might have
    """
    from mmdet.core import BitmapMasks

    (N, C, H, W) = input_shape

    rng = np.random.RandomState(0)

    imgs = rng.rand(*input_shape)

    img_metas = [{
        'img_shape': (H, W, C),
        'ori_shape': (H, W, C),
        'pad_shape': (H, W, C),
        'filename': '<demo>.png',
        'scale_factor': np.array([1.1, 1.2, 1.1, 1.2]),
        'flip': False,
        'flip_direction': None,
    } for _ in range(N)]

    gt_bboxes = []
    gt_labels = []
    gt_masks = []

    for batch_idx in range(N):
        if num_items is None:
            num_boxes = rng.randint(1, 10)
        else:
            num_boxes = num_items[batch_idx]

        cx, cy, bw, bh = rng.rand(num_boxes, 4).T

        tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)
        tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)
        br_x = ((cx * W) + (W * bw / 2)).clip(0, W)
        br_y = ((cy * H) + (H * bh / 2)).clip(0, H)

        boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T
        class_idxs = rng.randint(1, num_classes, size=num_boxes)

        gt_bboxes.append(torch.FloatTensor(boxes))
        gt_labels.append(torch.LongTensor(class_idxs))

    mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)
    gt_masks.append(BitmapMasks(mask, H, W))

    mm_inputs = {
        'imgs': torch.FloatTensor(imgs).requires_grad_(True),
        'img_metas': img_metas,
        'gt_bboxes': gt_bboxes,
        'gt_labels': gt_labels,
        'gt_bboxes_ignore': None,
        'gt_masks': gt_masks,
    }

    if with_semantic:
        # assume gt_semantic_seg using scale 1/8 of the img
        gt_semantic_seg = np.random.randint(0,
                                            num_classes,
                                            (1, 1, H // 8, W // 8),
                                            dtype=np.uint8)
        mm_inputs.update(
            {'gt_semantic_seg': torch.ByteTensor(gt_semantic_seg)})

    return mm_inputs
Esempio n. 14
0
def all_gather_list(data, group=None, max_size=16384):
    """Gathers arbitrary data from all nodes into a list.

    Similar to :func:`~torch.distributed.all_gather` but for arbitrary Python
    data. Note that *data* must be picklable and any CUDA tensors will be moved
    to CPU and returned on CPU as well.

    Args:
        data (Any): data from the local worker to be gathered on other workers
        group: group of the collective
        max_size (int, optional): maximum size of the data to be gathered
            across workers
    """
    from fairseq import utils

    if group is None:
        group = get_global_group()
    rank = get_rank(group=group)
    world_size = get_world_size(group=group)

    buffer_size = max_size * world_size
    if (not hasattr(all_gather_list, "_buffer")
            or all_gather_list._buffer.numel() < buffer_size):
        all_gather_list._buffer = torch.cuda.ByteTensor(buffer_size)
        all_gather_list._cpu_buffer = torch.ByteTensor(max_size).pin_memory()
    buffer = all_gather_list._buffer
    buffer.zero_()
    cpu_buffer = all_gather_list._cpu_buffer

    data = utils.move_to_cpu(data)
    enc = pickle.dumps(data)
    enc_size = len(enc)
    header_size = 4  # size of header that contains the length of the encoded data
    size = header_size + enc_size
    if size > max_size:
        raise ValueError("encoded data size ({}) exceeds max_size ({})".format(
            size, max_size))

    header = struct.pack(">I", enc_size)
    cpu_buffer[:size] = torch.ByteTensor(list(header + enc))
    start = rank * max_size
    buffer[start:start + size].copy_(cpu_buffer[:size])

    all_reduce(buffer, group=group)

    buffer = buffer.cpu()
    try:
        result = []
        for i in range(world_size):
            out_buffer = buffer[i * max_size:(i + 1) * max_size]
            (enc_size, ) = struct.unpack(
                ">I", bytes(out_buffer[:header_size].tolist()))
            if enc_size > 0:
                result.append(
                    pickle.loads(
                        bytes(out_buffer[header_size:header_size +
                                         enc_size].tolist())))
        return result
    except pickle.UnpicklingError:
        raise Exception(
            "Unable to unpickle data from other workers. all_gather_list requires all "
            "workers to enter the function together, so this error usually indicates "
            "that the workers have fallen out of sync somehow. Workers can fall out of "
            "sync if one of them runs out of memory, or if there are other conditions "
            "in your training script that can cause one worker to finish an epoch "
            "while other workers are still iterating over their portions of the data. "
            "Try rerunning with --ddp-backend=legacy_ddp and see if that helps."
        )
Esempio n. 15
0
def get_f1(model: BiRecurrentConvCRF4NestedNER,
           mode: str,
           file_path: str = None) -> float:
    with torch.no_grad():
        model.eval()

        pred_all, pred, recall_all, recall = 0, 0, 0, 0
        gold_cross_num = 0
        pred_cross_num = 0
        if mode == 'dev':
            batch_zip = zip(dev_input_ids_batches, dev_input_mask_batches,
                            dev_first_subtokens_batches,
                            dev_last_subtokens_batches, dev_label_batches,
                            dev_mask_batches)
        elif mode == 'test':
            batch_zip = zip(test_input_ids_batches, test_input_mask_batches,
                            test_first_subtokens_batches,
                            test_last_subtokens_batches, test_label_batches,
                            test_mask_batches)
        else:
            raise ValueError

        f = None
        if file_path is not None:
            f = open(file_path, 'w')

        for input_ids_batch, input_mask_batch, first_subtokens_batch, last_subtokens_batch, label_batch, mask_batch \
                in batch_zip:
            input_ids_batch_var = torch.LongTensor(np.array(input_ids_batch))
            input_mask_batch_var = torch.LongTensor(np.array(input_mask_batch))
            mask_batch_var = torch.ByteTensor(
                np.array(mask_batch, dtype=np.uint8))
            if config.if_gpu:
                input_ids_batch_var = input_ids_batch_var.cuda()
                input_mask_batch_var = input_mask_batch_var.cuda()
                mask_batch_var = mask_batch_var.cuda()

            pred_sequence_entities = model.predict(input_ids_batch_var,
                                                   input_mask_batch_var,
                                                   first_subtokens_batch,
                                                   last_subtokens_batch,
                                                   mask_batch_var)
            pred_entities = unpack_prediction(model, pred_sequence_entities)
            p_a, p, r_a, r = evaluate(label_batch, pred_entities)

            gold_cross_num += 0
            pred_cross_num += 0

            pred_all += p_a
            pred += p
            recall_all += r_a
            recall += r

            if file_path is not None:
                for input_ids, input_mask, first_subtokens, last_subtokens, mask, label, preds \
                        in zip(input_ids_batch, input_mask_batch, first_subtokens_batch, last_subtokens_batch,
                               mask_batch, label_batch, pred_entities):
                    words = []
                    for t, m in zip(input_ids, input_mask):
                        if m == 0:
                            break
                        words.append(voc_dict.get_instance(t))
                    f.write(' '.join(words) + '\n')

                    labels = []
                    for l in sorted(label, key=lambda x: (x[0], x[1], x[2])):
                        s = first_subtokens[l[0]]
                        e = last_subtokens[l[1] - 1]
                        labels.append("{},{} {}".format(
                            s, e, label_dict.get_instance(l[2])))
                    f.write('|'.join(labels) + '\n')

                    labels = []
                    for p in sorted(preds, key=lambda x: (x[0], x[1], x[2])):
                        s = first_subtokens[p[0]]
                        e = last_subtokens[p[1] - 1]
                        labels.append("{},{} {}".format(
                            s, e, label_dict.get_instance(p[2])))
                    f.write('|'.join(labels) + '\n')

                    f.write('\n')

        if file_path is not None:
            f.close()

        pred = pred / pred_all if pred_all > 0 else 1.
        recall = recall / recall_all if recall_all > 0 else 1.
        f1 = 2 / ((1. / pred) +
                  (1. / recall)) if pred > 0. and recall > 0. else 0.
        logger.info(
            "{} precision: {:.2f}%, recall: {:.2f}%, F1: {:.2f}%".format(
                mode, pred * 100., recall * 100., f1 * 100.))
        # logger.info("Prediction Crossing: ", pred_cross_num)
        # logger.info("Gold Crossing: ", gold_cross_num)

        return f1
Esempio n. 16
0
    def __getitem__(self, index):
        A_paths = self.A_paths[index]

        match = re.search('(/\d+)?/serie(\d+)', A_paths[0])

        folder_id = self.folder_id_lookup[match.group(1)[1:] if match.
                                          group(1) else '']

        dir_tag = '_' + match.group(1)[1:] + '_' if match.group(1) else '_'
        series_number = int(match.group(2))

        # Check that all files are of the same series number, as glob doesn't always
        # return the files in the correct order

        s_no = series_number - 100000

        assert all([get_series_number(path) == s_no
                    for path in A_paths[1:]]), A_paths

        series = 'serie' + dir_tag + str(series_number)

        data = OrderedDict([(get_file_tag(path), read_geo_file(path))
                            for path in A_paths])

        rows = len(np.unique(data['Vx']['y']))
        cols = len(np.unique(data['Vx']['x']))

        # It is possible to do an interpolation here, but it's really slow
        # and ends up looking about the same
        for key in data.keys():
            data[key]['values'] = data[key]['values'].reshape((rows, cols),
                                                              order='C')
            data[key]['values'] = resize(
                data[key]['values'],
                (self.opt.fineSize, self.opt.fineSize * 2),
                mode='constant')

        rows = 256
        cols = 512

        # Create discrete image before we normalise
        A = create_one_hot(data['DIV']['values'], self.opt.div_threshold)

        # We're done with x/y data now, so discard
        A_data = [data[key]['values'] for key in data.keys() if key != 'cont']
        # Normalise
        A_DIV, A_Vx, A_Vy = A_data

        A_DIV = np.interp(A_DIV,
                          [np.min(A_DIV.ravel()),
                           np.max(A_DIV.ravel())], [-1, 1])
        A_Vx = np.interp(
            A_Vx,
            [np.min(A_Vx.ravel()), np.max(A_Vx.ravel())], [-1, 1])
        A_Vy = np.interp(
            A_Vy,
            [np.min(A_Vy.ravel()), np.max(A_Vy.ravel())], [-1, 1])

        w_offset, h_offset, layer = self.get_inpaint_region(
            index, A, rows, cols)

        mask_x1 = w_offset
        mask_x2 = w_offset + 100

        mask_y1 = h_offset
        mask_y2 = h_offset + 100

        mask = np.zeros((rows, cols), dtype=np.uint8)
        mask[mask_y1:mask_y2, mask_x1:mask_x2] = 1

        # B_DIV = A_DIV.copy()
        # B_DIV[mask_y1:mask_y2, mask_x1:mask_x2] = 0

        # B_Vx = A_Vx.copy()
        # B_Vx[mask_y1:mask_y2, mask_x1:mask_x2] = 0

        # B_Vy = A_Vy.copy()
        # B_Vy[mask_y1:mask_y2, mask_x1:mask_x2] = 0

        B_data = [
            mask_out_inpaint_region(im, mask) for im in [A_DIV, A_Vx, A_Vy]
        ]

        B = A.copy()

        if self.opt.inpaint_single_class:
            B[:, :, 1][np.where(np.logical_and(mask, B[:, :, layer]))] = 1
            B[mask_y1:mask_y2, mask_x1:mask_x2, layer] = 0
        else:
            B[np.where(mask)] = [0, 1, 0]

        mask = np.expand_dims(mask, 2)
        mask = torch.ByteTensor(mask.transpose(2, 0, 1)).clone()

        # A_DIV = np.interp(A_DIV, [np.min(A_DIV), np.max(A_DIV)], [-1, 1])
        # A_Vx = np.interp(A_Vx, [np.min(A_Vx), np.max(A_Vx)], [-1, 1])
        # A_Vy = np.interp(A_Vy, [np.min(A_Vy), np.max(A_Vy)], [-1, 1])

        # B_DIV = np.interp(B_DIV, [np.min(B_DIV), np.max(B_DIV)], [-1, 1])
        # B_Vx = np.interp(B_Vx, [np.min(B_Vx), np.max(B_Vx)], [-1, 1])
        # B_Vy = np.interp(B_Vy, [np.min(B_Vy), np.max(B_Vy)], [-1, 1])

        def process_image(A, B, discrete=False):
            if not discrete:
                A = np.expand_dims(A, 0)
                B = np.expand_dims(B, 0)
                A = torch.FloatTensor(A)
                B = torch.FloatTensor(B)

                # A = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(A)
                # B = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(B)
            else:
                A = torch.FloatTensor(A.transpose(2, 0, 1))
                B = torch.FloatTensor(B.transpose(2, 0, 1))

            return A, B

        B_DIV, B_Vx, B_Vy = B_data

        if self.opt.continent_data:
            if 'cont' in data.keys():
                continents = data['cont']['values']
            else:
                continents = np.zeros((rows, cols))

        A, B = process_image(A, B, discrete=True)
        A_DIV, B_DIV = process_image(A_DIV, B_DIV)
        A_Vx, B_Vx = process_image(A_Vx, B_Vx)
        A_Vy, B_Vy = process_image(A_Vy, B_Vy)

        if self.opt.continent_data:
            continents = (continents > 0).astype(np.uint8)
            continents = np.expand_dims(continents, 2)
            continents = continents.transpose(2, 0, 1)

            continents = torch.ByteTensor(continents).clone()

        if (not self.opt.no_flip) and random.random() < 0.5:
            idx = [i for i in range(A.size(2) - 1, -1, -1)]
            idx = torch.LongTensor(idx)
            A = A.index_select(2, idx)
            B = B.index_select(2, idx)
            A_DIV = A_DIV.index_select(2, idx)
            B_DIV = B_DIV.index_select(2, idx)
            A_Vx = A_Vx.index_select(2, idx)
            B_Vx = B_Vx.index_select(2, idx)
            A_Vy = A_Vy.index_select(2, idx)
            B_Vy = B_Vy.index_select(2, idx)

            if self.opt.continent_data:
                continents = continents.index_select(2, idx)

            mask = mask.index_select(2, idx)

            tmp = mask_x1
            mask_x1 = mask.shape[2] - mask_x2
            mask_x2 = mask.shape[2] - tmp

        mask_x1 = torch.LongTensor([mask_x1]).expand(1, -1)
        mask_x2 = torch.LongTensor([mask_x2]).expand(1, -1)
        mask_y1 = torch.LongTensor([mask_y1]).expand(1, -1)
        mask_y2 = torch.LongTensor([mask_y2]).expand(1, -1)

        data = {
            'A': A,
            'B': B,
            'A_DIV': A_DIV,
            'B_DIV': B_DIV,
            'A_Vx': A_Vx,
            'B_Vx': B_Vx,
            'A_Vy': A_Vy,
            'B_Vy': B_Vy,
            'mask': mask,
            'mask_x1': mask_x1,
            'mask_x2': mask_x2,
            'mask_y1': mask_y1,
            'mask_y2': mask_y2,
            'A_paths': os.path.join(self.dir_A, series),
            'B_paths': os.path.join(self.dir_A, series + '_inpainted'),
            'series_number': int(dir_tag[1:-1] + str(series_number)),
            'folder_id': folder_id
        }

        if self.opt.continent_data:
            data['cont'] = continents

        return data
def create_data_loader(loader, batch_size=5000):
    array, lengths = np.array(loader["data"]), np.array(loader["length"])
    data = TensorDataset(
        torch.from_numpy(array).type(torch.LongTensor), torch.ByteTensor(lengths)
    )
    return DataLoader(data, batch_size=batch_size, drop_last=False)
Esempio n. 18
0
def train(config):
    hidden_size = config["hidden_size"]
    save_dir = config["save_dir"]
    learning_rate = config["learning_rate"]
    batch_size = config["batch_size"]
    epoch_size = config["epoch_size"]

    dataset = DataUtil(config)
    input_vocab, target_vocab, intent_vocab = dataset.get_vocab()
    dataloader = DataLoader(dataset, batch_size, shuffle=True)

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    encoder = Encoder(len(input_vocab), config)
    decoder = Decoder(len(target_vocab), len(intent_vocab), hidden_size * 2)

    if USE_CUDA:
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    encoder.init_weights()
    decoder.init_weights()

    loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
    loss_function_2 = nn.CrossEntropyLoss()
    enc_optim = optim.Adam(encoder.parameters(), lr=learning_rate)
    dec_optim = optim.Adam(decoder.parameters(), lr=learning_rate)

    for epoch in range(1, epoch_size+1):
        losses = []
        for i, batch in enumerate(dataloader):
            input_batch, target_batch, intent_batch = batch
            input_batch = input_batch.long()
            target_batch = target_batch.long()
            if USE_CUDA:
                input_batch = input_batch.cuda()
                target_batch = target_batch.cuda()

            input_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))).cuda()
                                if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s == 0, t.data))))
                                for t in input_batch]).view(batch_size, -1)

            encoder.zero_grad()
            decoder.zero_grad()

            output, hidden_c = encoder(input_batch, input_mask)
            start_decode = Variable(torch.LongTensor([[input_vocab.index('PAD')] * batch_size])).transpose(1, 0)
            if USE_CUDA:
                start_decode = start_decode.cuda()

            tag_score, intent_score = decoder(start_decode, hidden_c, output, input_mask)

            loss_1 = loss_function_1(tag_score, target_batch.view(-1))
            loss_2 = loss_function_2(intent_score, intent_batch.cuda())

            loss = loss_1 + loss_2
            losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy())
            loss.backward()

            torch.nn.utils.clip_grad_norm_(encoder.parameters(), 5.0)
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5.0)

            enc_optim.step()
            dec_optim.step()

            if i % 10 == 0:
                print(f"Epoch {epoch}: {np.mean(losses)}")
                losses = []

        if epoch % 100 == 0:
            torch.save(encoder, os.path.join(save_dir, f'encoder-{epoch}.pt'))
            torch.save(decoder, os.path.join(save_dir, f'decoder-{epoch}.pt'))
            print(f"Epoch: {epoch} save model...")

    print("Training Complete!")
Esempio n. 19
0
def test_model(test_dataset, test_num_each):
    num_test = len(test_dataset)
    test_count = 0
    for i in range(len(test_num_each)):
        test_count += test_num_each[i]

    test_useful_start_idx = get_useful_start_idx(sequence_length, test_num_each)

    num_test_we_use = len(test_useful_start_idx)
    # 其实需要除以gpu个数再乘以gpu个数,但是为了保证所有都测试到,尽量保证test个数完整
    # num_test_we_use = 804

    test_we_use_start_idx = test_useful_start_idx[0:num_test_we_use]

    test_idx = []
    for i in range(num_test_we_use):
        for j in range(sequence_length):
            test_idx.append(test_we_use_start_idx[i] + j)

    num_test_all = len(test_idx)

    print('num test start idx : {:6d}'.format(len(test_useful_start_idx)))
    print('last idx test start: {:6d}'.format(test_useful_start_idx[-1]))
    print('num of test dataset: {:6d}'.format(num_test))
    print('num of test we use : {:6d}'.format(num_test_we_use))
    print('num of all test use: {:6d}'.format(num_test_all))

    test_loader = DataLoader(
        test_dataset,
        batch_size=test_batch_size,
        sampler=test_idx,
        num_workers=1,
        pin_memory=False
    )
    model = multi_lstm_p2t()
    model = DataParallel(model)
    model.load_state_dict(torch.load(model_name))
    # model = model.module
    # model = DataParallel(model)

    if use_gpu:
        model = model.cuda()
    # model = DataParallel(model)
    # model = model.module
    criterion_1 = nn.BCEWithLogitsLoss(size_average=False)
    criterion_2 = nn.CrossEntropyLoss(size_average=False)
    sig_f = nn.Sigmoid()

    model.eval()
    test_loss_1 = 0.0
    test_loss_2 = 0.0
    test_corrects_2 = 0

    test_start_time = time.time()
    all_preds_1 = []
    all_labels_1 = []
    all_preds_2 = []

    for data in test_loader:
        inputs, labels_1, labels_2 = data

        # labels_1 = labels_1[(sequence_length - 1)::sequence_length]
        labels_2 = labels_2[(sequence_length - 1)::sequence_length]
        if use_gpu:
            inputs = Variable(inputs.cuda(), volatile=True)
            labels_1 = Variable(labels_1.cuda(), volatile=True)
            labels_2 = Variable(labels_2.cuda(), volatile=True)
        else:
            inputs = Variable(inputs, volatile=True)
            labels_1 = Variable(labels_1, volatile=True)
            labels_2 = Variable(labels_2, voatile=True)

        if crop_type == 0 or crop_type == 1:
            outputs_1, outputs_2, _ = model.forward(inputs)
        elif crop_type == 5:
            inputs = inputs.permute(1, 0, 2, 3, 4).contiguous()
            inputs = inputs.view(-1, 3, 224, 224)
            outputs_1, outputs_2, _ = model.forward(inputs)
            outputs_1 = outputs_1.view(5, -1, 7)
            outputs_1 = torch.mean(outputs_1, 0)
            outputs_2 = outputs_2.view(5, -1, 7)
            outputs_2 = torch.mean(outputs_2, 0)
        elif crop_type == 10:
            inputs = inputs.permute(1, 0, 2, 3, 4).contiguous()
            inputs = inputs.view(-1, 3, 224, 224)
            outputs_1, outputs_2, _ = model.forward(inputs)
            outputs_1 = outputs_1.view(10, -1, 7)
            outputs_1 = torch.mean(outputs_1, 0)
            outputs_2 = outputs_2.view(10, -1, 7)
            outputs_2 = torch.mean(outputs_2, 0)

        # outputs_1 = outputs_1[sequence_length-1::sequence_length]
        outputs_2 = outputs_2[sequence_length - 1::sequence_length]

        _, preds_2 = torch.max(outputs_2.data, 1)

        for i in range(len(outputs_1)):
            all_preds_1.append(outputs_1[i].data.cpu().numpy().tolist())
            all_labels_1.append(labels_1[i].data.cpu().numpy().tolist())
        for i in range(len(preds_2)):
            all_preds_2.append(preds_2[i])
        print('preds_1: {:6d} preds_2: {:6d}'.format(len(all_preds_1), len(all_preds_2)))

        # labels_1 = Variable(labels_1.data.float())
        # loss_1 = criterion_1(outputs_1, labels_1)

        # test_loss_1 += loss_1.data[0]
        loss_2 = criterion_2(outputs_2, labels_2)
        test_loss_2 += loss_2.data[0]
        test_corrects_2 += torch.sum(preds_2 == labels_2.data)

    all_preds_1_cor = []
    all_labels_1_cor = []
    cor_count = 0
    for i in range(len(test_num_each)):
        for j in range(cor_count, cor_count + test_num_each[i] - (sequence_length - 1)):
            if j == cor_count:
                for k in range(sequence_length - 1):
                    all_preds_1_cor.append(all_preds_1[sequence_length * j + k])
                    all_labels_1_cor.append(all_labels_1[sequence_length * j + k])
            all_preds_1_cor.append(all_preds_1[sequence_length * j + sequence_length - 1])
            all_labels_1_cor.append(all_labels_1[sequence_length * j + sequence_length - 1])
        cor_count += test_num_each[i] + 1 - sequence_length

    print('all_preds_1 : {:6d}'.format(len(all_preds_1)))
    print('all_labels_1: {:6d}'.format(len(all_labels_1)))
    print('cor_labels_1: {:6d}'.format(len(all_preds_1_cor)))
    print('cor_labels_1: {:6d}'.format(len(all_labels_1_cor)))


    pt_preds_1 = torch.from_numpy(np.asarray(all_preds_1_cor, dtype=np.float32))
    pt_labels_1 = torch.from_numpy(np.asarray(all_labels_1_cor, dtype=np.float32))
    pt_labels_1 = Variable(pt_labels_1, requires_grad=False)
    pt_preds_1 = Variable(pt_preds_1, requires_grad=False)
    loss_1 = criterion_1(pt_preds_1, pt_labels_1)
    test_loss_1 += loss_1.data[0]

    pt_labels_1 = pt_labels_1.data
    pt_preds_1 = pt_preds_1.data
    sig_out = sig_f(pt_preds_1)
    preds_cor = torch.ByteTensor(sig_out > 0.5)
    preds_cor = preds_cor.long()
    pt_labels_1 = pt_labels_1.long()
    test_corrects_1 = torch.sum(preds_cor == pt_labels_1)

    test_elapsed_time = time.time() - test_start_time
    test_accuracy_1 = test_corrects_1 / num_test / 7
    test_accuracy_2 = test_corrects_2 / num_test_we_use
    test_average_loss_1 = test_loss_1 / num_test / 7
    test_average_loss_2 = test_loss_2 / num_test_we_use

    print('preds_1 num: {:6d} preds_2 num: {:6d}'.format(len(all_preds_1_cor), len(all_preds_2)))

    save_test1 = int("{:4.0f}".format(test_accuracy_1 * 10000))
    save_test2 = int("{:4.0f}".format(test_accuracy_2 * 10000))

    pred_1_name = model_pure_name + '_test1_' + str(save_test1) + '_crop_' + str(crop_type) + '.pkl'
    pred_2_name = model_pure_name + '_test2_' + str(save_test2) + '_crop_' + str(crop_type) + '.pkl'

    with open(pred_1_name, 'wb') as f:
        pickle.dump(all_preds_1_cor, f)
    with open(pred_2_name, 'wb') as f:
        pickle.dump(all_preds_2, f)

    print('test completed in:'
          ' {:2.0f}m{:2.0f}s'
          ' test loss_1: {:4.4f}'
          ' test loss_2: {:4.4f}'
          ' test accu_1: {:.4f}'
          ' test accu_2: {:.4f}'
          .format(test_elapsed_time // 60,
                  test_elapsed_time % 60,
                  test_average_loss_1,
                  test_average_loss_2,
                  test_accuracy_1,
                  test_accuracy_2))
Esempio n. 20
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)
    test_nid = np.nonzero(data.test_mask)[0].astype(np.int64)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().item()
    n_val_samples = val_mask.sum().item()
    n_test_samples = test_mask.sum().item()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))

    # create GCN model
    g = DGLGraph(data.graph, readonly=True)
    norm = 1. / g.in_degrees().float().unsqueeze(1)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        norm = norm.cuda()

    g.ndata['features'] = features

    num_neighbors = args.num_neighbors
    n_layers = args.n_layers

    g.ndata['norm'] = norm

    g.update_all(
        fn.copy_src(src='features',
                    out='m'), fn.sum(msg='m', out='preprocess'), lambda node:
        {'preprocess': node.data['preprocess'] * node.data['norm']})

    for i in range(n_layers):
        g.ndata['h_{}'.format(i)] = torch.zeros(
            features.shape[0], args.n_hidden).to(device=features.device)

    g.ndata['h_{}'.format(n_layers - 1)] = torch.zeros(
        features.shape[0], 2 * args.n_hidden).to(device=features.device)

    model = GCNSampling(in_feats, args.n_hidden, n_classes, n_layers, F.relu,
                        args.dropout)

    loss_fcn = nn.CrossEntropyLoss()

    infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, n_layers,
                           F.relu)

    if cuda:
        model.cuda()
        infer_model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    for epoch in range(args.n_epochs):
        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.batch_size,
                                                       num_neighbors,
                                                       neighbor_type='in',
                                                       shuffle=True,
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       seed_nodes=train_nid):
            for i in range(n_layers):
                agg_history_str = 'agg_h_{}'.format(i)
                g.pull(
                    nf.layer_parent_nid(i + 1).long(),
                    fn.copy_src(src='h_{}'.format(i), out='m'),
                    fn.sum(msg='m', out=agg_history_str), lambda node: {
                        agg_history_str:
                        node.data[agg_history_str] * node.data['norm']
                    })

            node_embed_names = [['preprocess', 'h_0']]
            for i in range(1, n_layers):
                node_embed_names.append(
                    ['h_{}'.format(i), 'agg_h_{}'.format(i - 1)])
            node_embed_names.append(['agg_h_{}'.format(n_layers - 1)])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            model.train()
            # forward
            pred = model(nf)
            batch_nids = nf.layer_parent_nid(-1).to(device=pred.device).long()
            batch_labels = labels[batch_nids]
            loss = loss_fcn(pred, batch_labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)]
            node_embed_names.append([])
            nf.copy_to_parent(node_embed_names=node_embed_names)

        for infer_param, param in zip(infer_model.parameters(),
                                      model.parameters()):
            infer_param.data.copy_(param.data)

        num_acc = 0.

        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.test_batch_size,
                                                       g.number_of_nodes(),
                                                       neighbor_type='in',
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       seed_nodes=test_nid):
            node_embed_names = [['preprocess']]
            for i in range(n_layers):
                node_embed_names.append(['norm'])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            infer_model.eval()
            with torch.no_grad():
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1).to(
                    device=pred.device).long()
                batch_labels = labels[batch_nids]
                num_acc += (pred.argmax(
                    dim=1) == batch_labels).sum().cpu().item()

        print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
Esempio n. 21
0
    def __call__(self, input):

        def _just_resize():
            img = input['img']
            w, h = img.size

            # perform scaling
            input['img'] = img.resize((self.ix, self.iy), Image.ANTIALIAS)

            if np.sum(input['loc']) != 0:
                loc = input['loc']
                loc[0, :] = loc[0, :] * self.ix / w
                loc[1, :] = loc[1, :] * self.iy / h
                input['loc'] = loc

        def _transform():
            angle = self.rangle * (2 * torch.rand(1)[0] - 1)
            grad_angle = angle * math.pi / 180
            scale = 1 + self.rscale * (2 * torch.rand(1)[0] - 1)
            transx = self.rtrans * (2 * torch.rand(1)[0] - 1)
            transy = self.rtrans * (2 * torch.rand(1)[0] - 1)

            img = input['img']
            w, h = img.size
            centerX, centerY = w // 2, h // 2

            # perform rotation
            img = img.rotate(angle, Image.BICUBIC)
            # perform translation
            img = img.transform(img.size, Image.AFFINE,
                                (1, 0, transx, 0, 1, transy))
            # perform scaling
            img = img.resize((int(math.ceil(scale * h)),
                              int(math.ceil(scale * w))),
                             Image.ANTIALIAS)

            w, h = img.size
            x1 = round((w - self.ix) // 2)
            y1 = round((h - self.iy) // 2)
            input['img'] = img.crop((x1, y1, x1 + self.ix, y1 + self.iy))

            if np.sum(input['loc']) != 0:
                loc = input['loc']

                newloc = np.ones((3, loc.shape[1]))
                newloc[0:2, :] = loc

                trans_matrix = np.array([[1,0,-1*transx], [0,1,-1*transy], [0,0,1]])
                scale_matrix = np.array([[scale,0,0], [0,scale,0], [0,0,1]])
                angle_matrix = np.array([
                    [math.cos(grad_angle),math.sin(grad_angle),0],
                    [-math.sin(grad_angle),math.cos(grad_angle),0],
                    [0,0,1]])

                # perform rotation
                newloc[0,:] = newloc[0,:] - centerY
                newloc[1,:] = newloc[1,:] - centerX
                newloc = np.dot(angle_matrix, newloc)
                newloc[0,:] = newloc[0,:] + centerY
                newloc[1,:] = newloc[1,:] + centerX
                # perform translation
                newloc = np.dot(trans_matrix, newloc)
                # perform scaling
                newloc = np.dot(scale_matrix, newloc)

                newloc[0,:] = newloc[0,:] - y1
                newloc[1,:] = newloc[1,:] - x1
                input['loc'] = newloc[0:2,:]

                for i in range(input['loc'].shape[1]):
                    if not np.isnan(input['loc'][:, i]).any():
                        if np.any(input['loc'][:, i] < 0) or \
                                        input['loc'][0,i] > self.iy or \
                                        input['loc'][1,i] > self.ix:
                            input['loc'][:, i] = np.nan
                            # TODO: fill the surrounding with normal noise
                            input['occ'][0, i] = 0

        # FIXME: create multiple images for the same sample with different occluded blocks for testing purposes
        # input['im'][:, 10:40, 22:50] = 0

        # adding one more at the end for the center landmark
        # add the center of image as the last landmark
        h, w = input['img'].size
        input['loc'] = np.hstack((input['loc'], np.array([[w // 2], [h // 2]])))

        input['occ'] = torch.cat((input['occ'], torch.ByteTensor([[1]])), 1)
        input['mask'] = torch.cat((input['mask'], torch.ByteTensor([[1]])), 1)
        orig_img = input['img']
        orig_loc = input['loc']
        orig_occ = input['occ'].clone()
        orig_mask = input['mask'].clone()

        _transform()

        if self.keep_landmarks_visible:
            # train: making sure all landmarks are still visible, if not perform
            #        another transformation
            mask = input['mask']
            mask2D = torch.cat((mask, mask), dim=0)
            landmarks = torch.from_numpy(input['loc'])
            limit = 100
            while not (mask == mask * input['occ']).all() or utils.isnan(landmarks[mask2D]).any():
                input['img'] = orig_img
                input['loc'] = orig_loc
                input['occ'] = orig_occ.clone()
                input['mask'] = orig_mask.clone()

                _transform()

                mask = input['mask']
                mask2D = torch.cat((mask, mask), dim=0)
                landmarks = torch.from_numpy(input['loc'])

                limit -= 1
                if limit == 0:
                    input['img'] = orig_img
                    input['loc'] = orig_loc
                    input['occ'] = orig_occ.clone()
                    input['mask'] = orig_mask.clone()
                    _just_resize()
                    print('using the orignal data because even after 100 transformation, there are still occluded landmarks!!!')
                    break

        input['tgt'] = self.toHeatmaps(input['loc'], self.image_resolution)

        return input
Esempio n. 22
0
def build_targets_max(target, anchor_wh, nA, nC, nGh, nGw):
    """
    returns nT, nCorrect, tx, ty, tw, th, tconf, tcls
    """
    nB = len(target)  # number of images in batch

    txy = torch.zeros(nB, nA, nGh, nGw,
                      2).cuda()  # batch size, anchors, grid size
    twh = torch.zeros(nB, nA, nGh, nGw, 2).cuda()
    tconf = torch.LongTensor(nB, nA, nGh, nGw).fill_(0).cuda()
    tcls = torch.ByteTensor(nB, nA, nGh, nGw,
                            nC).fill_(0).cuda()  # nC = number of classes
    tid = torch.LongTensor(nB, nA, nGh, nGw, 1).fill_(-1).cuda()
    for b in range(nB):
        t = target[b]
        t_id = t[:, 1].clone().long().cuda()
        t = t[:, [0, 2, 3, 4, 5]]
        nTb = len(t)  # number of targets
        if nTb == 0:
            continue

        #gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG
        gxy, gwh = t[:, 1:3].clone(), t[:, 3:5].clone()
        gxy[:, 0] = gxy[:, 0] * nGw
        gxy[:, 1] = gxy[:, 1] * nGh
        gwh[:, 0] = gwh[:, 0] * nGw
        gwh[:, 1] = gwh[:, 1] * nGh
        gi = torch.clamp(gxy[:, 0], min=0, max=nGw - 1).long()
        gj = torch.clamp(gxy[:, 1], min=0, max=nGh - 1).long()

        # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors)
        #gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t()
        #gi, gj = gxy.long().t()

        # iou of targets-anchors (using wh only)
        box1 = gwh
        box2 = anchor_wh.unsqueeze(1)
        inter_area = torch.min(box1, box2).prod(2)
        iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16)

        # Select best iou_pred and anchor
        iou_best, a = iou.max(0)  # best anchor [0-2] for each target

        # Select best unique target-anchor combinations
        if nTb > 1:
            _, iou_order = torch.sort(-iou_best)  # best to worst

            # Unique anchor selection
            u = torch.stack((gi, gj, a), 0)[:, iou_order]
            # _, first_unique = np.unique(u, axis=1, return_index=True)  # first unique indices
            first_unique = return_torch_unique_index(u, torch.unique(
                u, dim=1))  # torch alternative
            i = iou_order[first_unique]
            # best anchor must share significant commonality (iou) with target
            i = i[iou_best[i] > 0.60]  # TODO: examine arbitrary threshold
            if len(i) == 0:
                continue

            a, gj, gi, t = a[i], gj[i], gi[i], t[i]
            t_id = t_id[i]
            if len(t.shape) == 1:
                t = t.view(1, 5)
        else:
            if iou_best < 0.60:
                continue

        tc, gxy, gwh = t[:, 0].long(), t[:, 1:3].clone(), t[:, 3:5].clone()
        gxy[:, 0] = gxy[:, 0] * nGw
        gxy[:, 1] = gxy[:, 1] * nGh
        gwh[:, 0] = gwh[:, 0] * nGw
        gwh[:, 1] = gwh[:, 1] * nGh

        # XY coordinates
        txy[b, a, gj, gi] = gxy - gxy.floor()

        # Width and height
        twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a])  # yolo method
        # twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method

        # One-hot encoding of label
        tcls[b, a, gj, gi, tc] = 1
        tconf[b, a, gj, gi] = 1
        tid[b, a, gj, gi] = t_id.unsqueeze(1)
    tbox = torch.cat([txy, twh], -1)
    return tconf, tbox, tid
Esempio n. 23
0
def length2mask(length):
    mask = torch.ByteTensor(len(length), max(length)).zero_().cuda()
    for i, l in enumerate(length):
        mask[i][:l].fill_(1)
    return Variable(mask)
Esempio n. 24
0
def makeData(srcFile, ldaFile, tgtFile, srcDicts, ldaDicts, tgtDicts):
    src, tgt = [], []
    eq_mask = []
    lda = []
    sizes = []
    count, ignored = 0, 0

    logger.info('Processing %s & %s ...' % (srcFile, tgtFile))
    srcF = open(srcFile, encoding='utf-8')
    ldaF = open(ldaFile, encoding='utf-8')
    tgtF = open(tgtFile, encoding='utf-8')

    while True:
        sline = srcF.readline()
        ldaLine = ldaF.readline()
        tline = tgtF.readline()

        # normal end of file
        if sline == "" and tline == "" and ldaLine == "":
            break

        # source or target does not have same number of lines
        if sline == "" or tline == "" or ldaLine == "":
            logger.info(
                'WARNING: source and target do not have the same number of sentences'
            )
            break

        sline = sline.strip()
        ldaLine = ldaLine.strip()
        tline = tline.strip()

        # source and/or target are empty
        if sline == "" or tline == "" or ldaLine == "":
            # TODO: Fix this, does this affect dev
            logger.info('WARNING: ignoring an empty line (' + str(count + 1) +
                        ')')
            continue

        srcWords = sline.split(' ')
        ldaWords = ldaLine.split(' ')
        tgtWords = tline.split(' ')

        if len(srcWords) <= seq_length and len(tgtWords) <= seq_length:
            src += [srcDicts.convertToIdx(srcWords, s2s.Constants.UNK_WORD)]
            eq_mask += [
                torch.ByteTensor([
                    1 if ((len(x) == 1 and 'a' <= x <= 'z')
                          or x.startswith('[num')) else 0 for x in srcWords
                ])
            ]
            tgt += [
                tgtDicts.convertToIdx(tgtWords, s2s.Constants.UNK_WORD,
                                      s2s.Constants.BOS_WORD,
                                      s2s.Constants.EOS_WORD)
            ]
            lda += [ldaDicts.convertToIdx(ldaWords, s2s.Constants.UNK_WORD)]

            sizes += [len(srcWords)]
        else:
            ignored += 1

        count += 1

        if count % report_every == 0:
            logger.info('... %d sentences prepared' % count)

    srcF.close()
    ldaF.close()
    tgtF.close()

    if shuffle == 1:
        logger.info('... shuffling sentences')
        perm = torch.randperm(len(src))
        src = [src[idx] for idx in perm]
        eq_mask = [eq_mask[idx] for idx in perm]
        lda = [lda[idx] for idx in perm]
        tgt = [tgt[idx] for idx in perm]
        sizes = [sizes[idx] for idx in perm]

    logger.info('... sorting sentences by size')
    _, perm = torch.sort(torch.Tensor(sizes))
    src = [src[idx] for idx in perm]
    eq_mask = [eq_mask[idx] for idx in perm]
    lda = [lda[idx] for idx in perm]
    tgt = [tgt[idx] for idx in perm]

    logger.info(
        'Prepared %d sentences (%d ignored due to length == 0 or > %d)' %
        (len(src), ignored, seq_length))
    return src, eq_mask, lda, tgt
Esempio n. 25
0
    def apply_model(self, ner_model, features):
        """
        apply_model function for LM-LSTM-CRF

        args:
            ner_model: sequence labeling model
            feature (list): list of words list
        """
        char_features = encode2char_safe(features, self.c_map)

        if self.caseless:
            word_features = encode_safe(
                list(map(lambda t: list(map(lambda x: x.lower(), t)),
                         features)), self.f_map, self.f_map['<unk>'])
        else:
            word_features = encode_safe(features, self.f_map,
                                        self.f_map['<unk>'])

        fea_len = [list(map(lambda t: len(t) + 1, f)) for f in char_features]
        forw_features = concatChar(char_features, self.c_map)

        word_len = max(map(lambda t: len(t) + 1, word_features))
        char_len = max(
            map(lambda t: len(t[0]) + word_len - len(t[1]),
                zip(forw_features, word_features)))
        forw_t = list(
            map(lambda t: t + [self.pad_char] * (char_len - len(t)),
                forw_features))
        back_t = torch.LongTensor(list(map(lambda t: t[::-1], forw_t)))
        forw_t = torch.LongTensor(forw_t)
        forw_p = torch.LongTensor(
            list(
                map(
                    lambda t: list(
                        itertools.accumulate(t + [1] * (word_len - len(t)))),
                    fea_len)))
        back_p = torch.LongTensor(
            list(
                map(
                    lambda t: [char_len - 1] +
                    [char_len - 1 - tup for tup in t[:-1]], forw_p)))

        masks = torch.ByteTensor(
            list(
                map(
                    lambda t: [1] * (len(t) + 1) + [0] *
                    (word_len - len(t) - 1), word_features)))
        word_t = torch.LongTensor(
            list(
                map(lambda t: t + [self.pad_word] * (word_len - len(t)),
                    word_features)))

        if self.if_cuda:
            f_f = autograd.Variable(forw_t.transpose(0, 1)).cuda()
            f_p = autograd.Variable(forw_p.transpose(0, 1)).cuda()
            b_f = autograd.Variable(back_t.transpose(0, 1)).cuda()
            b_p = autograd.Variable(back_p.transpose(0, 1)).cuda()
            w_f = autograd.Variable(word_t.transpose(0, 1)).cuda()
            mask_v = masks.transpose(0, 1).cuda()
        else:
            f_f = autograd.Variable(forw_t.transpose(0, 1))
            f_p = autograd.Variable(forw_p.transpose(0, 1))
            b_f = autograd.Variable(back_t.transpose(0, 1))
            b_p = autograd.Variable(back_p.transpose(0, 1))
            w_f = autograd.Variable(word_t.transpose(0, 1))
            mask_v = masks.transpose(0, 1)

        scores = ner_model(f_f, f_p, b_f, b_p, w_f)
        decoded = self.decoder.decode(scores.data, mask_v)

        return decoded
Esempio n. 26
0
 def get_sequence_info(self, seq_id):
     bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
     valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
     visible = torch.ByteTensor(
         self.sequence_list[seq_id]['target_visible']) & valid.byte()
     return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
Esempio n. 27
0
    def __iter__(self):
        # Random permutation for the context
        idx_perm = range(0, self.context_num)
        if not self.eval:
            idx_perm = np.random.permutation(idx_perm)

        batch_size = self.batch_size
        for batch_i in range(
            (self.context_num + self.batch_size - 1) // self.batch_size):

            batch_idx = idx_perm[self.batch_size * batch_i:self.batch_size *
                                 (batch_i + 1)]

            context_batch = [self.data['context'][i] for i in batch_idx]
            batch_size = len(context_batch)

            context_batch = list(zip(*context_batch))

            # Process Context Tokens
            context_len = max(len(x) for x in context_batch[0])
            if not self.eval:
                context_len = min(context_len, self.context_maxlen)
            context_id = torch.LongTensor(batch_size, context_len).fill_(0)
            for i, doc in enumerate(context_batch[0]):
                select_len = min(len(doc), context_len)
                context_id[i, :select_len] = torch.LongTensor(doc[:select_len])

            # Process Context POS Tags
            context_tag = torch.LongTensor(batch_size, context_len).fill_(0)
            for i, doc in enumerate(context_batch[1]):
                select_len = min(len(doc), context_len)
                context_tag[i, :select_len] = torch.LongTensor(
                    doc[:select_len])

            # Process Context Named Entity
            context_ent = torch.LongTensor(batch_size, context_len).fill_(0)
            for i, doc in enumerate(context_batch[2]):
                select_len = min(len(doc), context_len)
                context_ent[i, :select_len] = torch.LongTensor(
                    doc[:select_len])

            if self.precompute_elmo > 0:
                if batch_i % self.precompute_elmo == 0:
                    precompute_idx = idx_perm[self.batch_size *
                                              batch_i:self.batch_size *
                                              (batch_i + self.precompute_elmo)]
                    elmo_tokens = [
                        self.data['context'][i][6] for i in precompute_idx
                    ]
                    context_cid = batch_to_ids(elmo_tokens)
                else:
                    context_cid = torch.LongTensor(1).fill_(0)
            else:
                context_cid = batch_to_ids(context_batch[6])

            # Process Questions (number = batch * Qseq)
            qa_data = self.data['qa']

            question_num, question_len = 0, 0
            question_batch = []
            for first_QID in context_batch[5]:
                i, question_seq = 0, []
                while True:
                    if first_QID + i >= len(
                            qa_data
                    ) or qa_data[first_QID + i][0] != qa_data[first_QID][
                            0]:  # their corresponding context ID is different
                        break
                    question_seq.append(first_QID + i)
                    question_len = max(question_len,
                                       len(qa_data[first_QID + i][1]))
                    i += 1
                question_batch.append(question_seq)
                question_num = max(question_num, i)

            question_id = torch.LongTensor(batch_size, question_num,
                                           question_len).fill_(0)
            question_tokens = []
            for i, q_seq in enumerate(question_batch):
                for j, id in enumerate(q_seq):
                    doc = qa_data[id][1]
                    question_id[i, j, :len(doc)] = torch.LongTensor(doc)
                    question_tokens.append(qa_data[id][8])

                for j in range(len(q_seq), question_num):
                    question_id[i, j, :2] = torch.LongTensor([2, 3])
                    question_tokens.append(["<S>", "</S>"])

            question_cid = batch_to_ids(question_tokens)

            # Process Context-Question Features
            feature_len = len(qa_data[0][2][0])
            context_feature = torch.Tensor(
                batch_size, question_num, context_len, feature_len +
                (self.dialog_ctx * (self.use_dialog_act * 3 + 2))).fill_(0)
            for i, q_seq in enumerate(question_batch):
                for j, id in enumerate(q_seq):
                    doc = qa_data[id][2]
                    select_len = min(len(doc), context_len)
                    context_feature[
                        i, j, :select_len, :feature_len] = torch.Tensor(
                            doc[:select_len])

                    for prv_ctx in range(0, self.dialog_ctx):
                        if j > prv_ctx:
                            prv_id = id - prv_ctx - 1
                            prv_ans_st, prv_ans_end, prv_ans_choice = qa_data[
                                prv_id][3], qa_data[prv_id][4], qa_data[
                                    prv_id][5]

                            # dialog act: don't follow-up, follow-up, maybe follow-up (prv_ans_choice // 10)
                            if self.use_dialog_act:
                                context_feature[i, j, :select_len,
                                                feature_len + prv_ctx *
                                                (self.use_dialog_act * 3 + 2) +
                                                2 + (prv_ans_choice // 10)] = 1

                            if prv_ans_choice == 0:  # indicating that the previous reply is NO ANSWER
                                context_feature[i, j, :select_len,
                                                feature_len + prv_ctx *
                                                (self.use_dialog_act * 3 + 2) +
                                                1] = 1
                                continue

                            # There is an answer
                            for k in range(prv_ans_st, prv_ans_end + 1):
                                if k >= context_len:
                                    break
                                context_feature[
                                    i, j, k, feature_len + prv_ctx *
                                    (self.use_dialog_act * 3 + 2)] = 1

            # Process Answer (w/ raw question, answer text)
            answer_s = torch.LongTensor(batch_size, question_num).fill_(0)
            answer_e = torch.LongTensor(batch_size, question_num).fill_(0)
            answer_c = torch.LongTensor(batch_size, question_num).fill_(0)
            overall_mask = torch.ByteTensor(batch_size, question_num).fill_(0)
            question, answer = [], []
            for i, q_seq in enumerate(question_batch):
                question_pack, answer_pack = [], []
                for j, id in enumerate(q_seq):
                    answer_s[i, j], answer_e[i, j], answer_c[
                        i, j] = qa_data[id][3], qa_data[id][4], qa_data[id][5]
                    overall_mask[i, j] = 1
                    question_pack.append(qa_data[id][6])
                    answer_pack.append(qa_data[id][7])
                question.append(question_pack)
                answer.append(answer_pack)

            # Process Masks
            context_mask = torch.eq(context_id, 0)
            question_mask = torch.eq(question_id, 0)

            text = list(context_batch[3])  # raw text
            span = list(context_batch[4])  # character span for each words

            if self.use_bert is None:
                context_bert = None
                context_bert_mask = None
                context_bert_offsets = None
                question_bert = None
                question_bert_mask = None
                question_bert_offsets = None
            else:
                pass

            if self.gpu:  # page locked memory for async data transfer
                context_id = context_id.pin_memory()
                context_feature = context_feature.pin_memory()
                context_tag = context_tag.pin_memory()
                context_ent = context_ent.pin_memory()
                context_mask = context_mask.pin_memory()
                question_id = question_id.pin_memory()
                question_mask = question_mask.pin_memory()
                answer_s = answer_s.pin_memory()
                answer_e = answer_e.pin_memory()
                answer_c = answer_c.pin_memory()
                overall_mask = overall_mask.pin_memory()
                context_cid = context_cid.pin_memory()
                question_cid = question_cid.pin_memory()
                if self.use_bert:
                    context_bert = context_bert.pin_memory()
                    context_bert_mask = context_bert_mask.pin_memory()
                    context_bert_offsets = context_bert_offsets.pin_memory()
                    question_bert = question_bert.pin_memory()
                    question_bert_mask = question_bert_mask.pin_memory()
                    question_bert_offsets = question_bert_offsets.pin_memory()

            yield (context_id, context_cid, context_feature, context_tag,
                   context_ent, context_mask, question_id, question_cid,
                   question_mask, overall_mask, answer_s, answer_e, answer_c,
                   text, span, question, answer, context_bert,
                   context_bert_mask, context_bert_offsets, question_bert,
                   question_bert_mask, question_bert_offsets)
Esempio n. 28
0
    if config.if_shuffle:
        shuffle(train_all_batches)
    batch_counter = 0
    start_time = time.time()
    ner_model.train()
    num_back = 0
    for input_ids_batch, input_mask_batch, first_subtokens_batch, last_subtokens_batch, label_batch, mask_batch \
            in train_all_batches:
        batch_len = max([
            len(first_subtokens) for first_subtokens in first_subtokens_batch
        ])

        input_ids_batch_var = torch.LongTensor(np.array(input_ids_batch))
        input_mask_batch_var = torch.LongTensor(np.array(input_mask_batch))
        mask_batch_var = torch.ByteTensor(np.array(mask_batch, dtype=np.uint8))
        if config.if_gpu:
            input_ids_batch_var = input_ids_batch_var.cuda()
            input_mask_batch_var = input_mask_batch_var.cuda()
            mask_batch_var = mask_batch_var.cuda()

        optimizer.zero_grad()
        loss = ner_model.forward(input_ids_batch_var, input_mask_batch_var,
                                 first_subtokens_batch, last_subtokens_batch,
                                 label_batch, mask_batch_var)
        loss.backward()
        clip_model_grad(ner_model, config.clip_norm)

        batch_counter += 1

        optimizer.step(None)
Esempio n. 29
0
def build_targets(target, anchor_wh, nA, nC, nG):
    """
    returns nT, nCorrect, tx, ty, tw, th, tconf, tcls
    """
    nB = len(target)  # number of images in batch
    nT = [len(x) for x in target]
    txy = torch.zeros(nB, nA, nG, nG, 2)  # batch size, anchors, grid size
    twh = torch.zeros(nB, nA, nG, nG, 2)
    tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
    tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)  # nC = number of classes

    for b in range(nB):
        nTb = nT[b]  # number of targets
        if nTb == 0:
            continue
        t = target[b]

        gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG

        # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors)
        gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t()

        # iou of targets-anchors (using wh only)
        box1 = gwh
        box2 = anchor_wh.unsqueeze(1)
        inter_area = torch.min(box1, box2).prod(2)
        iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16)

        # Select best iou_pred and anchor
        iou_best, a = iou.max(0)  # best anchor [0-2] for each target

        # Select best unique target-anchor combinations
        if nTb > 1:
            iou_order = torch.argsort(-iou_best)  # best to worst

            # Unique anchor selection
            u = torch.cat((gi, gj, a), 0).view((3, -1))
            # u = torch.stack((gi, gj, a),0)
            _, first_unique = np.unique(u[:, iou_order], axis=1, return_index=True)  # first unique indices
            # _, first_unique = torch.unique(u[:, iou_order], dim=1, return_inverse=True)  # different than numpy?

            i = iou_order[first_unique]
            # best anchor must share significant commonality (iou) with target
            i = i[iou_best[i] > 0.10]  # TODO: arbitrary threshold is problematic
            if len(i) == 0:
                continue

            a, gj, gi, t = a[i], gj[i], gi[i], t[i]
            if len(t.shape) == 1:
                t = t.view(1, 5)
        else:
            if iou_best < 0.10:
                continue

        tc, gxy, gwh = t[:, 0].long(), t[:, 1:3] * nG, t[:, 3:5] * nG

        # XY coordinates
        txy[b, a, gj, gi] = gxy - gxy.floor()

        # Width and height
        twh[b, a, gj, gi] = torch.log(gwh / anchor_wh[a])  # yolo method
        # twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_wh[a]) / 2 # power method

        # One-hot encoding of label
        tcls[b, a, gj, gi, tc] = 1
        tconf[b, a, gj, gi] = 1

    return txy, twh, tconf, tcls
# [torch.FloatTensor of size 4x3]
# out:
# 0.8403  0.1383  0.5636
# 0.1963  0.2446  0.8257
# [torch.FloatTensor of size 2x3]

a1 = a[:, 0]  #所有的行,第一列
a2 = a[[0, 1], :]  #前两行,所有列,等同于out
a3 = a[0:2, 0:2]  #前两行,前两列
print(a1, "\n", a2, "\n", a3)

x = torch.Tensor([[1, 2, 3], [3, 4, 5]])
#  1  2  3
#  3  4  5
# [torch.FloatTensor of size 2x3]
mask = torch.ByteTensor([[0, 0, 1], [0, 1, 0]])
# 0  0  1
# 0  1  0
# [torch.ByteTensor of size 2x3]
out = torch.masked_select(x, mask)
# 3
# 4
# [torch.FloatTensor of size 2]
print(x, "\n", mask, "\n", out)

#%%
# 2.2 Joining
x = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])  #2x3
y = torch.FloatTensor([[-1, -2, -3], [-4, -5, -6]])  #2x3
z1 = torch.cat([x, y], dim=0)  #在第一个维度上进行叠加,4x3
#  1  2  3