Beispiel #1
0
 def __init__(self, node_weight=1.0, edge_weight=1.0, ignore=0):
     super().__init__()
     self.loss_node = nn.CrossEntropyLoss(ignore_index=ignore)
     self.loss_edge = nn.CrossEntropyLoss(ignore_index=-1)
     self.node_weight = node_weight
     self.edge_weight = edge_weight
     self.ignore = ignore
Beispiel #2
0
    def forward(self,
                input_ids,
                token_type_ids=None,
                attention_mask=None,
                masked_lm_labels=None,
                next_sentence_label=None):
        sequence_output, pooled_output = self.nezha(input_ids, token_type_ids,
                                                    attention_mask)
        prediction_scores, seq_relationship_score = self.cls(
            sequence_output, pooled_output)

        if masked_lm_labels is not None and next_sentence_label is not None:
            loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
            masked_lm_loss = loss_fct(
                prediction_scores.reshape(
                    (-1, self.nezha.config["vocab_size"])),
                masked_lm_labels.reshape((-1, )))
            next_sentence_loss = loss_fct(
                seq_relationship_score.reshape((-1, 2)),
                next_sentence_label.reshape((-1, )))
            total_loss = masked_lm_loss + next_sentence_loss
            return total_loss
        elif masked_lm_labels is not None:
            loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
            masked_lm_loss = loss_fct(
                prediction_scores.reshape(
                    (-1, self.nezha.config["vocab_size"])),
                masked_lm_labels.reshape((-1, )))
            total_loss = masked_lm_loss
            return total_loss
        else:
            return prediction_scores, seq_relationship_score
Beispiel #3
0
    def __init__(self, model_config, compound_encoder):
        super(GeoPredModel, self).__init__()
        self.compound_encoder = compound_encoder

        self.hidden_size = model_config['hidden_size']
        self.dropout_rate = model_config['dropout_rate']
        self.act = model_config['act']
        self.pretrain_tasks = model_config['pretrain_tasks']

        # context mask
        if 'Cm' in self.pretrain_tasks:
            self.Cm_vocab = model_config['Cm_vocab']
            self.Cm_linear = nn.Linear(compound_encoder.embed_dim,
                                       self.Cm_vocab + 3)
            self.Cm_loss = nn.CrossEntropyLoss()
        # functinal group
        self.Fg_linear = nn.Linear(compound_encoder.embed_dim,
                                   model_config['Fg_size'])  # 494
        self.Fg_loss = nn.BCEWithLogitsLoss()
        # bond angle with regression
        if 'Bar' in self.pretrain_tasks:
            self.Bar_mlp = MLP(2,
                               hidden_size=self.hidden_size,
                               act=self.act,
                               in_size=compound_encoder.embed_dim * 3,
                               out_size=1,
                               dropout_rate=self.dropout_rate)
            self.Bar_loss = nn.SmoothL1Loss()
        # bond length with regression
        if 'Blr' in self.pretrain_tasks:
            self.Blr_mlp = MLP(2,
                               hidden_size=self.hidden_size,
                               act=self.act,
                               in_size=compound_encoder.embed_dim * 2,
                               out_size=1,
                               dropout_rate=self.dropout_rate)
            self.Blr_loss = nn.SmoothL1Loss()
        # atom distance with classification
        if 'Adc' in self.pretrain_tasks:
            self.Adc_vocab = model_config['Adc_vocab']
            self.Adc_mlp = MLP(2,
                               hidden_size=self.hidden_size,
                               in_size=self.compound_encoder.embed_dim * 2,
                               act=self.act,
                               out_size=self.Adc_vocab + 3,
                               dropout_rate=self.dropout_rate)
            self.Adc_loss = nn.CrossEntropyLoss()

        print('[GeoPredModel] pretrain_tasks:%s' % str(self.pretrain_tasks))
def train():
    # enable dygraph mode
    paddle.disable_static()

    dist.init_parallel_env()

    # create network
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)
    loss_fn = nn.CrossEntropyLoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # print(core._get_device_properties(dist.ParallelEnv().device_id))

    # create data loader
    # loader = paddle.io.DataLoader.from_generator(capacity=5, use_multiprocess=True)
    loader = paddle.io.DataLoader.from_generator(capacity=5)
    loader.set_batch_generator(random_batch_reader())

    for epoch_id in range(EPOCH_NUM):
        for batch_id, (image, label) in enumerate(loader()):
            out = layer(image)
            loss = loss_fn(out, label)

            loss = dp_layer.scale_loss(loss)
            loss.backward()
            dp_layer.apply_collective_grads()

            adam.step()
            adam.clear_grad()
            print("Epoch {} batch {}: loss = {}".format(
                epoch_id, batch_id, np.mean(loss.numpy())))
def train():
    # init env
    dist.init_parallel_env()

    # create network
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)
    loss_fn = nn.CrossEntropyLoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # create data loader
    dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
    loader = paddle.io.DataLoader(dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=1)

    # train
    for epoch_id in range(EPOCH_NUM):
        for batch_id, (image, label) in enumerate(loader()):
            out = layer(image)
            loss = loss_fn(out, label)

            loss.backward()

            adam.step()
            adam.clear_grad()

            if dist.get_rank() == 0:
                print("Epoch {} batch {}: loss = {}".format(
                    epoch_id, batch_id, np.mean(loss.numpy())))
Beispiel #6
0
def finetune(args):
    paddle.set_device(args.device)
    if dist.get_world_size() > 1:
        dist.init_parallel_env()

    pos_file = os.path.join(args.data_dir, 'rt-polarity.pos')
    neg_file = os.path.join(args.data_dir, 'rt-polarity.neg')
    x_text, y = load_data_and_labels(pos_file, neg_file)
    x_train, x_test, y_train, y_test = train_test_split(x_text,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=args.seed)

    if not args.init_from_ckpt:
        raise ValueError('`init_from_ckpt` should be set.')
    model = ELMoBowTextClassification(args.init_from_ckpt, args.batch_size,
                                      args.sent_embedding_dim, args.dropout,
                                      args.num_classes)
    if dist.get_world_size() > 1:
        model = paddle.DataParallel(model)
    model.train()

    adam = paddle.optimizer.Adam(parameters=model.parameters(),
                                 learning_rate=args.lr,
                                 weight_decay=args.weight_decay)
    criterion = nn.CrossEntropyLoss()

    vocab = load_vocab()

    train_dataset = SentencePolarityDatasetV1(x_train, y_train, vocab,
                                              args.max_seq_len)
    test_dataset = SentencePolarityDatasetV1(x_test, y_test, vocab,
                                             args.max_seq_len)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              return_list=True,
                              shuffle=True,
                              collate_fn=lambda batch: generate_batch(batch))
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             return_list=True,
                             shuffle=False,
                             collate_fn=lambda batch: generate_batch(batch))

    for epoch in range(args.epochs):
        print('Epoch {}/{}'.format(epoch + 1, args.epochs))
        for step, batch_data in enumerate(train_loader, start=1):
            ids, ids_reverse, label = batch_data

            output = model((ids, ids_reverse))
            loss = criterion(output, label)
            loss.backward()
            adam.step()
            adam.clear_grad()

            if step % args.logging_step == 0:
                print('step {}, loss {}'.format(step, loss.numpy()[0]))

    acc = test(model, test_loader)
    print('\ntest acc {}\n'.format(acc))
Beispiel #7
0
def main():
    # Initialization for the parallel enviroment
    paddle.set_device(args.device)
    set_seed(args)
    # Define the model and metric
    # In finetune task, bigbird performs better when setting dropout to zero.
    model = BigBirdForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        attn_dropout=args.attn_dropout,
        hidden_dropout_prob=args.hidden_dropout_prob)

    criterion = nn.CrossEntropyLoss()
    metric = paddle.metric.Accuracy()

    # Define the tokenizer and dataloader
    tokenizer = BigBirdTokenizer.from_pretrained(args.model_name_or_path)
    config = getattr(model,
                     BigBirdForSequenceClassification.base_model_prefix).config
    train_data_loader, test_data_loader = \
            create_dataloader(args.batch_size, args.max_encoder_length, tokenizer, config)

    # Define the Adam optimizer
    optimizer = paddle.optimizer.Adam(parameters=model.parameters(),
                                      learning_rate=args.learning_rate,
                                      epsilon=1e-6)

    # Finetune the classification model
    do_train(model, criterion, metric, optimizer, train_data_loader, tokenizer)

    # Evaluate the finetune model
    do_evalute(model, criterion, metric, test_data_loader)
Beispiel #8
0
def main():
    # Initialization for the parallel enviroment
    assert args.device in [
        "cpu", "gpu", "xpu"
    ], "Invalid device! Available device should be cpu, gpu, or xpu."

    paddle.set_device(args.device)
    set_seed(args)
    # Define the model and metric
    model = BigBirdForSequenceClassification.from_pretrained(
        args.model_name_or_path)
    criterion = nn.CrossEntropyLoss()
    metric = paddle.metric.Accuracy()

    # Define the tokenizer and dataloader
    tokenizer = BigBirdTokenizer.from_pretrained(args.model_name_or_path)
    global config
    config = getattr(model,
                     BigBirdForSequenceClassification.base_model_prefix).config
    train_data_loader, test_data_loader = \
            create_dataloader(args.batch_size, args.max_encoder_length, tokenizer)

    # Define the Adam optimizer
    optimizer = paddle.optimizer.Adam(parameters=model.parameters(),
                                      learning_rate=args.learning_rate,
                                      epsilon=1e-6)

    # Finetune the classification model
    do_train(model, criterion, metric, optimizer, train_data_loader, tokenizer)

    # Evaluate the finetune model
    do_evalute(model, criterion, metric, test_data_loader)
    def setUp(self):
        # enable dygraph mode
        place = paddle.CPUPlace()
        paddle.disable_static(place)

        # config seed
        paddle.seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)

        # create network
        self.layer = LinearNet()
        self.loss_fn = nn.CrossEntropyLoss()
        self.sgd = opt.SGD(learning_rate=0.001,
                           parameters=self.layer.parameters())

        # create data loader
        dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
        self.loader = paddle.io.DataLoader(
            dataset,
            places=place,
            batch_size=BATCH_SIZE,
            shuffle=True,
            drop_last=True,
            num_workers=0)

        # train
        train(self.layer, self.loader, self.loss_fn, self.sgd)

        # save
        self.model_path = "linear.example.model"
        paddle.jit.save(self.layer, self.model_path)
Beispiel #10
0
def main():
    # Initialization for the parallel enviroment
    paddle.set_device(args.device)
    set_seed(args)
    # Define the model and metric
    model = BigBirdForSequenceClassification.from_pretrained(
        args.model_name_or_path)
    criterion = nn.CrossEntropyLoss()
    metric = paddle.metric.Accuracy()

    # Define the tokenizer and dataloader
    tokenizer = BigBirdTokenizer.from_pretrained(args.model_name_or_path)
    global config
    config = BigBirdModel.pretrained_init_configuration[
        args.model_name_or_path]
    train_data_loader, test_data_loader = \
            create_dataloader(args.batch_size, args.max_encoder_length, tokenizer)

    # Define the Adam optimizer
    optimizer = paddle.optimizer.Adam(parameters=model.parameters(),
                                      learning_rate=args.learning_rate,
                                      epsilon=1e-6)

    # Finetune the classification model
    do_train(model, criterion, metric, optimizer, train_data_loader,
             test_data_loader)

    # Evaluate the finetune model
    do_evalute(model, criterion, metric, test_data_loader)
def get_paddle_model(model_path):
    def train(layer, loader, loss_fn, optimizer):
        for _ in range(1):
            for _, (image, label) in enumerate(loader()):
                out = layer(image)
                loss = loss_fn(out, label)
                loss.backward()
                optimizer.step()
                optimizer.clear_grad()

    paddle.disable_static()
    model_layer = _LinearNet()
    loss_func = nn.CrossEntropyLoss()
    adam = opt.Adam(learning_rate=0.001, parameters=model_layer.parameters())

    dataset = _RandomDataset(64)
    data_loader = paddle.io.DataLoader(dataset,
                                       batch_size=16,
                                       shuffle=True,
                                       drop_last=True,
                                       num_workers=2)

    train(model_layer, data_loader, loss_func, adam)
    paddle.jit.save(layer=model_layer,
                    path=os.path.join(model_path, 'model'),
                    input_spec=[InputSpec(shape=[None, 784], dtype='float32')])
Beispiel #12
0
    def loss(self, embeds):
        """
        Computes the softmax loss according the section 2.1 of GE2E.
        
        :param embeds: the embeddings as a tensor of shape (speakers_per_batch, 
        utterances_per_speaker, embedding_size)
        :return: the loss and the EER for this batch of embeddings.
        """
        speakers_per_batch, utterances_per_speaker = embeds.shape[:2]

        # Loss
        sim_matrix, *_ = self.similarity_matrix(embeds)
        sim_matrix = sim_matrix.reshape(
            [speakers_per_batch * utterances_per_speaker, speakers_per_batch])
        target = paddle.arange(0, speakers_per_batch,
                               dtype="int64").unsqueeze(-1)
        target = paddle.expand(target,
                               [speakers_per_batch, utterances_per_speaker])
        target = paddle.reshape(target, [-1])

        loss = nn.CrossEntropyLoss()(sim_matrix, target)

        # EER (not backpropagated)
        with paddle.no_grad():
            ground_truth = target.numpy()
            inv_argmax = lambda i: np.eye(
                1, speakers_per_batch, i, dtype=np.int)[0]
            labels = np.array([inv_argmax(i) for i in ground_truth])
            preds = sim_matrix.numpy()

            # Snippet from https://yangcha.github.io/EER-ROC/
            fpr, tpr, thresholds = roc_curve(labels.flatten(), preds.flatten())
            eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)

        return loss, eer
Beispiel #13
0
    def __init__(self, opt):
        super(MotLoss, self).__init__()
        self.crit = paddle.nn.MSELoss() if opt.mse_loss else FocalLoss()
        self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
            RegLoss() if opt.reg_loss == 'sl1' else None
        self.crit_wh = paddle.nn.L1Loss(reduction='sum') if opt.dense_wh else \
            NormRegL1Loss() if opt.norm_wh else \
                RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg
        self.opt = opt
        self.emb_dim = opt.reid_dim
        self.nID = opt.nID

        # param_attr = paddle.ParamAttr(initializer=KaimingUniform())
        # bound = 1 / math.sqrt(self.emb_dim)
        # bias_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound))
        # self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=param_attr, bias_attr=bias_attr)
        self.classifier = nn.Linear(self.emb_dim, self.nID, bias_attr=True)
        if opt.id_loss == 'focal': # 一般用不到
            # torch.nn.init.normal_(self.classifier.weight, std=0.01)
            prior_prob = 0.01
            bias_value = -math.log((1 - prior_prob) / prior_prob)
            # torch.nn.init.constant_(self.classifier.bias, bias_value)

            weight_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Normal(std=0.01))
            bias_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Constant(bias_value))
            self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=weight_attr, bias_attr=bias_attr)
        self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1)
        self.emb_scale = math.sqrt(2) * math.log(self.nID - 1)
        # self.s_det = nn.Parameter(-1.85 * torch.ones(1))
        # self.s_id = nn.Parameter(-1.05 * torch.ones(1))
        self.s_det = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.85))
        self.s_id = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.05))
Beispiel #14
0
 def __init__(self, **kwargs):
     super().__init__()
     self.loss_func = nn.CrossEntropyLoss(weight=None,
                                          ignore_index=0,
                                          reduction='none',
                                          soft_label=True,
                                          axis=-1)
    def __init__(self, model_config, compound_encoder):
        super(AttrmaskModel, self).__init__()

        self.compound_encoder = compound_encoder

        out_size = CompoundKit.get_atom_feature_size('atomic_num') + 3
        self.linear = nn.Linear(compound_encoder.node_dim, out_size)
        self.criterion = nn.CrossEntropyLoss()
Beispiel #16
0
    def __init__(self, vocab_size, gen_weight, disc_weight):
        super(ElectraPretrainingCriterion, self).__init__()

        self.vocab_size = vocab_size
        self.gen_weight = gen_weight
        self.disc_weight = disc_weight
        self.gen_loss_fct = nn.CrossEntropyLoss(reduction='none')
        self.disc_loss_fct = nn.BCEWithLogitsLoss(reduction='none')
Beispiel #17
0
def _softmax_cross_entropy_with_logits(logits, labels):
  # print("++++++++++++++++++++++++++++++++++++START SOFT_CROSS_LOSS++++++++++++++++++++++++++++++++++++++++++++++++")
  param = list(range(len(logits.shape)))
  transpose_param = [0] + [param[-1]] + param[1:-1]
  logits = logits.transpose(transpose_param) # [N, ..., C] -> [N, C, ...]
  # print("++++++++++++++++++++++++++++++++++++START SOFT_CROSS_LOSS++++++++++++++++++++++++++++++++++++++++++++++++")
  loss_ftor = nn.CrossEntropyLoss(reduction="none")
  loss = loss_ftor(logits, paddle.argmax(labels, axis=-1))
  return loss
Beispiel #18
0
    def __init__(self, config):
        super(GNN, self).__init__()
        log.info("model_type is %s" % self.__class__.__name__)

        self.config = config
        self.pretrain_tasks = config.pretrain_tasks.split(',')
        self.num_layers = config.num_layers
        self.drop_ratio = config.drop_ratio
        self.JK = config.JK
        self.block_num = config.block_num
        self.emb_dim = config.emb_dim
        self.num_tasks = config.num_tasks
        self.residual = config.residual
        self.graph_pooling = config.graph_pooling

        if self.num_layers < 2:
            raise ValueError("Number of GNN layers must be greater than 1.")

        ### GNN to generate node embeddings
        self.gnn_blocks = paddle.nn.LayerList()
        for i in range(self.config.block_num):
            self.gnn_blocks.append(getattr(CONV, self.config.gnn_type)(config))

        hidden_size = self.emb_dim * self.block_num
        ### Pooling function to generate whole-graph embeddings
        if self.config.graph_pooling == "bisop":
            pass
        else:
            self.pool = MeanGlobalPool()

        if self.config.clf_layers == 3:
            log.info("clf_layers is 3")
            self.graph_pred_linear = nn.Sequential(
                L.Linear(hidden_size, hidden_size // 2),
                L.batch_norm_1d(hidden_size // 2), nn.Swish(),
                L.Linear(hidden_size // 2, hidden_size // 4),
                L.batch_norm_1d(hidden_size // 4), nn.Swish(),
                L.Linear(hidden_size // 4, self.num_tasks))
        elif self.config.clf_layers == 2:
            log.info("clf_layers is 2")
            self.graph_pred_linear = nn.Sequential(
                L.Linear(hidden_size, hidden_size // 2),
                L.batch_norm_1d(hidden_size // 2), nn.Swish(),
                L.Linear(hidden_size // 2, self.num_tasks))
        else:
            self.graph_pred_linear = L.Linear(hidden_size, self.num_tasks)

        if 'Con' in self.pretrain_tasks:
            self.context_loss = nn.CrossEntropyLoss()
            self.contextmlp = nn.Sequential(
                L.Linear(self.emb_dim, self.emb_dim // 2),
                L.batch_norm_1d(self.emb_dim // 2), nn.Swish(),
                L.Linear(self.emb_dim // 2, 5000))
        if 'Ba' in self.pretrain_tasks:
            self.pretrain_bond_angle = PretrainBondAngle(config)
        if 'Bl' in self.pretrain_tasks:
            self.pretrain_bond_length = PretrainBondLength(config)
Beispiel #19
0
def train_single_epoch(model: MemN2N, lr, data, config):
    """
    train one epoch

    Args:
        model (MemN2N): model to be trained
        lr (float): the learning rate of this epoch
        data: training data
        config: configs

    Returns:
        float: average loss
    """
    model.train()
    N = int(math.ceil(len(data) / config.batch_size))  # total train N batchs

    clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=config.max_grad_norm)
    optimizer = paddle.optimizer.SGD(learning_rate=lr,
                                     parameters=model.parameters(),
                                     grad_clip=clip)
    lossfn = nn.CrossEntropyLoss(reduction='sum')

    total_loss = 0

    if config.show:
        ProgressBar = getattr(import_module('utils'), 'ProgressBar')
        bar = ProgressBar('Train', max=N)

    for batch in range(N):
        if config.show:
            bar.next()

        optimizer.clear_grad()
        context = np.ndarray([config.batch_size, config.mem_size],
                             dtype=np.int64)
        target = np.ndarray([config.batch_size], dtype=np.int64)
        for i in range(config.batch_size):
            m = random.randrange(config.mem_size, len(data))
            target[i] = data[m]
            context[i, :] = data[m - config.mem_size:m]

        batch_data = paddle.to_tensor(context)
        batch_label = paddle.to_tensor(target)

        preict = model(batch_data)
        loss = lossfn(preict, batch_label)
        loss.backward()
        optimizer.step()
        total_loss += loss

    if config.show:
        bar.finish()

    return total_loss / N / config.batch_size
Beispiel #20
0
 def __init__(self,
              structure_weight,
              loc_weight,
              use_giou=False,
              giou_weight=1.0,
              **kwargs):
     super(TableAttentionLoss, self).__init__()
     self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none')
     self.structure_weight = structure_weight
     self.loc_weight = loc_weight
     self.use_giou = use_giou
     self.giou_weight = giou_weight
Beispiel #21
0
def eval(model: MemN2N, data, config, mode="Test"):
    """
    evaluate the model performance

    Args:
        model (MemN2N): the model to be evaluate
        data: evaluation data
        config: model and eval configs
        mode: Valid or Test
    
    Returns:
        average loss
    """
    model.eval()
    lossfn = nn.CrossEntropyLoss(reduction='sum')
    N = int(math.ceil(len(data) / config.batch_size))
    total_loss = 0

    context = np.ndarray([config.batch_size, config.mem_size], dtype=np.int64)
    target = np.ndarray([config.batch_size], dtype=np.int64)

    if config.show:
        ProgressBar = getattr(import_module('utils'), 'ProgressBar')
        bar = ProgressBar(mode, max=N - 1)

    m = config.mem_size
    for batch in range(N):
        if config.show:
            bar.next()

        for i in range(config.batch_size):
            if m >= len(data):
                break
            target[i] = data[m]
            context[i, :] = data[m - config.mem_size:m]
            m += 1
        if m >= len(data):
            break

        batch_data = paddle.to_tensor(context)
        batch_label = paddle.to_tensor(target)

        preict = model(batch_data)
        loss = lossfn(preict, batch_label)

        total_loss += loss

    if config.show:
        bar.finish()

    return total_loss / N / config.batch_size
Beispiel #22
0
    def __init__(self,
                 with_avg_pool=False,
                 in_channels=2048,
                 num_classes=1000):
        super(ClasHead, self).__init__()
        self.with_avg_pool = with_avg_pool
        self.in_channels = in_channels
        self.num_classes = num_classes

        self.criterion = nn.CrossEntropyLoss()

        if self.with_avg_pool:
            self.avg_pool = nn.AdaptiveAvgPool2D((1, 1))
        self.fc_cls = nn.Linear(in_channels, num_classes)
        reset_parameters(self.fc_cls)
Beispiel #23
0
    def build_and_train_model(self):
        # create network
        layer = LinearNet()
        loss_fn = nn.CrossEntropyLoss()

        adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())

        # create data loader
        # TODO: using new DataLoader cause unknown Timeout on windows, replace it
        loader = random_batch_reader()

        # train
        train(layer, loader, loss_fn, adam)

        return layer, adam
Beispiel #24
0
 def __init__(self,
              weight=None,
              size_average=True,
              ignore_index=-100,
              sequence_normalize=False,
              sample_normalize=True,
              **kwargs):
     super(AsterLoss, self).__init__()
     self.weight = weight
     self.size_average = size_average
     self.ignore_index = ignore_index
     self.sequence_normalize = sequence_normalize
     self.sample_normalize = sample_normalize
     self.loss_sem = CosineEmbeddingLoss()
     self.is_cosin_loss = True
     self.loss_func_rec = nn.CrossEntropyLoss(weight=None, reduction='none')
Beispiel #25
0
    def forward(
            self,
            input_ids=None,
            bbox=None,
            image=None,
            attention_mask=None,
            token_type_ids=None,
            position_ids=None,
            head_mask=None,
            labels=None, ):
        outputs = self.layoutxlm(
            input_ids=input_ids,
            bbox=bbox,
            image=image,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask, )
        seq_length = input_ids.shape[1]
        # sequence out and image out
        sequence_output, image_output = outputs[0][:, :seq_length], outputs[
            0][:, seq_length:]
        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        outputs = logits,

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()

            if attention_mask is not None:
                active_loss = attention_mask.reshape([-1, ]) == 1
                active_logits = logits.reshape(
                    [-1, self.num_classes])[active_loss]
                active_labels = labels.reshape([-1, ])[active_loss]
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(
                    logits.reshape([-1, self.num_classes]),
                    labels.reshape([-1, ]))

            outputs = (loss, ) + outputs

        return outputs
Beispiel #26
0
    def __init__(self, vocab, hidden_size, latent_size, depthT, depthG):
        super(JTNNVAE, self).__init__()
        self.vocab = vocab
        self.hidden_size = hidden_size
        self.latent_size = latent_size = int(latent_size / 2)

        self.jtnn = JTNNEncoder(hidden_size, depthT, nn.Embedding(vocab.size(), hidden_size))
        self.decoder = JTNNDecoder(vocab, hidden_size, latent_size, nn.Embedding(vocab.size(), hidden_size))

        self.jtmpn = JTMPN(hidden_size, depthG)
        self.mpn = MPN(hidden_size, depthG)

        self.A_assm = nn.Linear(latent_size, hidden_size, bias_attr=False)
        self.assm_loss = nn.CrossEntropyLoss(reduction='sum')

        self.T_mean = nn.Linear(hidden_size, latent_size)
        self.T_var = nn.Linear(hidden_size, latent_size)
        self.G_mean = nn.Linear(hidden_size, latent_size)
        self.G_var = nn.Linear(hidden_size, latent_size)
Beispiel #27
0
    def __init__(self,
                 balance_loss=True,
                 main_loss_type='DiceLoss',
                 negative_ratio=3,
                 return_origin=False,
                 eps=1e-6,
                 **kwargs):
        """
               The BalanceLoss for Differentiable Binarization text detection
               args:
                   balance_loss (bool): whether balance loss or not, default is True
                   main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
                       'Euclidean','BCELoss', 'MaskL1Loss'], default is  'DiceLoss'.
                   negative_ratio (int|float): float, default is 3.
                   return_origin (bool): whether return unbalanced loss or not, default is False.
                   eps (float): default is 1e-6.
               """
        super(BalanceLoss, self).__init__()
        self.balance_loss = balance_loss
        self.main_loss_type = main_loss_type
        self.negative_ratio = negative_ratio
        self.return_origin = return_origin
        self.eps = eps

        if self.main_loss_type == "CrossEntropy":
            self.loss = nn.CrossEntropyLoss()
        elif self.main_loss_type == "Euclidean":
            self.loss = nn.MSELoss()
        elif self.main_loss_type == "DiceLoss":
            self.loss = DiceLoss(self.eps)
        elif self.main_loss_type == "BCELoss":
            self.loss = BCELoss(reduction='none')
        elif self.main_loss_type == "MaskL1Loss":
            self.loss = MaskL1Loss(self.eps)
        else:
            loss_type = [
                'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss',
                'MaskL1Loss'
            ]
            raise Exception(
                "main_loss_type in BalanceLoss() can only be one of {}".format(
                    loss_type))
Beispiel #28
0
    def __init__(self, vocab, hidden_size, latent_size, embedding):
        super(JTNNDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab.size()
        self.vocab = vocab
        self.embedding = embedding
        latent_size = int(latent_size)
        self.W_z = nn.Linear(2 * hidden_size, hidden_size)
        self.U_r = nn.Linear(hidden_size, hidden_size, bias_attr=False)
        self.W_r = nn.Linear(hidden_size, hidden_size)
        self.W_h = nn.Linear(2 * hidden_size, hidden_size)

        self.W = nn.Linear(hidden_size + latent_size, hidden_size)

        self.U = nn.Linear(hidden_size + latent_size, hidden_size)
        self.U_i = nn.Linear(2 * hidden_size, hidden_size)

        self.W_o = nn.Linear(hidden_size, self.vocab_size)
        self.U_o = nn.Linear(hidden_size, 1)

        self.pred_loss = nn.CrossEntropyLoss(reduction='sum')
        self.stop_loss = nn.BCEWithLogitsLoss(reduction='sum')
Beispiel #29
0
def train_ch6(net, train_iter, test_iter, batch_size, optimi, num_epochs):

    loss = nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for idx, (X, y) in enumerate(train_iter):
            y_hat = net(X)
            l = loss(y_hat, y)
            optimi.clear_grad()
            l.backward()
            optimi.step()
            train_l_sum += l.numpy()[0]
            train_acc_sum += (y_hat.argmax(
                axis=1) == y.flatten()).astype('float32').sum().numpy()[0]
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print(
            'epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
            % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n,
               test_acc, time.time() - start))
 def __init__(self, num_classes=10, **kwargs):
     self.num_classes = num_classes
     decs = [
         LayerDesc(nn.Conv2D, 1, 64, kernel_size=11, stride=4, padding=5),
         LayerDesc(nn.ReLU),
         LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2),
         LayerDesc(nn.Conv2D, 64, 192, kernel_size=5, padding=2),
         F.relu,
         LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2),
         LayerDesc(nn.Conv2D, 192, 384, kernel_size=3, padding=1),
         F.relu,
         LayerDesc(nn.Conv2D, 384, 256, kernel_size=3, padding=1),
         F.relu,
         LayerDesc(nn.Conv2D, 256, 256, kernel_size=3, padding=1),
         F.relu,
         LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2),
         LayerDesc(ReshapeHelp, shape=[-1, 256]),
         LayerDesc(nn.Linear, 256, self.num_classes),  # classifier
     ]
     super(AlexNetPipeDesc, self).__init__(layers=decs,
                                           loss_fn=nn.CrossEntropyLoss(),
                                           **kwargs)