Example #1
0
 def __init__(self, hdfs_host: str = None, device: str = 'cpu'):
     self.device = device
     self.task = HateSpeech(self.TRAIN_DATA_PATH, (9, 1))
     self.model = BaseLine(256, 3, 0.2,
                           self.task.max_vocab_indexes['syllable_contents'],
                           384)
     self.model.to(self.device)
     self.loss_fn = nn.BCELoss()
     self.batch_size = 128
     self.__test_iter = None
     bind_model(self.model)
Example #2
0
 def __init__(self, hdfs_host: str = None):
     self.device = config.device
     self.task = HateSpeech(self.TRAIN_DATA_PATH, [9, 1])
     self.model = BaseLine(config.hidden_dim, config.filter_size,
                           config.dropout,
                           self.task.max_vocab_indexes['syllable_contents'],
                           config.embedding_dim,
                           config.padding)  ## 저장모델도 같게 해줘~
     ## Baseline : self.model = BaseLine(256, 3, 0.2, self.task.max_vocab_indexes['syllable_contents'], 384)
     #         print('can use gpu num = ',torch.cuda.device_count())
     if torch.cuda.device_count() > 1:
         self.model = nn.DataParallel(self.model)
     self.model.to(config.device)
     self.loss_fn = nn.BCEWithLogitsLoss(
         torch.tensor(config.weights[0] / config.weights[1]))
     self.batch_size = config.batch_size
     self.__test_iter = None
     bind_model(self.model)
Example #3
0
    EMBEDDING_SIZE = 128
    BATCH_SIZE = 512
    BI_RNN_LAYERS = 1
    UNI_RNN_LAYERS = 1
    LEARNING_RATE = 0.001
    ENSEMBLE_SIZE = 5

    parser = ArgumentParser()
    parser.add_argument('--mode', default='train')
    parser.add_argument('--pause', default=0)
    args = parser.parse_args()
    task = HateSpeech()
    vocab_size = task.max_vocab_indexes['syllable_contents']
    models = []
    for i in range(ENSEMBLE_SIZE):
        model = BaseLine(HIDDEN_DIM, DROPOUT_RATE, vocab_size, EMBEDDING_SIZE, BI_RNN_LAYERS, UNI_RNN_LAYERS)
        model.to('cuda')
        models.append(model)

    if args.pause:
        bind_models(models)
        nsml.paused(scope=locals())
    if args.mode == 'train':
        scores = []
        for i, model in enumerate(models):
            trainer = Trainer(model, i, ENSEMBLE_SIZE, device='cuda')
            best_state_dict, best_f1_score = trainer.train()
            model.load_state_dict(best_state_dict)
            scores.append(best_f1_score)
            print('best f1 score:', best_f1_score)
Example #4
0
if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('--mode', default='train')
    parser.add_argument('--pause', default=0)
    args = parser.parse_args()

    torch.manual_seed(2020)
    torch.cuda.manual_seed_all(2020)
    np.random.seed(2020)
    random.seed(2020)

    if args.pause:
        task = HateSpeech()
        model = BaseLine(config.hidden_dim, config.filter_size, config.dropout,
                         task.max_vocab_indexes['syllable_contents'],
                         config.embedding_dim, config.padding)
        model.to("cuda")
        bind_model(model)
        nsml.paused(scope=locals())
    if args.mode == 'train':
        print(config)
        trainer = Trainer()
        trainer.train()
        print('-' * 50)
        ##############################################
        config.embedding_dim = 128
        print(config)
        trainer = Trainer()
        trainer.train()
        print('-' * 50)
Example #5
0
            losses = self.loss_fn(preds, batch.eval_reply)
            pred_lst += preds.tolist()
            acc_lst += accs.tolist()
            loss_sum += losses.tolist() * len(batch)
        return pred_lst, loss_sum / total, acc_lst, total

    def save_model(self, model, appendix=None):
        file_name = 'model'
        if appendix:
            file_name += '_{}'.format(appendix)
        torch.save({
            'model': model,
            'task': type(self.task).__name__
        }, file_name)


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('--mode', default='train')
    parser.add_argument('--pause', default=0)
    args = parser.parse_args()
    if args.pause:
        task = HateSpeech()
        model = BaseLine(256, 3, 0.2,
                         task.max_vocab_indexes['syllable_contents'], 384)
        model.to("cuda")
        bind_model(model)
        nsml.paused(scope=locals())
    if args.mode == 'train':
        trainer = Trainer(device='cuda')
        trainer.train()