def create_project_file(config):
    """
    Creates a coregen project with settings for this device

    Args:
        config (dictionary): configuration dictionary

    Returns:
        (string): filename to the project file

    Raises:
        Nothing
    """
    core_dir = get_coregen_dir(config, absolute = True)
    cp_fn = os.path.join(core_dir, COREGEN_PROJECT_NAME)
    fp = open(cp_fn, "w")

    #Open up the template dictionary
    fn = COREGEN_TEMPLATE
    fn = os.path.join(os.path.dirname(__file__), fn)

    template = json.load(open(fn, "r"))

    template["device"] = utils.get_device(config)
    template["devicefamily"] = utils.get_family(config)
    template["package"] = utils.get_package(config)
    template["speedgrade"] = utils.get_speed_grade(config)
    template["workingdirectory"] = get_coregen_temp_dir(config, absolute = True)
    for t in template:
        fp.write("SET %s = %s%s" % (t, template[t], os.linesep))
    fp.close()

    return cp_fn
Ejemplo n.º 2
0
 def post(self):
     # Check arguments
     args = user_parser.parse_args()
     if not args["email"] or not args["password"]:
         abort(406)
     # Check for duplicate emails
     if User.query.filter(User.email == args["email"]).first() is not None:
         abort(409)
     # Add user
     user = User(args["email"], args["password"])
     if args["phone"]:
         user.phone = args["phone"]
     db.session.add(user)
     db.session.commit()
     # Add current device for the user
     device = Device(user.id, get_device())
     device.active = True
     db.session.add(device)
     db.session.commit()
     return user, 201
parser.add_argument('--with_exploration',
                    action='store_true',
                    help="See data exploration visualization")
parser.add_argument('--download',
                    type=str,
                    default=None,
                    nargs='+',
                    choices=dataset_names,
                    help="Download the specified datasets and quits.")

# 解析参数
args = parser.parse_args()

# 操作参数
CUDA_DEVICE = get_device(args.cuda)

# % of training samples
SAMPLE_PERCENTAGE = args.training_sample
# Data augmentation 数据增强
FLIP_AUGMENTATION = args.flip_augmentation
RADIATION_AUGMENTATION = args.radiation_augmentation
MIXTURE_AUGMENTATION = args.mixture_augmentation
# Dataset name
DATASET = args.dataset
# Model name
MODEL = args.model
# Number of runs (for cross-validation)
N_RUNS = args.runs
# Spatial context size (number of neighbours in each spatial direction)
# 空间上下文大小(每个空间方向上的邻居数)
Ejemplo n.º 4
0
def train_model(args: dict, hparams:dict):
    # Code for this function adopted from https://mccormickml.com/2019/07/22/BERT-fine-tuning/
    
    file = args.dataset_filepath
    # pos_file = args.pos_file
    # neg_file = args.neg_file
    truncation = args.truncation
    # n_samples = args.n_samples
    seed_val = hparams["seed_val"]
    device = utils.get_device(device_no=args.device_no)
    saves_dir = "saves/"

    Path(saves_dir).mkdir(parents=True, exist_ok=True)   
    time = datetime.datetime.now()

    saves_path = os.path.join(saves_dir, utils.get_filename(time))
    Path(saves_path).mkdir(parents=True, exist_ok=True)

    log_path = os.path.join(saves_path, "training.log")

    logging.basicConfig(filename=log_path, filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)
    logger=logging.getLogger()
    # logger.setLevel()

    logger.info("File: "+str(file))
    logger.info("Parameters: "+str(args))
    logger.info("Truncation: "+truncation)

    # Load the BERT tokenizer.
    logger.info('Loading BERT tokenizer...')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    max_len = 0

    
    # samples = utils.read_and_sample(file
    # # , seed_val=seed_val
    # )
    samples = utils.read_pairwise(file, first=0, second=2)
    

    random.shuffle(samples)
    input_ids = []
    attention_masks = []
    
    samples_text = [val[0] for val in samples]
    samples_label = [val[1] for val in samples]

    print(np.unique(np.array(samples_label)))

    max_len = 0

    # For every sentence...
    for text in samples_text:

        # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
        input_id = tokenizer(text, add_special_tokens=True)

        # Update the maximum sentence length.
        max_len = max(max_len, len(input_id['input_ids']))

    logger.info('Max text length: ' + str(max_len))
    print('Max text length: ' + str(max_len))

    for text in samples_text:        
        input_id = tokenizer(text, add_special_tokens=True)
        # print(len(input_id['input_ids']))
        # if len(input_id['input_ids']) > 512:                        
        #     if truncation == "tail-only":
        #         input_id = [tokenizer.cls_token_id]+input_id[-511:]      
        #     elif truncation == "head-and-tail":
        #         input_id = [tokenizer.cls_token_id]+input_id[1:129]+input_id[-382:]+[tokenizer.sep_token_id]
        #     else:
        #         input_id = input_id[:511]+[tokenizer.sep_token_id]
                
        #     input_ids.append(torch.tensor(input_id).view(1,-1))
        #     attention_masks.append(torch.ones([1,len(input_id)], dtype=torch.long))
        # else:
        encoded_dict = tokenizer(
                            text,                      
                            add_special_tokens = True, 
                            truncation=True,                               
                            max_length = 512,         
                            padding = 'max_length',
                            return_attention_mask = True,
                            return_tensors = 'pt',
                    )
                    
        input_ids.append(encoded_dict['input_ids'])
                    
        attention_masks.append(encoded_dict['attention_mask'])
    
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    samples_label_tensor = torch.tensor(samples_label)
    # samples_text_tensor = torch.tensor(samples_text)
    
    dataset = TensorDataset(input_ids, attention_masks, samples_label_tensor)
    # dataset = TensorDataset(samples_text_tensor, samples_label_tensor)

    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    logger.info('{:>5,} training samples'.format(train_size))
    logger.info('{:>5,} validation samples'.format(val_size))

    batch_size = hparams["batch_size"]

    train_dataloader = DataLoader(
                train_dataset,  # The training samples.
                sampler = RandomSampler(train_dataset), # Select batches randomly
                batch_size = batch_size, # Trains with this batch size.
                # collate_fn = collate_fn
            )

    validation_dataloader = DataLoader(
                val_dataset, # The validation samples.
                sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
                batch_size = batch_size, # Evaluate with this batch size.
                # collate_fn = collate_fn
            )


    model = BertForSequenceClassification.from_pretrained(        
        "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
        num_labels = 2, # The number of output labels--2 for binary classification.
                        # You can increase this for multi-class tasks.   
        output_attentions = False, # Whether the model returns attentions weights.
        output_hidden_states = False, # Whether the model returns all hidden-states.        
    )
    
    
    model = model.to(device=device)    
    # model.cuda(device=device)

    optimizer = AdamW(model.parameters(),
                    lr = hparams["learning_rate"], # args.learning_rate - default is 5e-5, our notebook had 2e-5
                    eps = hparams["adam_epsilon"] # args.adam_epsilon  - default is 1e-8.
                    )
    epochs = 4

    total_steps = len(train_dataloader) * epochs

    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps = 0, # Default value in run_glue.py
                                                num_training_steps = total_steps)

    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

    training_stats = []

    for epoch_i in range(0, epochs):
        
        logger.info("")
        logger.info('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        logger.info('Training...')

        
        total_train_loss = 0

        model.train()

        for step, batch in enumerate(train_dataloader):
            print(len(train_dataloader))
        
            if step % 40 == 0 and not step == 0:               
                logger.info('  Batch {:>5,}  of  {:>5,}. '.format(step, len(train_dataloader)))

            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)           

            model.zero_grad()        

            loss, logits = model(b_input_ids, 
                                token_type_ids=None, 
                                attention_mask=b_input_mask, 
                                labels=b_labels)

            # print(logits)
            # print(loss)
            
            total_train_loss += loss.detach().cpu().numpy()

            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()

            scheduler.step()
        
        avg_train_loss = total_train_loss / len(train_dataloader)            

        logger.info("")
        logger.info("Average training loss: {0:.2f}".format(avg_train_loss))

            
        logger.info("")
        logger.info("Running Validation...")

        model.eval()

        total_eval_accuracy = 0
        total_eval_loss = 0

        for batch in validation_dataloader:
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)
            
            with torch.no_grad():        

                (loss, logits) = model(b_input_ids, 
                                    token_type_ids=None, 
                                    attention_mask=b_input_mask,
                                    labels=b_labels)
                
            total_eval_loss += loss.detach().cpu().numpy()

            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            total_eval_accuracy += flat_accuracy(logits, label_ids)
            

        avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
        logger.info("Accuracy: {0:.2f}".format(avg_val_accuracy))

        avg_val_loss = total_eval_loss / len(validation_dataloader)
               
        logger.info("Validation Loss: {0:.2f}".format(avg_val_loss))        

        training_stats.append(
            {
                'epoch': epoch_i + 1,
                'Training Loss': avg_train_loss,
                'Valid. Loss': avg_val_loss,
                'Valid. Accur.': avg_val_accuracy,                
            }
        )

        model_save_path = os.path.join(saves_path, "model_"+str(epoch_i+1)+"epochs")
        torch.save(model, model_save_path)

    logger.info("")
    logger.info("Training complete!")
    handlers = logger.handlers[:]
    for handler in handlers:
        handler.close()
        logger.removeHandler(handler)
Ejemplo n.º 5
0
# Prepare dataset
transformer = transforms.Compose([
    transforms.Resize(config["image_size"]),
    transforms.CenterCrop(config["image_size"]),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = dset.ImageFolder(root=data_path, transform=transformer)

# Initialize dataloader
dataloader = torch.utils.data.DataLoader(dataset,
                                         batch_size=config["batch_size"],
                                         shuffle=True,
                                         num_workers=2)

device = get_device()

# Show some images from the trainingset
# show_images(dataloader)

# Initialize the model
generator = Generator(config).to(device)
discriminator = Discriminator(config).to(device)

# Initialize custom weights to model
generator.apply(weight_init)
discriminator.apply(weight_init)

# Loss Functions and Optimizers

# BCELoss for Discriminator
Ejemplo n.º 6
0
def main():
    device = utils.get_device()
    saves_dir = 'saves'
    compare_models(f'{saves_dir}/cae-F-ConvAutoencoder_21-06-01--10-43-39',
                   f'{saves_dir}/cae-F-ConvAutoencoder_21-06-03--08-16-48',
                   device)
Ejemplo n.º 7
0
def main(task='mrpc',
         train_cfg='config/train_mrpc.json',
         model_cfg='config/bert_base.json',
         data_file='../glue/MRPC/train.tsv',
         model_file=None,
         pretrain_file='../uncased_L-12_H-768_A-12/bert_model.ckpt',
         data_parallel=True,
         vocab='../uncased_L-12_H-768_A-12/vocab.txt',
         save_dir='../exp/bert/mrpc',
         max_len=128,
         mode='train'):

    cfg = train.Config.from_json(train_cfg)
    model_cfg = models.Config.from_json(model_cfg)

    set_seeds(cfg.seed)

    tokenizer = tokenization.FullTokenizer(vocab_file=vocab,
                                           do_lower_case=True)

    pipeline = [
        Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
        AddSpecialTokensWithTruncation(max_len),
        TokenIndexing(tokenizer.convert_tokens_to_ids, ('0', '1'), max_len)
    ]
    dataset = CsvDataset(pipeline)
    # print(dataset[0])
    # pdb.set_trace()
    data_iter = DataLoader(dataset, batch_size=1, shuffle=True)

    model = Classifier(model_cfg, 1)
    criterion = nn.CrossEntropyLoss()

    trainer = train.Trainer(cfg, model, data_iter, optim.optim4GPU(cfg, model),
                            save_dir, get_device())

    if mode == 'train':

        def get_loss(model, batch,
                     global_step):  # make sure loss is a scalar tensor
            # pdb.set_trace()
            input_ids, segment_ids, input_mask, label_id = [
                b[0] for b in batch
            ]
            # pdb.set_trace()
            logits = model(input_ids, segment_ids, input_mask)
            # pdb.set_trace()
            loss = neg_logloss(logits)
            # loss = criterion(logits, label_id)
            return loss

        trainer.train(get_loss, model_file, pretrain_file, data_parallel)

    elif mode == 'eval':

        def evaluate(model, batch):
            input_ids, segment_ids, input_mask, label_id = batch
            logits = model(input_ids, segment_ids, input_mask)
            _, label_pred = logits.max(1)
            result = (label_pred == label_id).float()  #.cpu().numpy()
            accuracy = result.mean()
            return accuracy, result

        results = trainer.eval(evaluate, model_file, data_parallel)
        total_accuracy = torch.cat(results).mean().item()
        print('Accuracy:', total_accuracy)
Ejemplo n.º 8
0
def main(train_cfg='config/pretrain.json',
         model_cfg='config/bert_base.json',
         data_file='/root/voucher/dataset/tifu/bert/train.tsv',
         model_file=None,
         pretrain_file=None,
         data_parallel=True,
         word_vocab='/root/voucher/dataset/tifu/bert/word_vocab.txt',
         pos_vocab='/root/voucher/dataset/tifu/bert/pos_vocab.txt',
         dep_vocab='/root/voucher/dataset/tifu/bert/dep_vocab.txt',
         pos_dep_word_vocab='/root/voucher/dataset/tifu/bert/pos_dep_word.pkl',
         save_dir='../exp/bert/pretrain',
         log_dir='../exp/bert/pretrain/runs',
         max_len=384,
         max_pred=20,
         mask_prob=0.15,
         mode=train):

    if mode == 'train':
        pass
    elif mode == 'eval':
        pass
    #    max_pred = max_len
    #    mask_prob = 1
    else:
        print("please select correct mode")
        exit(1)

    cfg = train.Config.from_json(train_cfg)
    model_cfg = models.Config.from_json(model_cfg)

    set_seeds(cfg.seed)

    custom_tokenizer = CustomVocabTokenizer(word_vocab_file=word_vocab,
                                            pos_vocab_file=pos_vocab,
                                            dep_vocab_file=dep_vocab,
                                            pos_dep_word_vocab_file=pos_dep_word_vocab)
    custom_tokenize = lambda word, pos, dep: custom_tokenizer.tokenize(custom_tokenizer.convert_to_unicode(word),
                                                                       custom_tokenizer.convert_to_unicode(pos),
                                                                       custom_tokenizer.convert_to_unicode(dep))

    pipeline = [Preprocess4Pretrain(max_pred,
                                    mask_prob,
                                    list(custom_tokenizer.word_tokenizer.vocab.keys()),
                                    list(custom_tokenizer.pos_tokenizer.vocab.keys()),
                                    list(custom_tokenizer.dep_tokenizer.vocab.keys()),
                                    custom_tokenizer.convert_tokens_to_ids,
                                    max_len)]
    data_iter = TifuDataLoader(data_file,
                               cfg.batch_size,
                               custom_tokenize,
                               max_len,
                               pipeline=pipeline)

    model = BertModel4Pretrain(model_cfg)

    optimizer = optim.optim4GPU(cfg, model)
    trainer = train.Trainer(cfg, model, data_iter, optimizer, save_dir, get_device())

    if mode == 'eval':
        def evaluate(model, batch):
            input_word_ids,\
            input_segment_ids,\
            input_mask,\
            target_word_ids,\
            target_mask,\
            input_len, \
            target_len = batch

            logits_word = model(input_word_ids,
                                input_segment_ids,
                                input_mask,
                                target_mask)


            input_len = input_len.tolist()
            target_len = target_len.tolist()

            for i in range(len(input_len)):
                logits = torch.squeeze(logits_word.narrow(0, i, 1), dim=0)
                logits_input = logits.narrow(0, 0, input_len[i])
                logits_target = logits.narrow(0, input_len[i], target_len[i])

                _, input_ids = logits_input.max(-1)
                _, target_ids = logits_target.max(-1)
                input_tokens = custom_tokenizer.word_tokenizer.convert_ids_to_tokens(input_ids.tolist())
                target_tokens = custom_tokenizer.word_tokenizer.convert_ids_to_tokens(target_ids.tolist())

                results = []
                input_norm = logits_input / logits_input.norm(dim=1)[:, None]
                target_norm = logits_target / logits_target.norm(dim=1)[:, None]

                #target_len x input_len
                res = torch.mm(target_norm, input_norm.transpose(0, 1))

                #target_len x 1
                _, sim_idxs = res.max(-1)
                for j, sim_idx in enumerate(sim_idxs.tolist()):
                    results.append([target_tokens[j], input_tokens[sim_idx]])

            print(results)
            accuracies = [0]
            results = [0]
            return accuracies, results

        results = trainer.eval(evaluate, None, pretrain_file, data_parallel, eval_kind_names=["Word"])
        print(results)
Ejemplo n.º 9
0
def train(flags):
    data_root = flags.data
    window_size = flags.window_size
    pred_size = flags.pred_size
    batch_size = flags.batch_size
    out_dir = flags.out
    num_epochs = flags.epochs
    val_every = flags.val_every
    classify_thresh = flags.classify_thresh
    # optim args
    lr = flags.lr
    betas = (flags.beta1, flags.beta2)
    eps = flags.eps
    weight_decay = flags.decay
    use_confidence = flags.use_confidence
    joint_set = flags.joint_set

    if not os.path.exists(data_root):
        print('Could not find training data at ' + data_root)
        return
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    weights_out_path = os.path.join(out_dir, 'op_only_weights.pth')
    best_weights_out_path = os.path.join(out_dir, 'op_only_weights_BEST.pth')

    # load training and validation data
    train_dataset = OpenPoseDataset(data_root,
                                    split='train',
                                    window_size=window_size,
                                    contact_size=pred_size,
                                    use_confidence=use_confidence,
                                    joint_set=joint_set)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=2)
    val_dataset = OpenPoseDataset(data_root,
                                  split='val',
                                  window_size=window_size,
                                  contact_size=pred_size,
                                  use_confidence=use_confidence,
                                  joint_set=joint_set)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=2)

    num_joints = len(
        openpose_dataset.OP_JOINT_SUBSETS[train_dataset.joint_set])

    # create the model and optimizer
    device_str = 'cpu' if flags.cpu else None
    device = get_device(device_str)
    op_model = create_model(window_size,
                            num_joints,
                            pred_size,
                            device,
                            use_confidence=use_confidence)
    op_optim = optim.Adam(op_model.parameters(), lr=lr, betas=betas, \
                                    eps=eps, weight_decay=weight_decay)

    model_parameters = filter(lambda p: p.requires_grad, op_model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print('Num model params: ' + str(params))

    # viz stats
    train_steps = []
    train_losses = []
    train_accs = []
    val_steps = []
    val_losses = []
    val_accs = []

    # train
    loss_sum = 0.0
    loss_count = 0
    best_val_f1 = -float('inf')
    confusion_count = np.zeros((4), dtype=int)
    for epoch_idx in range(num_epochs):
        for batch_idx, batch_data in enumerate(train_loader):
            # prepere the data for this batch
            input_data = batch_data['joint2d'].to(device)
            label_data = batch_data['contacts'].to(device)

            # zero the gradients
            op_optim.zero_grad()
            # forward + backward + optimize
            output_data = op_model(input_data)
            loss = op_model.loss(output_data, label_data)
            n_tp, n_fp, n_fn, n_tn = op_model.accuracy(output_data,
                                                       label_data,
                                                       thresh=classify_thresh)
            loss = torch.mean(loss)
            loss.backward()
            op_optim.step()

            loss_sum += loss.to('cpu').item()
            loss_count += 1
            confusion_count += np.array([n_tp, n_fp, n_fn, n_tn], dtype=int)

        if epoch_idx % 5 == 0:
            print('=================== TRAIN (' + str(epoch_idx + 1) +
                  ' epochs) ================================================')
            mean_loss = loss_sum / loss_count
            print('Mean loss: %0.3f' % (mean_loss))
            loss_sum = 0.0
            loss_count = 0

            metrics = calculate_metrics(confusion_count)
            cur_acc, _, _, _, _ = metrics
            print_metrics(metrics)
            confusion_count = np.zeros((4), dtype=int)
            print(
                '======================================================================================'
            )

            train_steps.append(epoch_idx * len(train_loader) + batch_idx)
            train_losses.append(mean_loss)
            train_accs.append(cur_acc)

            # save plot
            plot_train_stats((train_steps, train_losses, train_accs), \
                             (val_steps, val_losses, val_accs), \
                             out_dir, accuracy_metrics=metrics)

        if epoch_idx % val_every == 0:
            # run on the validation data
            print('==================== VALIDATION (' + str(epoch_idx + 1) +
                  ' epochs) ===========================================')
            val_loss, val_metrics = val_epoch(val_loader, op_model, device,
                                              classify_thresh, pred_size)
            print('Mean Loss: %0.3f' % (val_loss))

            for tgt_frame_idx in range(pred_size):
                print('----- Pred Frame ' + str(tgt_frame_idx) + ' ------')
                print_metrics(val_metrics[tgt_frame_idx])
            val_acc, _, _, _, _ = val_metrics[
                pred_size // 2]  # only want accuracy for middle target
            print(
                '======================================================================================'
            )
            op_model.train()

            val_steps.append(epoch_idx * len(train_loader) + batch_idx)
            val_losses.append(val_loss)
            val_accs.append(val_acc)

            # save confusion matrix
            for tgt_frame_idx in range(pred_size):
                accuracy, precision, recall, f1, cm = val_metrics[
                    tgt_frame_idx]
                plot_confusion_mat(
                    cm,
                    os.path.join(
                        out_dir,
                        'val_confusion_matrix_%d.png' % (tgt_frame_idx)))

            # also save model weights
            print('Saving checkpoint...')
            torch.save(op_model.state_dict(), weights_out_path)

            # check if this is the best so far and save (in terms of f1 score)
            if f1 > best_val_f1:
                best_val_f1 = f1
                print('Saving best model so far...')
                torch.save(op_model.state_dict(), best_weights_out_path)

    # save final model
    print('Saving final checkpoint...')
    torch.save(op_model.state_dict(),
               os.path.join(out_dir, 'op_only_weights_FINAL.pth'))
    # save plot
    metrics = calculate_metrics(confusion_count)
    plot_train_stats((train_steps, train_losses, train_accs), \
                        (val_steps, val_losses, val_accs), \
                        out_dir, accuracy_metrics=metrics)
    print('FINISHED Training!')
Ejemplo n.º 10
0
def load_sagen(name):
    device = utils.get_device()
    gen = utils.load_model(name,
                           sagenerator.SelfAttentionGenerator,
                           save_attention=True)
    return gen, device
Ejemplo n.º 11
0
def validate(loader: DataLoader, model: nn.Module, criterion: Callable,
             num_classes: int, num_super_classes: int, maf: torch.FloatTensor,
             args: ArgumentParser) -> torch.FloatTensor:

    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('MLM Loss', ':.4e')
    accuracies = AverageMeter('Acc', ':.4e')
    accuracy_deltas = AverageMeter('Acc Delta', ':.4e')
    progress = ProgressMeter(len(loader),
                             [batch_time, losses, accuracies, accuracy_deltas],
                             prefix="Test: ")

    model.eval()

    device = get_device(args)
    with torch.no_grad():
        end = time.time()
        for i, (genotypes, labels, super_labels) in enumerate(loader):

            ### Mask for Masked Language Modeling
            mask_num = int((i % 9 + 1) / 10 * genotypes.shape[1])
            mask_scores = torch.rand(genotypes.shape[1])
            mask_indices = mask_scores.argsort(descending=True)[:mask_num]
            masked_genotypes = genotypes[:, mask_indices].reshape(-1)
            targets = (masked_genotypes == 1).float().clone().detach()
            genotypes[:, mask_indices] = 0
            maf_vector = maf[labels[0]]

            genotypes = genotypes.to(device)
            masked_genotypes = masked_genotypes.to(device)
            targets = targets.to(device)
            labels = labels.to(device)
            super_labels = super_labels.to(device)
            maf_vector = maf_vector.to(device)

            logits = model(genotypes, labels, super_labels)
            logits = logits[:, mask_indices].reshape(-1)

            # add weight to nonzero maf snps
            weights = torch.ones_like(logits)
            weight_coefficients = (maf_vector[mask_indices] > 0).repeat(
                genotypes.shape[0]).float() * (args.minor_coefficient - 1) + 1
            weights *= weight_coefficients

            loss = criterion(logits, targets, weight=weights, reduction='mean')

            accuracy = (masked_genotypes * logits.sign()).mean() / 2 + .5
            baseline_accuracy = (
                masked_genotypes *
                (maf_vector[mask_indices].repeat(genotypes.shape[0]) -
                 .5000001).sign()).mean() / 2 + .5
            accuracy_delta = accuracy - baseline_accuracy

            losses.update(loss.item(), genotypes.shape[0])
            accuracies.update(accuracy.item(), genotypes.shape[0])
            accuracy_deltas.update(accuracy_delta.item(), genotypes.shape[0])
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.display(i)
        progress.display(i)
    return losses.avg
Ejemplo n.º 12
0
def main() -> None:
    global best_loss

    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    start_epoch = 0

    vcf_reader = VCFReader(args.train_data, args.classification_map,
                           args.chromosome, args.class_hierarchy)
    vcf_writer = vcf_reader.get_vcf_writer()
    train_dataset, validation_dataset = vcf_reader.get_datasets(
        args.validation_split)
    train_sampler = BatchByLabelRandomSampler(args.batch_size,
                                              train_dataset.labels)
    train_loader = DataLoader(train_dataset, batch_sampler=train_sampler)

    if args.validation_split != 0:
        validation_sampler = BatchByLabelRandomSampler(
            args.batch_size, validation_dataset.labels)
        validation_loader = DataLoader(validation_dataset,
                                       batch_sampler=validation_sampler)

    kwargs = {
        'total_size': vcf_reader.positions.shape[0],
        'window_size': args.window_size,
        'num_layers': args.layers,
        'num_classes': len(vcf_reader.label_encoder.classes_),
        'num_super_classes': len(vcf_reader.super_label_encoder.classes_)
    }
    model = WindowedMLP(**kwargs)
    model.to(get_device(args))

    optimizer = AdamW(model.parameters(), lr=args.learning_rate)

    #######
    if args.resume_path is not None:
        if os.path.isfile(args.resume_path):
            print("=> loading checkpoint '{}'".format(args.resume_path))
            checkpoint = torch.load(args.resume_path)
            if kwargs != checkpoint['model_kwargs']:
                raise ValueError(
                    'The checkpoint\'s kwargs don\'t match the ones used to initialize the model'
                )
            if vcf_reader.snps.shape[0] != checkpoint['vcf_writer'].snps.shape[
                    0]:
                raise ValueError(
                    'The data on which the checkpoint was trained had a different number of snp positions'
                )
            start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume_path, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    #############

    if args.validate:
        validate(validation_loader, model,
                 nn.functional.binary_cross_entropy_with_logits,
                 len(vcf_reader.label_encoder.classes_),
                 len(vcf_reader.super_label_encoder.classes_), vcf_reader.maf,
                 args)
        return

    for epoch in range(start_epoch, args.epochs + start_epoch):
        loss = train(train_loader, model,
                     nn.functional.binary_cross_entropy_with_logits, optimizer,
                     len(vcf_reader.label_encoder.classes_),
                     len(vcf_reader.super_label_encoder.classes_),
                     vcf_reader.maf, epoch, args)

        if epoch % args.save_freq == 0 or epoch == args.epochs + start_epoch - 1:
            if args.validation_split != 0:
                validation_loss = validate(
                    validation_loader, model,
                    nn.functional.binary_cross_entropy_with_logits,
                    len(vcf_reader.label_encoder.classes_),
                    len(vcf_reader.super_label_encoder.classes_),
                    vcf_reader.maf, args)
                is_best = validation_loss < best_loss
                best_loss = min(validation_loss, best_loss)
            else:
                is_best = loss < best_loss
                best_loss = min(loss, best_loss)

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'model_kwargs': kwargs,
                    'best_loss': best_loss,
                    'optimizer': optimizer.state_dict(),
                    'vcf_writer': vcf_writer,
                    'label_encoder': vcf_reader.label_encoder,
                    'super_label_encoder': vcf_reader.super_label_encoder,
                    'maf': vcf_reader.maf
                }, is_best, args.chromosome, args.model_name, args.model_dir)
Ejemplo n.º 13
0
def test():
    # get device
    device = get_device(0)

    # load net
    num_classes = 80
    anchor_size = config.ANCHOR_SIZE_COCO
    if args.dataset == 'COCO':
        cfg = config.coco_ab
        testset = COCODataset(data_dir=args.dataset_root,
                              json_file='instances_val2017.json',
                              name='val2017',
                              img_size=cfg['min_dim'][0],
                              debug=args.debug)
        mean = config.MEANS
    elif args.dataset == 'VOC':
        cfg = config.voc_ab
        testset = VOCDetection(VOC_ROOT, [('2007', 'test')], None,
                               VOCAnnotationTransform())
        mean = config.MEANS

    if args.version == 'yolo_v2':
        from models.yolo_v2 import myYOLOv2
        net = myYOLOv2(device,
                       input_size=cfg['min_dim'],
                       num_classes=num_classes,
                       anchor_size=config.ANCHOR_SIZE_COCO)
        print('Let us test yolo-v2 on the MSCOCO dataset ......')

    elif args.version == 'yolo_v3':
        from models.yolo_v3 import myYOLOv3
        net = myYOLOv3(device,
                       input_size=cfg['min_dim'],
                       num_classes=num_classes,
                       anchor_size=config.MULTI_ANCHOR_SIZE_COCO)

    elif args.version == 'tiny_yolo_v2':
        from models.tiny_yolo_v2 import YOLOv2tiny
        net = YOLOv2tiny(device,
                         input_size=cfg['min_dim'],
                         num_classes=num_classes,
                         anchor_size=config.ANCHOR_SIZE_COCO)

    elif args.version == 'tiny_yolo_v3':
        from models.tiny_yolo_v3 import YOLOv3tiny

        net = YOLOv3tiny(device,
                         input_size=cfg['min_dim'],
                         num_classes=num_classes,
                         anchor_size=config.MULTI_ANCHOR_SIZE_COCO)

    net.load_state_dict(torch.load(args.trained_model, map_location='cuda'))
    net.to(device).eval()
    print('Finished loading model!')

    # evaluation
    test_net(net,
             device,
             testset,
             BaseTransform(net.input_size,
                           mean=(0.406, 0.456, 0.485),
                           std=(0.225, 0.224, 0.229)),
             thresh=args.visual_threshold)
Ejemplo n.º 14
0
def main(with_gui=None, check_stop=None):
    device_idx, model_name, generator_cfg, _, train_cfg = load_config()
    device = get_device()
    if with_gui is None:
        with_gui = train_cfg.get('with_gui')
    model = None
    loss_f = BCELoss(reduction='none')
    pause = False
    images, count, loss_sum, epoch, acc_sum, acc_sum_p = 0, 0, 0, 1, 0, 0
    generator, generator_cfg = None, None
    optimizer, optimizer_cfg = None, None
    best_loss = None
    while check_stop is None or not check_stop():

        # Check config:
        if images == 0:
            cfg = load_config()
            if model_name != cfg.model or model is None:
                model_name = cfg.model
                model, best_loss, epoch = load_model(model_name, train=True, device=device)
                log(model_name, 'Loaded model %s' % model_name)
                optimizer_cfg = None
            if optimizer_cfg != cfg.optimizer:
                optimizer_cfg = cfg.optimizer
                optimizer = create_optimizer(optimizer_cfg, model)
                log(model_name, 'Created optimizer %s' % str(optimizer))
            if generator_cfg != cfg.generator:
                generator_cfg = cfg.generator
                generator = create_generator(generator_cfg, device=device)
                log(model_name, 'Created generator')
            train_cfg = cfg.train

        # Run:
        x, target = next(generator)
        mask, _ = target.max(dim=1, keepdim=True)
        optimizer.zero_grad()
        y = model(x)

        # Save for debug:
        if train_cfg.get('save', False):
            show_images(x, 'input', save_dir='debug')
            show_images(y, 'output', mask=True, save_dir='debug')
            show_images(target, 'target', mask=mask, save_dir='debug')

        # GUI:
        if with_gui:
            if not pause:
                show_images(x, 'input')
                show_images(y, 'output', mask=True, step=2)
                show_images(target, 'target', mask=mask, step=2)
            key = cv2.waitKey(1)
            if key == ord('s'):
                torch.save(model.state_dict(), 'models/unet2.pt')
            elif key == ord('p'):
                pause = not pause
            elif key == ord('q'):
                break

        # Optimize:
        acc_sum += check_accuracy(y, target)
        loss = (loss_f(y, target) * mask).mean()
        loss_item = loss.item()
        loss_sum += loss_item
        count += 1
        images += len(x)
        loss.backward()
        optimizer.step()

        # Complete epoch:
        if images >= train_cfg['epoch_images']:
            acc_total, names = acc_sum, channel_names
            msg = 'Epoch %d: train loss %f, acc %s' % (
                epoch, loss_sum / count,
                acc_to_str(acc_total, names=names)
            )
            log(model_name, msg)
            count = 0
            images = 0
            loss_sum = 0
            epoch += 1
            acc_sum[:] = 0
            save_model(model_name, model, best_loss, epoch)

    log(model_name, 'Stopped\n')
Ejemplo n.º 15
0
def get_default_run_options(model, dataset, runs, sampling_mode):
    """Setup general experiment options, irrespective of the model and data.
    Parameters:
        model (str): name of model to use. Available: 
            SVM (linear),  SVM_grid (grid search on  linear, poly and RBF), 
            baseline (fully connected NN), hu (1D CNN),
            hamida (3D CNN + 1D classifier), lee (3D FCN), chen (3D CNN), 
            li (3D CNN), he (3D CNN), luo (3D CNN), sharma (2D CNN), 
            mou (1D RNN) boulch (1D semi-supervised CNN), 
            liu (3D semi-supervised CNN)
        dataset (str): hyperspectral image name.
        runs (int): number of runs.
        sampling_mode ('all' 'fixed'): how to select pixels for train/test.
    Returns:
        options (dict): set of options.
    """
    options = {
        'model': model,
        'runs': runs,
        'sampling_mode': sampling_mode,
        'dataset': dataset,
        'device': get_device(0),  # (defaults to -1, which learns on CPU)
        'dataset_path': PATH_DATA,
        'sample_path': PATH_SAMPLES,
        'rdir': 'work/',
        'preprocessing': {
            'type': 'division'
        }
    }
    if model == 'hu':
        options['batch_size'], options['epoch'] = 50, 400
    elif model == 'li' or model == 'lee':
        options['batch_size'], options['epoch'] = 100, 200
    else:
        options['batch_size'], options['epoch'] = 100, 100
    # DeepHyperX default options:
    options['svm_grid_params'] = [{
        'kernel': ['rbf'],
        'gamma': [1e-1, 1e-2, 1e-3],
        'C': [1, 10, 100, 1000]
    }, {
        'kernel': ['linear'],
        'C': [0.1, 1, 10, 100, 1000]
    }, {
        'kernel': ['poly'],
        'degree': [3],
        'gamma': [1e-1, 1e-2, 1e-3]
    }]
    options.update({
        'class_balancing': False,
        'flip_augmentation': False,
        'mixture_augmentation': False,
        'multi_class': 1,
        'path': './predictions/',
        'radiation_augmentation': False,
        'test_stride': 1,
        'training_sample': 10,
        'with_exploration': False
    })
    # DeepHyperX handy, but unused options
    options.update({
        'checkpoint':
        None,  # option to load state dict instead of train from scratch
        'train_gt': None,  # train GT filename, not used
        'test_gt': None,  # test GT filename, not used
    })
    return options
Ejemplo n.º 16
0
 def test_get_device(self):
     config = utils.read_config(self.env)
     config["device"] = "xc6slx9-tqg144-3"
     device = utils.get_device(config)
     self.assertEqual("xc6slx9", device)
Ejemplo n.º 17
0
from captum.attr import (
    GradientShap,
    DeepLift,
    DeepLiftShap,
    IntegratedGradients,
    LayerConductance,
    NeuronConductance,
    NoiseTunnel,
)

import tqdm

from utils import get_device, isnotebook, ensure_arr, is_numeric
import metrics

DEVICE = get_device()

# Combination of
# https://www.nature.com/articles/s41598-020-59827-1#MOESM1
#  - this is generally a superset of what Seurat uses
# https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html
PBMC_MARKER_GENES = {
    "CD4+ T cells": ["IL7R", "CD3D", "CD4", "CTLA4"],
    "IL7RCD4+ T Cells": ["CD8A", "IL7R", "CD3D"],  # Nature paper only
    "CD8+ T cells": ["CD8A", "GZMB", "CD3D", "CD8B"],  # Common both
    "B cells": ["CD19", "MS4A1", "CD79A", "CD79B", "BLNK"],  # Common both
    "Natural Killer cells": [
        "FCGR3A",
        "NCAM1",
        "KLRB1",
        "KLRC1",
            params["reg_ratio"] = np.random.rand() * 0.0015
            params["batch_size"] = np.random.randint(26, 256)
            params["bidirectional"] = bool(np.random.randint(0, 2))
            cfg = AcousticLLDConfig(**params)
            model = RNN(cfg)

        elif args.model_type == "acoustic-spectrogram":
            test_features, test_labels, val_features, val_labels, train_features, train_labels = load_spectrogram_dataset(
            )
            params["fc_size"] = np.random.randint(10, 200)
            params["dropout"] = 0.3 + np.random.rand() * 0.6
            cfg = AcousticSpectrogramConfig(**params)
            model = CNN(cfg)

        else:
            raise Exception(
                "model_type parameter has to be one of [linguistic|acoustic-lld|acoustic-spectrogram]"
            )

        print(
            "Subsets sizes: test_features:{}, test_labels:{}, val_features:{}, val_labels:{}, train_features:{}, train_labels:{}"
            .format(test_features.shape[0], test_labels.shape[0],
                    val_features.shape[0], val_labels.shape[0],
                    train_features.shape[0], train_labels.shape[0]))
        """Converting model to specified hardware and format"""
        model.float()
        model = model.to(get_device())

        run_training(model, cfg, test_features, test_labels, train_features,
                     train_labels, val_features, val_labels)
Ejemplo n.º 19
0
from utils import get_args, get_device, get_class_names
from modeler import load_model, predict

## Get the arguments
args = get_args('predict')

## Get the device
device = get_device(args.gpu)

## Get the model saved as checkpoint
trained_validated_model = load_model(model_dir=args.model_dir)

probs, classes = predict(device=device,
                         image_path=args.image_path,
                         model=trained_validated_model,
                         topx=args.topk)

## Get names of the classes
class_names = get_class_names(classes=classes, cat_names=args.cat_names)

## Print prediction(s)
print(('AI Model\'s top {} prediction(s) are:').format(args.topk))
print('Rank'.ljust(5) + 'Predicted Name'.ljust(25) + 'Probability')
for i, (prob, class_name) in enumerate(zip(probs, class_names)):
    print('{}. {} {}%'.format(
        str(i + 1).rjust(3), class_name.ljust(25),
        ("%.2f" % round(prob * 100, 2)).rjust(6)))
Ejemplo n.º 20
0
        param_group['lr'] = lr
    return lr


def warmup_strategy(optimizer, epoch_size, iteration):
    lr = 1e-6 + (args.lr - 1e-6) * iteration / (epoch_size * (args.wp_epoch))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr


if __name__ == '__main__':
    global hr, cfg

    hr = False
    device = get_device(args.gpu_ind)

    if args.high_resolution == 1:
        hr = True

    cfg = voc_ab

    if args.version == 'fcos_lite':
        from models.fcos_lite import FCOS_LITE

        fcos_lite = FCOS_LITE(device,
                              input_size=cfg['min_dim'],
                              num_classes=args.num_classes,
                              trainable=True,
                              hr=hr)
        print('Let us train FCOS-LITE on the VOC0712 dataset ......')
Ejemplo n.º 21
0
def main(train_cfg='config/pretrain.json',
         model_cfg='config/bert_base.json',
         data_file='/root/voucher/dataset/tifu/bert/train.tsv',
         model_file=None,
         pretrain_file=None,
         data_parallel=True,
         word_vocab='/root/voucher/dataset/tifu/bert/word_vocab.txt',
         pos_vocab='/root/voucher/dataset/tifu/bert/pos_vocab.txt',
         dep_vocab='/root/voucher/dataset/tifu/bert/dep_vocab.txt',
         pos_dep_word_vocab='/root/voucher/dataset/tifu/bert/pos_dep_word.pkl',
         save_dir='../exp/bert/pretrain',
         log_dir='../exp/bert/pretrain/runs',
         max_len=384,
         max_pred=20,
         mask_prob=0.15,
         mode=train):

    if mode == 'train':
        pass
    elif mode == 'eval':
        pass
    #    max_pred = max_len
    #    mask_prob = 1
    else:
        print("please select correct mode")
        exit(1)

    cfg = train.Config.from_json(train_cfg)
    model_cfg = models.Config.from_json(model_cfg)

    set_seeds(cfg.seed)

    custom_tokenizer = CustomVocabTokenizer(word_vocab_file=word_vocab,
                                            pos_vocab_file=pos_vocab,
                                            dep_vocab_file=dep_vocab,
                                            pos_dep_word_vocab_file=pos_dep_word_vocab)
    custom_tokenize = lambda word, pos, dep: custom_tokenizer.tokenize(custom_tokenizer.convert_to_unicode(word),
                                                                       custom_tokenizer.convert_to_unicode(pos),
                                                                       custom_tokenizer.convert_to_unicode(dep))

    pipeline = [Preprocess4Pretrain(max_pred,
                                    mask_prob,
                                    list(custom_tokenizer.word_tokenizer.vocab.keys()),
                                    list(custom_tokenizer.pos_tokenizer.vocab.keys()),
                                    list(custom_tokenizer.dep_tokenizer.vocab.keys()),
                                    custom_tokenizer.convert_tokens_to_ids,
                                    max_len)]
    data_iter = TifuDataLoader(data_file,
                               cfg.batch_size,
                               custom_tokenize,
                               max_len,
                               pipeline=pipeline)

    model = BertModel4Pretrain(model_cfg)

    criterion3 = nn.CrossEntropyLoss(reduction='none')

    optimizer = optim.optim4GPU(cfg, model)
    trainer = train.Trainer(cfg, model, data_iter, optimizer, save_dir, get_device())

    writer = SummaryWriter(log_dir=log_dir) # for tensorboardX

    if mode == 'train':
        def get_loss(model, batch, global_step): # make sure loss is tensor
            input_word_ids,\
            input_segment_ids,\
            input_mask,\
            target_word_ids,\
            target_mask = batch

            logits_word = model(input_word_ids,
                                input_segment_ids,
                                input_mask,
                                target_mask)

            loss_word = criterion3(logits_word.transpose(1, 2), target_word_ids) # for masked word
            loss_word = (loss_word*target_mask.float()).mean()
            print(loss_word.item())
            writer.add_scalars('data/scalar_group',
                               {'loss_word': loss_word.item(),
                                'loss_total': loss_word.item(),
                                'lr': optimizer.get_lr()[0],
                               },
                               global_step)

            return loss_word

        trainer.train(get_loss, model_file, pretrain_file, data_parallel)
    elif mode == 'eval':
        def evaluate(model, batch):
            input_word_ids,\
            input_segment_ids,\
            input_mask,\
            target_word_ids,\
            target_mask = batch

            logits_word = model(input_word_ids,
                                input_segment_ids,
                                input_mask,
                                target_mask)

            _, label_word = logits_word.max(-1)
            result_word = (label_word == target_word_ids).float()
            word_accuracy = result_word.mean()

            accuracies = [word_accuracy]
            results = [result_word]
            return accuracies, results

        results = trainer.eval(evaluate, model_file, data_parallel, eval_kind_names=["Word"])
        print(results)
Ejemplo n.º 22
0
 def __init__(self, alpha: float = 1.0, use_cuda: bool = True):
     self.alpha = alpha
     self.device = get_device(use_cuda)
Ejemplo n.º 23
0
def train_rank_net(start_epoch=0,
                   additional_epoch=100,
                   lr=0.0001,
                   optim="adam",
                   train_algo=SUM_SESSION,
                   double_precision=False,
                   standardize=False,
                   small_dataset=False,
                   debug=False):
    """

    :param start_epoch: int
    :param additional_epoch: int
    :param lr: float
    :param optim: str
    :param train_algo: str
    :param double_precision: boolean
    :param standardize: boolean
    :param small_dataset: boolean
    :param debug: boolean
    :return:
    """
    print("start_epoch:{}, additional_epoch:{}, lr:{}".format(
        start_epoch, additional_epoch, lr))
    precision = torch.float64 if double_precision else torch.float32

    # get training and validation data:
    data_fold = 'Fold1'
    train_loader, df_train, valid_loader, df_valid = load_train_vali_data(
        data_fold, small_dataset)
    if standardize:
        df_train, scaler = train_loader.train_scaler_and_transform()
        df_valid = valid_loader.apply_scaler(scaler)

    net, net_inference, ckptfile = get_train_inference_net(
        train_algo, train_loader.num_features, start_epoch, double_precision)
    device = get_device()
    net.to(device)
    net_inference.to(device)

    # initialize to make training faster
    net.apply(init_weights)

    if optim == "adam":
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    elif optim == "sgd":
        optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9)
    else:
        raise ValueError(
            "Optimization method {} not implemented".format(optim))
    print(optimizer)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.75)

    loss_func = None
    if train_algo == BASELINE:
        loss_func = torch.nn.BCELoss()
        loss_func.to(device)

    losses = []

    for i in range(start_epoch, start_epoch + additional_epoch):

        scheduler.step()
        net.zero_grad()
        net.train()

        if train_algo == BASELINE:
            epoch_loss = baseline_pairwise_training_loop(i,
                                                         net,
                                                         loss_func,
                                                         optimizer,
                                                         train_loader,
                                                         precision=precision,
                                                         device=device,
                                                         debug=debug)
        elif train_algo in [SUM_SESSION, ACC_GRADIENT]:
            epoch_loss = factorized_training_loop(i,
                                                  net,
                                                  None,
                                                  optimizer,
                                                  train_loader,
                                                  training_algo=train_algo,
                                                  precision=precision,
                                                  device=device,
                                                  debug=debug)

        losses.append(epoch_loss)
        print('=' * 20 + '\n', get_time(),
              'Epoch{}, loss : {}'.format(i, losses[-1]), '\n' + '=' * 20)

        # save to checkpoint every 5 step, and run eval
        if i % 5 == 0 and i != start_epoch:
            save_to_ckpt(ckptfile, i, net, optimizer, scheduler)
            net_inference.load_state_dict(net.state_dict())
            eval_model(net_inference, device, df_valid, valid_loader)

    # save the last ckpt
    save_to_ckpt(ckptfile, start_epoch + additional_epoch, net, optimizer,
                 scheduler)

    # final evaluation
    net_inference.load_state_dict(net.state_dict())
    ndcg_result = eval_model(net_inference, device, df_valid, valid_loader)

    # save the final model
    torch.save(net.state_dict(), ckptfile)
    print(
        get_time(), "finish training " + ", ".join(
            ["NDCG@{}: {:.5f}".format(k, ndcg_result[k])
             for k in ndcg_result]), '\n\n')
Ejemplo n.º 24
0
    def __init__(
        self,
        policy,
         env,
        transform_func,
        gamma,
        learning_rate,
        buffer_size,
        exploration_type,
        exploration_frac,
        exploration_ep,
        exploration_initial_eps,
        exploration_final_eps, 
        double_q,
        policy_kwargs, 
        seed, 
        device
        ):

        super(DeepRLModel, self).__init__(
            policy=policy, env=env, 
            policy_kwargs=policy_kwargs,
            seed=seed
            )

        self.gamma = gamma
        self.learning_rate = learning_rate
        self.buffer_size = buffer_size
        self.exploration_type = exploration_type
        self.exploration_frac = exploration_frac
        self.exploration_ep = exploration_ep
        self.exploration_initial_eps = exploration_initial_eps
        self.exploration_final_eps = exploration_final_eps
        self.double_q = double_q
        # self.policy_kwargs = {} if policy_kwargs is None else policy_kwargs
        if device is None:
            self.device = get_device(device)
        else:
            self.device = device

        self.policy_kwargs = get_default_args(self.policy)
        self.policy_kwargs['ob_space'] = self.observation_space
        self.policy_kwargs['ac_space'] = self.action_space
        self.policy_kwargs['device'] = self.device
        self.policy_kwargs['learning_rate'] = self.learning_rate

        if policy_kwargs is not None:
            for key, val in policy_kwargs.items():
                self.policy_kwargs[key] = val
        # self.policy_kwargs['transform_func'] = transform_func

        # if policy_kwargs is None:
        #     self.policy = policy(self.observation_space, self.action_space,
        #                          intent=True, device=self.device)
        # else:
        self.policy = policy(**self.policy_kwargs)


        if self.buffer_size is None:
            self.replay_buffer = None
        else:
            self.replay_buffer = ReplayBuffer(self.buffer_size, device=self.device, torch=True)
Ejemplo n.º 25
0
def main(task_name='qqp',
         base_train_cfg='config/QDElectra_pretrain.json',
         train_cfg='config/train_mrpc.json',
         model_cfg='config/QDElectra_base.json',
         train_data_file='GLUE/glue_data/QQP/train.tsv',
         eval_data_file='GLUE/glue_data/QQP/eval.tsv',
         model_file=None,
         data_parallel=True,
         vocab='../uncased_L-12_H-768_A-12/vocab.txt',
         log_dir='../exp/electra/pretrain/runs',
         save_dir='../exp/bert/mrpc',
         distill=True,
         quantize=True,
         gradually_distill=False,
         imitate_tinybert=False,
         pred_distill=True):

    check_dirs_exist([log_dir, save_dir])

    train_cfg_dict = json.load(open(base_train_cfg, "r"))
    train_cfg_dict.update(json.load(open(train_cfg, "r")))
    train_cfg = ElectraConfig().from_dict(train_cfg_dict)
    model_cfg = ElectraConfig().from_json_file(model_cfg)
    output_mode, train_cfg.n_epochs, max_len = get_task_params(task_name)

    set_seeds(train_cfg.seed)

    tokenizer = tokenization.FullTokenizer(vocab_file=vocab,
                                           do_lower_case=True)
    TaskDataset = dataset_class(
        task_name)  # task dataset class according to the task name
    model_cfg.num_labels = len(TaskDataset.labels)
    pipeline = [
        Tokenizing(task_name, tokenizer.convert_to_unicode,
                   tokenizer.tokenize),
        AddSpecialTokensWithTruncation(max_len),
        TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels,
                      output_mode, max_len)
    ]
    train_data_set = TaskDataset(train_data_file, pipeline)
    eval_data_set = TaskDataset(eval_data_file, pipeline)
    train_data_iter = DataLoader(train_data_set,
                                 batch_size=train_cfg.batch_size,
                                 shuffle=True)
    eval_data_iter = DataLoader(eval_data_set,
                                batch_size=train_cfg.batch_size,
                                shuffle=False)

    generator = ElectraForSequenceClassification.from_pretrained(
        'google/electra-small-generator')
    t_discriminator = ElectraForSequenceClassification.from_pretrained(
        'google/electra-base-discriminator')
    s_discriminator = QuantizedElectraForSequenceClassification if quantize else ElectraForSequenceClassification
    s_discriminator = s_discriminator.from_pretrained(
        'google/electra-small-discriminator', config=model_cfg)
    model = DistillElectraForSequenceClassification(generator, t_discriminator,
                                                    s_discriminator, model_cfg)

    optimizer = optim.optim4GPU(train_cfg, model)
    writer = SummaryWriter(log_dir=log_dir)  # for tensorboardX

    base_trainer_args = (train_cfg, model_cfg, model, train_data_iter,
                         eval_data_iter, optimizer, save_dir, get_device())
    trainer = QuantizedDistillElectraTrainer(task_name, output_mode, distill,
                                             gradually_distill,
                                             imitate_tinybert, pred_distill,
                                             len(TaskDataset.labels), writer,
                                             *base_trainer_args)

    trainer.train(model_file, None, data_parallel)
    trainer.eval(model_file, data_parallel)
Ejemplo n.º 26
0
def train_network(model,
                  x,
                  y,
                  x_test=None,
                  y_test=None,
                  epochs=50,
                  batch_size=64,
                  loss_f=BCELoss,
                  optimizer=Adam,
                  lr=0.001,
                  y_postprocessing=utils.y_to_one_hot,
                  weight_decay: float = 0.0000001,
                  verbose: int = 1):
    print("Started training")

    best_epoch = (0, 0, 0)

    if y_postprocessing is not None:
        y = y_postprocessing(y)
        y_test = y_postprocessing(y_test)
    y, y_test = y.astype(np.float), y_test.astype(np.float)

    loss_f = loss_f()
    optimizer = optimizer(model.parameters(), lr, weight_decay=weight_decay)
    model.train()
    for epoch in range(epochs):
        for i in range((x.shape[0] // batch_size) + 1):
            x_for_network = x[i * batch_size:(i + 1) * batch_size]
            y_for_network = y[i * batch_size:(i + 1) * batch_size]
            if x_for_network.size == 0:
                break
            if verbose > 0:
                print("batch {} of {}".format(i + 1,
                                              x.shape[0] // batch_size + 1))
            optimizer.zero_grad()
            pred = model(x_for_network)
            loss = loss_f(
                pred,
                from_numpy(y_for_network).float().to(utils.get_device()))
            loss.backward()
            optimizer.step()
        train_performance = evaluation.evaluate_model(
            model,
            x,
            y,
            pred_postprocessing=utils.softmax_to_one_hot,
            out_dim=2,
            batch_size=batch_size)
        print("train performance is {}".format(train_performance))
        if x_test is not None:
            performance = evaluation.evaluate_model(
                model,
                x_test,
                y_test,
                pred_postprocessing=utils.softmax_to_one_hot,
                out_dim=2,
                batch_size=batch_size)
            print("test performance is {}".format(performance))
            if performance > best_epoch[-1]:
                best_epoch = (epoch, train_performance, performance)
        print("Finished epoch {}".format(epoch))
    if x_test is not None:
        print(
            "Finished training. Best epoch is {} with training performance {} "
            "and test performance {}".format(*best_epoch))
Ejemplo n.º 27
0
def main(cfg: DictConfig) -> None:
    "The entry point for testing"

    assert cfg.model_path is not None, "Need to specify model_path for testing."
    log.info("\n" + OmegaConf.to_yaml(cfg))

    # restore the hyperparameters used for training
    model_path = hydra.utils.to_absolute_path(cfg.model_path)
    log.info("Loading the model from %s" % model_path)
    checkpoint = load_model(model_path)
    restore_hyperparams(checkpoint["cfg"], cfg)

    # create dataloaders for validation and testing
    vocabs = checkpoint["vocabs"]
    loader_val, _ = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_val),
        "val",
        cfg.encoder,
        vocabs,
        cfg.eval_batch_size,
        cfg.num_workers,
    )
    loader_test, _ = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_test),
        "test",
        cfg.encoder,
        vocabs,
        cfg.eval_batch_size,
        cfg.num_workers,
    )

    # restore the trained model checkpoint
    model = Parser(vocabs, cfg)
    model.load_state_dict(checkpoint["model_state"])
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()]))

    # validation
    log.info("Validating..")
    f1_score = validate(loader_val, model, cfg)
    log.info(
        "Validation F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f"
        % (
            f1_score.fscore,
            f1_score.complete_match,
            f1_score.precision,
            f1_score.recall,
        ))

    # testing
    log.info("Testing..")
    if cfg.beam_size > 1:
        log.info("Performing beam search..")
        f1_score = beam_search(loader_test, model, cfg)
    else:
        log.info("Running without beam search..")
        f1_score = validate(loader_test, model, cfg)
    log.info(
        "Testing F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f"
        % (
            f1_score.fscore,
            f1_score.complete_match,
            f1_score.precision,
            f1_score.recall,
        ))
Ejemplo n.º 28
0
                                           sampler=train_sampler)

valid_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           num_workers=4,
                                           collate_fn=collate_fn,
                                           sampler=valid_sampler)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1,
                                          num_workers=4,
                                          shuffle=False)

config = {
    "epochs": 100,
    "device": get_device(),
    "sampling": True,
    "temperature": 1.0,
    "max_sentence_length": 18
}

embedding_dim = 256
hidden_dim = 512
vocab_size = len(vocab)
model = Baseline(embedding_dim, hidden_dim, vocab_size, vanilla=False)

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=5e-4)

model.cuda()
train(model, optimizer, criterion, train_loader, valid_loader, vocab, config)
Ejemplo n.º 29
0
import torch,data
import utils
device = utils.get_device()
def evaluate(seq2seq,test_generator,vocab):
    #cnt = 0
    acc_cnt = 0
    total_examples = 0
    for (num1s,ops,num2s,l1s,l2s,mask1s,mask2s),(answers,ans_lens,ans_masks) in test_generator.get_batches():
        batch_size = len(num1s)
        context_vector =  seq2seq.encode(num1s,ops,num2s,l1s,l2s,mask1s,mask2s)
        decoder_input = torch.tensor([data.BOS_ID]*batch_size).view(-1,1).to(device)
        current_input = seq2seq.embeddings(decoder_input).view(-1,1,seq2seq.embedding_dim)
        current_hidden = context_vector.unsqueeze(1).transpose(0,1)
        

        max_decode_size = answers.size(1)
        preidct_each_t = []
        for t in range(max_decode_size):
            ot,ht,probs,predicts = seq2seq.predict_step(current_input ,current_hidden)
            preidct_each_t.append(predicts)
            current_input =  seq2seq.embeddings(predicts)
            current_hidden = ht
        preidct_each_t = torch.cat(preidct_each_t,1)


        import re
        predict_numbers = seq2Number(preidct_each_t, vocab,True)
        answer_numbers = seq2Number(answers, vocab,False)
        answer_numbers = [re.findall('[\-0-9]+',n)[0] for n in answer_numbers] 

Ejemplo n.º 30
0
def main(task='mrpc',
         train_cfg='./model/config/train_mrpc.json',
         model_cfg='./model/config/bert_base.json',
         data_train_file='total_data/imdbtrain.tsv',
         data_test_file='total_data/IMDB_test.tsv',
         model_file=None,
         pretrain_file='./model/uncased_L-12_H-768_A-12/bert_model.ckpt',
         data_parallel=False,
         vocab='./model/uncased_L-12_H-768_A-12/vocab.txt',
         dataName='IMDB',
         stopNum=250,
         max_len=300,
         mode='train'):

    if mode == 'train':

        def get_loss_CNN(model, batch,
                         global_step):  # make sure loss is a scalar tensor
            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch
            logits = model(input_ids, segment_ids, input_mask)
            loss = criterion(logits, label_id)
            return loss

        def evalute_CNN(model, batch):
            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch
            logits = model(input_ids, segment_ids, input_mask)

            return label_id, logits

        def get_loss_Attn_LSTM(
                model, batch,
                global_step):  # make sure loss is a scalar tensor
            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch

            seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
            input_ids = input_ids[perm_idx]
            label_id = label_id[perm_idx]
            token1 = embedding(input_ids.long())

            logits, attention_score = model(token1.cuda(), input_ids,
                                            segment_ids, input_mask,
                                            seq_lengths)

            loss1 = criterion(logits, label_id)
            return loss1

        def evalute_Attn_LSTM(model, batch, global_step, ls):
            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch

            seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
            input_ids = input_ids[perm_idx]
            label_id = label_id[perm_idx]
            token1 = embedding(input_ids.long())

            logits, attention_score = model(token1.cuda(), input_ids,
                                            segment_ids, input_mask,
                                            seq_lengths)
            logits = F.softmax(logits)

            y_pred11, y_pred1 = logits.max(1)

            return label_id, logits

        def generating_lexiocn(model2, batch, global_step, ls, e):
            if (global_step == 0):
                result3.clear()
                result_label.clear()
                bb_11.clear()
                bb_22.clear()

            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch

            seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
            input_ids = input_ids[perm_idx]
            label_id = label_id[perm_idx]
            token1 = embedding(input_ids.long())
            #logits = model(input_ids, segment_ids, input_mask)
            logits2, attention_score2 = model2(token1.cuda(), input_ids,
                                               segment_ids, input_mask,
                                               seq_lengths)

            #logits=F.softmax(logits)
            logits = F.softmax(logits2)
            # y_pred11, y_pred1 = logits.max(1)
            y_pred22, y_pred2 = logits2.max(1)
            atten, attn_s1 = attention_score2.max(1)
            atte2, attn_s2 = torch.topk(attention_score2, 4)

            for i in range(0, len(input_ids)):

                split_tokens = []
                att_index = []
                for token in tokenizer.tokenize(data0[global_step * 64 +
                                                      perm_idx[i]]):
                    split_tokens.append(token)

                if (len(split_tokens) <= attn_s1[i].item()):
                    attn_index3 = attention_score2[i][:len(split_tokens) - 1]
                    attn_num, attn_index2 = attn_index3.max(0)
                    attn_index = attn_index2.item()
                else:
                    for j in range(0, 4):
                        att_index.append(attn_s2[i][j].item())

                tok = []
                if (atten[i].item() <= 0):
                    token_ab = split_tokens[0]
                else:
                    for j in range(0, len(att_index)):
                        if (att_index[j] >= len(split_tokens)):
                            continue
                        tok.append(split_tokens[att_index[j]])

                token_temp = data0[global_step * 64 + perm_idx[i]].split(' ')
                token2 = []
                for kk in range(0, len(tok)):
                    token_ab = tok[kk]
                    token_ab = token_ab.replace(".", "")
                    token_ab = token_ab.replace(",", "")
                    token_ab = token_ab.replace("'", "")
                    token_ab = token_ab.replace("!", "")
                    token_ab = token_ab.replace("?", "")
                    token_ab = token_ab.replace("'", "")
                    token_ab = token_ab.replace('"', "")
                    if (token_ab == '' or token_ab == ' ' or token_ab == ','
                            or token_ab == '.' or token_ab == 'from'
                            or token_ab == 'are' or token_ab == 'is'
                            or token_ab == 'and' or token_ab == 'with'
                            or token_ab == 'may' or token_ab == 'would'
                            or token_ab == 'could' or token_ab == 'have'
                            or token_ab == 'has' or token_ab == 'had'
                            or token_ab == 'was' or token_ab == 'were'
                            or token_ab == 'this' or token_ab == 'who'
                            or token_ab == 'that' or token_ab == 'www'
                            or token_ab == 'http' or token_ab == 'com'
                            or token_ab == 'those' or token_ab == 'your'
                            or token_ab == 'not' or token_ab == 'seem'
                            or token_ab == 'too' or token_ab == 'lol'
                            or token_ab == 'but' or token_ab == 'these'
                            or token_ab == 'their' or token_ab == 'can'
                            or token_ab == 'there' or token_ab == 'gave'
                            or token_ab == 'his' or token_ab == 'etc'
                            or token_ab == 'thats' or token_ab == 'though'
                            or token_ab == 'off' or token_ab == 'she'
                            or token_ab == 'them' or token_ab == 'huh'
                            or token_ab == 'why' or token_ab == 'wont'
                            or token_ab == 'any' or token_ab == 'some'
                            or token_ab == 'its' or token_ab == 'yeah'
                            or token_ab == 'yes' or token_ab == 'you'
                            or token_ab == 'should' or token_ab == 'dont'
                            or token_ab == 'anybody' or token_ab == 'than'
                            or token_ab == 'where' or token_ab == 'for'
                            or token_ab == 'more' or token_ab == 'will'
                            or token_ab == 'him' or token_ab == 'its'
                            or token_ab == 'your' or token_ab == 'wii'
                            or token_ab == 'having' or token_ab == 'just'
                            or token_ab == 'help' or token_ab == 'helps'
                            or token_ab == 'all' or token_ab == 'they'
                            or token_ab == 'take' or token_ab == 'the'
                            or token_ab == 'what' or token_ab == 'need'
                            or token_ab == 'make' or token_ab == 'about'
                            or token_ab == 'then' or token_ab == 'when'
                            or token_ab == 'does' or token_ab == 'ask'
                            or token_ab == 'much' or token_ab == 'man'
                            or token_ab == 'know' or token_ab == 'how'
                            or token_ab == 'look' or token_ab == 'like'
                            or token_ab == 'one' or token_ab == 'think'
                            or token_ab == 'tell' or token_ab == 'find'
                            or token_ab == 'cant' or token_ab == 'now'
                            or token_ab == 'try' or token_ab == 'give'
                            or token_ab == 'answer' or token_ab == 'her'
                            or token_ab == 'out' or token_ab == 'get'
                            or token_ab == 'because' or token_ab == 'myself'
                            or token_ab == 'wants' or token_ab == 'movie'
                            or token_ab == 'film' or token_ab == 'films'):
                        continue

                    if (len(token_ab) < 2):
                        continue

                    for gge, input_word in enumerate(token_temp):

                        if (token_ab.lower() in input_word.lower()):
                            input_word = input_word.replace(".", "")
                            input_word = input_word.replace(",", "")
                            input_word = input_word.replace("'", "")
                            input_word = input_word.replace("!", "")
                            input_word = input_word.replace("?", "")
                            input_word = input_word.replace("'", "")
                            input_word = input_word.replace('"', "")

                            token2.append(input_word.lower())
                            break
                token2 = list(set(token2))

                if (len(token2) < 3):
                    continue
            #print(token2)
                sen = ""
                for l in range(0, len(token2) - 1):
                    sen += token2[l] + ' '
                sen += token2[len(token2) - 1]
                if (y_pred2[i] == 0):
                    try:
                        bb_11[sen] += y_pred22[i]
                    except KeyError:
                        bb_11[sen] = y_pred22[i]

                if (y_pred2[i] == 1):
                    try:
                        bb_22[sen] += y_pred22[i]
                    except KeyError:
                        bb_22[sen] = y_pred22[i]

            if (global_step == ls - 1):

                abusive_11.clear()
                abusive_22.clear()

                bb_11_up = sorted(bb_11.items(),
                                  key=lambda x: x[1],
                                  reverse=True)
                bb_22_up = sorted(bb_22.items(),
                                  key=lambda x: x[1],
                                  reverse=True)

                lexicon_size = 50
                bb_11_up = bb_11_up[:lexicon_size]
                bb_22_up = bb_22_up[:lexicon_size]

                for i in bb_11_up:
                    flag = 0
                    for j in bb_22_up:
                        if ((i[0].lower() in j[0].lower())
                                or (j[0].lower() in i[0].lower())):
                            if (i[1] < j[1]):
                                flag = 1
                                break

                    if (flag == 0):
                        abusive_11.append(i[0])

                for i in bb_22_up:
                    flag = 0
                    for j in bb_11_up:
                        if ((i[0].lower() in j[0].lower())
                                or (j[0].lower() in i[0].lower())):
                            if (i[1] < j[1]):
                                flag = 1
                                break

                    if (flag == 0):
                        abusive_22.append(i[0])

                ddf = open("./IMDB_Lexicon/imdbLexicon_1.txt",
                           'w',
                           encoding='UTF8')

                for i in range(0, len(abusive_11)):
                    ddf.write(abusive_11[i] + '\n')

                ddf.close()

                ddf = open("./IMDB_Lexicon/imdbLexicon_2.txt",
                           'w',
                           encoding='UTF8')

                for i in range(0, len(abusive_22)):
                    ddf.write(abusive_22[i] + '\n')

                ddf.close()
            return label_id, logits

        def evalute_CNN_SSL(model, batch, global_step):
            if (global_step == 0):
                result5.clear()

            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch

            logits = model(input_ids, segment_ids, input_mask)

            logits = F.softmax(logits)
            y_pred11, y_pred1 = logits.max(1)

            for i in range(0, len(input_ids)):
                result5.append([y_pred1[i].item(), y_pred11[i].item()])

            return label_id, logits

        def pseudo_labeling(model2, batch, global_step, ls, e):
            if (global_step == 0):
                result3.clear()
                result4.clear()

                label_0.clear()
                label_1.clear()

                result_label.clear()

                abusive_11.clear()
                abusive_22.clear()

                abusive_dic_file = open("./IMDB_Lexicon/imdbLexicon_1.txt",
                                        'r',
                                        encoding='UTF8')
                for line in abusive_dic_file.read().split('\n'):
                    if (len(line) <= 3):
                        continue
                    abusive_11.append(line)
                abusive_dic_file.close()

                abusive_dic_file = open("./IMDB_Lexicon/imdbLexicon_2.txt",
                                        'r',
                                        encoding='UTF8')
                for line in abusive_dic_file.read().split('\n'):
                    if (len(line) <= 3):
                        continue
                    abusive_22.append(line)
                abusive_dic_file.close()

            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch

            seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
            input_ids = input_ids[perm_idx]
            label_id = label_id[perm_idx]
            token1 = embedding(input_ids.long())

            logits2, attention_score2 = model2(token1.cuda(), input_ids,
                                               segment_ids, input_mask,
                                               seq_lengths)

            logits2 = F.softmax(logits2)

            y_pred22, y_pred2 = logits2.max(1)

            label_id2 = []

            for i in range(0, len(input_ids)):
                input_sentence = data0[global_step * 64 + perm_idx[i]]
                input_sentence = re.sub("[!@#$%^&*().?\"~/<>:;'{}]", "",
                                        input_sentence)

                matching_word1 = 3
                matching_word2 = 4
                abusive_word_list_neg11 = list()
                abusive_word_list_neg11 += matching_blacklist2(
                    abusive_11, input_sentence, matching_word1)
                abusive_word_list_neg11 = list((set(abusive_word_list_neg11)))

                abusive_word_list_neg22 = list()
                abusive_word_list_neg22 += matching_blacklist2(
                    abusive_22, input_sentence, matching_word1)
                abusive_word_list_neg22 = list((set(abusive_word_list_neg22)))

                abusive_word_list_neg111 = list()
                abusive_word_list_neg111 += matching_blacklist2(
                    abusive_11, input_sentence, matching_word2)
                abusive_word_list_neg111 = list(
                    (set(abusive_word_list_neg111)))

                abusive_word_list_neg222 = list()
                abusive_word_list_neg222 += matching_blacklist2(
                    abusive_22, input_sentence, matching_word2)
                abusive_word_list_neg222 = list(
                    (set(abusive_word_list_neg222)))

                a = max(len(abusive_word_list_neg11),
                        len(abusive_word_list_neg22))
                aa = max(len(abusive_word_list_neg111),
                         len(abusive_word_list_neg222))

                if ((len(abusive_word_list_neg11) >
                     len(abusive_word_list_neg22)
                     and result5[global_step * 64 + perm_idx[i]][0] == 0
                     and result5[global_step * 64 + perm_idx[i]][1] >= 0.9) or
                    (len(abusive_word_list_neg11) >
                     len(abusive_word_list_neg22) and y_pred2[i].item() == 0
                     and y_pred22[i].item() >= 0.9)):
                    label_0.append(0)
                    result4.append([
                        global_step * 64 + perm_idx[i], 0,
                        data0[global_step * 64 + perm_idx[i]],
                        label_id[perm_idx[i]].item()
                    ])
                elif ((len(abusive_word_list_neg11) <
                       len(abusive_word_list_neg22)
                       and result5[global_step * 64 + perm_idx[i]][0] == 1
                       and result5[global_step * 64 + perm_idx[i]][1] >= 0.9)
                      or
                      (len(abusive_word_list_neg11) <
                       len(abusive_word_list_neg22) and y_pred2[i].item() == 1
                       and y_pred22[i].item() >= 0.9)):
                    label_1.append(1)
                    result4.append([
                        global_step * 64 + perm_idx[i], 1,
                        data0[global_step * 64 + perm_idx[i]],
                        label_id[perm_idx[i]].item()
                    ])

                elif (aa >= 1 and len(abusive_word_list_neg111) >
                      len(abusive_word_list_neg222)):
                    label_0.append(0)
                    result4.append([
                        global_step * 64 + perm_idx[i], 0,
                        data0[global_step * 64 + perm_idx[i]],
                        label_id[perm_idx[i]].item()
                    ])
                elif (aa >= 1 and len(abusive_word_list_neg111) <
                      len(abusive_word_list_neg222)):
                    label_1.append(1)
                    result4.append([
                        global_step * 64 + perm_idx[i], 1,
                        data0[global_step * 64 + perm_idx[i]],
                        label_id[perm_idx[i]].item()
                    ])
                elif (result5[global_step * 64 + perm_idx[i]][1]
                      and y_pred22[i].item() >= 0.9
                      and result5[global_step * 64 + perm_idx[i]][0]
                      == y_pred2[i].item()):
                    if (result5[global_step * 64 + perm_idx[i]][0] == 0):
                        label_0.append(0)
                        result4.append([
                            global_step * 64 + perm_idx[i], 0,
                            data0[global_step * 64 + perm_idx[i]],
                            label_id[perm_idx[i]].item()
                        ])
                    elif (result5[global_step * 64 + perm_idx[i]][0] == 1):
                        label_1.append(1)
                        result4.append([
                            global_step * 64 + perm_idx[i], 1,
                            data0[global_step * 64 + perm_idx[i]],
                            label_id[perm_idx[i]].item()
                        ])

                else:
                    result4.append([
                        global_step * 64 + perm_idx[i], -1,
                        data0[global_step * 64 + perm_idx[i]],
                        label_id[perm_idx[i]].item()
                    ])

            if (global_step == ls - 1):

                result_label.clear()
                result3.clear()

                print("###result3[i] ###:", len(result3))
                a = min(len(label_0), len(label_1))

                la_0 = 0
                la_1 = 0
                la_2 = 0
                la_3 = 0

                random.shuffle(result4)

                for i in range(0, len(result4)):

                    if (result4[i][1] == 0 and la_0 < a):
                        if (temp_check[result4[i][0]][0] == 0):
                            temp_check[result4[i][0]][0] = 1
                            temp_check[result4[i][0]][1] = 0
                            la_0 += 1
                            continue

                    elif (result4[i][1] == 1 and la_1 < a):
                        if (temp_check[result4[i][0]][0] == 0):
                            temp_check[result4[i][0]][0] = 1
                            temp_check[result4[i][0]][1] = 1
                            la_1 += 1
                            continue

                result_label.clear()
                result3.clear()

                fw = open('./temp_data/temp_train_IMDB.tsv',
                          'a',
                          encoding='utf-8',
                          newline='')
                wr = csv.writer(fw, delimiter='\t')

                fww = open('./temp_data/temp_train_na_IMDB.tsv',
                           'w',
                           encoding='utf-8',
                           newline='')
                wrr = csv.writer(fww, delimiter='\t')

                for i in range(0, len(temp_check)):
                    if (temp_check[i][0] == 1):
                        result_label.append(str(temp_check[i][3]))
                        result3.append(str(temp_check[i][1]))
                        wr.writerow(
                            [str(temp_check[i][1]),
                             str(temp_check[i][2])])
                    else:
                        wrr.writerow(
                            [str(temp_check[i][3]),
                             str(temp_check[i][2])])

                fw.close()
                fww.close()
                data0.clear()
                temp_check.clear()
                with open('./temp_data/temp_train_na_IMDB.tsv',
                          "r",
                          encoding='utf-8') as f:
                    lines = csv.reader(f, delimiter='\t')

                    for i in lines:
                        a = ''
                        lines2 = i[1].split(' ')
                        b = 0
                        for j in range(0, len(lines2)):
                            a += lines2[j] + ' '
                            b += 1

                        data0.append(a)
                        temp_check.append([0, -1, a, i[0]])
                print("################;", len(data0))
                f.close()

                dataset_temp = TaskDataset('./temp_data/temp_train_IMDB.tsv',
                                           pipeline)
                data_iter_temp = DataLoader(dataset_temp,
                                            batch_size=64,
                                            shuffle=True)

                dataset_temp_b = TaskDataset('./temp_data/temp_train_IMDB.tsv',
                                             pipeline1)
                data_iter_temp_b = DataLoader(dataset_temp_b,
                                              batch_size=64,
                                              shuffle=True)

                dataset_temp_na = TaskDataset(
                    './temp_data/temp_train_na_IMDB.tsv', pipeline)
                data_iter_temp_na = DataLoader(dataset_temp_na,
                                               batch_size=64,
                                               shuffle=False)

                dataset_temp_na_b = TaskDataset(
                    './temp_data/temp_train_na_IMDB.tsv', pipeline1)
                data_iter_temp_na_b = DataLoader(dataset_temp_na_b,
                                                 batch_size=64,
                                                 shuffle=False)

            if (global_step != ls - 1):
                dataset_temp = TaskDataset(data_dev_file, pipeline)
                data_iter_temp = DataLoader(dataset_temp,
                                            batch_size=cfg.batch_size,
                                            shuffle=True)

                dataset_temp_b = TaskDataset(data_dev_file, pipeline1)
                data_iter_temp_b = DataLoader(dataset_temp_b,
                                              batch_size=64,
                                              shuffle=True)

                dataset_temp_na = TaskDataset(data_dev_file, pipeline)
                data_iter_temp_na = DataLoader(dataset_temp_na,
                                               batch_size=cfg.batch_size,
                                               shuffle=False)

                dataset_temp_na_b = TaskDataset(data_dev_file, pipeline1)
                data_iter_temp_na_b = DataLoader(dataset_temp_na_b,
                                                 batch_size=64,
                                                 shuffle=False)

            return label_id, logits2, result_label, result3, data_iter_temp, data_iter_temp_b, data_iter_temp_na, data_iter_temp_na_b

        def evalute_Attn_LSTM_SSL(model, batch):

            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch

            seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
            input_ids = input_ids[perm_idx]
            label_id = label_id[perm_idx]
            token1 = embedding(input_ids.long())

            logits, attention_score = model2(token1.cuda(), input_ids,
                                             segment_ids, input_mask,
                                             seq_lengths)

            return label_id, logits

        curNum = 1

        print("###########################################")
        print(model_cfg)
        print(model_cfg)
        #kkk+=1

        cfg = train.Config.from_json(train_cfg)
        model_cfg = models.Config.from_json(model_cfg)

        for kkk in range(0, 5):
            print("###########################################")

            tokenizer = tokenization.FullTokenizer(do_lower_case=True)
            tokenizer1 = tokenization.FullTokenizer1(vocab_file=vocab,
                                                     do_lower_case=True)

            TaskDataset = dataset_class(
                task)  # task dataset class according to the task

            pipeline = [
                Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
                AddSpecialTokensWithTruncation(max_len),
                TokenIndexing(tokenizer.convert_tokens_to_ids,
                              TaskDataset.labels, max_len)
            ]

            pipeline1 = [
                Tokenizing(tokenizer1.convert_to_unicode, tokenizer1.tokenize),
                AddSpecialTokensWithTruncation(max_len),
                TokenIndexing(tokenizer1.convert_tokens_to_ids1,
                              TaskDataset.labels, max_len)
            ]

            fd = open("./total_data/imdbtrain.tsv", 'r', encoding='utf-8')
            rdr = csv.reader(fd, delimiter='\t')

            res = []
            num_a = 0
            num_b = 0
            for line in rdr:
                #print(line)
                num_a += 1
                res.append([line[0], line[1]])

            print("curNum#:", curNum)
            #print(res)
            fw = open('./data/IMDB_temp_short.tsv',
                      'w',
                      encoding='utf-8',
                      newline='')
            wr = csv.writer(fw, delimiter='\t')

            for i in range(0, curNum):
                random.shuffle(res)
                #print(res[1][0])
                print("########")
            curNum += 100
            num_data = len(res)
            num_data_dev_temp = int(num_data * 0.01)
            num_data_dev = int(num_data_dev_temp * 0.15)
            num_data_short = int(num_data_dev_temp * 0.85)
            num_data_train = num_data - num_data_dev_temp
            fd.close()

            num = 0

            data_train_file = "./data/IMDB_train" + str(kkk + 1) + ".tsv"
            data_dev_file = "./data/IMDB_dev" + str(kkk + 1) + ".tsv"
            data_short_file = "./data/IMDB_short" + str(kkk + 1) + ".tsv"

            print("num_data_dev#:", num_data_dev)
            print("num_data_short#:", num_data_short)
            print("num_data_train#:", num_data_train)
            fw = open('./data/IMDB_temp_short.tsv',
                      'w',
                      encoding='utf-8',
                      newline='')
            wr = csv.writer(fw, delimiter='\t')

            fe = open(data_train_file, 'w', encoding='utf-8', newline='')
            we = csv.writer(fe, delimiter='\t')

            res2 = []
            num_pos = 0
            num_neg = 0
            for line in res:
                #print(line[0])
                #print(line[1])
                if (line[0] == '0' and num_pos <= (num_data_dev_temp / 2)):
                    num_pos += 1
                    wr.writerow(['0', line[1]])
                elif (line[0] == '1' and num_neg <= (num_data_dev_temp / 2)):
                    num_neg += 1
                    wr.writerow(['1', line[1]])
                else:
                    num += 1
                    we.writerow([line[0], line[1]])

            fw.close()
            fe.close()

            print("num_pos #:", num_pos, " num_neg:", num_neg)

            f = open('./data/IMDB_temp_short.tsv', 'r', encoding='utf-8')
            rdr = csv.reader(f, delimiter='\t')
            num_pos = 0
            num_neg = 0
            num = 0

            fw = open(data_dev_file, 'w', encoding='utf-8', newline='')
            wr = csv.writer(fw, delimiter='\t')

            fe = open(data_short_file, 'w', encoding='utf-8', newline='')
            we = csv.writer(fe, delimiter='\t')

            for line in rdr:
                #print(line[0])
                if (line[0] == '0' and num_pos <= (num_data_dev / 2)):
                    num_pos += 1
                    wr.writerow(['0', line[1]])
                elif (line[0] == '1' and num_neg <= (num_data_dev / 2)):
                    num_neg += 1
                    wr.writerow(['1', line[1]])
                else:
                    num += 1
                    we.writerow([line[0], line[1]])

            print("num_pos #:", num_pos, " num_neg:", num_neg)
            f.close()
            fw.close()
            fe.close()

            dataset = TaskDataset(data_train_file, pipeline)
            data_iter = DataLoader(dataset, batch_size=64, shuffle=False)

            dataset_b = TaskDataset(data_train_file, pipeline1)
            data_iter_b = DataLoader(dataset_b, batch_size=64, shuffle=False)

            dataset2 = TaskDataset(data_test_file, pipeline)
            data_iter2 = DataLoader(dataset2, batch_size=64, shuffle=False)

            dataset2_b = TaskDataset(data_test_file, pipeline1)
            data_iter2_b = DataLoader(dataset2_b, batch_size=64, shuffle=False)

            dataset_dev = TaskDataset(data_dev_file, pipeline)
            data_iter_dev = DataLoader(dataset_dev,
                                       batch_size=64,
                                       shuffle=False)

            dataset_dev_b = TaskDataset(data_dev_file, pipeline1)
            data_iter_dev_b = DataLoader(dataset_dev_b,
                                         batch_size=64,
                                         shuffle=False)

            dataset3 = TaskDataset(data_short_file, pipeline)
            data_iter3 = DataLoader(dataset3, batch_size=64, shuffle=True)

            dataset3_b = TaskDataset(data_short_file, pipeline1)
            data_iter3_b = DataLoader(dataset3_b, batch_size=64, shuffle=True)

            print("###########################################")
            print(model_cfg)
            weights = tokenization.embed_lookup2()

            print("#train_set:", len(data_iter))
            print("#test_set:", len(data_iter2))
            print("#short_set:", len(data_iter3))
            print("#dev_set:", len(data_iter_dev))
            curNum += 1

            embedding = nn.Embedding.from_pretrained(weights).cuda()
            criterion = nn.CrossEntropyLoss()

            model = Classifier(model_cfg, 2)
            model2 = Classifier_Attention_LSTM(2)

            trainer = train.Trainer(
                cfg, dataName, stopNum, model, model2, data_iter, data_iter_b,
                data_iter2, data_iter2_b, data_iter3, data_iter3_b,
                data_iter_dev, data_iter_dev_b,
                optim.optim4GPU(cfg, model,
                                len(data_iter) * 10),
                torch.optim.Adam(model2.parameters(),
                                 lr=0.005), get_device(), kkk + 1)

            label_0 = []
            label_1 = []

            result3 = []
            result4 = []
            result5 = []

            bb_11 = {}
            bb_22 = {}

            abusive_11 = []
            abusive_22 = []

            result_label = []

            fw = open('./temp_data/temp_train_IMDB.tsv',
                      'w',
                      encoding='utf-8',
                      newline='')
            wr = csv.writer(fw, delimiter='\t')

            fr = open(data_short_file, 'r', encoding='utf-8')
            rdrr = csv.reader(fr, delimiter='\t')
            for line in rdrr:
                wr.writerow([line[0], line[1]])

            fw.close()
            fr.close()

            data0 = []
            temp_check = []
            temp_label = []

            with open(data_train_file, "r", encoding='utf-8') as f:
                lines = csv.reader(f, delimiter='\t')

                for i in lines:
                    a = ''
                    lines2 = i[1].split(' ')
                    for j in range(0, len(lines2)):
                        a += lines2[j] + ' '

                    data0.append(a)
                    temp_check.append([0, -1, a, i[0]])
                    temp_label.append([0, 0])
            f.close()

            trainer.train(model_file, pretrain_file, get_loss_CNN,
                          get_loss_Attn_LSTM, evalute_CNN_SSL, pseudo_labeling,
                          evalute_Attn_LSTM, evalute_CNN,
                          evalute_Attn_LSTM_SSL, generating_lexiocn,
                          data_parallel)

    elif mode == 'eval':

        def evalute_Attn_LSTM_SSL(model, batch):

            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch

            seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
            input_ids = input_ids[perm_idx]
            label_id = label_id[perm_idx]
            token1 = embedding(input_ids.long())

            logits, attention_score = model2(token1.cuda(), input_ids,
                                             segment_ids, input_mask,
                                             seq_lengths)

            return label_id, logits

        def evalute_CNN_SSL(model, batch):
            input_ids, segment_ids, input_mask, label_id, seq_lengths = batch
            token1 = embedding(input_ids.long())
            logits, attention_score = model(token1.cuda(), input_ids,
                                            segment_ids, input_mask)

            return label_id, logits

        weights = tokenization.embed_lookup2()

        embedding = nn.Embedding.from_pretrained(weights).cuda()
        criterion = nn.CrossEntropyLoss()

        model = Classifier_CNN(2)
        model2 = Classifier_Attention_LSTM(2)

        trainer = train.Eval(cfg, model, model2, data_iter, save_dir,
                             get_device())

        embedding = nn.Embedding.from_pretrained(weights).cuda()
        results = trainer.eval(evalute_CNN_SSL, evalute_Attn_LSTM_SSL,
                               data_parallel)
def customize_core(config, coregen_filename):
    """
    Reads in the xco from the the core directory and customize it for this
    Architecture

    Args:
        config (dictionary): configuration dictionary
        coregen_filename (string): filename of the coregen file to work on

    Returns:
        (string): filename to the custom core path

    Raises:
        Nothing
    """
    #Open the coregen file
    fp = open(coregen_filename)
    core_in = fp.read()
    fp.close()

    #open a reference to the output file
    c_fn = os.path.split(coregen_filename)[1]
    c_fn = os.path.join(get_coregen_dir(config, absolute = True), c_fn)

    #Open up the template dictionary
    fn = COREGEN_TEMPLATE
    fn = os.path.join(os.path.dirname(__file__), fn)

    template = json.load(open(fn, "r"))

    template["device"] = utils.get_device(config)
    template["devicefamily"] = utils.get_family(config)
    template["package"] = utils.get_package(config)
    template["speedgrade"] = utils.get_speed_grade(config)
    template["workingdirectory"] = get_coregen_temp_dir(config, absolute = True)

    #print "Open: %s" % c_fn
    fp = open(c_fn, "w")

    #Break this into lines
    core_in_lines = core_in.splitlines()
    for line in core_in_lines:
        line = line.strip()

        if re.search('BEGIN.*Project.*Options', line, re.I):
            #print "\tFound the beginning of the project"
            fp.write("%s%s" % (line, os.linesep))
            #Copy all the objects into the new file
            for key in template:
                fp.write("SET %s = %s%s" % (key, template[key], os.linesep))

            continue

        if "CRC" in line:
            #Don't write the CRC
            continue

        #if line.startswith("#"):

        #print "line: %s" % line
        items = line.split(' ')
        if "set" == items[0].lower():
            #print "Line: %s" % line
            #Now we have a line we might need to modify
            if items[1].lower() in template.keys():
                #Skip it, cause we already wrote what we wanted into the new xco
                continue

        fp.write("%s%s" % (line, os.linesep))

    fp.close()
char_to_idx, idx_to_char = char_mapping()

config = {
    "VOCAB_SIZE": len(char_to_idx.keys()),
    "HIDDEN": 100,

    # For songs sampling
    "TEMPERATURE": 1,
    "TAKE_MAX_PROBABLE": False,
    "LIMIT_LEN": 440
}

MODEL_INPUT = "$\nX:3"
model = LSTMSimple(config["VOCAB_SIZE"], config["HIDDEN"],
                   config["VOCAB_SIZE"]).to(get_device())
model.init_state()
model.load_state_dict(
    torch.load("trained_models/model2019-11-26-03-35.pth", map_location='cpu'))

model.eval()

text = """$
X:3
T:Trow Faicstieu
C:Itt
R:polka
Z:id:hn-hornpipe-59
M:C|
K:A
^GG|B2B B2c BGA|B2d c2c d2B|g6 A3|BdB dBA B2d|edf ecA Bdg|gdc AAF |1 dfdd g2ge ||
Ejemplo n.º 33
0
def train(
    start_epoch=0,
    additional_epoch=100,
    lr=0.0001,
    optim="adam",
    leaky_relu=False,
    ndcg_gain_in_train="exp2",
    sigma=1.0,
    double_precision=False,
    standardize=False,
    small_dataset=False,
    debug=False,
    output_dir="/tmp/ranking_output/",
):
    print("start_epoch:{}, additional_epoch:{}, lr:{}".format(
        start_epoch, additional_epoch, lr))
    writer = SummaryWriter(output_dir)

    precision = torch.float64 if double_precision else torch.float32

    # get training and validation data:
    data_fold = 'Fold1'
    train_loader, df_train, valid_loader, df_valid = load_train_vali_data(
        data_fold, small_dataset)
    if standardize:
        df_train, scaler = train_loader.train_scaler_and_transform()
        df_valid = valid_loader.apply_scaler(scaler)

    lambdarank_structure = [136, 64, 16]

    net = LambdaRank(lambdarank_structure,
                     leaky_relu=leaky_relu,
                     double_precision=double_precision,
                     sigma=sigma)
    device = get_device()
    net.to(device)
    net.apply(init_weights)
    print(net)

    ckptfile = get_ckptdir('lambdarank', lambdarank_structure, sigma)

    if optim == "adam":
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    elif optim == "sgd":
        optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9)
    else:
        raise ValueError(
            "Optimization method {} not implemented".format(optim))
    print(optimizer)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.75)

    ideal_dcg = NDCG(2**9, ndcg_gain_in_train)

    for i in range(start_epoch, start_epoch + additional_epoch):
        net.train()
        net.zero_grad()

        count = 0
        batch_size = 200
        grad_batch, y_pred_batch = [], []

        for X, Y in train_loader.generate_batch_per_query():
            if np.sum(Y) == 0:
                # negative session, cannot learn useful signal
                continue
            N = 1.0 / ideal_dcg.maxDCG(Y)

            X_tensor = torch.tensor(X, dtype=precision, device=device)
            y_pred = net(X_tensor)
            y_pred_batch.append(y_pred)
            # compute the rank order of each document
            rank_df = pd.DataFrame({"Y": Y, "doc": np.arange(Y.shape[0])})
            rank_df = rank_df.sort_values("Y").reset_index(drop=True)
            rank_order = rank_df.sort_values("doc").index.values + 1

            with torch.no_grad():
                pos_pairs_score_diff = 1.0 + torch.exp(sigma *
                                                       (y_pred - y_pred.t()))

                Y_tensor = torch.tensor(Y, dtype=precision,
                                        device=device).view(-1, 1)
                rel_diff = Y_tensor - Y_tensor.t()
                pos_pairs = (rel_diff > 0).type(precision)
                neg_pairs = (rel_diff < 0).type(precision)
                Sij = pos_pairs - neg_pairs
                if ndcg_gain_in_train == "exp2":
                    gain_diff = torch.pow(2.0, Y_tensor) - torch.pow(
                        2.0, Y_tensor.t())
                elif ndcg_gain_in_train == "identity":
                    gain_diff = Y_tensor - Y_tensor.t()
                else:
                    raise ValueError(
                        "ndcg_gain method not supported yet {}".format(
                            ndcg_gain_in_train))

                rank_order_tensor = torch.tensor(rank_order,
                                                 dtype=precision,
                                                 device=device).view(-1, 1)
                decay_diff = 1.0 / torch.log2(rank_order_tensor +
                                              1.0) - 1.0 / torch.log2(
                                                  rank_order_tensor.t() + 1.0)

                delta_ndcg = torch.abs(N * gain_diff * decay_diff)
                lambda_update = sigma * (0.5 * (1 - Sij) -
                                         1 / pos_pairs_score_diff) * delta_ndcg
                lambda_update = torch.sum(lambda_update, 1, keepdim=True)

                assert lambda_update.shape == y_pred.shape
                check_grad = torch.sum(lambda_update, (0, 1)).item()
                if check_grad == float('inf') or np.isnan(check_grad):
                    import ipdb
                    ipdb.set_trace()
                grad_batch.append(lambda_update)

            # optimization is to similar to RankNetListWise, but to maximize NDCG.
            # lambda_update scales with gain and decay

            count += 1
            if count % batch_size == 0:
                for grad, y_pred in zip(grad_batch, y_pred_batch):
                    y_pred.backward(grad / batch_size)

                if count % (4 * batch_size) == 0 and debug:
                    net.dump_param()

                optimizer.step()
                net.zero_grad()
                grad_batch, y_pred_batch = [], [
                ]  # grad_batch, y_pred_batch used for gradient_acc

        # optimizer.step()
        print(
            get_time(),
            "training dataset at epoch {}, total queries: {}".format(i, count))
        if debug:
            eval_cross_entropy_loss(net,
                                    device,
                                    train_loader,
                                    i,
                                    writer,
                                    phase="Train")
        # eval_ndcg_at_k(net, device, df_train, train_loader, 100000, [10, 30, 50])

        if i % 5 == 0 and i != start_epoch:
            print(get_time(), "eval for epoch: {}".format(i))
            eval_cross_entropy_loss(net, device, valid_loader, i, writer)
            eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000,
                           [10, 30], i, writer)
        if i % 10 == 0 and i != start_epoch:
            save_to_ckpt(ckptfile, i, net, optimizer, scheduler)

        scheduler.step()

    # save the last ckpt
    save_to_ckpt(ckptfile, start_epoch + additional_epoch, net, optimizer,
                 scheduler)

    # save the final model
    torch.save(net.state_dict(), ckptfile)
    ndcg_result = eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000,
                                 [10, 30], start_epoch + additional_epoch,
                                 writer)
    print(
        get_time(), "finish training " + ", ".join(
            ["NDCG@{}: {:.5f}".format(k, ndcg_result[k])
             for k in ndcg_result]), '\n\n')
def train(args):
    # Init wandb
    run = wandb.init(name=args.save_dir[len('../runs/'):],
                     config=args,
                     project='sign-language-recognition')

    # Create directory for model checkpoints and log
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Save args
    with open(os.path.join(args.save_dir, 'args.json'), 'w') as f:
        json.dump(vars(args), f, sort_keys=True, indent=2)

    # Logger
    logger = create_logger(args.save_dir)

    # Set gpu
    if torch.cuda.is_available():
        i = get_free_gpu()
        device = get_device(gpu=i)
    else:
        device = 'cpu'
    logger.info('using device: {}'.format(device))

    # Prepare early stop
    stopped = False
    best_epoch = 0
    best_loss = torch.Tensor([float('Inf')])

    # Data

    if args.freeze_vgg:
        real_batch_size = 3
    else:
        real_batch_size = 2  # can't fit more into gpu memory

    json_file = os.path.join(args.data_path, 'WLASL_v0.3.json')
    videos_folder = os.path.join(args.data_path, 'videos')
    keypoints_folder = os.path.join(args.data_path, 'keypoints')
    train_transforms = transforms.Compose([videotransforms.RandomCrop(224)])
    val_transforms = train_transforms

    # Debug data
    if args.debug_dataset:
        train_dataset = WLASL(json_file=json_file,
                              videos_folder=videos_folder,
                              keypoints_folder=keypoints_folder,
                              transforms=train_transforms,
                              split='train',
                              subset=args.subset)
        train_dl = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=real_batch_size,
                                               sampler=DebugSampler(
                                                   args.debug_dataset,
                                                   len(train_dataset)))
        val_dl = train_dl
    else:
        train_dataset = WLASL(json_file=json_file,
                              videos_folder=videos_folder,
                              keypoints_folder=keypoints_folder,
                              transforms=train_transforms,
                              split='train',
                              subset=args.subset)
        train_dl = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=real_batch_size,
                                               shuffle=True)

        val_dataset = WLASL(json_file=json_file,
                            videos_folder=videos_folder,
                            keypoints_folder=keypoints_folder,
                            transforms=val_transforms,
                            split='val',
                            subset=args.subset)
        val_dl = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=real_batch_size,
                                             shuffle=True)
    logger.info('data loaded')

    # Model, loss, optimizer
    m = Conv2dRNN(args).to(device)
    optimizer = torch.optim.Adam(m.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss()

    # Resume train
    start_epoch = 0
    if args.resume_train:
        checkpoint = torch.load(os.path.join(args.save_dir,
                                             'checkpoint.pt.tar'),
                                map_location=torch.device('cpu'))
        best_epoch = checkpoint['epoch']
        m.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        m = m.to(device)
        best_loss = checkpoint['best_val_loss']
        start_epoch = best_epoch + 1

        # Change learning rate
        for g in optimizer.param_groups:
            g['lr'] = args.lr

        logger.info(
            'Resuming training from epoch {} with best loss {:.4f}'.format(
                start_epoch, best_loss))

    # learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=args.lr_schedule_factor,
        patience=args.lr_schedule_patience,
        threshold=args.lr_schedule_threshold)

    # Watch model with wandb
    run.watch(m, log='all', log_freq=5)

    # Print args
    logger.info('using args: \n' +
                json.dumps(vars(args), sort_keys=True, indent=2))

    # Train loop
    for t in range(args.n_epochs):
        t += start_epoch
        # Train
        losses = AverageMeter()
        batch_time = AverageMeter()
        m.train()

        start_t = time.time()
        for i, batch in enumerate(train_dl):

            # Run the forward pass multiple times and accumulate gradient (to be able to use large batch size)
            X = batch['X'].to(device)
            label = batch['label'].to(device)

            # [per frame logits, mean of all frames logits]
            logits = m(X)

            # Create label for each logit
            label = torch.cat([l.repeat(logits.shape[1], 1) for l in label],
                              dim=0)

            # Squeeze time sequence and batch into one dimension
            logits = logits.reshape(logits.shape[0] * logits.shape[1],
                                    logits.shape[2])

            loss = criterion(logits, label.squeeze())
            loss.backward()
            losses.update(loss.item())

            if (i % (args.batch_size // real_batch_size)) == 0:
                # Optimize with accumulated gradient
                optimizer.step()
                optimizer.zero_grad()

                batch_time.update(time.time() - start_t)
                start_t = time.time()

        train_loss = losses.avg

        # Validate
        with torch.no_grad():
            top1 = AverageMeter()
            top5 = AverageMeter()
            top10 = AverageMeter()
            losses = AverageMeter()

            m.eval()
            for batch in val_dl:
                X = batch['X'].to(device)
                label = batch['label'].to(device)

                # [per frame logits, mean of all frames logits]
                logits = m(X)

                # Create label for each logit
                label = torch.cat(
                    [l.repeat(logits.shape[1], 1) for l in label], dim=0)

                # Squeeze time sequence and batch into one dimension
                logits = logits.reshape(logits.shape[0] * logits.shape[1],
                                        logits.shape[2])

                losses.update(criterion(logits, label.squeeze()).item())

                # Update metrics
                acc1, acc5, acc10 = topk_accuracy(logits,
                                                  label,
                                                  topk=(1, 5, 10))
                top1.update(acc1.item())
                top5.update(acc5.item())
                top10.update(acc10.item())

            val_loss = losses.avg

            # Save best model
            if val_loss < best_loss:
                best_loss, best_epoch = val_loss, t
                save_best(args, t, m, optimizer, best_loss)

            # Check early stop
            if t >= best_epoch + args.early_stop:
                logger.info('EARLY STOP')
                break

        # Log info
        logger.info(
            'epoch: {} train loss: {:.4f} val loss: {:.4f} top1acc {:.4f} top5acc {:.4f} top10acc {:.4f} lr: {:.2e} time per batch {:.1f} s'
            .format(t + 1, train_loss, val_loss, top1.avg, top5.avg, top10.avg,
                    optimizer.param_groups[0]['lr'], batch_time.avg))

        # Wandb log
        run.log({
            'train_loss': train_loss,
            'val_loss': val_loss,
            'top1_acc': top1.avg,
            'top5_acc': top5.avg,
            'top10_acc': top10.avg,
            'lr': optimizer.param_groups[0]['lr']
        })

        # Scheduler step
        if args.use_lr_scheduler:
            scheduler.step(val_loss)