def create_project_file(config): """ Creates a coregen project with settings for this device Args: config (dictionary): configuration dictionary Returns: (string): filename to the project file Raises: Nothing """ core_dir = get_coregen_dir(config, absolute = True) cp_fn = os.path.join(core_dir, COREGEN_PROJECT_NAME) fp = open(cp_fn, "w") #Open up the template dictionary fn = COREGEN_TEMPLATE fn = os.path.join(os.path.dirname(__file__), fn) template = json.load(open(fn, "r")) template["device"] = utils.get_device(config) template["devicefamily"] = utils.get_family(config) template["package"] = utils.get_package(config) template["speedgrade"] = utils.get_speed_grade(config) template["workingdirectory"] = get_coregen_temp_dir(config, absolute = True) for t in template: fp.write("SET %s = %s%s" % (t, template[t], os.linesep)) fp.close() return cp_fn
def post(self): # Check arguments args = user_parser.parse_args() if not args["email"] or not args["password"]: abort(406) # Check for duplicate emails if User.query.filter(User.email == args["email"]).first() is not None: abort(409) # Add user user = User(args["email"], args["password"]) if args["phone"]: user.phone = args["phone"] db.session.add(user) db.session.commit() # Add current device for the user device = Device(user.id, get_device()) device.active = True db.session.add(device) db.session.commit() return user, 201
parser.add_argument('--with_exploration', action='store_true', help="See data exploration visualization") parser.add_argument('--download', type=str, default=None, nargs='+', choices=dataset_names, help="Download the specified datasets and quits.") # 解析参数 args = parser.parse_args() # 操作参数 CUDA_DEVICE = get_device(args.cuda) # % of training samples SAMPLE_PERCENTAGE = args.training_sample # Data augmentation 数据增强 FLIP_AUGMENTATION = args.flip_augmentation RADIATION_AUGMENTATION = args.radiation_augmentation MIXTURE_AUGMENTATION = args.mixture_augmentation # Dataset name DATASET = args.dataset # Model name MODEL = args.model # Number of runs (for cross-validation) N_RUNS = args.runs # Spatial context size (number of neighbours in each spatial direction) # 空间上下文大小(每个空间方向上的邻居数)
def train_model(args: dict, hparams:dict): # Code for this function adopted from https://mccormickml.com/2019/07/22/BERT-fine-tuning/ file = args.dataset_filepath # pos_file = args.pos_file # neg_file = args.neg_file truncation = args.truncation # n_samples = args.n_samples seed_val = hparams["seed_val"] device = utils.get_device(device_no=args.device_no) saves_dir = "saves/" Path(saves_dir).mkdir(parents=True, exist_ok=True) time = datetime.datetime.now() saves_path = os.path.join(saves_dir, utils.get_filename(time)) Path(saves_path).mkdir(parents=True, exist_ok=True) log_path = os.path.join(saves_path, "training.log") logging.basicConfig(filename=log_path, filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) logger=logging.getLogger() # logger.setLevel() logger.info("File: "+str(file)) logger.info("Parameters: "+str(args)) logger.info("Truncation: "+truncation) # Load the BERT tokenizer. logger.info('Loading BERT tokenizer...') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) max_len = 0 # samples = utils.read_and_sample(file # # , seed_val=seed_val # ) samples = utils.read_pairwise(file, first=0, second=2) random.shuffle(samples) input_ids = [] attention_masks = [] samples_text = [val[0] for val in samples] samples_label = [val[1] for val in samples] print(np.unique(np.array(samples_label))) max_len = 0 # For every sentence... for text in samples_text: # Tokenize the text and add `[CLS]` and `[SEP]` tokens. input_id = tokenizer(text, add_special_tokens=True) # Update the maximum sentence length. max_len = max(max_len, len(input_id['input_ids'])) logger.info('Max text length: ' + str(max_len)) print('Max text length: ' + str(max_len)) for text in samples_text: input_id = tokenizer(text, add_special_tokens=True) # print(len(input_id['input_ids'])) # if len(input_id['input_ids']) > 512: # if truncation == "tail-only": # input_id = [tokenizer.cls_token_id]+input_id[-511:] # elif truncation == "head-and-tail": # input_id = [tokenizer.cls_token_id]+input_id[1:129]+input_id[-382:]+[tokenizer.sep_token_id] # else: # input_id = input_id[:511]+[tokenizer.sep_token_id] # input_ids.append(torch.tensor(input_id).view(1,-1)) # attention_masks.append(torch.ones([1,len(input_id)], dtype=torch.long)) # else: encoded_dict = tokenizer( text, add_special_tokens = True, truncation=True, max_length = 512, padding = 'max_length', return_attention_mask = True, return_tensors = 'pt', ) input_ids.append(encoded_dict['input_ids']) attention_masks.append(encoded_dict['attention_mask']) input_ids = torch.cat(input_ids, dim=0) attention_masks = torch.cat(attention_masks, dim=0) samples_label_tensor = torch.tensor(samples_label) # samples_text_tensor = torch.tensor(samples_text) dataset = TensorDataset(input_ids, attention_masks, samples_label_tensor) # dataset = TensorDataset(samples_text_tensor, samples_label_tensor) train_size = int(0.9 * len(dataset)) val_size = len(dataset) - train_size train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) logger.info('{:>5,} training samples'.format(train_size)) logger.info('{:>5,} validation samples'.format(val_size)) batch_size = hparams["batch_size"] train_dataloader = DataLoader( train_dataset, # The training samples. sampler = RandomSampler(train_dataset), # Select batches randomly batch_size = batch_size, # Trains with this batch size. # collate_fn = collate_fn ) validation_dataloader = DataLoader( val_dataset, # The validation samples. sampler = SequentialSampler(val_dataset), # Pull out batches sequentially. batch_size = batch_size, # Evaluate with this batch size. # collate_fn = collate_fn ) model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. num_labels = 2, # The number of output labels--2 for binary classification. # You can increase this for multi-class tasks. output_attentions = False, # Whether the model returns attentions weights. output_hidden_states = False, # Whether the model returns all hidden-states. ) model = model.to(device=device) # model.cuda(device=device) optimizer = AdamW(model.parameters(), lr = hparams["learning_rate"], # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps = hparams["adam_epsilon"] # args.adam_epsilon - default is 1e-8. ) epochs = 4 total_steps = len(train_dataloader) * epochs scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, # Default value in run_glue.py num_training_steps = total_steps) random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) training_stats = [] for epoch_i in range(0, epochs): logger.info("") logger.info('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) logger.info('Training...') total_train_loss = 0 model.train() for step, batch in enumerate(train_dataloader): print(len(train_dataloader)) if step % 40 == 0 and not step == 0: logger.info(' Batch {:>5,} of {:>5,}. '.format(step, len(train_dataloader))) b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) model.zero_grad() loss, logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) # print(logits) # print(loss) total_train_loss += loss.detach().cpu().numpy() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() avg_train_loss = total_train_loss / len(train_dataloader) logger.info("") logger.info("Average training loss: {0:.2f}".format(avg_train_loss)) logger.info("") logger.info("Running Validation...") model.eval() total_eval_accuracy = 0 total_eval_loss = 0 for batch in validation_dataloader: b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) with torch.no_grad(): (loss, logits) = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) total_eval_loss += loss.detach().cpu().numpy() logits = logits.detach().cpu().numpy() label_ids = b_labels.to('cpu').numpy() total_eval_accuracy += flat_accuracy(logits, label_ids) avg_val_accuracy = total_eval_accuracy / len(validation_dataloader) logger.info("Accuracy: {0:.2f}".format(avg_val_accuracy)) avg_val_loss = total_eval_loss / len(validation_dataloader) logger.info("Validation Loss: {0:.2f}".format(avg_val_loss)) training_stats.append( { 'epoch': epoch_i + 1, 'Training Loss': avg_train_loss, 'Valid. Loss': avg_val_loss, 'Valid. Accur.': avg_val_accuracy, } ) model_save_path = os.path.join(saves_path, "model_"+str(epoch_i+1)+"epochs") torch.save(model, model_save_path) logger.info("") logger.info("Training complete!") handlers = logger.handlers[:] for handler in handlers: handler.close() logger.removeHandler(handler)
# Prepare dataset transformer = transforms.Compose([ transforms.Resize(config["image_size"]), transforms.CenterCrop(config["image_size"]), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = dset.ImageFolder(root=data_path, transform=transformer) # Initialize dataloader dataloader = torch.utils.data.DataLoader(dataset, batch_size=config["batch_size"], shuffle=True, num_workers=2) device = get_device() # Show some images from the trainingset # show_images(dataloader) # Initialize the model generator = Generator(config).to(device) discriminator = Discriminator(config).to(device) # Initialize custom weights to model generator.apply(weight_init) discriminator.apply(weight_init) # Loss Functions and Optimizers # BCELoss for Discriminator
def main(): device = utils.get_device() saves_dir = 'saves' compare_models(f'{saves_dir}/cae-F-ConvAutoencoder_21-06-01--10-43-39', f'{saves_dir}/cae-F-ConvAutoencoder_21-06-03--08-16-48', device)
def main(task='mrpc', train_cfg='config/train_mrpc.json', model_cfg='config/bert_base.json', data_file='../glue/MRPC/train.tsv', model_file=None, pretrain_file='../uncased_L-12_H-768_A-12/bert_model.ckpt', data_parallel=True, vocab='../uncased_L-12_H-768_A-12/vocab.txt', save_dir='../exp/bert/mrpc', max_len=128, mode='train'): cfg = train.Config.from_json(train_cfg) model_cfg = models.Config.from_json(model_cfg) set_seeds(cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True) pipeline = [ Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize), AddSpecialTokensWithTruncation(max_len), TokenIndexing(tokenizer.convert_tokens_to_ids, ('0', '1'), max_len) ] dataset = CsvDataset(pipeline) # print(dataset[0]) # pdb.set_trace() data_iter = DataLoader(dataset, batch_size=1, shuffle=True) model = Classifier(model_cfg, 1) criterion = nn.CrossEntropyLoss() trainer = train.Trainer(cfg, model, data_iter, optim.optim4GPU(cfg, model), save_dir, get_device()) if mode == 'train': def get_loss(model, batch, global_step): # make sure loss is a scalar tensor # pdb.set_trace() input_ids, segment_ids, input_mask, label_id = [ b[0] for b in batch ] # pdb.set_trace() logits = model(input_ids, segment_ids, input_mask) # pdb.set_trace() loss = neg_logloss(logits) # loss = criterion(logits, label_id) return loss trainer.train(get_loss, model_file, pretrain_file, data_parallel) elif mode == 'eval': def evaluate(model, batch): input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) _, label_pred = logits.max(1) result = (label_pred == label_id).float() #.cpu().numpy() accuracy = result.mean() return accuracy, result results = trainer.eval(evaluate, model_file, data_parallel) total_accuracy = torch.cat(results).mean().item() print('Accuracy:', total_accuracy)
def main(train_cfg='config/pretrain.json', model_cfg='config/bert_base.json', data_file='/root/voucher/dataset/tifu/bert/train.tsv', model_file=None, pretrain_file=None, data_parallel=True, word_vocab='/root/voucher/dataset/tifu/bert/word_vocab.txt', pos_vocab='/root/voucher/dataset/tifu/bert/pos_vocab.txt', dep_vocab='/root/voucher/dataset/tifu/bert/dep_vocab.txt', pos_dep_word_vocab='/root/voucher/dataset/tifu/bert/pos_dep_word.pkl', save_dir='../exp/bert/pretrain', log_dir='../exp/bert/pretrain/runs', max_len=384, max_pred=20, mask_prob=0.15, mode=train): if mode == 'train': pass elif mode == 'eval': pass # max_pred = max_len # mask_prob = 1 else: print("please select correct mode") exit(1) cfg = train.Config.from_json(train_cfg) model_cfg = models.Config.from_json(model_cfg) set_seeds(cfg.seed) custom_tokenizer = CustomVocabTokenizer(word_vocab_file=word_vocab, pos_vocab_file=pos_vocab, dep_vocab_file=dep_vocab, pos_dep_word_vocab_file=pos_dep_word_vocab) custom_tokenize = lambda word, pos, dep: custom_tokenizer.tokenize(custom_tokenizer.convert_to_unicode(word), custom_tokenizer.convert_to_unicode(pos), custom_tokenizer.convert_to_unicode(dep)) pipeline = [Preprocess4Pretrain(max_pred, mask_prob, list(custom_tokenizer.word_tokenizer.vocab.keys()), list(custom_tokenizer.pos_tokenizer.vocab.keys()), list(custom_tokenizer.dep_tokenizer.vocab.keys()), custom_tokenizer.convert_tokens_to_ids, max_len)] data_iter = TifuDataLoader(data_file, cfg.batch_size, custom_tokenize, max_len, pipeline=pipeline) model = BertModel4Pretrain(model_cfg) optimizer = optim.optim4GPU(cfg, model) trainer = train.Trainer(cfg, model, data_iter, optimizer, save_dir, get_device()) if mode == 'eval': def evaluate(model, batch): input_word_ids,\ input_segment_ids,\ input_mask,\ target_word_ids,\ target_mask,\ input_len, \ target_len = batch logits_word = model(input_word_ids, input_segment_ids, input_mask, target_mask) input_len = input_len.tolist() target_len = target_len.tolist() for i in range(len(input_len)): logits = torch.squeeze(logits_word.narrow(0, i, 1), dim=0) logits_input = logits.narrow(0, 0, input_len[i]) logits_target = logits.narrow(0, input_len[i], target_len[i]) _, input_ids = logits_input.max(-1) _, target_ids = logits_target.max(-1) input_tokens = custom_tokenizer.word_tokenizer.convert_ids_to_tokens(input_ids.tolist()) target_tokens = custom_tokenizer.word_tokenizer.convert_ids_to_tokens(target_ids.tolist()) results = [] input_norm = logits_input / logits_input.norm(dim=1)[:, None] target_norm = logits_target / logits_target.norm(dim=1)[:, None] #target_len x input_len res = torch.mm(target_norm, input_norm.transpose(0, 1)) #target_len x 1 _, sim_idxs = res.max(-1) for j, sim_idx in enumerate(sim_idxs.tolist()): results.append([target_tokens[j], input_tokens[sim_idx]]) print(results) accuracies = [0] results = [0] return accuracies, results results = trainer.eval(evaluate, None, pretrain_file, data_parallel, eval_kind_names=["Word"]) print(results)
def train(flags): data_root = flags.data window_size = flags.window_size pred_size = flags.pred_size batch_size = flags.batch_size out_dir = flags.out num_epochs = flags.epochs val_every = flags.val_every classify_thresh = flags.classify_thresh # optim args lr = flags.lr betas = (flags.beta1, flags.beta2) eps = flags.eps weight_decay = flags.decay use_confidence = flags.use_confidence joint_set = flags.joint_set if not os.path.exists(data_root): print('Could not find training data at ' + data_root) return if not os.path.exists(out_dir): os.mkdir(out_dir) weights_out_path = os.path.join(out_dir, 'op_only_weights.pth') best_weights_out_path = os.path.join(out_dir, 'op_only_weights_BEST.pth') # load training and validation data train_dataset = OpenPoseDataset(data_root, split='train', window_size=window_size, contact_size=pred_size, use_confidence=use_confidence, joint_set=joint_set) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2) val_dataset = OpenPoseDataset(data_root, split='val', window_size=window_size, contact_size=pred_size, use_confidence=use_confidence, joint_set=joint_set) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2) num_joints = len( openpose_dataset.OP_JOINT_SUBSETS[train_dataset.joint_set]) # create the model and optimizer device_str = 'cpu' if flags.cpu else None device = get_device(device_str) op_model = create_model(window_size, num_joints, pred_size, device, use_confidence=use_confidence) op_optim = optim.Adam(op_model.parameters(), lr=lr, betas=betas, \ eps=eps, weight_decay=weight_decay) model_parameters = filter(lambda p: p.requires_grad, op_model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Num model params: ' + str(params)) # viz stats train_steps = [] train_losses = [] train_accs = [] val_steps = [] val_losses = [] val_accs = [] # train loss_sum = 0.0 loss_count = 0 best_val_f1 = -float('inf') confusion_count = np.zeros((4), dtype=int) for epoch_idx in range(num_epochs): for batch_idx, batch_data in enumerate(train_loader): # prepere the data for this batch input_data = batch_data['joint2d'].to(device) label_data = batch_data['contacts'].to(device) # zero the gradients op_optim.zero_grad() # forward + backward + optimize output_data = op_model(input_data) loss = op_model.loss(output_data, label_data) n_tp, n_fp, n_fn, n_tn = op_model.accuracy(output_data, label_data, thresh=classify_thresh) loss = torch.mean(loss) loss.backward() op_optim.step() loss_sum += loss.to('cpu').item() loss_count += 1 confusion_count += np.array([n_tp, n_fp, n_fn, n_tn], dtype=int) if epoch_idx % 5 == 0: print('=================== TRAIN (' + str(epoch_idx + 1) + ' epochs) ================================================') mean_loss = loss_sum / loss_count print('Mean loss: %0.3f' % (mean_loss)) loss_sum = 0.0 loss_count = 0 metrics = calculate_metrics(confusion_count) cur_acc, _, _, _, _ = metrics print_metrics(metrics) confusion_count = np.zeros((4), dtype=int) print( '======================================================================================' ) train_steps.append(epoch_idx * len(train_loader) + batch_idx) train_losses.append(mean_loss) train_accs.append(cur_acc) # save plot plot_train_stats((train_steps, train_losses, train_accs), \ (val_steps, val_losses, val_accs), \ out_dir, accuracy_metrics=metrics) if epoch_idx % val_every == 0: # run on the validation data print('==================== VALIDATION (' + str(epoch_idx + 1) + ' epochs) ===========================================') val_loss, val_metrics = val_epoch(val_loader, op_model, device, classify_thresh, pred_size) print('Mean Loss: %0.3f' % (val_loss)) for tgt_frame_idx in range(pred_size): print('----- Pred Frame ' + str(tgt_frame_idx) + ' ------') print_metrics(val_metrics[tgt_frame_idx]) val_acc, _, _, _, _ = val_metrics[ pred_size // 2] # only want accuracy for middle target print( '======================================================================================' ) op_model.train() val_steps.append(epoch_idx * len(train_loader) + batch_idx) val_losses.append(val_loss) val_accs.append(val_acc) # save confusion matrix for tgt_frame_idx in range(pred_size): accuracy, precision, recall, f1, cm = val_metrics[ tgt_frame_idx] plot_confusion_mat( cm, os.path.join( out_dir, 'val_confusion_matrix_%d.png' % (tgt_frame_idx))) # also save model weights print('Saving checkpoint...') torch.save(op_model.state_dict(), weights_out_path) # check if this is the best so far and save (in terms of f1 score) if f1 > best_val_f1: best_val_f1 = f1 print('Saving best model so far...') torch.save(op_model.state_dict(), best_weights_out_path) # save final model print('Saving final checkpoint...') torch.save(op_model.state_dict(), os.path.join(out_dir, 'op_only_weights_FINAL.pth')) # save plot metrics = calculate_metrics(confusion_count) plot_train_stats((train_steps, train_losses, train_accs), \ (val_steps, val_losses, val_accs), \ out_dir, accuracy_metrics=metrics) print('FINISHED Training!')
def load_sagen(name): device = utils.get_device() gen = utils.load_model(name, sagenerator.SelfAttentionGenerator, save_attention=True) return gen, device
def validate(loader: DataLoader, model: nn.Module, criterion: Callable, num_classes: int, num_super_classes: int, maf: torch.FloatTensor, args: ArgumentParser) -> torch.FloatTensor: batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('MLM Loss', ':.4e') accuracies = AverageMeter('Acc', ':.4e') accuracy_deltas = AverageMeter('Acc Delta', ':.4e') progress = ProgressMeter(len(loader), [batch_time, losses, accuracies, accuracy_deltas], prefix="Test: ") model.eval() device = get_device(args) with torch.no_grad(): end = time.time() for i, (genotypes, labels, super_labels) in enumerate(loader): ### Mask for Masked Language Modeling mask_num = int((i % 9 + 1) / 10 * genotypes.shape[1]) mask_scores = torch.rand(genotypes.shape[1]) mask_indices = mask_scores.argsort(descending=True)[:mask_num] masked_genotypes = genotypes[:, mask_indices].reshape(-1) targets = (masked_genotypes == 1).float().clone().detach() genotypes[:, mask_indices] = 0 maf_vector = maf[labels[0]] genotypes = genotypes.to(device) masked_genotypes = masked_genotypes.to(device) targets = targets.to(device) labels = labels.to(device) super_labels = super_labels.to(device) maf_vector = maf_vector.to(device) logits = model(genotypes, labels, super_labels) logits = logits[:, mask_indices].reshape(-1) # add weight to nonzero maf snps weights = torch.ones_like(logits) weight_coefficients = (maf_vector[mask_indices] > 0).repeat( genotypes.shape[0]).float() * (args.minor_coefficient - 1) + 1 weights *= weight_coefficients loss = criterion(logits, targets, weight=weights, reduction='mean') accuracy = (masked_genotypes * logits.sign()).mean() / 2 + .5 baseline_accuracy = ( masked_genotypes * (maf_vector[mask_indices].repeat(genotypes.shape[0]) - .5000001).sign()).mean() / 2 + .5 accuracy_delta = accuracy - baseline_accuracy losses.update(loss.item(), genotypes.shape[0]) accuracies.update(accuracy.item(), genotypes.shape[0]) accuracy_deltas.update(accuracy_delta.item(), genotypes.shape[0]) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) progress.display(i) return losses.avg
def main() -> None: global best_loss args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') start_epoch = 0 vcf_reader = VCFReader(args.train_data, args.classification_map, args.chromosome, args.class_hierarchy) vcf_writer = vcf_reader.get_vcf_writer() train_dataset, validation_dataset = vcf_reader.get_datasets( args.validation_split) train_sampler = BatchByLabelRandomSampler(args.batch_size, train_dataset.labels) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler) if args.validation_split != 0: validation_sampler = BatchByLabelRandomSampler( args.batch_size, validation_dataset.labels) validation_loader = DataLoader(validation_dataset, batch_sampler=validation_sampler) kwargs = { 'total_size': vcf_reader.positions.shape[0], 'window_size': args.window_size, 'num_layers': args.layers, 'num_classes': len(vcf_reader.label_encoder.classes_), 'num_super_classes': len(vcf_reader.super_label_encoder.classes_) } model = WindowedMLP(**kwargs) model.to(get_device(args)) optimizer = AdamW(model.parameters(), lr=args.learning_rate) ####### if args.resume_path is not None: if os.path.isfile(args.resume_path): print("=> loading checkpoint '{}'".format(args.resume_path)) checkpoint = torch.load(args.resume_path) if kwargs != checkpoint['model_kwargs']: raise ValueError( 'The checkpoint\'s kwargs don\'t match the ones used to initialize the model' ) if vcf_reader.snps.shape[0] != checkpoint['vcf_writer'].snps.shape[ 0]: raise ValueError( 'The data on which the checkpoint was trained had a different number of snp positions' ) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume_path, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) ############# if args.validate: validate(validation_loader, model, nn.functional.binary_cross_entropy_with_logits, len(vcf_reader.label_encoder.classes_), len(vcf_reader.super_label_encoder.classes_), vcf_reader.maf, args) return for epoch in range(start_epoch, args.epochs + start_epoch): loss = train(train_loader, model, nn.functional.binary_cross_entropy_with_logits, optimizer, len(vcf_reader.label_encoder.classes_), len(vcf_reader.super_label_encoder.classes_), vcf_reader.maf, epoch, args) if epoch % args.save_freq == 0 or epoch == args.epochs + start_epoch - 1: if args.validation_split != 0: validation_loss = validate( validation_loader, model, nn.functional.binary_cross_entropy_with_logits, len(vcf_reader.label_encoder.classes_), len(vcf_reader.super_label_encoder.classes_), vcf_reader.maf, args) is_best = validation_loss < best_loss best_loss = min(validation_loss, best_loss) else: is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'model_kwargs': kwargs, 'best_loss': best_loss, 'optimizer': optimizer.state_dict(), 'vcf_writer': vcf_writer, 'label_encoder': vcf_reader.label_encoder, 'super_label_encoder': vcf_reader.super_label_encoder, 'maf': vcf_reader.maf }, is_best, args.chromosome, args.model_name, args.model_dir)
def test(): # get device device = get_device(0) # load net num_classes = 80 anchor_size = config.ANCHOR_SIZE_COCO if args.dataset == 'COCO': cfg = config.coco_ab testset = COCODataset(data_dir=args.dataset_root, json_file='instances_val2017.json', name='val2017', img_size=cfg['min_dim'][0], debug=args.debug) mean = config.MEANS elif args.dataset == 'VOC': cfg = config.voc_ab testset = VOCDetection(VOC_ROOT, [('2007', 'test')], None, VOCAnnotationTransform()) mean = config.MEANS if args.version == 'yolo_v2': from models.yolo_v2 import myYOLOv2 net = myYOLOv2(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.ANCHOR_SIZE_COCO) print('Let us test yolo-v2 on the MSCOCO dataset ......') elif args.version == 'yolo_v3': from models.yolo_v3 import myYOLOv3 net = myYOLOv3(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.MULTI_ANCHOR_SIZE_COCO) elif args.version == 'tiny_yolo_v2': from models.tiny_yolo_v2 import YOLOv2tiny net = YOLOv2tiny(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.ANCHOR_SIZE_COCO) elif args.version == 'tiny_yolo_v3': from models.tiny_yolo_v3 import YOLOv3tiny net = YOLOv3tiny(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.MULTI_ANCHOR_SIZE_COCO) net.load_state_dict(torch.load(args.trained_model, map_location='cuda')) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)), thresh=args.visual_threshold)
def main(with_gui=None, check_stop=None): device_idx, model_name, generator_cfg, _, train_cfg = load_config() device = get_device() if with_gui is None: with_gui = train_cfg.get('with_gui') model = None loss_f = BCELoss(reduction='none') pause = False images, count, loss_sum, epoch, acc_sum, acc_sum_p = 0, 0, 0, 1, 0, 0 generator, generator_cfg = None, None optimizer, optimizer_cfg = None, None best_loss = None while check_stop is None or not check_stop(): # Check config: if images == 0: cfg = load_config() if model_name != cfg.model or model is None: model_name = cfg.model model, best_loss, epoch = load_model(model_name, train=True, device=device) log(model_name, 'Loaded model %s' % model_name) optimizer_cfg = None if optimizer_cfg != cfg.optimizer: optimizer_cfg = cfg.optimizer optimizer = create_optimizer(optimizer_cfg, model) log(model_name, 'Created optimizer %s' % str(optimizer)) if generator_cfg != cfg.generator: generator_cfg = cfg.generator generator = create_generator(generator_cfg, device=device) log(model_name, 'Created generator') train_cfg = cfg.train # Run: x, target = next(generator) mask, _ = target.max(dim=1, keepdim=True) optimizer.zero_grad() y = model(x) # Save for debug: if train_cfg.get('save', False): show_images(x, 'input', save_dir='debug') show_images(y, 'output', mask=True, save_dir='debug') show_images(target, 'target', mask=mask, save_dir='debug') # GUI: if with_gui: if not pause: show_images(x, 'input') show_images(y, 'output', mask=True, step=2) show_images(target, 'target', mask=mask, step=2) key = cv2.waitKey(1) if key == ord('s'): torch.save(model.state_dict(), 'models/unet2.pt') elif key == ord('p'): pause = not pause elif key == ord('q'): break # Optimize: acc_sum += check_accuracy(y, target) loss = (loss_f(y, target) * mask).mean() loss_item = loss.item() loss_sum += loss_item count += 1 images += len(x) loss.backward() optimizer.step() # Complete epoch: if images >= train_cfg['epoch_images']: acc_total, names = acc_sum, channel_names msg = 'Epoch %d: train loss %f, acc %s' % ( epoch, loss_sum / count, acc_to_str(acc_total, names=names) ) log(model_name, msg) count = 0 images = 0 loss_sum = 0 epoch += 1 acc_sum[:] = 0 save_model(model_name, model, best_loss, epoch) log(model_name, 'Stopped\n')
def get_default_run_options(model, dataset, runs, sampling_mode): """Setup general experiment options, irrespective of the model and data. Parameters: model (str): name of model to use. Available: SVM (linear), SVM_grid (grid search on linear, poly and RBF), baseline (fully connected NN), hu (1D CNN), hamida (3D CNN + 1D classifier), lee (3D FCN), chen (3D CNN), li (3D CNN), he (3D CNN), luo (3D CNN), sharma (2D CNN), mou (1D RNN) boulch (1D semi-supervised CNN), liu (3D semi-supervised CNN) dataset (str): hyperspectral image name. runs (int): number of runs. sampling_mode ('all' 'fixed'): how to select pixels for train/test. Returns: options (dict): set of options. """ options = { 'model': model, 'runs': runs, 'sampling_mode': sampling_mode, 'dataset': dataset, 'device': get_device(0), # (defaults to -1, which learns on CPU) 'dataset_path': PATH_DATA, 'sample_path': PATH_SAMPLES, 'rdir': 'work/', 'preprocessing': { 'type': 'division' } } if model == 'hu': options['batch_size'], options['epoch'] = 50, 400 elif model == 'li' or model == 'lee': options['batch_size'], options['epoch'] = 100, 200 else: options['batch_size'], options['epoch'] = 100, 100 # DeepHyperX default options: options['svm_grid_params'] = [{ 'kernel': ['rbf'], 'gamma': [1e-1, 1e-2, 1e-3], 'C': [1, 10, 100, 1000] }, { 'kernel': ['linear'], 'C': [0.1, 1, 10, 100, 1000] }, { 'kernel': ['poly'], 'degree': [3], 'gamma': [1e-1, 1e-2, 1e-3] }] options.update({ 'class_balancing': False, 'flip_augmentation': False, 'mixture_augmentation': False, 'multi_class': 1, 'path': './predictions/', 'radiation_augmentation': False, 'test_stride': 1, 'training_sample': 10, 'with_exploration': False }) # DeepHyperX handy, but unused options options.update({ 'checkpoint': None, # option to load state dict instead of train from scratch 'train_gt': None, # train GT filename, not used 'test_gt': None, # test GT filename, not used }) return options
def test_get_device(self): config = utils.read_config(self.env) config["device"] = "xc6slx9-tqg144-3" device = utils.get_device(config) self.assertEqual("xc6slx9", device)
from captum.attr import ( GradientShap, DeepLift, DeepLiftShap, IntegratedGradients, LayerConductance, NeuronConductance, NoiseTunnel, ) import tqdm from utils import get_device, isnotebook, ensure_arr, is_numeric import metrics DEVICE = get_device() # Combination of # https://www.nature.com/articles/s41598-020-59827-1#MOESM1 # - this is generally a superset of what Seurat uses # https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html PBMC_MARKER_GENES = { "CD4+ T cells": ["IL7R", "CD3D", "CD4", "CTLA4"], "IL7RCD4+ T Cells": ["CD8A", "IL7R", "CD3D"], # Nature paper only "CD8+ T cells": ["CD8A", "GZMB", "CD3D", "CD8B"], # Common both "B cells": ["CD19", "MS4A1", "CD79A", "CD79B", "BLNK"], # Common both "Natural Killer cells": [ "FCGR3A", "NCAM1", "KLRB1", "KLRC1",
params["reg_ratio"] = np.random.rand() * 0.0015 params["batch_size"] = np.random.randint(26, 256) params["bidirectional"] = bool(np.random.randint(0, 2)) cfg = AcousticLLDConfig(**params) model = RNN(cfg) elif args.model_type == "acoustic-spectrogram": test_features, test_labels, val_features, val_labels, train_features, train_labels = load_spectrogram_dataset( ) params["fc_size"] = np.random.randint(10, 200) params["dropout"] = 0.3 + np.random.rand() * 0.6 cfg = AcousticSpectrogramConfig(**params) model = CNN(cfg) else: raise Exception( "model_type parameter has to be one of [linguistic|acoustic-lld|acoustic-spectrogram]" ) print( "Subsets sizes: test_features:{}, test_labels:{}, val_features:{}, val_labels:{}, train_features:{}, train_labels:{}" .format(test_features.shape[0], test_labels.shape[0], val_features.shape[0], val_labels.shape[0], train_features.shape[0], train_labels.shape[0])) """Converting model to specified hardware and format""" model.float() model = model.to(get_device()) run_training(model, cfg, test_features, test_labels, train_features, train_labels, val_features, val_labels)
from utils import get_args, get_device, get_class_names from modeler import load_model, predict ## Get the arguments args = get_args('predict') ## Get the device device = get_device(args.gpu) ## Get the model saved as checkpoint trained_validated_model = load_model(model_dir=args.model_dir) probs, classes = predict(device=device, image_path=args.image_path, model=trained_validated_model, topx=args.topk) ## Get names of the classes class_names = get_class_names(classes=classes, cat_names=args.cat_names) ## Print prediction(s) print(('AI Model\'s top {} prediction(s) are:').format(args.topk)) print('Rank'.ljust(5) + 'Predicted Name'.ljust(25) + 'Probability') for i, (prob, class_name) in enumerate(zip(probs, class_names)): print('{}. {} {}%'.format( str(i + 1).rjust(3), class_name.ljust(25), ("%.2f" % round(prob * 100, 2)).rjust(6)))
param_group['lr'] = lr return lr def warmup_strategy(optimizer, epoch_size, iteration): lr = 1e-6 + (args.lr - 1e-6) * iteration / (epoch_size * (args.wp_epoch)) for param_group in optimizer.param_groups: param_group['lr'] = lr return lr if __name__ == '__main__': global hr, cfg hr = False device = get_device(args.gpu_ind) if args.high_resolution == 1: hr = True cfg = voc_ab if args.version == 'fcos_lite': from models.fcos_lite import FCOS_LITE fcos_lite = FCOS_LITE(device, input_size=cfg['min_dim'], num_classes=args.num_classes, trainable=True, hr=hr) print('Let us train FCOS-LITE on the VOC0712 dataset ......')
def main(train_cfg='config/pretrain.json', model_cfg='config/bert_base.json', data_file='/root/voucher/dataset/tifu/bert/train.tsv', model_file=None, pretrain_file=None, data_parallel=True, word_vocab='/root/voucher/dataset/tifu/bert/word_vocab.txt', pos_vocab='/root/voucher/dataset/tifu/bert/pos_vocab.txt', dep_vocab='/root/voucher/dataset/tifu/bert/dep_vocab.txt', pos_dep_word_vocab='/root/voucher/dataset/tifu/bert/pos_dep_word.pkl', save_dir='../exp/bert/pretrain', log_dir='../exp/bert/pretrain/runs', max_len=384, max_pred=20, mask_prob=0.15, mode=train): if mode == 'train': pass elif mode == 'eval': pass # max_pred = max_len # mask_prob = 1 else: print("please select correct mode") exit(1) cfg = train.Config.from_json(train_cfg) model_cfg = models.Config.from_json(model_cfg) set_seeds(cfg.seed) custom_tokenizer = CustomVocabTokenizer(word_vocab_file=word_vocab, pos_vocab_file=pos_vocab, dep_vocab_file=dep_vocab, pos_dep_word_vocab_file=pos_dep_word_vocab) custom_tokenize = lambda word, pos, dep: custom_tokenizer.tokenize(custom_tokenizer.convert_to_unicode(word), custom_tokenizer.convert_to_unicode(pos), custom_tokenizer.convert_to_unicode(dep)) pipeline = [Preprocess4Pretrain(max_pred, mask_prob, list(custom_tokenizer.word_tokenizer.vocab.keys()), list(custom_tokenizer.pos_tokenizer.vocab.keys()), list(custom_tokenizer.dep_tokenizer.vocab.keys()), custom_tokenizer.convert_tokens_to_ids, max_len)] data_iter = TifuDataLoader(data_file, cfg.batch_size, custom_tokenize, max_len, pipeline=pipeline) model = BertModel4Pretrain(model_cfg) criterion3 = nn.CrossEntropyLoss(reduction='none') optimizer = optim.optim4GPU(cfg, model) trainer = train.Trainer(cfg, model, data_iter, optimizer, save_dir, get_device()) writer = SummaryWriter(log_dir=log_dir) # for tensorboardX if mode == 'train': def get_loss(model, batch, global_step): # make sure loss is tensor input_word_ids,\ input_segment_ids,\ input_mask,\ target_word_ids,\ target_mask = batch logits_word = model(input_word_ids, input_segment_ids, input_mask, target_mask) loss_word = criterion3(logits_word.transpose(1, 2), target_word_ids) # for masked word loss_word = (loss_word*target_mask.float()).mean() print(loss_word.item()) writer.add_scalars('data/scalar_group', {'loss_word': loss_word.item(), 'loss_total': loss_word.item(), 'lr': optimizer.get_lr()[0], }, global_step) return loss_word trainer.train(get_loss, model_file, pretrain_file, data_parallel) elif mode == 'eval': def evaluate(model, batch): input_word_ids,\ input_segment_ids,\ input_mask,\ target_word_ids,\ target_mask = batch logits_word = model(input_word_ids, input_segment_ids, input_mask, target_mask) _, label_word = logits_word.max(-1) result_word = (label_word == target_word_ids).float() word_accuracy = result_word.mean() accuracies = [word_accuracy] results = [result_word] return accuracies, results results = trainer.eval(evaluate, model_file, data_parallel, eval_kind_names=["Word"]) print(results)
def __init__(self, alpha: float = 1.0, use_cuda: bool = True): self.alpha = alpha self.device = get_device(use_cuda)
def train_rank_net(start_epoch=0, additional_epoch=100, lr=0.0001, optim="adam", train_algo=SUM_SESSION, double_precision=False, standardize=False, small_dataset=False, debug=False): """ :param start_epoch: int :param additional_epoch: int :param lr: float :param optim: str :param train_algo: str :param double_precision: boolean :param standardize: boolean :param small_dataset: boolean :param debug: boolean :return: """ print("start_epoch:{}, additional_epoch:{}, lr:{}".format( start_epoch, additional_epoch, lr)) precision = torch.float64 if double_precision else torch.float32 # get training and validation data: data_fold = 'Fold1' train_loader, df_train, valid_loader, df_valid = load_train_vali_data( data_fold, small_dataset) if standardize: df_train, scaler = train_loader.train_scaler_and_transform() df_valid = valid_loader.apply_scaler(scaler) net, net_inference, ckptfile = get_train_inference_net( train_algo, train_loader.num_features, start_epoch, double_precision) device = get_device() net.to(device) net_inference.to(device) # initialize to make training faster net.apply(init_weights) if optim == "adam": optimizer = torch.optim.Adam(net.parameters(), lr=lr) elif optim == "sgd": optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9) else: raise ValueError( "Optimization method {} not implemented".format(optim)) print(optimizer) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75) loss_func = None if train_algo == BASELINE: loss_func = torch.nn.BCELoss() loss_func.to(device) losses = [] for i in range(start_epoch, start_epoch + additional_epoch): scheduler.step() net.zero_grad() net.train() if train_algo == BASELINE: epoch_loss = baseline_pairwise_training_loop(i, net, loss_func, optimizer, train_loader, precision=precision, device=device, debug=debug) elif train_algo in [SUM_SESSION, ACC_GRADIENT]: epoch_loss = factorized_training_loop(i, net, None, optimizer, train_loader, training_algo=train_algo, precision=precision, device=device, debug=debug) losses.append(epoch_loss) print('=' * 20 + '\n', get_time(), 'Epoch{}, loss : {}'.format(i, losses[-1]), '\n' + '=' * 20) # save to checkpoint every 5 step, and run eval if i % 5 == 0 and i != start_epoch: save_to_ckpt(ckptfile, i, net, optimizer, scheduler) net_inference.load_state_dict(net.state_dict()) eval_model(net_inference, device, df_valid, valid_loader) # save the last ckpt save_to_ckpt(ckptfile, start_epoch + additional_epoch, net, optimizer, scheduler) # final evaluation net_inference.load_state_dict(net.state_dict()) ndcg_result = eval_model(net_inference, device, df_valid, valid_loader) # save the final model torch.save(net.state_dict(), ckptfile) print( get_time(), "finish training " + ", ".join( ["NDCG@{}: {:.5f}".format(k, ndcg_result[k]) for k in ndcg_result]), '\n\n')
def __init__( self, policy, env, transform_func, gamma, learning_rate, buffer_size, exploration_type, exploration_frac, exploration_ep, exploration_initial_eps, exploration_final_eps, double_q, policy_kwargs, seed, device ): super(DeepRLModel, self).__init__( policy=policy, env=env, policy_kwargs=policy_kwargs, seed=seed ) self.gamma = gamma self.learning_rate = learning_rate self.buffer_size = buffer_size self.exploration_type = exploration_type self.exploration_frac = exploration_frac self.exploration_ep = exploration_ep self.exploration_initial_eps = exploration_initial_eps self.exploration_final_eps = exploration_final_eps self.double_q = double_q # self.policy_kwargs = {} if policy_kwargs is None else policy_kwargs if device is None: self.device = get_device(device) else: self.device = device self.policy_kwargs = get_default_args(self.policy) self.policy_kwargs['ob_space'] = self.observation_space self.policy_kwargs['ac_space'] = self.action_space self.policy_kwargs['device'] = self.device self.policy_kwargs['learning_rate'] = self.learning_rate if policy_kwargs is not None: for key, val in policy_kwargs.items(): self.policy_kwargs[key] = val # self.policy_kwargs['transform_func'] = transform_func # if policy_kwargs is None: # self.policy = policy(self.observation_space, self.action_space, # intent=True, device=self.device) # else: self.policy = policy(**self.policy_kwargs) if self.buffer_size is None: self.replay_buffer = None else: self.replay_buffer = ReplayBuffer(self.buffer_size, device=self.device, torch=True)
def main(task_name='qqp', base_train_cfg='config/QDElectra_pretrain.json', train_cfg='config/train_mrpc.json', model_cfg='config/QDElectra_base.json', train_data_file='GLUE/glue_data/QQP/train.tsv', eval_data_file='GLUE/glue_data/QQP/eval.tsv', model_file=None, data_parallel=True, vocab='../uncased_L-12_H-768_A-12/vocab.txt', log_dir='../exp/electra/pretrain/runs', save_dir='../exp/bert/mrpc', distill=True, quantize=True, gradually_distill=False, imitate_tinybert=False, pred_distill=True): check_dirs_exist([log_dir, save_dir]) train_cfg_dict = json.load(open(base_train_cfg, "r")) train_cfg_dict.update(json.load(open(train_cfg, "r"))) train_cfg = ElectraConfig().from_dict(train_cfg_dict) model_cfg = ElectraConfig().from_json_file(model_cfg) output_mode, train_cfg.n_epochs, max_len = get_task_params(task_name) set_seeds(train_cfg.seed) tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True) TaskDataset = dataset_class( task_name) # task dataset class according to the task name model_cfg.num_labels = len(TaskDataset.labels) pipeline = [ Tokenizing(task_name, tokenizer.convert_to_unicode, tokenizer.tokenize), AddSpecialTokensWithTruncation(max_len), TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, output_mode, max_len) ] train_data_set = TaskDataset(train_data_file, pipeline) eval_data_set = TaskDataset(eval_data_file, pipeline) train_data_iter = DataLoader(train_data_set, batch_size=train_cfg.batch_size, shuffle=True) eval_data_iter = DataLoader(eval_data_set, batch_size=train_cfg.batch_size, shuffle=False) generator = ElectraForSequenceClassification.from_pretrained( 'google/electra-small-generator') t_discriminator = ElectraForSequenceClassification.from_pretrained( 'google/electra-base-discriminator') s_discriminator = QuantizedElectraForSequenceClassification if quantize else ElectraForSequenceClassification s_discriminator = s_discriminator.from_pretrained( 'google/electra-small-discriminator', config=model_cfg) model = DistillElectraForSequenceClassification(generator, t_discriminator, s_discriminator, model_cfg) optimizer = optim.optim4GPU(train_cfg, model) writer = SummaryWriter(log_dir=log_dir) # for tensorboardX base_trainer_args = (train_cfg, model_cfg, model, train_data_iter, eval_data_iter, optimizer, save_dir, get_device()) trainer = QuantizedDistillElectraTrainer(task_name, output_mode, distill, gradually_distill, imitate_tinybert, pred_distill, len(TaskDataset.labels), writer, *base_trainer_args) trainer.train(model_file, None, data_parallel) trainer.eval(model_file, data_parallel)
def train_network(model, x, y, x_test=None, y_test=None, epochs=50, batch_size=64, loss_f=BCELoss, optimizer=Adam, lr=0.001, y_postprocessing=utils.y_to_one_hot, weight_decay: float = 0.0000001, verbose: int = 1): print("Started training") best_epoch = (0, 0, 0) if y_postprocessing is not None: y = y_postprocessing(y) y_test = y_postprocessing(y_test) y, y_test = y.astype(np.float), y_test.astype(np.float) loss_f = loss_f() optimizer = optimizer(model.parameters(), lr, weight_decay=weight_decay) model.train() for epoch in range(epochs): for i in range((x.shape[0] // batch_size) + 1): x_for_network = x[i * batch_size:(i + 1) * batch_size] y_for_network = y[i * batch_size:(i + 1) * batch_size] if x_for_network.size == 0: break if verbose > 0: print("batch {} of {}".format(i + 1, x.shape[0] // batch_size + 1)) optimizer.zero_grad() pred = model(x_for_network) loss = loss_f( pred, from_numpy(y_for_network).float().to(utils.get_device())) loss.backward() optimizer.step() train_performance = evaluation.evaluate_model( model, x, y, pred_postprocessing=utils.softmax_to_one_hot, out_dim=2, batch_size=batch_size) print("train performance is {}".format(train_performance)) if x_test is not None: performance = evaluation.evaluate_model( model, x_test, y_test, pred_postprocessing=utils.softmax_to_one_hot, out_dim=2, batch_size=batch_size) print("test performance is {}".format(performance)) if performance > best_epoch[-1]: best_epoch = (epoch, train_performance, performance) print("Finished epoch {}".format(epoch)) if x_test is not None: print( "Finished training. Best epoch is {} with training performance {} " "and test performance {}".format(*best_epoch))
def main(cfg: DictConfig) -> None: "The entry point for testing" assert cfg.model_path is not None, "Need to specify model_path for testing." log.info("\n" + OmegaConf.to_yaml(cfg)) # restore the hyperparameters used for training model_path = hydra.utils.to_absolute_path(cfg.model_path) log.info("Loading the model from %s" % model_path) checkpoint = load_model(model_path) restore_hyperparams(checkpoint["cfg"], cfg) # create dataloaders for validation and testing vocabs = checkpoint["vocabs"] loader_val, _ = create_dataloader( hydra.utils.to_absolute_path(cfg.path_val), "val", cfg.encoder, vocabs, cfg.eval_batch_size, cfg.num_workers, ) loader_test, _ = create_dataloader( hydra.utils.to_absolute_path(cfg.path_test), "test", cfg.encoder, vocabs, cfg.eval_batch_size, cfg.num_workers, ) # restore the trained model checkpoint model = Parser(vocabs, cfg) model.load_state_dict(checkpoint["model_state"]) device, _ = get_device() model.to(device) log.info("\n" + str(model)) log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()])) # validation log.info("Validating..") f1_score = validate(loader_val, model, cfg) log.info( "Validation F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f" % ( f1_score.fscore, f1_score.complete_match, f1_score.precision, f1_score.recall, )) # testing log.info("Testing..") if cfg.beam_size > 1: log.info("Performing beam search..") f1_score = beam_search(loader_test, model, cfg) else: log.info("Running without beam search..") f1_score = validate(loader_test, model, cfg) log.info( "Testing F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f" % ( f1_score.fscore, f1_score.complete_match, f1_score.precision, f1_score.recall, ))
sampler=train_sampler) valid_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=4, collate_fn=collate_fn, sampler=valid_sampler) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, num_workers=4, shuffle=False) config = { "epochs": 100, "device": get_device(), "sampling": True, "temperature": 1.0, "max_sentence_length": 18 } embedding_dim = 256 hidden_dim = 512 vocab_size = len(vocab) model = Baseline(embedding_dim, hidden_dim, vocab_size, vanilla=False) criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=5e-4) model.cuda() train(model, optimizer, criterion, train_loader, valid_loader, vocab, config)
import torch,data import utils device = utils.get_device() def evaluate(seq2seq,test_generator,vocab): #cnt = 0 acc_cnt = 0 total_examples = 0 for (num1s,ops,num2s,l1s,l2s,mask1s,mask2s),(answers,ans_lens,ans_masks) in test_generator.get_batches(): batch_size = len(num1s) context_vector = seq2seq.encode(num1s,ops,num2s,l1s,l2s,mask1s,mask2s) decoder_input = torch.tensor([data.BOS_ID]*batch_size).view(-1,1).to(device) current_input = seq2seq.embeddings(decoder_input).view(-1,1,seq2seq.embedding_dim) current_hidden = context_vector.unsqueeze(1).transpose(0,1) max_decode_size = answers.size(1) preidct_each_t = [] for t in range(max_decode_size): ot,ht,probs,predicts = seq2seq.predict_step(current_input ,current_hidden) preidct_each_t.append(predicts) current_input = seq2seq.embeddings(predicts) current_hidden = ht preidct_each_t = torch.cat(preidct_each_t,1) import re predict_numbers = seq2Number(preidct_each_t, vocab,True) answer_numbers = seq2Number(answers, vocab,False) answer_numbers = [re.findall('[\-0-9]+',n)[0] for n in answer_numbers]
def main(task='mrpc', train_cfg='./model/config/train_mrpc.json', model_cfg='./model/config/bert_base.json', data_train_file='total_data/imdbtrain.tsv', data_test_file='total_data/IMDB_test.tsv', model_file=None, pretrain_file='./model/uncased_L-12_H-768_A-12/bert_model.ckpt', data_parallel=False, vocab='./model/uncased_L-12_H-768_A-12/vocab.txt', dataName='IMDB', stopNum=250, max_len=300, mode='train'): if mode == 'train': def get_loss_CNN(model, batch, global_step): # make sure loss is a scalar tensor input_ids, segment_ids, input_mask, label_id, seq_lengths = batch logits = model(input_ids, segment_ids, input_mask) loss = criterion(logits, label_id) return loss def evalute_CNN(model, batch): input_ids, segment_ids, input_mask, label_id, seq_lengths = batch logits = model(input_ids, segment_ids, input_mask) return label_id, logits def get_loss_Attn_LSTM( model, batch, global_step): # make sure loss is a scalar tensor input_ids, segment_ids, input_mask, label_id, seq_lengths = batch seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) input_ids = input_ids[perm_idx] label_id = label_id[perm_idx] token1 = embedding(input_ids.long()) logits, attention_score = model(token1.cuda(), input_ids, segment_ids, input_mask, seq_lengths) loss1 = criterion(logits, label_id) return loss1 def evalute_Attn_LSTM(model, batch, global_step, ls): input_ids, segment_ids, input_mask, label_id, seq_lengths = batch seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) input_ids = input_ids[perm_idx] label_id = label_id[perm_idx] token1 = embedding(input_ids.long()) logits, attention_score = model(token1.cuda(), input_ids, segment_ids, input_mask, seq_lengths) logits = F.softmax(logits) y_pred11, y_pred1 = logits.max(1) return label_id, logits def generating_lexiocn(model2, batch, global_step, ls, e): if (global_step == 0): result3.clear() result_label.clear() bb_11.clear() bb_22.clear() input_ids, segment_ids, input_mask, label_id, seq_lengths = batch seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) input_ids = input_ids[perm_idx] label_id = label_id[perm_idx] token1 = embedding(input_ids.long()) #logits = model(input_ids, segment_ids, input_mask) logits2, attention_score2 = model2(token1.cuda(), input_ids, segment_ids, input_mask, seq_lengths) #logits=F.softmax(logits) logits = F.softmax(logits2) # y_pred11, y_pred1 = logits.max(1) y_pred22, y_pred2 = logits2.max(1) atten, attn_s1 = attention_score2.max(1) atte2, attn_s2 = torch.topk(attention_score2, 4) for i in range(0, len(input_ids)): split_tokens = [] att_index = [] for token in tokenizer.tokenize(data0[global_step * 64 + perm_idx[i]]): split_tokens.append(token) if (len(split_tokens) <= attn_s1[i].item()): attn_index3 = attention_score2[i][:len(split_tokens) - 1] attn_num, attn_index2 = attn_index3.max(0) attn_index = attn_index2.item() else: for j in range(0, 4): att_index.append(attn_s2[i][j].item()) tok = [] if (atten[i].item() <= 0): token_ab = split_tokens[0] else: for j in range(0, len(att_index)): if (att_index[j] >= len(split_tokens)): continue tok.append(split_tokens[att_index[j]]) token_temp = data0[global_step * 64 + perm_idx[i]].split(' ') token2 = [] for kk in range(0, len(tok)): token_ab = tok[kk] token_ab = token_ab.replace(".", "") token_ab = token_ab.replace(",", "") token_ab = token_ab.replace("'", "") token_ab = token_ab.replace("!", "") token_ab = token_ab.replace("?", "") token_ab = token_ab.replace("'", "") token_ab = token_ab.replace('"', "") if (token_ab == '' or token_ab == ' ' or token_ab == ',' or token_ab == '.' or token_ab == 'from' or token_ab == 'are' or token_ab == 'is' or token_ab == 'and' or token_ab == 'with' or token_ab == 'may' or token_ab == 'would' or token_ab == 'could' or token_ab == 'have' or token_ab == 'has' or token_ab == 'had' or token_ab == 'was' or token_ab == 'were' or token_ab == 'this' or token_ab == 'who' or token_ab == 'that' or token_ab == 'www' or token_ab == 'http' or token_ab == 'com' or token_ab == 'those' or token_ab == 'your' or token_ab == 'not' or token_ab == 'seem' or token_ab == 'too' or token_ab == 'lol' or token_ab == 'but' or token_ab == 'these' or token_ab == 'their' or token_ab == 'can' or token_ab == 'there' or token_ab == 'gave' or token_ab == 'his' or token_ab == 'etc' or token_ab == 'thats' or token_ab == 'though' or token_ab == 'off' or token_ab == 'she' or token_ab == 'them' or token_ab == 'huh' or token_ab == 'why' or token_ab == 'wont' or token_ab == 'any' or token_ab == 'some' or token_ab == 'its' or token_ab == 'yeah' or token_ab == 'yes' or token_ab == 'you' or token_ab == 'should' or token_ab == 'dont' or token_ab == 'anybody' or token_ab == 'than' or token_ab == 'where' or token_ab == 'for' or token_ab == 'more' or token_ab == 'will' or token_ab == 'him' or token_ab == 'its' or token_ab == 'your' or token_ab == 'wii' or token_ab == 'having' or token_ab == 'just' or token_ab == 'help' or token_ab == 'helps' or token_ab == 'all' or token_ab == 'they' or token_ab == 'take' or token_ab == 'the' or token_ab == 'what' or token_ab == 'need' or token_ab == 'make' or token_ab == 'about' or token_ab == 'then' or token_ab == 'when' or token_ab == 'does' or token_ab == 'ask' or token_ab == 'much' or token_ab == 'man' or token_ab == 'know' or token_ab == 'how' or token_ab == 'look' or token_ab == 'like' or token_ab == 'one' or token_ab == 'think' or token_ab == 'tell' or token_ab == 'find' or token_ab == 'cant' or token_ab == 'now' or token_ab == 'try' or token_ab == 'give' or token_ab == 'answer' or token_ab == 'her' or token_ab == 'out' or token_ab == 'get' or token_ab == 'because' or token_ab == 'myself' or token_ab == 'wants' or token_ab == 'movie' or token_ab == 'film' or token_ab == 'films'): continue if (len(token_ab) < 2): continue for gge, input_word in enumerate(token_temp): if (token_ab.lower() in input_word.lower()): input_word = input_word.replace(".", "") input_word = input_word.replace(",", "") input_word = input_word.replace("'", "") input_word = input_word.replace("!", "") input_word = input_word.replace("?", "") input_word = input_word.replace("'", "") input_word = input_word.replace('"', "") token2.append(input_word.lower()) break token2 = list(set(token2)) if (len(token2) < 3): continue #print(token2) sen = "" for l in range(0, len(token2) - 1): sen += token2[l] + ' ' sen += token2[len(token2) - 1] if (y_pred2[i] == 0): try: bb_11[sen] += y_pred22[i] except KeyError: bb_11[sen] = y_pred22[i] if (y_pred2[i] == 1): try: bb_22[sen] += y_pred22[i] except KeyError: bb_22[sen] = y_pred22[i] if (global_step == ls - 1): abusive_11.clear() abusive_22.clear() bb_11_up = sorted(bb_11.items(), key=lambda x: x[1], reverse=True) bb_22_up = sorted(bb_22.items(), key=lambda x: x[1], reverse=True) lexicon_size = 50 bb_11_up = bb_11_up[:lexicon_size] bb_22_up = bb_22_up[:lexicon_size] for i in bb_11_up: flag = 0 for j in bb_22_up: if ((i[0].lower() in j[0].lower()) or (j[0].lower() in i[0].lower())): if (i[1] < j[1]): flag = 1 break if (flag == 0): abusive_11.append(i[0]) for i in bb_22_up: flag = 0 for j in bb_11_up: if ((i[0].lower() in j[0].lower()) or (j[0].lower() in i[0].lower())): if (i[1] < j[1]): flag = 1 break if (flag == 0): abusive_22.append(i[0]) ddf = open("./IMDB_Lexicon/imdbLexicon_1.txt", 'w', encoding='UTF8') for i in range(0, len(abusive_11)): ddf.write(abusive_11[i] + '\n') ddf.close() ddf = open("./IMDB_Lexicon/imdbLexicon_2.txt", 'w', encoding='UTF8') for i in range(0, len(abusive_22)): ddf.write(abusive_22[i] + '\n') ddf.close() return label_id, logits def evalute_CNN_SSL(model, batch, global_step): if (global_step == 0): result5.clear() input_ids, segment_ids, input_mask, label_id, seq_lengths = batch logits = model(input_ids, segment_ids, input_mask) logits = F.softmax(logits) y_pred11, y_pred1 = logits.max(1) for i in range(0, len(input_ids)): result5.append([y_pred1[i].item(), y_pred11[i].item()]) return label_id, logits def pseudo_labeling(model2, batch, global_step, ls, e): if (global_step == 0): result3.clear() result4.clear() label_0.clear() label_1.clear() result_label.clear() abusive_11.clear() abusive_22.clear() abusive_dic_file = open("./IMDB_Lexicon/imdbLexicon_1.txt", 'r', encoding='UTF8') for line in abusive_dic_file.read().split('\n'): if (len(line) <= 3): continue abusive_11.append(line) abusive_dic_file.close() abusive_dic_file = open("./IMDB_Lexicon/imdbLexicon_2.txt", 'r', encoding='UTF8') for line in abusive_dic_file.read().split('\n'): if (len(line) <= 3): continue abusive_22.append(line) abusive_dic_file.close() input_ids, segment_ids, input_mask, label_id, seq_lengths = batch seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) input_ids = input_ids[perm_idx] label_id = label_id[perm_idx] token1 = embedding(input_ids.long()) logits2, attention_score2 = model2(token1.cuda(), input_ids, segment_ids, input_mask, seq_lengths) logits2 = F.softmax(logits2) y_pred22, y_pred2 = logits2.max(1) label_id2 = [] for i in range(0, len(input_ids)): input_sentence = data0[global_step * 64 + perm_idx[i]] input_sentence = re.sub("[!@#$%^&*().?\"~/<>:;'{}]", "", input_sentence) matching_word1 = 3 matching_word2 = 4 abusive_word_list_neg11 = list() abusive_word_list_neg11 += matching_blacklist2( abusive_11, input_sentence, matching_word1) abusive_word_list_neg11 = list((set(abusive_word_list_neg11))) abusive_word_list_neg22 = list() abusive_word_list_neg22 += matching_blacklist2( abusive_22, input_sentence, matching_word1) abusive_word_list_neg22 = list((set(abusive_word_list_neg22))) abusive_word_list_neg111 = list() abusive_word_list_neg111 += matching_blacklist2( abusive_11, input_sentence, matching_word2) abusive_word_list_neg111 = list( (set(abusive_word_list_neg111))) abusive_word_list_neg222 = list() abusive_word_list_neg222 += matching_blacklist2( abusive_22, input_sentence, matching_word2) abusive_word_list_neg222 = list( (set(abusive_word_list_neg222))) a = max(len(abusive_word_list_neg11), len(abusive_word_list_neg22)) aa = max(len(abusive_word_list_neg111), len(abusive_word_list_neg222)) if ((len(abusive_word_list_neg11) > len(abusive_word_list_neg22) and result5[global_step * 64 + perm_idx[i]][0] == 0 and result5[global_step * 64 + perm_idx[i]][1] >= 0.9) or (len(abusive_word_list_neg11) > len(abusive_word_list_neg22) and y_pred2[i].item() == 0 and y_pred22[i].item() >= 0.9)): label_0.append(0) result4.append([ global_step * 64 + perm_idx[i], 0, data0[global_step * 64 + perm_idx[i]], label_id[perm_idx[i]].item() ]) elif ((len(abusive_word_list_neg11) < len(abusive_word_list_neg22) and result5[global_step * 64 + perm_idx[i]][0] == 1 and result5[global_step * 64 + perm_idx[i]][1] >= 0.9) or (len(abusive_word_list_neg11) < len(abusive_word_list_neg22) and y_pred2[i].item() == 1 and y_pred22[i].item() >= 0.9)): label_1.append(1) result4.append([ global_step * 64 + perm_idx[i], 1, data0[global_step * 64 + perm_idx[i]], label_id[perm_idx[i]].item() ]) elif (aa >= 1 and len(abusive_word_list_neg111) > len(abusive_word_list_neg222)): label_0.append(0) result4.append([ global_step * 64 + perm_idx[i], 0, data0[global_step * 64 + perm_idx[i]], label_id[perm_idx[i]].item() ]) elif (aa >= 1 and len(abusive_word_list_neg111) < len(abusive_word_list_neg222)): label_1.append(1) result4.append([ global_step * 64 + perm_idx[i], 1, data0[global_step * 64 + perm_idx[i]], label_id[perm_idx[i]].item() ]) elif (result5[global_step * 64 + perm_idx[i]][1] and y_pred22[i].item() >= 0.9 and result5[global_step * 64 + perm_idx[i]][0] == y_pred2[i].item()): if (result5[global_step * 64 + perm_idx[i]][0] == 0): label_0.append(0) result4.append([ global_step * 64 + perm_idx[i], 0, data0[global_step * 64 + perm_idx[i]], label_id[perm_idx[i]].item() ]) elif (result5[global_step * 64 + perm_idx[i]][0] == 1): label_1.append(1) result4.append([ global_step * 64 + perm_idx[i], 1, data0[global_step * 64 + perm_idx[i]], label_id[perm_idx[i]].item() ]) else: result4.append([ global_step * 64 + perm_idx[i], -1, data0[global_step * 64 + perm_idx[i]], label_id[perm_idx[i]].item() ]) if (global_step == ls - 1): result_label.clear() result3.clear() print("###result3[i] ###:", len(result3)) a = min(len(label_0), len(label_1)) la_0 = 0 la_1 = 0 la_2 = 0 la_3 = 0 random.shuffle(result4) for i in range(0, len(result4)): if (result4[i][1] == 0 and la_0 < a): if (temp_check[result4[i][0]][0] == 0): temp_check[result4[i][0]][0] = 1 temp_check[result4[i][0]][1] = 0 la_0 += 1 continue elif (result4[i][1] == 1 and la_1 < a): if (temp_check[result4[i][0]][0] == 0): temp_check[result4[i][0]][0] = 1 temp_check[result4[i][0]][1] = 1 la_1 += 1 continue result_label.clear() result3.clear() fw = open('./temp_data/temp_train_IMDB.tsv', 'a', encoding='utf-8', newline='') wr = csv.writer(fw, delimiter='\t') fww = open('./temp_data/temp_train_na_IMDB.tsv', 'w', encoding='utf-8', newline='') wrr = csv.writer(fww, delimiter='\t') for i in range(0, len(temp_check)): if (temp_check[i][0] == 1): result_label.append(str(temp_check[i][3])) result3.append(str(temp_check[i][1])) wr.writerow( [str(temp_check[i][1]), str(temp_check[i][2])]) else: wrr.writerow( [str(temp_check[i][3]), str(temp_check[i][2])]) fw.close() fww.close() data0.clear() temp_check.clear() with open('./temp_data/temp_train_na_IMDB.tsv', "r", encoding='utf-8') as f: lines = csv.reader(f, delimiter='\t') for i in lines: a = '' lines2 = i[1].split(' ') b = 0 for j in range(0, len(lines2)): a += lines2[j] + ' ' b += 1 data0.append(a) temp_check.append([0, -1, a, i[0]]) print("################;", len(data0)) f.close() dataset_temp = TaskDataset('./temp_data/temp_train_IMDB.tsv', pipeline) data_iter_temp = DataLoader(dataset_temp, batch_size=64, shuffle=True) dataset_temp_b = TaskDataset('./temp_data/temp_train_IMDB.tsv', pipeline1) data_iter_temp_b = DataLoader(dataset_temp_b, batch_size=64, shuffle=True) dataset_temp_na = TaskDataset( './temp_data/temp_train_na_IMDB.tsv', pipeline) data_iter_temp_na = DataLoader(dataset_temp_na, batch_size=64, shuffle=False) dataset_temp_na_b = TaskDataset( './temp_data/temp_train_na_IMDB.tsv', pipeline1) data_iter_temp_na_b = DataLoader(dataset_temp_na_b, batch_size=64, shuffle=False) if (global_step != ls - 1): dataset_temp = TaskDataset(data_dev_file, pipeline) data_iter_temp = DataLoader(dataset_temp, batch_size=cfg.batch_size, shuffle=True) dataset_temp_b = TaskDataset(data_dev_file, pipeline1) data_iter_temp_b = DataLoader(dataset_temp_b, batch_size=64, shuffle=True) dataset_temp_na = TaskDataset(data_dev_file, pipeline) data_iter_temp_na = DataLoader(dataset_temp_na, batch_size=cfg.batch_size, shuffle=False) dataset_temp_na_b = TaskDataset(data_dev_file, pipeline1) data_iter_temp_na_b = DataLoader(dataset_temp_na_b, batch_size=64, shuffle=False) return label_id, logits2, result_label, result3, data_iter_temp, data_iter_temp_b, data_iter_temp_na, data_iter_temp_na_b def evalute_Attn_LSTM_SSL(model, batch): input_ids, segment_ids, input_mask, label_id, seq_lengths = batch seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) input_ids = input_ids[perm_idx] label_id = label_id[perm_idx] token1 = embedding(input_ids.long()) logits, attention_score = model2(token1.cuda(), input_ids, segment_ids, input_mask, seq_lengths) return label_id, logits curNum = 1 print("###########################################") print(model_cfg) print(model_cfg) #kkk+=1 cfg = train.Config.from_json(train_cfg) model_cfg = models.Config.from_json(model_cfg) for kkk in range(0, 5): print("###########################################") tokenizer = tokenization.FullTokenizer(do_lower_case=True) tokenizer1 = tokenization.FullTokenizer1(vocab_file=vocab, do_lower_case=True) TaskDataset = dataset_class( task) # task dataset class according to the task pipeline = [ Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize), AddSpecialTokensWithTruncation(max_len), TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, max_len) ] pipeline1 = [ Tokenizing(tokenizer1.convert_to_unicode, tokenizer1.tokenize), AddSpecialTokensWithTruncation(max_len), TokenIndexing(tokenizer1.convert_tokens_to_ids1, TaskDataset.labels, max_len) ] fd = open("./total_data/imdbtrain.tsv", 'r', encoding='utf-8') rdr = csv.reader(fd, delimiter='\t') res = [] num_a = 0 num_b = 0 for line in rdr: #print(line) num_a += 1 res.append([line[0], line[1]]) print("curNum#:", curNum) #print(res) fw = open('./data/IMDB_temp_short.tsv', 'w', encoding='utf-8', newline='') wr = csv.writer(fw, delimiter='\t') for i in range(0, curNum): random.shuffle(res) #print(res[1][0]) print("########") curNum += 100 num_data = len(res) num_data_dev_temp = int(num_data * 0.01) num_data_dev = int(num_data_dev_temp * 0.15) num_data_short = int(num_data_dev_temp * 0.85) num_data_train = num_data - num_data_dev_temp fd.close() num = 0 data_train_file = "./data/IMDB_train" + str(kkk + 1) + ".tsv" data_dev_file = "./data/IMDB_dev" + str(kkk + 1) + ".tsv" data_short_file = "./data/IMDB_short" + str(kkk + 1) + ".tsv" print("num_data_dev#:", num_data_dev) print("num_data_short#:", num_data_short) print("num_data_train#:", num_data_train) fw = open('./data/IMDB_temp_short.tsv', 'w', encoding='utf-8', newline='') wr = csv.writer(fw, delimiter='\t') fe = open(data_train_file, 'w', encoding='utf-8', newline='') we = csv.writer(fe, delimiter='\t') res2 = [] num_pos = 0 num_neg = 0 for line in res: #print(line[0]) #print(line[1]) if (line[0] == '0' and num_pos <= (num_data_dev_temp / 2)): num_pos += 1 wr.writerow(['0', line[1]]) elif (line[0] == '1' and num_neg <= (num_data_dev_temp / 2)): num_neg += 1 wr.writerow(['1', line[1]]) else: num += 1 we.writerow([line[0], line[1]]) fw.close() fe.close() print("num_pos #:", num_pos, " num_neg:", num_neg) f = open('./data/IMDB_temp_short.tsv', 'r', encoding='utf-8') rdr = csv.reader(f, delimiter='\t') num_pos = 0 num_neg = 0 num = 0 fw = open(data_dev_file, 'w', encoding='utf-8', newline='') wr = csv.writer(fw, delimiter='\t') fe = open(data_short_file, 'w', encoding='utf-8', newline='') we = csv.writer(fe, delimiter='\t') for line in rdr: #print(line[0]) if (line[0] == '0' and num_pos <= (num_data_dev / 2)): num_pos += 1 wr.writerow(['0', line[1]]) elif (line[0] == '1' and num_neg <= (num_data_dev / 2)): num_neg += 1 wr.writerow(['1', line[1]]) else: num += 1 we.writerow([line[0], line[1]]) print("num_pos #:", num_pos, " num_neg:", num_neg) f.close() fw.close() fe.close() dataset = TaskDataset(data_train_file, pipeline) data_iter = DataLoader(dataset, batch_size=64, shuffle=False) dataset_b = TaskDataset(data_train_file, pipeline1) data_iter_b = DataLoader(dataset_b, batch_size=64, shuffle=False) dataset2 = TaskDataset(data_test_file, pipeline) data_iter2 = DataLoader(dataset2, batch_size=64, shuffle=False) dataset2_b = TaskDataset(data_test_file, pipeline1) data_iter2_b = DataLoader(dataset2_b, batch_size=64, shuffle=False) dataset_dev = TaskDataset(data_dev_file, pipeline) data_iter_dev = DataLoader(dataset_dev, batch_size=64, shuffle=False) dataset_dev_b = TaskDataset(data_dev_file, pipeline1) data_iter_dev_b = DataLoader(dataset_dev_b, batch_size=64, shuffle=False) dataset3 = TaskDataset(data_short_file, pipeline) data_iter3 = DataLoader(dataset3, batch_size=64, shuffle=True) dataset3_b = TaskDataset(data_short_file, pipeline1) data_iter3_b = DataLoader(dataset3_b, batch_size=64, shuffle=True) print("###########################################") print(model_cfg) weights = tokenization.embed_lookup2() print("#train_set:", len(data_iter)) print("#test_set:", len(data_iter2)) print("#short_set:", len(data_iter3)) print("#dev_set:", len(data_iter_dev)) curNum += 1 embedding = nn.Embedding.from_pretrained(weights).cuda() criterion = nn.CrossEntropyLoss() model = Classifier(model_cfg, 2) model2 = Classifier_Attention_LSTM(2) trainer = train.Trainer( cfg, dataName, stopNum, model, model2, data_iter, data_iter_b, data_iter2, data_iter2_b, data_iter3, data_iter3_b, data_iter_dev, data_iter_dev_b, optim.optim4GPU(cfg, model, len(data_iter) * 10), torch.optim.Adam(model2.parameters(), lr=0.005), get_device(), kkk + 1) label_0 = [] label_1 = [] result3 = [] result4 = [] result5 = [] bb_11 = {} bb_22 = {} abusive_11 = [] abusive_22 = [] result_label = [] fw = open('./temp_data/temp_train_IMDB.tsv', 'w', encoding='utf-8', newline='') wr = csv.writer(fw, delimiter='\t') fr = open(data_short_file, 'r', encoding='utf-8') rdrr = csv.reader(fr, delimiter='\t') for line in rdrr: wr.writerow([line[0], line[1]]) fw.close() fr.close() data0 = [] temp_check = [] temp_label = [] with open(data_train_file, "r", encoding='utf-8') as f: lines = csv.reader(f, delimiter='\t') for i in lines: a = '' lines2 = i[1].split(' ') for j in range(0, len(lines2)): a += lines2[j] + ' ' data0.append(a) temp_check.append([0, -1, a, i[0]]) temp_label.append([0, 0]) f.close() trainer.train(model_file, pretrain_file, get_loss_CNN, get_loss_Attn_LSTM, evalute_CNN_SSL, pseudo_labeling, evalute_Attn_LSTM, evalute_CNN, evalute_Attn_LSTM_SSL, generating_lexiocn, data_parallel) elif mode == 'eval': def evalute_Attn_LSTM_SSL(model, batch): input_ids, segment_ids, input_mask, label_id, seq_lengths = batch seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) input_ids = input_ids[perm_idx] label_id = label_id[perm_idx] token1 = embedding(input_ids.long()) logits, attention_score = model2(token1.cuda(), input_ids, segment_ids, input_mask, seq_lengths) return label_id, logits def evalute_CNN_SSL(model, batch): input_ids, segment_ids, input_mask, label_id, seq_lengths = batch token1 = embedding(input_ids.long()) logits, attention_score = model(token1.cuda(), input_ids, segment_ids, input_mask) return label_id, logits weights = tokenization.embed_lookup2() embedding = nn.Embedding.from_pretrained(weights).cuda() criterion = nn.CrossEntropyLoss() model = Classifier_CNN(2) model2 = Classifier_Attention_LSTM(2) trainer = train.Eval(cfg, model, model2, data_iter, save_dir, get_device()) embedding = nn.Embedding.from_pretrained(weights).cuda() results = trainer.eval(evalute_CNN_SSL, evalute_Attn_LSTM_SSL, data_parallel)
def customize_core(config, coregen_filename): """ Reads in the xco from the the core directory and customize it for this Architecture Args: config (dictionary): configuration dictionary coregen_filename (string): filename of the coregen file to work on Returns: (string): filename to the custom core path Raises: Nothing """ #Open the coregen file fp = open(coregen_filename) core_in = fp.read() fp.close() #open a reference to the output file c_fn = os.path.split(coregen_filename)[1] c_fn = os.path.join(get_coregen_dir(config, absolute = True), c_fn) #Open up the template dictionary fn = COREGEN_TEMPLATE fn = os.path.join(os.path.dirname(__file__), fn) template = json.load(open(fn, "r")) template["device"] = utils.get_device(config) template["devicefamily"] = utils.get_family(config) template["package"] = utils.get_package(config) template["speedgrade"] = utils.get_speed_grade(config) template["workingdirectory"] = get_coregen_temp_dir(config, absolute = True) #print "Open: %s" % c_fn fp = open(c_fn, "w") #Break this into lines core_in_lines = core_in.splitlines() for line in core_in_lines: line = line.strip() if re.search('BEGIN.*Project.*Options', line, re.I): #print "\tFound the beginning of the project" fp.write("%s%s" % (line, os.linesep)) #Copy all the objects into the new file for key in template: fp.write("SET %s = %s%s" % (key, template[key], os.linesep)) continue if "CRC" in line: #Don't write the CRC continue #if line.startswith("#"): #print "line: %s" % line items = line.split(' ') if "set" == items[0].lower(): #print "Line: %s" % line #Now we have a line we might need to modify if items[1].lower() in template.keys(): #Skip it, cause we already wrote what we wanted into the new xco continue fp.write("%s%s" % (line, os.linesep)) fp.close()
char_to_idx, idx_to_char = char_mapping() config = { "VOCAB_SIZE": len(char_to_idx.keys()), "HIDDEN": 100, # For songs sampling "TEMPERATURE": 1, "TAKE_MAX_PROBABLE": False, "LIMIT_LEN": 440 } MODEL_INPUT = "$\nX:3" model = LSTMSimple(config["VOCAB_SIZE"], config["HIDDEN"], config["VOCAB_SIZE"]).to(get_device()) model.init_state() model.load_state_dict( torch.load("trained_models/model2019-11-26-03-35.pth", map_location='cpu')) model.eval() text = """$ X:3 T:Trow Faicstieu C:Itt R:polka Z:id:hn-hornpipe-59 M:C| K:A ^GG|B2B B2c BGA|B2d c2c d2B|g6 A3|BdB dBA B2d|edf ecA Bdg|gdc AAF |1 dfdd g2ge ||
def train( start_epoch=0, additional_epoch=100, lr=0.0001, optim="adam", leaky_relu=False, ndcg_gain_in_train="exp2", sigma=1.0, double_precision=False, standardize=False, small_dataset=False, debug=False, output_dir="/tmp/ranking_output/", ): print("start_epoch:{}, additional_epoch:{}, lr:{}".format( start_epoch, additional_epoch, lr)) writer = SummaryWriter(output_dir) precision = torch.float64 if double_precision else torch.float32 # get training and validation data: data_fold = 'Fold1' train_loader, df_train, valid_loader, df_valid = load_train_vali_data( data_fold, small_dataset) if standardize: df_train, scaler = train_loader.train_scaler_and_transform() df_valid = valid_loader.apply_scaler(scaler) lambdarank_structure = [136, 64, 16] net = LambdaRank(lambdarank_structure, leaky_relu=leaky_relu, double_precision=double_precision, sigma=sigma) device = get_device() net.to(device) net.apply(init_weights) print(net) ckptfile = get_ckptdir('lambdarank', lambdarank_structure, sigma) if optim == "adam": optimizer = torch.optim.Adam(net.parameters(), lr=lr) elif optim == "sgd": optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9) else: raise ValueError( "Optimization method {} not implemented".format(optim)) print(optimizer) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75) ideal_dcg = NDCG(2**9, ndcg_gain_in_train) for i in range(start_epoch, start_epoch + additional_epoch): net.train() net.zero_grad() count = 0 batch_size = 200 grad_batch, y_pred_batch = [], [] for X, Y in train_loader.generate_batch_per_query(): if np.sum(Y) == 0: # negative session, cannot learn useful signal continue N = 1.0 / ideal_dcg.maxDCG(Y) X_tensor = torch.tensor(X, dtype=precision, device=device) y_pred = net(X_tensor) y_pred_batch.append(y_pred) # compute the rank order of each document rank_df = pd.DataFrame({"Y": Y, "doc": np.arange(Y.shape[0])}) rank_df = rank_df.sort_values("Y").reset_index(drop=True) rank_order = rank_df.sort_values("doc").index.values + 1 with torch.no_grad(): pos_pairs_score_diff = 1.0 + torch.exp(sigma * (y_pred - y_pred.t())) Y_tensor = torch.tensor(Y, dtype=precision, device=device).view(-1, 1) rel_diff = Y_tensor - Y_tensor.t() pos_pairs = (rel_diff > 0).type(precision) neg_pairs = (rel_diff < 0).type(precision) Sij = pos_pairs - neg_pairs if ndcg_gain_in_train == "exp2": gain_diff = torch.pow(2.0, Y_tensor) - torch.pow( 2.0, Y_tensor.t()) elif ndcg_gain_in_train == "identity": gain_diff = Y_tensor - Y_tensor.t() else: raise ValueError( "ndcg_gain method not supported yet {}".format( ndcg_gain_in_train)) rank_order_tensor = torch.tensor(rank_order, dtype=precision, device=device).view(-1, 1) decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2( rank_order_tensor.t() + 1.0) delta_ndcg = torch.abs(N * gain_diff * decay_diff) lambda_update = sigma * (0.5 * (1 - Sij) - 1 / pos_pairs_score_diff) * delta_ndcg lambda_update = torch.sum(lambda_update, 1, keepdim=True) assert lambda_update.shape == y_pred.shape check_grad = torch.sum(lambda_update, (0, 1)).item() if check_grad == float('inf') or np.isnan(check_grad): import ipdb ipdb.set_trace() grad_batch.append(lambda_update) # optimization is to similar to RankNetListWise, but to maximize NDCG. # lambda_update scales with gain and decay count += 1 if count % batch_size == 0: for grad, y_pred in zip(grad_batch, y_pred_batch): y_pred.backward(grad / batch_size) if count % (4 * batch_size) == 0 and debug: net.dump_param() optimizer.step() net.zero_grad() grad_batch, y_pred_batch = [], [ ] # grad_batch, y_pred_batch used for gradient_acc # optimizer.step() print( get_time(), "training dataset at epoch {}, total queries: {}".format(i, count)) if debug: eval_cross_entropy_loss(net, device, train_loader, i, writer, phase="Train") # eval_ndcg_at_k(net, device, df_train, train_loader, 100000, [10, 30, 50]) if i % 5 == 0 and i != start_epoch: print(get_time(), "eval for epoch: {}".format(i)) eval_cross_entropy_loss(net, device, valid_loader, i, writer) eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000, [10, 30], i, writer) if i % 10 == 0 and i != start_epoch: save_to_ckpt(ckptfile, i, net, optimizer, scheduler) scheduler.step() # save the last ckpt save_to_ckpt(ckptfile, start_epoch + additional_epoch, net, optimizer, scheduler) # save the final model torch.save(net.state_dict(), ckptfile) ndcg_result = eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000, [10, 30], start_epoch + additional_epoch, writer) print( get_time(), "finish training " + ", ".join( ["NDCG@{}: {:.5f}".format(k, ndcg_result[k]) for k in ndcg_result]), '\n\n')
def train(args): # Init wandb run = wandb.init(name=args.save_dir[len('../runs/'):], config=args, project='sign-language-recognition') # Create directory for model checkpoints and log if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Save args with open(os.path.join(args.save_dir, 'args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=2) # Logger logger = create_logger(args.save_dir) # Set gpu if torch.cuda.is_available(): i = get_free_gpu() device = get_device(gpu=i) else: device = 'cpu' logger.info('using device: {}'.format(device)) # Prepare early stop stopped = False best_epoch = 0 best_loss = torch.Tensor([float('Inf')]) # Data if args.freeze_vgg: real_batch_size = 3 else: real_batch_size = 2 # can't fit more into gpu memory json_file = os.path.join(args.data_path, 'WLASL_v0.3.json') videos_folder = os.path.join(args.data_path, 'videos') keypoints_folder = os.path.join(args.data_path, 'keypoints') train_transforms = transforms.Compose([videotransforms.RandomCrop(224)]) val_transforms = train_transforms # Debug data if args.debug_dataset: train_dataset = WLASL(json_file=json_file, videos_folder=videos_folder, keypoints_folder=keypoints_folder, transforms=train_transforms, split='train', subset=args.subset) train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=real_batch_size, sampler=DebugSampler( args.debug_dataset, len(train_dataset))) val_dl = train_dl else: train_dataset = WLASL(json_file=json_file, videos_folder=videos_folder, keypoints_folder=keypoints_folder, transforms=train_transforms, split='train', subset=args.subset) train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=real_batch_size, shuffle=True) val_dataset = WLASL(json_file=json_file, videos_folder=videos_folder, keypoints_folder=keypoints_folder, transforms=val_transforms, split='val', subset=args.subset) val_dl = torch.utils.data.DataLoader(val_dataset, batch_size=real_batch_size, shuffle=True) logger.info('data loaded') # Model, loss, optimizer m = Conv2dRNN(args).to(device) optimizer = torch.optim.Adam(m.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() # Resume train start_epoch = 0 if args.resume_train: checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar'), map_location=torch.device('cpu')) best_epoch = checkpoint['epoch'] m.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) m = m.to(device) best_loss = checkpoint['best_val_loss'] start_epoch = best_epoch + 1 # Change learning rate for g in optimizer.param_groups: g['lr'] = args.lr logger.info( 'Resuming training from epoch {} with best loss {:.4f}'.format( start_epoch, best_loss)) # learning rate scheduler scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=args.lr_schedule_factor, patience=args.lr_schedule_patience, threshold=args.lr_schedule_threshold) # Watch model with wandb run.watch(m, log='all', log_freq=5) # Print args logger.info('using args: \n' + json.dumps(vars(args), sort_keys=True, indent=2)) # Train loop for t in range(args.n_epochs): t += start_epoch # Train losses = AverageMeter() batch_time = AverageMeter() m.train() start_t = time.time() for i, batch in enumerate(train_dl): # Run the forward pass multiple times and accumulate gradient (to be able to use large batch size) X = batch['X'].to(device) label = batch['label'].to(device) # [per frame logits, mean of all frames logits] logits = m(X) # Create label for each logit label = torch.cat([l.repeat(logits.shape[1], 1) for l in label], dim=0) # Squeeze time sequence and batch into one dimension logits = logits.reshape(logits.shape[0] * logits.shape[1], logits.shape[2]) loss = criterion(logits, label.squeeze()) loss.backward() losses.update(loss.item()) if (i % (args.batch_size // real_batch_size)) == 0: # Optimize with accumulated gradient optimizer.step() optimizer.zero_grad() batch_time.update(time.time() - start_t) start_t = time.time() train_loss = losses.avg # Validate with torch.no_grad(): top1 = AverageMeter() top5 = AverageMeter() top10 = AverageMeter() losses = AverageMeter() m.eval() for batch in val_dl: X = batch['X'].to(device) label = batch['label'].to(device) # [per frame logits, mean of all frames logits] logits = m(X) # Create label for each logit label = torch.cat( [l.repeat(logits.shape[1], 1) for l in label], dim=0) # Squeeze time sequence and batch into one dimension logits = logits.reshape(logits.shape[0] * logits.shape[1], logits.shape[2]) losses.update(criterion(logits, label.squeeze()).item()) # Update metrics acc1, acc5, acc10 = topk_accuracy(logits, label, topk=(1, 5, 10)) top1.update(acc1.item()) top5.update(acc5.item()) top10.update(acc10.item()) val_loss = losses.avg # Save best model if val_loss < best_loss: best_loss, best_epoch = val_loss, t save_best(args, t, m, optimizer, best_loss) # Check early stop if t >= best_epoch + args.early_stop: logger.info('EARLY STOP') break # Log info logger.info( 'epoch: {} train loss: {:.4f} val loss: {:.4f} top1acc {:.4f} top5acc {:.4f} top10acc {:.4f} lr: {:.2e} time per batch {:.1f} s' .format(t + 1, train_loss, val_loss, top1.avg, top5.avg, top10.avg, optimizer.param_groups[0]['lr'], batch_time.avg)) # Wandb log run.log({ 'train_loss': train_loss, 'val_loss': val_loss, 'top1_acc': top1.avg, 'top5_acc': top5.avg, 'top10_acc': top10.avg, 'lr': optimizer.param_groups[0]['lr'] }) # Scheduler step if args.use_lr_scheduler: scheduler.step(val_loss)