def fine_tune_stage_2(model): label_dict = get_TNEWS_label_dict() train = Mydataset("data_split/TNEWS_dev.csv", label_dict) eval = Mydataset("data_split/TNEWS_train_small.csv", label_dict) training_args = TrainingArguments( output_dir='exp/TNEWS/model', # output directory num_train_epochs=5, # total # of training epochs per_device_train_batch_size=16, # batch size per device during training per_device_eval_batch_size=16, # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay save_total_limit=2, eval_steps=200, learning_rate=1e-5, logging_dir='exp/TNEWS/logs', # directory for storing logs evaluation_strategy='steps', load_best_model_at_end=True, metric_for_best_model="marco_f1_score", ) logging.info(model) trainer = Trainer( model=model, # the instantiated 🤗 Transformers model to be trained args=training_args, # training arguments, defined above train_dataset=train, # training dataset eval_dataset=eval, # evaluation dataset data_collator= data_collator, compute_metrics=compute_metrics, callbacks=[EarlyStoppingCallback(early_stopping_patience=3)], ) trainer.train() return trainer.model
def __init__(self, root, img_size): dataset = Mydataset(root, img_size) self.dataloader = DataLoader(dataset, batch_size=1024, shuffle=True) torch.cuda.empty_cache() if img_size == 12: self.net = PNet() self.net.load_state_dict(torch.load("../param/test_param/net.pt")) elif img_size == 24: self.net = RNet() # self.net.load_state_dict(torch.load("../param/0_pnet.pt")) elif img_size == 48: self.net = ONet() # self.net.load_state_dict(torch.load("../param/0_pnet.pt")) else: print("img_size error!", img_size) exit() self.img_size = img_size self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.net.to(self.device) self.opt = optim.Adam(self.net.parameters()) self.off_loss_fn = torch.nn.MSELoss() self.conf_loss_fn = torch.nn.BCEWithLogitsLoss() self.summary = SummaryWriter("./logs")
def inference(model): import json index = 0 task_name = "OCEMOTION" task_label_dict = get_OCEMOTION_label_dict() file = open("%s_predict.json" % task_name.lower(), "w+") test_dataset = Mydataset("/tcdata/%s_test_B.csv" % task_name.lower(), task_label_dict, is_test=True) dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, collate_fn=data_collator) device = torch.device("cuda") model.to(device) model.eval() inverse_label_dict = {v: k for k, v in task_label_dict.items()} for batch in dataloader: logits = model(batch["input_ids"].to(device), batch["attention_mask"].to(device))[0] predict_label = logits.argmax(-1) labels = batch["labels"].flatten().cpu().numpy() predict_label = predict_label.flatten().cpu().numpy() for i in range(labels.shape[0]): json.dump( { "id": int(index), "label": inverse_label_dict[int(predict_label[i])] }, file) file.write("\n") index += 1 print("task %s complete " % task_name) file.close()
model_dict = {} file_name = os.walk("exp/TNEWS/model/").__next__()[1][-1] model_dict["TNEWS"] = "exp/TNEWS/model/%s"%file_name file_name = os.walk("exp/OCNLI/model/").__next__()[1][-1] model_dict["OCNLI"] = "exp/OCNLI/model/%s"%file_name file_name = os.walk("exp/OCEMOTION/model/").__next__()[1][-1] model_dict["OCEMOTION"] = "exp/OCEMOTION/model/%s"%file_name for task_name in task_name_list: index = 0 file = open("%s_predict.json"%task_name.lower(), "w+") if task_name == "OCNLI": test_dataset = OCNLI_dataset("/tcdata/ocnli_test_B.csv", task_label_dict["OCNLI"],is_test=True) else: test_dataset = Mydataset("/tcdata/%s_test_B.csv" % task_name.lower(), task_label_dict[task_name], is_test=True) dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, collate_fn=data_collator) print("logging model from %s"%model_dict[task_name]) model = BertForSequenceClassification.from_pretrained(model_dict[task_name], num_labels=len(task_label_dict[task_name])) model.to(device) model.eval() inverse_label_dict = {v:k for k,v in task_label_dict[task_name].items()} for batch in dataloader: logits = model(batch["input_ids"].to(device), batch["attention_mask"].to(device)).logits predict_label = logits.argmax(-1) labels = batch["labels"].flatten().cpu().numpy() predict_label = predict_label.flatten().cpu().numpy() for i in range(labels.shape[0]): json.dump({"id":int(index), "label": inverse_label_dict[int(predict_label[i])]}, file) file.write("\n") index+=1
if epoch % 5 == 0: torch.save(net.state_dict(), "./ckpt/{}.pkl".format(epoch)) if __name__ == "__main__": net = TinySSD() net.cuda() loss_fn = MultiBoxLoss(3.) transform = transforms.Compose([ transforms.Resize((512, 512)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # transform = Compose([ # ConvertFromInts(), # PhotometricDistort(), # Expand([123, 117, 104]), # RandomSampleCrop(), # RandomMirror(), # ToPercentCoords(), # Resize(300), # SubtractMeans([123, 117, 104]), # ToTensor(), # ]) optm = optim.Adam(net.parameters(), lr=1e-3) dtset = Mydataset(img_path="./dataset", transform=transform) dataloader = DataLoader(dtset, batch_size=8, shuffle=True) train(dataloader, net, loss_fn, optm)
def main(): global args, best_prec1 print('parsing args...') args = parser.parse_args() # create model print("=> creating model '{}'".format(args.arch)) if args.arch.lower().startswith('wideresnet'): # a customized resnet model with last feature map size as 14x14 for better class activation mapping model = wideresnet.resnet50(num_classes=args.num_classes) else: model = models.__dict__[args.arch](num_classes=args.num_classes) if args.arch.lower().startswith('alexnet') or args.arch.lower().startswith( 'vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() model = model.cuda() print(model) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code #train_loader = Provider(phase = 'train', batch_size=args.batch_size, workers=args.workers) #val_loader = Provider(phase = 'test', batch_size=args.batch_size) train_loader = torch.utils.data.DataLoader(Mydataset(phase='train'), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(Mydataset(phase='test'), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, './snapshot/' + args.arch.lower() + '_' + str(epoch))
loss_obj_center = center_loss_fn(output_obj[:, 0:2], target_obj[:, 0:2]) loss_obj_wh = wh_loss_fn(output_obj[:, 2:4], target_obj[:, 2:4]) loss_obj_cls = cls_loss_fn(output_obj[:, 5:], target_obj[:, 5].long()) loss_obj = loss_obj_conf + loss_obj_center + loss_obj_wh + loss_obj_cls # 负样本的时候只需要计算置信度损失 mask_noobj = target[..., 4] == 0 output_noobj, target_noobj = output[mask_noobj], target[mask_noobj] loss_noobj = conf_loss_fn(output_noobj[:, 4], target_noobj[:, 4]) loss = alpha * loss_obj + (1 - alpha) * loss_noobj return loss if __name__ == '__main__': datas = Mydataset() imageDataloader = data.DataLoader(dataset=datas, batch_size=1, shuffle=True) net = MainNet(14).to(device) if os.path.exists(savepath): net.load_state_dict(torch.load(savepath)) optim = torch.optim.Adam(net.parameters(), weight_decay=4e-4) base_opt = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999)) # Any optimizer lookahead = Lookahead(base_opt, k=5, alpha=0.5) # Initialize Lookahead losses = []
def main(): global args, best_prec1 args = parser.parse_args() final = pd.read_csv( '/home/cytuser/code/panorama/pretrained-models.pytorch-master/classification_data/final.csv' ) final['label'] = final['label'].apply(lambda x: classes(x)) y = final['label'].values skf = StratifiedKFold(n_splits=5, shuffle=True) kfold = [] for train_index, test_index in skf.split(y, y): #print(len(train_index)) #print(len(test_index)) kfold.append([train_index, test_index]) # create model print("=> creating model '{}'".format(args.arch)) #if args.pretrained.lower() not in ['false', 'none', 'not', 'no', '0']: # print("=> using pre-trained parameters '{}'".format(args.pretrained)) # model = pretrainedmodels.__dict__[args.arch](num_classes=1000, # pretrained=args.pretrained) #else: # model = pretrainedmodels.__dict__[args.arch]() model = pretrainedmodels.__dict__['se_resnet50'](num_classes=1000, pretrained='imagenet') n_inputs = model.last_linear.in_features classifier = nn.Sequential( OrderedDict([('classifier', nn.Linear(n_inputs, 596))])) #model.conv0.conv = nn.Conv2d(1, 96, kernel_size=(3, 3), stride=(2, 2), bias=False) model.layer0.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) model.last_linear = classifier #model = EfficientNetB0() #model.conv1 = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'classification_data') #print(traindir) #valdir = os.path.join(args.data, 'val') # transform = transforms.Compose([ # transforms.Resize((224,224)), # transforms.ToTensor(), # transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) transform = transforms.Compose([ #transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]) ]) # if 'scale' in pretrainedmodels.pretrained_settings[args.arch][args.pretrained]: # scale = pretrainedmodels.pretrained_settings[args.arch][args.pretrained]['scale'] # else: # scale = 0.875 scale = 0.875 #print('Images transformed from size {} to {}'.format( # int(round(max(model.input_size) / scale)), # model.input_size)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model = torch.nn.DataParallel(model).cuda() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) i = np.random.choice(5) train_idx, val_idx = kfold[i][0], kfold[i][1] train_dataset = Mydataset(traindir, train_idx, transform) #print('batch size is :',args.batch_size) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_dataset = Mydataset(traindir, val_idx, transform) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) if epoch % 5 == 0: validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1[0] > best_prec1 best_prec1 = max(prec1[0], best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
logging.basicConfig( level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s' ) import os os.environ["CUDA_VISIBLE_DEVICES"] = "None" task_name = ["OCEMOTION", "OCNLI", "TNEWS"] label_len = [3, 7, 17] task_label_dict = {} task_label_dict["OCEMOTION"] = { 0: "100", } train = Mydataset(path="OCEMOTION_train1128.csv") label_dict = {0: "O", 1: ",", 2: "。", 3: ":", 4: "、"} model = BertForSequenceClassification.from_pretrained("bert-base-chinese") training_args = TrainingArguments( output_dir='exp/bert_base/model', # output directory num_train_epochs=25, # total # of training epochs per_device_train_batch_size=2, # batch size per device during training per_device_eval_batch_size=2, # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay save_steps=20000, eval_steps=5, logging_dir='exp/bert_base/logs', # directory for storing logs evaluation_strategy='steps', #prediction_loss_only=True,
level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s' ) import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" label_dict = { 'sadness': 0, 'happiness': 1, 'disgust': 2, 'anger': 3, 'like': 4, 'surprise': 5, 'fear': 6 } train = Mydataset("data/OCEMOTION_train1128.csv", label_dict) eval = Mydataset("data/OCEMOTION_train1128.csv", label_dict) logging.info(torch.cuda.is_available()) model = BertForSequenceClassification.from_pretrained( "hfl/chinese-roberta-wwm-ext-large", num_labels=len(label_dict)) training_args = TrainingArguments( output_dir='exp/OCEMOTION/model', # output directory num_train_epochs=6, # total # of training epochs per_device_train_batch_size=8, # batch size per device during training per_device_eval_batch_size=8, # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay save_steps=1000, save_total_limit=1,
pred = torch.argmax(pred, dim=1) return pred data_dir = 'data/data1.txt' model_dir = 'model\scnn_0_512_32fr_1.pkl' if __name__ == "__main__": scnn = SignalCNN(16, 32, 2) scnn.cuda() optimizer = optim.Adam(scnn.parameters(), lr=LR, weight_decay=LR_DECAY) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 100, gamma = 0.1, last_epoch=-1) loss_func = nn.CrossEntropyLoss() dataSet = Mydataset(data_dir) # testSet = Mydataset('data/sptestdata.txt') # test_total = len(testSet) total = len(dataSet) print(total) train_size = int(0.8*total) valid_size = total - train_size train_dataset, valid_dataset = torch.utils.data.random_split(dataSet, [train_size, valid_size]) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, shuffle=True) # test_loader = DataLoader(dataset=testSet, batch_size=60, shuffle=True) for epoch in range(N_EPOCH): scnn.zero_grad() # scheduler.step() for batch, (x, y, c, index) in enumerate(train_loader):
def fine_tune_stage(): label_dict = {} for i in range(17): if i < 10: label_dict["10%d" % i] = i else: label_dict["1%d" % i] = i train = Mydataset("data_split/TNEWS_train.csv", label_dict) eval = Mydataset("data_split/TNEWS_dev.csv", label_dict) model = BertClassification("hfl/chinese-roberta-wwm-ext-large", num_labels=len(label_dict)) for name, param in model.named_parameters(): if param.requires_grad: logging.info(name) #model = BertForSequenceClassification.from_pretrained("exp/TNEWS_pretrain/model/checkpoint-6000", num_labels=len(label_dict)) #model = BertForSequenceClassification.from_pretrained("hfl/chinese-roberta-wwm-ext-large", num_labels=len(label_dict)) training_args = TrainingArguments( output_dir='exp/TNEWS/model', # output directory num_train_epochs=1, # total # of training epochs per_device_train_batch_size=16, # batch size per device during training per_device_eval_batch_size=16, # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay save_total_limit=1, save_steps=1000, eval_steps=1000, learning_rate=1e-5, logging_dir='exp/TNEWS/logs', # directory for storing logs evaluation_strategy='steps', #load_best_model_at_end=True, #metric_for_best_model="marco_f1_score", #prediction_loss_only=True, do_eval=False, ) #logging.info(model) from sklearn.metrics import precision_recall_fscore_support, f1_score, confusion_matrix, classification_report def compute_metrics(pred): labels = pred.label_ids preds = pred.predictions.argmax(-1) labels = labels.flatten() preds = preds.flatten() marco_f1_score = f1_score(labels, preds, average='macro') logging.info(marco_f1_score) #logging.info(f"{'confusion_matrix':*^80}") #logging.info(confusion_matrix(labels, preds, )) #logging.info(f"{'classification_report':*^80}") #logging.info(classification_report(labels, preds, )) res = {"marco_f1_score": marco_f1_score} return res trainer = Trainer( model=model, # the instantiated 🤗 Transformers model to be trained args=training_args, # training arguments, defined above train_dataset=train, # training dataset eval_dataset=eval, # evaluation dataset data_collator=data_collator, compute_metrics=compute_metrics, ) trainer.train()