def init(): global model, device try: model_path = Model.get_model_path('pytorch_mnist') except: model_path = 'model.pth' device = torch.device('cpu') model = CNN() model.load_state_dict(torch.load(model_path, map_location=device)) model.to(device) model.eval()
def init(): global model, device try: model_path = Model.get_model_path('pytorch_mnist') except: model_path = 'model.pth' device = torch.device('cpu') model = CNN() model.load_state_dict(torch.load(model_path, map_location=device)) model.to(device) model.eval() print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
def main(): torch.manual_seed(42) # Random #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 128, 'learning_rate': 0.01, 'num_epochs': 5, 'num_layers': 2, 'oversample': False, 'soft_labels': False} # Glove params = { 'batch_size': 32, 'dropout': 0, 'hidden_dim': 128, 'learning_rate': 0.001, 'num_epochs': 5, 'num_layers': 2, 'oversample': False, 'soft_labels': False } # Random #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 256, 'learning_rate': 0.0001, 'num_epochs': 5, 'num_layers': 3, 'oversample': False, 'soft_labels': False} #some params experiment_number = 1 test_percentage = 0.1 val_percentage = 0.2 batch_size = params["batch_size"] num_epochs = 5 #params["num_epochs"] dropout = params["dropout"] embedding_dim = 300 model_name = "CNN" #'Bert' #"CNN" #"LSTM" unsupervised = True embedding = "Glove" #"Random" ##"Glove" # "Both" # soft_labels = False combine = embedding == "Both" # LSTM parameters if model_name == "LSTM": hidden_dim = params["hidden_dim"] num_layers = params["num_layers"] # Bert parameter num_warmup_steps = 100 num_total_steps = 1000 if model_name == "Bert": embedding = "None" if embedding == "Both": combine = True embedding = "Random" else: combine = False learning_rate = params["learning_rate"] #5e-5, 3e-5, 2e-5 oversample_bool = False weighted_loss = True # load data dataset = Dataset("../data/cleaned_tweets_orig.csv", use_embedding=embedding, embedd_dim=embedding_dim, combine=combine, for_bert=(model_name == "Bert")) #dataset.oversample() train_data, val_test_data = split_dataset(dataset, test_percentage + val_percentage) val_data, test_data = split_dataset( val_test_data, test_percentage / (test_percentage + val_percentage)) # print(len(train_data)) #save_data(train_data, 'train') #save_data(test_data, 'test') #define loaders if oversample_bool: weights, targets = get_loss_weights(train_data, return_targets=True) class_sample_count = [ 1024 / 20, 13426, 2898 / 2 ] # dataset has 10 class-1 samples, 1 class-2 samples, etc. oversample_weights = 1 / torch.Tensor(class_sample_count) oversample_weights = oversample_weights[targets] # oversample_weights = torch.tensor([0.9414, 0.2242, 0.8344]) #torch.ones((3))- sampler = torch.utils.data.sampler.WeightedRandomSampler( oversample_weights, len(oversample_weights)) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=my_collate, sampler=sampler) else: train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=my_collate) val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, collate_fn=my_collate) #define model if model_name == "CNN": vocab_size = len(dataset.vocab) model = CNN(vocab_size, embedding_dim, combine=combine) elif model_name == "LSTM": vocab_size = len(dataset.vocab) model = LSTM(vocab_size, embedding_dim, batch_size=batch_size, hidden_dim=hidden_dim, lstm_num_layers=num_layers, combine=combine, dropout=dropout) elif model_name == "Bert": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=3) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=bert_collate) val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, collate_fn=bert_collate) #device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #LOSS : weighted cross entropy loss, by class counts of other classess if weighted_loss: weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device) else: weights = torch.ones(3, device=device) #weights = torch.tensor([1.0, 1.0, 1.0], device = device) #get_loss_weights(train_data).to(device) # not to run again criterion = nn.CrossEntropyLoss(weight=weights) if soft_labels: criterion = weighted_soft_cross_entropy #latent model if unsupervised: vocab_size = len(dataset.vocab) criterion = nn.CrossEntropyLoss(weight=weights, reduction='none') model = Rationalisation_model(vocab_size, embedding_dim=embedding_dim, model=model_name, batch_size=batch_size, combine=combine, criterion=criterion) if not model_name == "Bert": model.embedding.weight.data.copy_(dataset.vocab.vectors) if combine: model.embedding_glove.weight.data.copy_(dataset.glove.vectors) #model to device model.to(device) #optimiser optimizer = optim.Adam(model.parameters(), lr=learning_rate) if model_name == "Bert": optimizer = AdamW(model.parameters(), lr=learning_rate, correct_bias=False) # Linear scheduler for adaptive lr scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) else: scheduler = None plot_log = defaultdict(list) for epoch in range(num_epochs): #train and validate epoch_loss, epoch_acc = train_epoch(model, train_loader, optimizer, criterion, device, soft_labels=soft_labels, weights=weights, scheduler=scheduler, unsupervised=unsupervised) val_loss, val_acc = evaluate_epoch(model, val_loader, criterion, device, soft_labels=soft_labels, weights=weights, unsupervised=unsupervised) #save for plotting for name, point in zip( ["train_loss", "train_accuracy", "val_loss", "val_accuracy"], [epoch_loss, epoch_acc, val_loss, val_acc]): plot_log[f'{name}'] = point #realtime feel print(f'Epoch: {epoch+1}') print( f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%' ) print(f'\t Val. Loss: {val_loss:.5f} | Val. Acc: {val_acc*100:.2f}%') sample_sentences_and_z(model, train_loader, device, dataset.vocab) #save plot results_directory = f'plots/{experiment_number}' os.makedirs(results_directory, exist_ok=True) for name, data in plot_log.items(): save_plot(data, name, results_directory) #save model torch.save(model, os.path.join(results_directory, 'model_cnn.pth')) #confusion matrix and all that fun loss, acc, predictions, ground_truth = evaluate_epoch( model, val_loader, criterion, device, is_final=True, soft_labels=soft_labels, weights=weights, unsupervised=unsupervised) conf_matrix = confusion_matrix(ground_truth, predictions) class_report = classification_report(ground_truth, predictions) print('\nFinal Loss and Accuracy\n----------------\n') print(f'\t Val. Loss: {loss:.5f} | Val. Acc: {acc*100:.2f}%') print('\nCONFUSION MATRIX\n----------------\n') print(conf_matrix) print('\nCLASSSIFICATION REPORT\n----------------------\n') print(class_report) plot_confusion_matrix(ground_truth, predictions, classes=["Hate speech", "Offensive", "Neither"], normalize=False, title='Confusion matrix') plt.show()
def train(model_name="LSTM", params=None, embedding="Random"): # Parameters to tune print(params) batch_size = params["batch_size"] num_epochs = params["num_epochs"] oversample = params["oversample"] soft_labels = params["soft_labels"] if model_name == "LSTM": learning_rate = params["learning_rate"] hidden_dim = params["hidden_dim"] num_layers = params["num_layers"] dropout = params["dropout"] combine = embedding == "Both" embedding_dim = 300 if combine: embedding = "Random" if model_name == "Bert": learning_rate = params["learning_rate"] num_warmup_steps = params["num_warmup_steps"] num_total_steps = params["num_total_steps"] embedding = "None" # Constants test_percentage = 0.1 val_percentage = 0.2 # Load data torch.manual_seed(42) dataset = Dataset("../data/cleaned_tweets_orig.csv", use_embedding=embedding, embedd_dim=embedding_dim, for_bert=(model_name == "Bert"), combine=combine) train_data, val_test_data = split_dataset(dataset, test_percentage + val_percentage) val_data, test_data = split_dataset( val_test_data, test_percentage / (test_percentage + val_percentage)) train_loader, val_loader, weights = load_data(oversample, train_data, val_data, batch_size) # Define model if model_name == "CNN": vocab_size = len(dataset.vocab) model = CNN(vocab_size, embedding_dim=embedding_dim, combine=params["combine"], n_filters=params["filters"]) elif model_name == "LSTM": vocab_size = len(dataset.vocab) model = LSTM(vocab_size, embedding_dim, batch_size=batch_size, hidden_dim=hidden_dim, lstm_num_layers=num_layers, combine=combine, dropout=dropout) elif model_name == "Bert": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=3) train_loader, val_loader, weights = load_data(oversample, train_data, val_data, batch_size, collate_fn=bert_collate) if not model_name == "Bert": model.embedding.weight.data.copy_(dataset.vocab.vectors) if combine: model.embedding_glove.weight.data.copy_(dataset.glove.vectors) # cuda device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # optimiser scheduler = None optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"]) if model_name == "Bert": optimizer = AdamW(model.parameters(), lr=learning_rate, correct_bias=False) # Linear scheduler for adaptive lr scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # weighted cross entropy loss, by class counts of other classess weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device) if soft_labels: criterion = weighted_soft_cross_entropy else: criterion = nn.CrossEntropyLoss(weight=weights) eval_criterion = nn.CrossEntropyLoss(weight=weights) for epoch in range(num_epochs): # train epoch_loss, epoch_acc = train_epoch(model, train_loader, optimizer, criterion, device, scheduler=scheduler, weights=weights) # realtime feel print(f'Epoch: {epoch+1}') print( f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%' ) # Compute F1 score on validation set - this is what we optimise during tuning loss, acc, predictions, ground_truth = evaluate_epoch(model, val_loader, eval_criterion, device, is_final=True) val_f1 = f1_score(y_true=ground_truth, y_pred=predictions, average="macro") print("Done") return val_f1
import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader if __name__ == '__main__': opt = Training_options().parse() if opt.model == 'cnn': from models import CNN net = CNN(opt) train_predictors, train_predictands = assemble_predictors_predictands( opt, train=True) train_dataset = ENSODataset(train_predictors, train_predictands) trainloader = DataLoader(train_dataset, batch_size=opt.batch_size) optimizer = optim.Adam(net.parameters(), lr=opt.lr) device = "cuda:0" if torch.cuda.is_available() else "cpu" net = net.to(device) best_loss = np.infty train_losses = [] net.train() criterion = nn.MSELoss() for epoch in range(opt.epoch): running_loss = 0.0 for i, data in enumerate(trainloader): batch_predictors, batch_predictands = data batch_predictands = batch_predictands.to(device) batch_predictors = batch_predictors.to(device) optimizer.zero_grad() predictions = net(batch_predictors).squeeze() loss = criterion(predictions, batch_predictands.squeeze()) loss.backward() optimizer.step()
def training_run_cnn(combination, criterion, train_loader, valid_loader, run): n_featuremap_1, n_featuremap_2, mode = combination model_path = "CNN_run_{}.pt".format(run) results[model_path] = dict() # initialize the network with the given configuration my_net = CNN(n_featuremap_1=n_featuremap_1, n_featuremap_2=n_featuremap_2) # initialize weights with the given mode my_net.apply(partial(init_weights, mode=mode)) my_net.to(device) optimizer = torch.optim.Adam(my_net.parameters()) for epoch in range(10): # loop over the training dataset multiple times training_loss = .0 pbar = tqdm(10) for batch_idx, (x, target) in enumerate(train_loader): x, target = x.to(device), target.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = my_net(x).view(-1, 1) loss = criterion(outputs, target.view(-1, 1)) loss.backward() optimizer.step() if epoch == 9: # update training loss in the last epoch training_loss += loss.item() * len(x) if batch_idx % 100 == 99: # print every 100 mini-batches print("[ Epoch %d,Batch %2d] loss: %.3f" % (epoch + 1, batch_idx + 1, loss.item())) pbar.update(1) # update results results[model_path]["training_loss"] = training_loss / len(train) print("Finished Training !") print("Start Evaluating !") # Validation loss valid_loss = .0 correct = 0 thres = 0.5 with torch.no_grad(): for batch_idx, (x, target) in enumerate(valid_loader): x, target = x.to(device), target.to(device) outputs = my_net(x).view(-1, 1) prediction = outputs >= thres correct += prediction.eq(target.view(-1, 1)).sum().item() loss = criterion(outputs, target.view(-1, 1)) valid_loss += loss.item() * len(x) # update results results[model_path]["validation_loss"] = valid_loss / len(valid) results[model_path]["accuracy"] = correct / len(valid) # save model in disk torch.save(my_net.state_dict(), "./models/" + model_path)
static_folder="./backend/static", template_folder="./backend/templates") app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER bootstrap = Bootstrap(app) # 处理跨域请求 cors = CORS(app, resources={r"/*": {"origins": "*"}}) class2index = json.load(open("class_index.json")) index2class = {v: k for k, v in class2index.items()} # 在此处声明,保证模型只在app初始化时加载 device = torch.device('cpu') model = CNN() model.load_state_dict(torch.load('pickles/cnn.pkl')) model.to(device=device) trans = Compose([ToTensor(), PaddingSame2d(seq_len=224, value=0)]) def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def transform_audio(audio_bytes): feat = get_mfcc(audio_bytes) feat = trans(feat) return feat