def create_thread(slug): json_data = request.get_json() user = User().get_by_nickname(json_data['author'], hide_id=False) forum = Forum().get_by_slug_with_id(slug) if not (user and forum): return json_response({ "message": "Can't find user or forum" }, 404) if 'slug' in json_data.keys(): thread_exists = Thread.get_serialised_with_forum_user_by_id_or_slug(slug=json_data['slug']) if thread_exists: thread_exists['created'] = format_time(thread_exists['created']) return json_response(thread_exists, 409) thread = Thread.create_and_get_serialized( user_id=user['id'], forum_id=forum['id'], title=json_data['title'], message=json_data['message'], user_nickname=user['nickname'], forum_slug=forum['slug'], created=json_data['created'] if 'created' in json_data else None, slug=json_data['slug'] if 'slug' in json_data else None, ) thread['author'] = user['nickname'] thread['forum'] = forum['slug'] thread['created'] = format_time(thread['created']) return Response( response=json.dumps(thread), status=201, mimetype="application/json" )
def create_thread(slug_or_id): posts_data = request.get_json() thread = Thread.get_by_slug_or_id_with_forum_id(slug_or_id) usernames = [i['author'] for i in posts_data] authors = User.get_user_ids_by_slug(usernames) posts_ids = set([i['parent'] for i in posts_data if 'parent' in i]) if posts_ids: posts = Post.get_posts_by_id_in_thread(thread['id'], posts_ids) if len(posts) != len(posts_ids): return json_response({'message': 'Parent in other thread'}, 409) if len(authors) != len(set(usernames)): return json_response({'message': 'user not found'}, 404) for post in posts_data: for a in authors: if a['nickname'] == post['author'].lower(): post['author_id'] = a['id'] post['author_nickname'] = a['nickname'] post['parent_id'] = post['parent'] if 'parent' in post else 0 if len(thread) > 0: posts = Post.create_posts(posts_data, thread['id'], thread['forum_id']) else: return json_response({'message': 'thread not found'}, 404) for item in posts: item['created'] = format_time(item['created']) return json_response(posts, 201)
def get_post(cls, post_id): sql = """ SELECT m.nickname as author, p.created as created, f.slug as forum, p.id as id, p.message as message, p.thread_id as thread, p.parent_id as parent, p.is_edited as isEdited FROM {tbl_name} AS p JOIN {u_tbl_name} AS m ON m.id = p.user_id JOIN {t_tbl_name} AS t ON t.id = p.thread_id JOIN {f_tbl_name} AS f ON f.id = t.forum_id WHERE p.id = %(post_id)s """.format_map({ 'tbl_name': cls.tbl_name, 'u_tbl_name': User.tbl_name, 't_tbl_name': Thread.tbl_name, 'f_tbl_name': Forum.tbl_name, }) post = DbConnector.execute_get(sql, {'post_id': post_id}) if post: post[0]['created'] = format_time(post[0]['created']) return post[0] if post else []
def update_thread(slug_or_id): json_data = request.get_json() thread = Thread.get_by_slug_or_id(slug_or_id) if not thread: return json_response({'message': 'Thread not found'}, 404) thread = Thread.update_thread(thread['id'], json_data, thread) thread['created'] = format_time(thread['created']) return json_response(thread, 200)
def vote_thread(slug_or_id): post_data = request.get_json() thread_id, user_id = Thread.check_user_and_thread( thread_slug_or_id=slug_or_id, nickname=post_data['nickname']) if not user_id and not thread_id: return json_response({'message': 'Thread OR USER not found'}, 404) thread = Vote.vote_for_thread(user_id, post_data['voice'], thread_id) thread['created'] = format_time(thread['created']) return json_response(thread, 200)
def get_info(cls, post_id, related): sql = """ SELECT m.nickname as author, p.created as created, f.slug as forum, p.id as id, p.message as message, p.thread_id as thread, p.parent_id as parent, p.is_edited as isEdited FROM {tbl_name} AS p JOIN {u_tbl_name} AS m ON m.id = p.user_id JOIN {t_tbl_name} AS t ON t.id = p.thread_id JOIN {f_tbl_name} AS f ON f.id = t.forum_id WHERE p.id = %(post_id)s """.format_map({ 'tbl_name': cls.tbl_name, 'u_tbl_name': User.tbl_name, 't_tbl_name': Thread.tbl_name, 'f_tbl_name': Forum.tbl_name, }) post = DbConnector.execute_get(sql, {'post_id': post_id}) if post: post[0]['isEdited'] = post[0]['isedited'] post[0]['created'] = format_time(post[0]['created']) data = {'post': post[0] if post else None} if related and post: related = related.split(',') if 'user' in related: data['author'] = User().get_by_nickname(post[0]['author']) if 'thread' in related: data[ 'thread'] = Thread.get_serialised_with_forum_user_by_id_or_slug( id=post[0]['thread']) data['thread']['created'] = format_time( data['thread']['created']) if 'forum' in related: data['forum'] = Forum().get_by_slug(post[0]['forum']) return data
def get_thread_messages(slug_or_id): sort = request.args.get('sort') since = request.args.get('since') limit = request.args.get('limit') desc = request.args.get('desc') thread = Thread.get_by_slug_or_id(slug_or_id) posts = [] if not thread: return json_response({'message': 'Thread not found'}, 404) if sort == "flat" or sort is None: posts = Thread.get_posts_flat_sorted(thread['id'], since, limit, desc) elif sort == "tree": posts = Thread.get_posts_tree_sorted(thread['id'], since, limit, desc) elif sort == "parent_tree": posts = Thread.get_posts_parent_tree_sorter(thread['id'], since, limit, desc) for post in posts: post['created'] = format_time(post['created']) return json_response(posts, 200)
def get_threads_list(slug): limit = request.args.get('limit') since = request.args.get('since') desc = request.args.get('desc') threads = Thread.get_threads_list(slug, limit, since, desc) forum = Forum().get_by_slug(slug) for thread in threads: thread['created'] = format_time(thread['created']) if forum: return Response( response=json.dumps(threads), status=200, mimetype="application/json" ) else: return json_response( {'message': 'not found'}, 404 )
def bert_train_val(model, dataloaders, starting_epoch, optimizer, scheduler, epochs, device): print("\n\n" + "-" * 15) print("| TRAINING... |") print("-" * 15) set_seed() start_training_time = time.time() # Define running history for train and val train_loss_history = [] val_loss_history = [] train_acc_history = [] val_acc_history = [] # Training loop for epoch in range(starting_epoch, epochs): train_loss = 0 train_acc = 0 model.train() for step, batch in tqdm(enumerate(dataloaders['train_dataloader']), total=len(dataloaders['train_dataloader'])): # Load and feed data to model input_ids = batch[0].to(device) attention_masks = batch[1].to(device) labels = batch[2].to(device) model.zero_grad() outputs = model(input_ids, labels=labels, attention_mask=attention_masks) loss = outputs.loss logits = outputs.logits batch_loss = loss.item() train_loss += batch_loss logits = logits.detach().cpu().numpy() labels = labels.to('cpu').numpy() predictions = np.argmax(logits, axis=1).flatten() # labels = labels.flatten() correct = 0 for i in range(0, len(predictions)): if predictions[i] == labels[i]: correct = correct + 1 batch_accuracy = correct / len(labels) train_acc += batch_accuracy if step % 100 == 0: print("Epoch: ", epoch + 1, "/", epochs, "Batch: ", step + 1, "/", len(dataloaders['train_dataloader']), "Loss: ", train_loss / (step + 1), "Accuracy: ", batch_accuracy) loss.backward() # Apply gradient clipping nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # Optimzer/Learning rate schedular step optimizer.step() scheduler.step() torch.cuda.empty_cache() # Loss and accuracy results by epoch end_epoch_time = time.time() epoch_train_accuracy = train_acc / len(dataloaders['train_dataloader']) epoch_train_loss = train_loss / len(dataloaders['train_dataloader']) epoch_train_time = format_time(start_training_time, end_epoch_time) train_loss_history.append(epoch_train_loss) train_acc_history.append(epoch_train_accuracy) print( f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train accuracy: {epoch_train_accuracy:.6f}, train time:{epoch_train_time}' ) # Switch to evaluation mode and run validation print("Validating...") start_val_time = time.time() model.eval() val_loss = 0 val_acc = 0 with torch.no_grad(): for step, batch in tqdm(enumerate(dataloaders['val_dataloader']), total=len(dataloaders['val_dataloader'])): # Load and feed data to model input_ids = batch[0].to(device) attention_masks = batch[1].to(device) labels = batch[2].to(device) model.zero_grad() outputs = model(input_ids, labels=labels, attention_mask=attention_masks) loss = outputs.loss logits = outputs.logits batch_loss = loss.item() val_loss += batch_loss logits = logits.detach().cpu().numpy() labels = labels.to('cpu').numpy() predictions = np.argmax(logits, axis=1).flatten() correct = 0 for i in range(0, len(predictions)): if predictions[i] == labels[i]: correct = correct + 1 batch_accuracy = correct / len(labels) val_acc += batch_accuracy torch.cuda.empty_cache() end_val_time = time.time() epoch_val_time = format_time(start_val_time, end_val_time) epoch_val_loss = val_loss / len(dataloaders['val_dataloader']) epoch_val_acc = val_acc / len(dataloaders['val_dataloader']) val_loss_history.append(epoch_val_loss) val_acc_history.append(epoch_val_acc) print( f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val accuracy: {epoch_val_acc:.6f}, val_time: {epoch_val_time}' ) # Record results to dictionary to return performance_history = { 'train_loss': train_loss_history, 'val_loss': val_loss_history, 'train_accuracy': train_acc_history, 'val_accuracy': val_acc_history, 'num_epochs': epochs } # Save model checkpoint at end of train_val run, also saves performance history if epoch == epochs - 1: checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'performance_history': performance_history, 'epoch': epoch + 1, } save_checkpoint(checkpoint, f"./BERTcheckpoint_{checkpoint['epoch']}.pth.tar") print("") print("Training Finished") return performance_history
def gpt2_train_val(model, dataloaders, tokenizer, starting_epoch, optimizer, scheduler, epochs, device): print("\n\n" + "-" * 15) print("| TRAINING... |") print("-" * 15) set_seed() start_training_time = time.time() # Define running history for train and val train_loss_history = [] val_loss_history = [] train_perplexity_history = [] val_perplexity_history = [] # Training loop for epoch in range(starting_epoch, epochs): train_loss = 0 model.train() for step, batch in tqdm(enumerate(dataloaders['train_dataloader']), total=len(dataloaders['train_dataloader'])): # Load and feed data to model input_ids = batch.to(device) model.zero_grad() outputs = model(input_ids, labels=input_ids) loss = outputs[0] batch_loss = loss.item() train_loss += batch_loss if step % 200 == 199: print("Epoch:", epoch + 1, "/", epochs, "Batch:", step + 1, "/", len(dataloaders['train_dataloader']), "Loss", train_loss / 200) train_loss = 0.0 # Generates a model output including special tokens in order to visualise the training process and model learning model.eval() if step % 100 == 0 and step != 0: samples = model.generate( # decoder_start_token_id=50258, bos_token_id=50257, do_sample=True, top_k=50, max_length=50, min_length=15, top_p=0.95, num_return_sequences=1, repition_penalty=1.1, no_repeat_ngram_size=2, temperature=1.1) for i, sample in enumerate(samples): print("{}".format( tokenizer.decode(sample, skip_special_tokens=False))) # Return to train mode and back propagate loss model.train() loss.backward() # Apply gradient clipping nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # Optimizer/Learning rate scheduler step optimizer.step() scheduler.step() torch.cuda.empty_cache() # Loss and perplexity results by epoch end_epoch_time = time.time() epoch_train_loss = train_loss / len(dataloaders['train_dataloader']) epoch_train_perplexity = torch.exp(torch.tensor(epoch_train_loss)) epoch_train_time = format_time(start_training_time, end_epoch_time) train_loss_history.append(epoch_train_loss) train_perplexity_history.append(epoch_train_perplexity) print( f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train ppl: {epoch_train_perplexity:.6f}, train time:{epoch_train_time}' ) # Switch to evaluation mode and run validation print("Validating...") start_val_time = time.time() model.eval() val_loss = 0 val_steps = 0 with torch.no_grad(): for step, batch in tqdm(enumerate(dataloaders['val_dataloader']), total=len(dataloaders['val_dataloader'])): input_ids = batch[0].to(device) outputs = model(input_ids, labels=input_ids) loss = outputs[0] # loss, logits= outputs[:2] # outputs has two elements loss and logits batch_loss = loss.item() val_loss += batch_loss torch.cuda.empty_cache() end_val_time = time.time() epoch_val_time = format_time(start_val_time, end_val_time) epoch_val_loss = val_loss / len(dataloaders['val_dataloader']) epoch_val_perplexity = torch.exp(torch.tensor(epoch_val_loss)) val_loss_history.append(epoch_val_loss) val_perplexity_history.append(epoch_val_perplexity) # print("Validation time: ", epoch_val_time) print( f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val ppl: {epoch_val_perplexity:.6f}, val_time: {epoch_val_time}' ) # Record results to dictionary to return performance_history = { 'train_loss': train_loss_history, 'val_loss': val_loss_history, 'train_perplexity': train_perplexity_history, 'val_perplexity': val_perplexity_history, 'num_epochs': epochs } # Save model checkpoint at end of train_val run, also saves performance history if epoch == epochs - 1: checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'performance_history': performance_history, 'epoch': epoch + 1, } save_checkpoint(checkpoint, f"./checkpoint_{checkpoint['epoch']}.pth.tar") print("") print("Training Finished") return performance_history
def get_thread_details(slug_or_id): thread = Thread.get_by_slug_or_id(slug_or_id) if thread: thread['created'] = format_time(thread['created']) return json_response(thread, 200) return json_response({'message': 'thread not found'}, 404)