def train_models_2(train_sentences, filter) -> tuple: # Prune the train data set train_words = prune_sentences(train_sentences, filter, balance=False) # Train on pruned sentences print(f"Number of datapoints at training identifier: {len(train_words)}") id_clf, id_report = train_classifier(train_words, bool_result=True, prob=False) # # Filter the words using the identification classifier similarly to testing # for chunk in chunks(train_words, 4): # test_classifier(id_clf, chunk, bool_result=True) # train_words = [] # for sentence in train_sentences: # words = sentence.getTreeNodesOrdered() # for w in words: # prediction = w.getPrediction() # if prediction == 1: # # add the word to the list to be labeled # train_words.append(w) filter["prune"] = 2 train_words = prune_sentences(train_sentences, filter, balance=False) print(f"Number of datapoints at training labeler: {len(train_words)}") label_clf, label_report = train_classifier(train_words, bool_result=False, prob=True) return id_clf, label_clf, f"{id_report}\n{label_report}"
def train_classifiers(data_file, cache_dir=os.path.curdir): data = read_data(data_file) for matter in ["WM", "GM"]: classifier_file = os.path.join( cache_dir, "Classifier", "{0}_matter_classifier.pkl".format(matter)) if not os.path.exists(os.path.dirname(classifier_file)): os.makedirs(os.path.dirname(classifier_file)) train_classifier(data["Features"].values, data["Truth"][matter].values, classifier_file)
def save_suggestion_feedback(sessionId, context, feedback): user = User.query.filter_by(session_id=sessionId).first() gauss_object = Classifiers.query.filter_by(user_id=user.id).first() gauss_clf = gauss_object.pickled_classifier classified = 1 if feedback else 0 visited = UserVisitedListings(user_id=user.id,listing=context['id'], like=feedback) train_classifier([context['description']], [classified], gauss_clf) Classifiers.query.filter_by(user_id=user.id).update(dict(pickled_classifier=gauss_clf)) db.session.add(visited) db.session.commit()
def get_sentiment(comments,comments_ratio): #comments = youtube_comments.get_youtube_comments(video_id) #comments = download_comment.commentExtract(video_id) #print("---------comments extracted--------", comments) pos = 0 neg = 0 comments_list = [] print("############################IN HERE###################") positive_comments = [] negative_comments = [] mixed_comments = [] for com in comments: filtered_comments = filter.filter_comments(com) training.train_classifier() classifier_f = open("classifier.pickle", "rb") classifier = pickle.load(classifier_f) classifier_f.close() for comment in filtered_comments: result = classifier.classify(bag_of_words(comment)) if result == "pos": pos += 1 else: neg += 1 result, score = calculate_score(pos,neg,comments_ratio) if(result == "positive"): positive_comments.append(com) temp_dict = {'Comment': com, 'Score': score, 'Sentiment': 'positive'} if(result == "negative"): negative_comments.append(com) temp_dict = {'Comment': com, 'Score': score, 'Sentiment': 'negative'} if(result == "mixed"): mixed_comments.append(com) temp_dict = {'Comment': com, 'Score': score, 'Sentiment': 'mixed'} comments_list.append(temp_dict) #no_of_likes , no_of_dislikes = youtube_stats.get_likes_dislikes(video_id) #print(no_of_likes," - ",no_of_dislikes) dictionary_comments = {'Positive Comments': positive_comments, 'Negative Comments': negative_comments, 'Mixed Comments': mixed_comments} #overall_result = calculate_score(pos,neg,no_of_likes,no_of_dislikes,comments_ratio) return comments_list
def train_models(train_sentences, filter) -> tuple: # Prune the train data set train_words = prune_sentences(train_sentences, filter, balance=False) # Train on pruned sentences print(f"Number of datapoints at training identifier: {len(train_words)}") id_clf, id_report = train_classifier(train_words, bool_result=True, prob=False) train_words = prune_sentences(train_sentences, filter, balance=False) print(f"Number of datapoints at training labeler: {len(train_words)}") label_clf, label_report = train_classifier(train_words, bool_result=False, prob=True) return id_clf, label_clf, f"{id_report}\n{label_report}"
options = vars(parser.parse_args()) sys.path.append(os.path.dirname(os.path.dirname(__file__))) from dataloader import CustomDataloader, FlexibleCustomDataloader from training import train_classifier from networks import build_networks, save_networks, get_optimizers from options import load_options, get_current_epoch from comparison import evaluate_with_comparison from evaluation import save_evaluation options = load_options(options) dataloader = FlexibleCustomDataloader(fold='train', **options) networks = build_networks(dataloader.num_classes, **options) optimizers = get_optimizers(networks, finetune=True, **options) eval_dataloader = CustomDataloader(last_batch=True, shuffle=False, fold='test', **options) start_epoch = get_current_epoch(options['result_dir']) + 1 for epoch in range(start_epoch, start_epoch + options['epochs']): train_classifier(networks, optimizers, dataloader, epoch=epoch, **options) #print(networks['classifier_kplusone']) #weights = networks['classifier_kplusone'].fc1.weight eval_results = evaluate_with_comparison(networks, eval_dataloader, **options) pprint(eval_results) save_evaluation(eval_results, options['result_dir'], epoch) save_networks(networks, epoch, options['result_dir'])
from options import save_options, load_options, get_current_epoch from locking import acquire_lock, release_lock from imutil import encode_video if os.path.exists(options['result_dir']): options = load_options(options) dataloader = FlexibleCustomDataloader(fold='train', **options) networks = build_networks(dataloader.num_classes, **options) optimizers = get_optimizers(networks, **options) save_options(options) start_epoch = get_current_epoch(options['result_dir']) + 1 acquire_lock(options['result_dir']) try: for epoch in range(start_epoch, start_epoch + options['epochs']): # Apply learning rate decay """ for name, optimizer in optimizers.items(): MAX_EPOCH = 100 optimizer.param_groups[0]['lr'] = options['lr'] * (options['decay'] ** min(epoch, MAX_EPOCH)) """ video_filename = train_classifier(networks, optimizers, dataloader, epoch=epoch, **options) save_networks(networks, epoch, options['result_dir']) finally: release_lock(options['result_dir'])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=options['batch_size'], shuffle=True, num_workers=8, pin_memory=True, drop_last=True) val_loader = torch.utils.data.DataLoader(OpenSetImageFolder( valdir, val_transforms, seed=options['seed'], num_classes=options['num_classes']), batch_size=options['batch_size'], shuffle=True, num_workers=8, pin_memory=True) start_epoch = get_current_epoch(options['result_dir']) + 1 for epoch in range(start_epoch, start_epoch + options['epochs']): train_results = train_classifier(networks, optimizers, train_loader, epoch=epoch, **options) eval_results = evaluate_with_comparison(networks, val_loader, **options) print('[Epoch {}] errC {} errOpenSet {} ClosedSetAcc{}'.format( epoch, train_results['errC'], train_results['errOpenSet'], eval_results['classifier_closed_set_accuracy'])) save_evaluation(eval_results, options['result_dir'], epoch) save_networks(networks, epoch, options['result_dir'])