def refresh_user_level(current_user_id): with app.app_context(): db.init_app(app) db.create_all() # get all articles in the curated table for this user user_curated_article_levels = UserSortedContent.query.filter_by(user_id=current_user_id).all() # calculate standard deviaion of the user level sqd_deviations = 0 for entry in user_curated_article_levels: level = (Content.query.filter_by(id=entry.content_id).first()).level sqd_deviations += level ** 2 if len(user_curated_article_levels) > 0: std_deviation = (sqd_deviations / len(user_curated_article_levels)) ** 0.5 else: std_deviation = 0 # add user skill to the user_lang_skill table user = UserLangSkill.query.filter_by(user_id=current_user_id).first() if std_deviation > 0: # user = UserLangSkill(user_id=current_user_id, language_id=815, skill=std_deviation) user.skill = std_deviation db.session.add(user) db.session.commit() return 0 else: return -1
def refresh_content_level(current_user_id): with app.app_context(): db.init_app(app) db.create_all() # clear user_sorted_content table user_content = UserSortedContent.query.filter_by(user_id=current_user_id).all() for item in user_content: db.session.delete(item) db.session.commit() #only curate for all articles content_text = Content.query.all() # pickled_classifier = User.query.filter_by(id=current_user_id).first().classifier # g.classifier = pickle.loads(pickled_classifier) # pickled_feature_set = User.query.filter_by(id=current_user_id).first().feature_set # g.feature_set = pickle.loads(pickled_feature_set) classifier = get_classifier(current_user_id) feature_set = get_feature_set(current_user_id) # classify and create as a new entry for content_item in content_text: result = classify(content_item.body, classifier, feature_set) # print(result) # only the suitable entries are added if result == 0: article_entry = UserSortedContent(user_id=current_user_id, content_id=content_item.id, sortedSkill=result) db.session.add(article_entry) # commit the change db.session.commit()
def get_db(): """Connect to the application's configured database. The connection is unique for each request and will be reused if this is called again. """ if 'db' not in g: db.init_app(current_app) g.db = db return g.db
def get_feature_set(user_id): # Retrieve classifier from db with app.app_context(): db.init_app(app) db.create_all() pickled_feature_set = User.query.filter_by( id=user_id).first().feature_set feature_set = pickle.loads(pickled_feature_set) return feature_set
def get_classifier(user_id): # Retrieve classifier from db with app.app_context(): db.init_app(app) db.create_all() pickled_classifier = User.query.filter_by( id=user_id).first().classifier classifier = pickle.loads(pickled_classifier) return classifier
def delete_entries(): with app.app_context(): db.init_app(app) db.create_all() user = User.query.filter_by(username='******').first() if user: user_id = user.id # Query DB for user Progress entries. While true, delete. entry = Progress.query.filter_by(user_id=user_id).first() while entry: db.session.delete(entry) entry = Progress.query.filter_by(user_id=user_id).first() db.session.delete(user) db.session.commit()
def create_entries(): with app.app_context(): db.init_app(app) db.create_all() # Create test user password = global_bcrypt.generate_password_hash('password').decode( 'utf-8') user = User(username='******', email='*****@*****.**', password=password) db.session.add(user) db.session.commit() # Get new user's id user = User.query.filter_by(username='******').first() user_id = user.id print("New user's id: " + str(user_id)) ''' select everything from the content ''' articles = Content.query.all() ''' Create 500 progress entries for the test user from the first 500 entries in the Content table. Rating -1 == too easy, 0 == just right, 1 == too difficult ''' for i in range(ARTICLE_CT): content = articles[i] score = content.level # Add appropriate Progress entry if score >= 0 and score < EASY: entry = Progress(user_id=user_id, content_id=content.id, rating=-1, read_ct=1) db.session.add(entry) elif score >= 5 and score < JUST_RIGHT: entry = Progress(user_id=user_id, content_id=content.id, rating=0, read_ct=1) db.session.add(entry) else: entry = Progress(user_id=user_id, content_id=content.id, rating=1, read_ct=1) db.session.add(entry) # Commit transactions to DB db.session.commit()
def assign_levels(): with app.app_context(): db.init_app(app) db.create_all() ''' select everything from the content loop through it to calculate the score then add the it back to the database ''' articles = Content.query.all() #for each article for article in articles: text = str(article.body) score = cal_diff(text) article.level = score # print(article.id, score) db.session.add(article) # Commit the transactions db.session.commit()
def create_classifier(user_id, word_ct): # Load data user_ratings = get_ratings(user_id) #print(user_ratings) all_words = get_words(user_ratings) # Convert all_words to dictionary with word frequency denoted all_words = nltk.FreqDist(all_words) # Create a list of n most frequent words where n = word_ct feature_words = list(all_words.keys())[:word_ct] # Create featuresets featuresets = create_featuresets(feature_words, user_ratings) # Train Naive-Bayes Algorithm classifier = nltk.NaiveBayesClassifier.train(featuresets) # Pickle the feature set for later use saved_dictionary = pickle.dumps(feature_words) # Pickle classifier for later use saved_classifier = pickle.dumps(classifier) # Save feature set and classifier in user's row in DB with app.app_context(): db.init_app(app) db.create_all() # Get user user = User.query.filter_by(id=user_id).first() user.feature_set = saved_dictionary user.classifier = saved_classifier # Commit updates db.session.commit() #return [classifier, feature_words] return
def get_ratings(user_id): with app.app_context(): db.init_app(app) db.create_all() ''' Grab all user's ratings from the database ''' user_entries = Progress.query.filter_by(user_id=user_id).all() ''' Tokenize each piece of content's words and create a tuple entry [[word_list], rating] ***NOTE: need to add punctuation stripping, accepting alpha chars only, etc. to refine features ''' tokenized_entries = [] for entry in user_entries: ''' Get binary first ''' tokenized_words_b = Content.query.filter_by( id=entry.content_id).first().body.split() tokenized_words = [] ''' Decode from binary to utf8 ''' for word in tokenized_words_b: word = word.decode('utf8') word = word.strip('.,!?-*();:\'\"[]{}\\') tokenized_words.append(word) tokenized_entries.append([tokenized_words, entry.rating]) return tokenized_entries
import os import json import sys proj_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) sys.path.insert(0, proj_dir) from cassiopeia.models.models import db, Country, Language from flask import Flask from flask_sqlalchemy import SQLAlchemy # Custom App to load to cassiopeia_prod database, neeed utf8mb4 for emoji support app = Flask(__name__) app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql+mysqlconnector://[email protected]/cassiopeia_prod?charset=utf8mb4' db.init_app(app) f = open('all.json', 'r') # From https://restcountries.eu/#api-endpoints-language buf = f.read() obj_dict = json.loads(buf) with app.app_context(): with db.session.no_autoflush: db.create_all() language_hash = set() for c in obj_dict: country = Country(name=c['name'], alpha2code=c['alpha2Code'], alpha3code=c['alpha3Code'], flag_image=c['flag']) for l in c['languages']: new_language = Language.query.filter(Language.name==l['name']).first() if new_language is None: new_language = Language(name=l['name'], iso639_1=l['iso639_1'], iso639_2=l['iso639_2'])