def choice(): global Q_LIST res = [] username = request.args.get('username') ua = UserAnalysis() Q_LIST = ua.process(username) for qa in Q_LIST: key = qa[0].split('/', 1)[0] val = qa[0].split('/', 1)[1] res.append((key, val)) return json.dumps(res)
def proceed_analysis(user,d,f): f = open ('result/analysis.csv','a') ua = UserAnalysis(d,analysis_interval = analysis_interval) ise = ua.ISE(year_analysis=year_analysis) ave_m_e = ua.average_month_effect(ise) ave_v = ua.average_visit() u_exp = ua.usage_expectation(ave_m_e,ave_v) name = d.identity_dict.get('user_name') email = d.identity_dict.get('email') cu_number = d.identity_dict.get('CU_number') geo = d.identity_dict.get('geo') #r[user] = (ave_v,u_exp) _writer(csv.writer(f,delimiter=','), user, (ave_v,u_exp), name, email, cu_number, geo) f.close()
def __init__ (self): """ PUBLIC: Constructor ------------------- constructs member objects """ #=====[ Step 1: create member objects ]===== self.preprocess = Preprocess () self.storage_delegate = StorageDelegate () self.semantic_analysis = SemanticAnalysis () self.user_analysis = UserAnalysis () self.inference = None
class SpotOn: def __init__ (self): """ PUBLIC: Constructor ------------------- constructs member objects """ #=====[ Step 1: create member objects ]===== self.preprocess = Preprocess () self.storage_delegate = StorageDelegate () self.semantic_analysis = SemanticAnalysis () self.user_analysis = UserAnalysis () self.inference = None def load (self): """ PUBLIC: load ------------ loads in all parameters """ #=====[ Step 1: load in semantic analysis ]===== print_status ("Initialization", "Loading ML parameters (Begin)") self.semantic_analysis.load () print_status ("Initialization", "Loading ML parameters (End)") #=====[ Step 2: transfer over models to inference ]===== print_status ("Initialization", "Constructing Inference instance (Begin)") self.inference = Inference (self.semantic_analysis.lda_model, self.semantic_analysis.lda_model_topics) print_status ("Initialization", "Constructing Inference instance (End)") #################################################################################################### ######################[ --- Getting Users --- ]##################################################### #################################################################################################### def get_users (self): """ PUBLIC: get_users ----------------- constructs self.u_df from all available calendar dataframes """ self.u_df = self.user_analysis.extract_users (self.storage_delegate.iter_calendar_dfs) self.u_df = self.semantic_analysis.analyze (self.u_df, 'all_event_names') def load_users (self, filepath='../data/pandas/users/users.df'): """ PUBLIC: load_users ------------------ constructs self.u_df from a saved file """ self.u_df = pd.read_pickle(filepath) #################################################################################################### ######################[ --- Training --- ]######################################################### #################################################################################################### def extract_text (self, activity_row): """ PRIVATE: extract_text --------------------- given a row representing an activity, this returns a list of words representing it as a 'text' """ text = [] if type(activity_row['name']) == list: text += activity_row['name'] if type(activity_row['words']) == list: text += activity_row['words'] return text def get_corpus_dictionary (self): """ PRIVATE: get_corpus_dictionary ------------------------------ Assembles a gensim corpus and dictionary from activities_df, where each text is name || words. """ #=====[ Step 1: iterate through all activity dataframes ]===== print_status ("get_corpus", "assembling texts") texts = [] for df in self.storage_delegate.iter_activity_dfs (): print_inner_status ("assembling texts", "next df") texts += list(df.apply(self.extract_text, axis=1)) #=====[ Step 3: get dictionary ]===== print_status ("get_corpus", "assembling dictionary") dictionary = gensim.corpora.Dictionary(texts) #=====[ Step 4: get corpus ]===== print_status ("get_corpus", "assembling corpus") corpus = [dictionary.doc2bow (text) for text in texts] return corpus, dictionary def train_semantic_analysis (self): """ PUBLIC: train_semantic_analysis ------------------------------- finds parameters for self.semantic_analysis """ #=====[ Step 1: get the corpus ]===== print_status ("train_semantic_analysis", "getting corpus/dictionary") corpus, dictionary = self.get_corpus_dictionary () #=====[ Step 2: train ]===== print_status ("train_semantic_analysis", "training semantic analysis") self.semantic_analysis.train (corpus, dictionary) #################################################################################################### ######################[ --- Inference --- ]######################################################### #################################################################################################### def score_activities_old (self, user_activities, recommend_activities): """ PUBLIC: score_activities ------------------------ Given a user and a list of activities, both represented as json, this will return (activities, scores) in a sorted list """ #=====[ Step 1: preprocess json inputs ]===== user_events_df = self.preprocess.preprocess_a (user_activities) activities_df = self.preprocess.preprocess_a (recommend_activities) #=====[ Step 2: construct a user from user_events_df ]===== def f(): yield user_events_df users = self.user_analysis.extract_users (f) assert len(users) == 1 user = users.iloc[0] #=====[ Step 3: get scores for each one ]===== scores = [inference.score_match (user, activities_df.iloc[i]) for i in range(len(activities_df))] #=====[ Step 4: return sorted list of activity, score ]===== return sorted(zip(activities_json, scores), key=lambda x: x[1], reverse=True) def score_activities (self, user_activities, recommend_activities): """ PUBLIC: score_activities ------------------------ Given a user and a list of activities, both represented as json, this will return (activities, scores) in a sorted list """ #=====[ Step 1: preprocess user_activities and recommend_activities ]===== user_activities = self.preprocess.preprocess_a (user_activities) # print len(recommend_activities) recommend_activities = self.preprocess.preprocess_a (recommend_activities) # print len(recommend_activities) #=====[ Step 2: get scores for each one ]===== scores,act = self.inference.score_activities (user_activities, recommend_activities) return scores,act #################################################################################################### ######################[ --- Interface --- ]######################################################### #################################################################################################### def print_lda_topics (self): """ PUBLIC: print_lda_topics ------------------------ prints out a representation of the lda topics found in self.semantic_analysis """ self.semantic_analysis.print_lda_topics ()
class SpotOn: def __init__ (self): """ PUBLIC: Constructor ------------------- constructs member objects """ #=====[ Step 1: create member objects ]===== self.preprocess = Preprocess () self.storage_delegate = StorageDelegate () self.semantic_analysis = SemanticAnalysis () self.user_analysis = UserAnalysis () self.inference = None self.activities_corpus = None def load (self): """ PUBLIC: load ------------ loads in all parameters """ #=====[ Step 1: load in semantic analysis ]===== print_status ("Initialization", "Loading ML parameters (Begin)") self.semantic_analysis.load () print_status ("Initialization", "Loading ML parameters (End)") #=====[ Step 2: transfer over models to inference ]===== print_status ("Initialization", "Constructing Inference instance (Begin)") self.inference = Inference (self.semantic_analysis.lda_model, self.semantic_analysis.lda_model_topics) print_status ("Initialization", "Constructing Inference instance (End)") #################################################################################################### ######################[ --- Getting Users --- ]##################################################### #################################################################################################### def get_users (self): """ PUBLIC: get_users ----------------- constructs self.u_df from all available calendar dataframes """ self.u_df = self.user_analysis.extract_users (self.storage_delegate.iter_calendar_dfs) self.u_df = self.semantic_analysis.analyze (self.u_df, 'all_event_names') def load_users (self, filepath='../data/pandas/users/users.df'): """ PUBLIC: load_users ------------------ constructs self.u_df from a saved file """ self.u_df = pd.read_pickle(filepath) #################################################################################################### ######################[ --- Training --- ]######################################################### #################################################################################################### def extract_text (self, activity_row): """ PRIVATE: extract_text --------------------- given a row representing an activity, this returns a list of words representing it as a 'text' """ text = [] if type(activity_row['name']) == list: text += activity_row['name'] if type(activity_row['words']) == list: text += activity_row['words'] return text def get_corpus_dictionary (self): """ PRIVATE: get_corpus_dictionary ------------------------------ Assembles a gensim corpus and dictionary from activities_df, where each text is name || words. """ #=====[ Step 1: iterate through all activity dataframes ]===== print_status ("get_corpus", "assembling texts") documents = [] for df in self.storage_delegate.iter_activity_dfs (): df['lda_doc'] = df['name'] + df['words'] documents += list(df['lda_doc']) #=====[ Step 2: get dictionary ]===== print_status ("get_corpus", "assembling dictionary") dictionary = gensim.corpora.Dictionary(documents) #=====[ Step 3: get corpus ]===== print_status ("get_corpus", "assembling corpus") corpus = [dictionary.doc2bow (d) for d in documents] return corpus, dictionary def print_lda_topics (self): """ PUBLIC: print_lda_topics ------------------------ prints out a representation of the lda topics found in self.semantic_analysis """ print_header ("LDA TOPICS: ") self.semantic_analysis.print_lda_topics () def train_semantic_analysis (self): """ PUBLIC: train_semantic_analysis ------------------------------- finds parameters for self.semantic_analysis """ #=====[ Step 1: get the corpus ]===== print_status ("train_semantic_analysis", "getting corpus/dictionary") corpus, dictionary = self.get_corpus_dictionary () #=====[ Step 2: train ]===== print_status ("train_semantic_analysis", "training semantic analysis") self.semantic_analysis.train (corpus, dictionary) #####[ DEBUG: print out lda topics ]##### self.print_lda_topics () #################################################################################################### ######################[ --- Processing --- ]######################################################## #################################################################################################### def activities_json_to_df (self, a_json): """ PRIVATE: activities_json_to_df ------------------------------ given: list of json dicts representing activities returns: dataframe with preprocessing, semantic analysis """ a_df = self.preprocess.preprocess_a (a_json) a_df = self.semantic_analysis.add_lda_vec_column (a_df) a_df = self.semantic_analysis.add_w2v_sum_column (a_df) return a_df def calendar_events_json_to_df (self, ce_json): """ PRIVATE: calendar_events_json_to_df ------------------------------ given: list of json dicts representing calendar events returns: dataframe with preprocessing, semantic analysis """ ce_df = self.preprocess.preprocess_ce (ce_json) ce_df = self.semantic_analysis.add_lda_vec_column (ce_df) ce_df = self.semantic_analysis.add_w2v_sum_column (ce_df) return ce_df def calendar_events_to_user_representation(self, ce_json): """ PUBLIC: calendar_events_to_user_representation ---------------------------------------------- given a list containing json dicts representing calendar events belonging to a single user, this will return a representation that can be passed to score_activity_for_user and recommend_for_user """ user_df = self.calendar_events_json_to_df (ce_json) lda_vec = self.semantic_analysis.get_user_lda_vec (user_df) return {'events_df':user_df, 'lda_vec':lda_vec} def load_activities_corpus(self, activities): ''' function: load_activities_corpus params: activities - list of activities to recommend returns: none notes: use this function to load a big activities corpus into the SpotOn object, and later when calling recommend_for_user we will pull activities to recommend from this corpus. Can be called multiple times to update to different activities ''' self.activities_corpus = self.activities_json_to_df (activities) #################################################################################################### ######################[ --- Recommending --- ]###################################################### #################################################################################################### def score_activity_for_user(self, user_representation, activity): """ PUBLIC: score_activity_for_user ------------------------------- params: user_representation - representation of the user to score for (created by calendar_events_to_user_representation) activity - json of the activity to score notes: goes from the representation of the user that you use + one activity -> return a score for how much they'd like it """ #=====[ Step 1: get activity dataframe ]===== activity_df = self.activities_json_to_df ([activity]) #=====[ Step 2: get scored dataframe ]===== activity_df = self.inference.infer_scores (user_representation, activity_df) #=====[ Step 3: extract and return score ]===== return activity_df.iloc[0]['score'] def recommend_for_user(self, user_representation, activities=None, topn=10): """ PUBLIC: recommend_for_user -------------------------- params: user_representation - representation of the user to recommend for activities - either a list of json activities, or None if .load_activities_corpus has been called topn - number of recommendations to return """ #=====[ Step 1: get a_df, df of activities to recommend ]===== if activities is not None: activities_df = self.activities_json_to_df (activities) else: if not (self.activities_corpus is not None): self.load_activities_corpus () activities_df = self.activities_corpus #=====[ Step 2: get scores, return sorted ]===== activity_ranks = self.inference.rank_activities (user_representation, activities_df) return list(activity_ranks) def recommend_users_for_activity(self, activity, list_of_users, topn=10): """ PUBLIC: recommend_users_for_activities -------------------------------------- params: activity - activity to recommend users for list_of_users - list of users to filter topn - number of users to return notes: goes from an activity and a list of users -> topn users for that activity """ scores = [self.score_activity_for_user(user, activity) for user in list_of_users] sorted_ix = np.argsort(scores)[::-1] return [list_of_users[sorted_ix[i]] for i in range(topn)]