lr = LogisticRegression(C=0.1, penalty='l2') lr.fit(train_X_lr, train_y) # test_predict = vectorizer.transform([{'尼泊尔': 1}, # {'赵本山': 1, '赵薇': 1, '张曼玉': 1, 'rat': '8.0', # 'ravg': 3.85714, # 'rcount': 7.0, # 'rmax': 5.0, # 'rmedian': 4.0, # 'rmin': 2.0, # 'rsum': 27.0}, # {'克里斯·派恩': 1, '扎克瑞·昆图': 1, '佐伊·索尔达娜': 1,'西蒙·佩吉':1, '安东·叶利钦':1, '林诣彬':1 , # '美国':1, # 'rat': '8.0', # 'ravg': 3.85714, # 'rcount': 7.0, # 'rmax': 5.0, # 'rmedian': 4.0, # 'rmin': 2.0, # 'rsum': 27.0}]) # print(lr.predict_proba(test_predict)) lr_predict_final = lr.predict_proba(X_predict) update_lr_rat(df_data, lr_predict_final.tolist()) print(lr.classes_) if __name__ == '__main__': sched_util.schedule_(10, task_func=process_task)
start_time = datetime.datetime.now() conn = common.get_connection() df_comment_new_data_ldim = get_comment_data() conn = common.get_connection() for i in df_comment_new_data_ldim.index: print(df_comment_new_data_ldim.iloc[i]['MOVIEID'], df_comment_new_data_ldim.iloc[i]['USERID']) ibmovie_list = get_ibmovie_by_movieid( df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn) for j in ibmovie_list: is_exist = exist_in_comment( j[0], df_comment_new_data_ldim.iloc[i]['USERID'], conn) if is_exist: print('exist in comment') else: insert_or_update_recmovie( j[0], df_comment_new_data_ldim.iloc[i]['USERID'], df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn) #update_comment_new_data_flag(df_comment_new_data_ldim.iloc[i]['ID'], conn) conn.commit() end_time = datetime.datetime.now() print(end_time - start_time) print('finish process movie based recall task:' + str(datetime.datetime.now())) if __name__ == '__main__': sched_util.schedule_(60, func_main)
conn) test_file_lr_base_url = cfg.get_config_property('test_file_lr_t_url', conn) train_file_lr = train_file_lr_base_url % time_now_str test_file_lr = test_file_lr_base_url % time_now_str # 转换为libsvm格式数据 dump_svmlight_file(train_X_lr, train_y_lr_, train_file_lr) dump_svmlight_file(test_X_lr, test_y_lr_, test_file_lr) cfg.set_config_property(train_file_fm, 'train_file_fm_url', conn) cfg.set_config_property(test_file_fm, 'test_file_fm_url', conn) cfg.set_config_property(train_file_lr, 'train_file_lr_url', conn) cfg.set_config_property(test_file_lr, 'test_file_lr_url', conn) dict2vec_save_url = cfg.get_config_property('dict2vec', conn) with open(dict2vec_save_url, 'wb') as f: pkl.dump(v, f) scaler_save_url = cfg.get_config_property('scaler', conn) with open(scaler_save_url, 'wb') as f: pkl.dump(scaler, f) end_time = datetime.datetime.now() print(end_time - start_time) print('finish process comment to libsvm task:' + str(datetime.datetime.now())) if __name__ == '__main__': sched_util.schedule_(60, process_task)
start_time = datetime.datetime.now() conn = common.get_connection() df_comment_new_data_ldim = get_comment_data() conn = common.get_connection() for i in df_comment_new_data_ldim.index: print(df_comment_new_data_ldim.iloc[i]['MOVIEID'], df_comment_new_data_ldim.iloc[i]['USERID']) ibmovie_list = get_ibmovie_by_movieid( df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn) for j in ibmovie_list: is_exist = exist_in_comment( j[0], df_comment_new_data_ldim.iloc[i]['USERID'], conn) if is_exist: print('exist in comment') else: insert_or_update_recmovie( j[0], df_comment_new_data_ldim.iloc[i]['USERID'], df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn) #update_comment_new_data_flag(df_comment_new_data_ldim.iloc[i]['ID'], conn) conn.commit() end_time = datetime.datetime.now() print(end_time - start_time) print('finish process movie based recall task:' + str(datetime.datetime.now())) if __name__ == '__main__': sched_util.schedule_(10 * 60 * 1000, func_main)