def get_predict_result(test_json=None): if test_json is None: test_json = [ 1, '本科', '北京', '全职', 0, '物业经理', ] if global_config.get('machine_learn').get('model_pkl_dir'): if os.path.exists( global_config.get('machine_learn').get('model_pkl_dir')): model = pd.read_pickle( global_config.get('machine_learn').get('model_pkl_dir')) # 这样快 else: model = pd.read_pickle('../../Data/model.pkl.bz2.001', 'bz2') else: model = pd.read_pickle('../../Data/model.pkl.bz2.001', 'bz2') label = model.predict([test_json]) prob = model.predict_proba([test_json]) log_prob = model.predict_log_proba([test_json]) print( 'Predict class for X: {}, Predict class probabilities for X: {}, Predict class log-probabilities for X: {}' .format(label, prob, log_prob)) return (label, prob, log_prob)
def get__dnn_predict_result(test_json=None): if test_json is None: test_json = [1, '本科', '哈尔滨', '全职', 0, '物业经理'] # Load a model input_layer = tflearn.input_data(shape=[None, 6], name='input') dense1 = tflearn.fully_connected(input_layer, 128, name='dense1') dense2 = tflearn.fully_connected(dense1, 256, name='dense2') softmax = tflearn.fully_connected(dense2, 4, activation='softmax') regression = tflearn.regression(softmax, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy') # Define classifier, with model checkpoint (autosave) model = tflearn.DNN(regression, checkpoint_path='model.tfl.ckpt') model.load( global_config.get('machine_learn').get('deep_learn').get( 'dnn_model_tfl') + "model.tfl") pd_json = pd.DataFrame(test_json, columns=['公司规模', '学历', '工作城市', '用工制', '经验', 'kw']) with open( global_config.get('machine_learn').get('deep_learn').get( 'dnn_catagorical'), 'rb') as f: _CATEGORICAL_TYPES_ = pickle.load(f) cat_columns = pd_json.select_dtypes(['object']).columns pd_json[cat_columns] = pd_json[cat_columns].apply(lambda x: x.astype( pd.api.types.CategoricalDtype(categories=_CATEGORICAL_TYPES_[x.name], ordered=True))) for col in cat_columns: pd_json[col] = pd_json[col].cat.codes pd_json.fillna(0) # 未知属性填为0 result = model.predict([pd_json.loc[0].to_list()]) print(f'预测结果{result}') return result
original_job_detail_data_dir = "G:\\数据集\\zhaopincom\\jobDetails\\jobDetailslinux\\" # original_job_detail_data_dir = "G:\\数据集\\zhaopincom\\jobDetails\\test\\" # 测试用 stage_job_info_file_one = '保存第一阶段.json' stage_job_info_file_two = '第二阶段文件.json' finally_job_detail_storage_drop_nan_file = 'finally_job_detail_storage_drop_nan.json' finally_recommend_storage_drop_nan_file = 'finally_recommend_storage_drop_nan.json' # -------------------------------------------------- # 保留哪些job_info列 # desc 类 # number # lat lon # ----------------- 行业大类------------- # genre='行业' if global_config.get('global_data_process').get('genres'): genres = json.loads( open(global_config.get('global_data_process').get('genres'), 'r', encoding='utf-8').readline()) else: # 使用默认的 genres = json.loads( open('E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\Data\\category.json', 'r', encoding='utf-8').readline()) # 详细页与概要页 数据 两表合并,要存储的目录与文件名 storage_dir_merge = 'G:\\数据集\\zhaopincom\\DP\\storage_merge\\' finally_elastic_merge_nan_file = 'finally_elastic_merge_nan_file.json' finally_mongo_merge_nan_file = 'finally_mongo_merge_nan_file.json' finally_mongo_merge_line_file = 'finally_mongo_merge_line_file.json'
Including another URLconf 1. Import the include() function: from django.urls import include, path 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ from django.conf.urls import url, include from django.contrib import admin from django.urls import path from django.views.decorators.cache import cache_page from ERDAV import settings from dataView import views from utils.parse_yaml import global_config cache_age = 60 * 2 # 缓存时间 if global_config.get('django'): # 全局配置文件 cache_age = 60 * int(global_config.get('django').get('cache_age')) urlpatterns = [ url(r'^admin/', admin.site.urls), # 当你在 URLconf 中使用 cache_page 时,可以这样包装视图函数。 path('job/getJobsInfo', cache_page(cache_age)(views.getJobInfos)), path('job/AvgSalaryEveryCity', cache_page(cache_age)(views.getAvgSalaryEveryCity)), path('job/jobCountsEveryCity', cache_page(cache_age)(views.getJobCountsByEveryCity)), path('job/avgWage', cache_page(cache_age)(views.getAvgSalaryByCityAndJobType)),
from utils.parse_yaml import global_config reserve_word_file = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\analysis\\保留字.txt' # 用户字典 stop_words_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\analysis\\stopWord\\' custom_stop_word_file = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\analysis\\mystopWord\\自定义停用词.txt' # --------------------- 全局配置覆盖------------------------------------ if global_config.get('analysis'): # 如果配置存在,覆盖 for k, v in global_config.get('analysis').items(): globals()[k] = v # eval(x) How to get the value of a variable given its name in a string? [duplicate]
""" 配置文件 """ from utils.parse_yaml import global_config seleniumChrome_category_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\Data\\' + 'category.txt' # 项目数据目录,绝对路径好 zhaopin_projectDataDir = 'G:\\数据集\\\zhaopincom\\zhaopinData' # 职位数据抓取保存的位置 ,绝对路径好 job_details_handle_data_dir = "G:\\数据集\\\zhaopincom\\zhaopinData" # 从这里目录取出详细页的url job_info_projectDataDir = 'G:\\数据集\\zhaopincom\jobDetails' # job详细信息 html文件解析后 存储在这个目录下 chrome_user_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\myspider\\AutomationProfile' # 登录保存信息用户目录 chromedriver_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\myspider\\chromedriver.exe' if global_config.get('global_spider'): # 如果配置存在,覆盖 for k, v in global_config.get('global_spider').items(): globals( )[k] = v # eval(x) How to get the value of a variable given its name in a string? [duplicate]
import jieba.analyse import numpy import pymongo from bokeh.plotting import figure from efficient_apriori import apriori from sklearn.cluster import KMeans from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from analysis.config import reserve_word_file from utils.parse_yaml import global_config jieba.load_userdict(reserve_word_file) # 用户字典 db_config = global_config.get('global_database').get('db').get( global_config.get('global_data_source')) # 得到节点 myclient = pymongo.MongoClient(db_config.get('mongo_url')) mydb = myclient[db_config.get('db_name')] mycol = mydb[db_config.get('col')] def db_mon(kw, max_rows=1000): """ 返回拼接的职位描述字段 :param max_rows: :param kw: :return: 返回所有文档相加 """ # 查找文档 posts = mycol.find({'kw': kw}, {'职位描述': 1, '_id': 0}).limit(max_rows) # 处理相加 return '\n'.join(v.get('职位描述') for v in posts if v.get('职位描述') is not None)
# ---------------------------------------- from utils.parse_yaml import global_config jobs_json_file = 'G:\\数据集\\zhaopincom\\DP\\storage_jobs\\finally_storage_drop_nan_file.json' job_detail_json_file = 'G:\\数据集\\zhaopincom\\DP\\storage_job_info\\finally_job_detail_storage_drop_nan.json' job_recommend_json_file = 'G:\\数据集\\zhaopincom\\DP\\storage_job_info\\finally_recommend_storage_drop_nan.json' # ---------------------------------------- mongo_url = "mongodb://localhost:27017/" mongo_DB = 'ERADV' mongo_col_jobs = "jobs" mongo_col_job_info = 'job_info' mongo_col_job_recommend = 'recommend_job' # id mongo id 默认在数据处理时 number 改为了_id # ------------------------------------------ elastic_index_jobs = 'jobs' elastic_index_job_detail = 'job_info' # 索引名 elastic_index_job_recommend_data = 'recommend_job' id_column_name = 'number' # 要作id 的列' if global_config.get('global_data_source'): # 如果配置存在,覆盖 db_config = global_config.get('global_database').get('db').get( global_config.get('global_data_source')) if global_config.get('global_data_source') == 'mongo': mongo_url = db_config.get('mongo_url') mongo_DB = db_config.get('db_name') mongo_col_jobs = db_config.get('col') else: elastic_index_jobs = db_config.get('col') id_column_name = db_config.get('id_column_name')