Beispiel #1
0
def get_predict_result(test_json=None):
    if test_json is None:
        test_json = [
            1,
            '本科',
            '北京',
            '全职',
            0,
            '物业经理',
        ]
    if global_config.get('machine_learn').get('model_pkl_dir'):
        if os.path.exists(
                global_config.get('machine_learn').get('model_pkl_dir')):
            model = pd.read_pickle(
                global_config.get('machine_learn').get('model_pkl_dir'))  # 这样快
        else:
            model = pd.read_pickle('../../Data/model.pkl.bz2.001', 'bz2')
    else:
        model = pd.read_pickle('../../Data/model.pkl.bz2.001', 'bz2')

    label = model.predict([test_json])
    prob = model.predict_proba([test_json])
    log_prob = model.predict_log_proba([test_json])
    print(
        'Predict class for X: {}, Predict class probabilities for X: {}, Predict class log-probabilities for X: {}'
        .format(label, prob, log_prob))
    return (label, prob, log_prob)
Beispiel #2
0
def get__dnn_predict_result(test_json=None):
    if test_json is None:
        test_json = [1, '本科', '哈尔滨', '全职', 0, '物业经理']

    # Load a model
    input_layer = tflearn.input_data(shape=[None, 6], name='input')
    dense1 = tflearn.fully_connected(input_layer, 128, name='dense1')
    dense2 = tflearn.fully_connected(dense1, 256, name='dense2')
    softmax = tflearn.fully_connected(dense2, 4, activation='softmax')
    regression = tflearn.regression(softmax,
                                    optimizer='adam',
                                    learning_rate=0.001,
                                    loss='categorical_crossentropy')
    # Define classifier, with model checkpoint (autosave)
    model = tflearn.DNN(regression, checkpoint_path='model.tfl.ckpt')
    model.load(
        global_config.get('machine_learn').get('deep_learn').get(
            'dnn_model_tfl') + "model.tfl")
    pd_json = pd.DataFrame(test_json,
                           columns=['公司规模', '学历', '工作城市', '用工制', '经验', 'kw'])

    with open(
            global_config.get('machine_learn').get('deep_learn').get(
                'dnn_catagorical'), 'rb') as f:
        _CATEGORICAL_TYPES_ = pickle.load(f)
    cat_columns = pd_json.select_dtypes(['object']).columns
    pd_json[cat_columns] = pd_json[cat_columns].apply(lambda x: x.astype(
        pd.api.types.CategoricalDtype(categories=_CATEGORICAL_TYPES_[x.name],
                                      ordered=True)))

    for col in cat_columns:
        pd_json[col] = pd_json[col].cat.codes

    pd_json.fillna(0)  # 未知属性填为0
    result = model.predict([pd_json.loc[0].to_list()])
    print(f'预测结果{result}')
    return result
Beispiel #3
0
original_job_detail_data_dir = "G:\\数据集\\zhaopincom\\jobDetails\\jobDetailslinux\\"
# original_job_detail_data_dir = "G:\\数据集\\zhaopincom\\jobDetails\\test\\"  # 测试用

stage_job_info_file_one = '保存第一阶段.json'
stage_job_info_file_two = '第二阶段文件.json'
finally_job_detail_storage_drop_nan_file = 'finally_job_detail_storage_drop_nan.json'
finally_recommend_storage_drop_nan_file = 'finally_recommend_storage_drop_nan.json'
# --------------------------------------------------
#  保留哪些job_info列
# desc 类
# number
# lat lon

# ----------------- 行业大类-------------
# genre='行业'
if global_config.get('global_data_process').get('genres'):
    genres = json.loads(
        open(global_config.get('global_data_process').get('genres'),
             'r',
             encoding='utf-8').readline())
else:  # 使用默认的
    genres = json.loads(
        open('E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\Data\\category.json',
             'r',
             encoding='utf-8').readline())

#   详细页与概要页 数据 两表合并,要存储的目录与文件名
storage_dir_merge = 'G:\\数据集\\zhaopincom\\DP\\storage_merge\\'
finally_elastic_merge_nan_file = 'finally_elastic_merge_nan_file.json'
finally_mongo_merge_nan_file = 'finally_mongo_merge_nan_file.json'
finally_mongo_merge_line_file = 'finally_mongo_merge_line_file.json'
Beispiel #4
0
Including another URLconf
    1. Import the include() function: from django.urls import include, path
    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
"""
from django.conf.urls import url, include
from django.contrib import admin
from django.urls import path
from django.views.decorators.cache import cache_page

from ERDAV import settings
from dataView import views
from utils.parse_yaml import global_config

cache_age = 60 * 2  # 缓存时间

if global_config.get('django'):
    # 全局配置文件
    cache_age = 60 * int(global_config.get('django').get('cache_age'))

urlpatterns = [
    url(r'^admin/', admin.site.urls),

    # 当你在 URLconf 中使用 cache_page 时,可以这样包装视图函数。
    path('job/getJobsInfo',
         cache_page(cache_age)(views.getJobInfos)),
    path('job/AvgSalaryEveryCity',
         cache_page(cache_age)(views.getAvgSalaryEveryCity)),
    path('job/jobCountsEveryCity',
         cache_page(cache_age)(views.getJobCountsByEveryCity)),
    path('job/avgWage',
         cache_page(cache_age)(views.getAvgSalaryByCityAndJobType)),
Beispiel #5
0
from utils.parse_yaml import global_config

reserve_word_file = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\analysis\\保留字.txt'  # 用户字典
stop_words_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\analysis\\stopWord\\'
custom_stop_word_file = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\analysis\\mystopWord\\自定义停用词.txt'
# --------------------- 全局配置覆盖------------------------------------
if global_config.get('analysis'):
    # 如果配置存在,覆盖
    for k, v in global_config.get('analysis').items():
        globals()[k] = v  # eval(x) How to get the value of a variable given its name in a string? [duplicate]
Beispiel #6
0
"""
配置文件
"""
from utils.parse_yaml import global_config

seleniumChrome_category_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\Data\\' + 'category.txt'  # 项目数据目录,绝对路径好
zhaopin_projectDataDir = 'G:\\数据集\\\zhaopincom\\zhaopinData'  # 职位数据抓取保存的位置  ,绝对路径好
job_details_handle_data_dir = "G:\\数据集\\\zhaopincom\\zhaopinData"  # 从这里目录取出详细页的url
job_info_projectDataDir = 'G:\\数据集\\zhaopincom\jobDetails'  # job详细信息 html文件解析后 存储在这个目录下
chrome_user_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\myspider\\AutomationProfile'  # 登录保存信息用户目录
chromedriver_dir = 'E:\\WorkSpacePyCharm\\PythonLearn\\ERDAV\\myspider\\chromedriver.exe'

if global_config.get('global_spider'):
    # 如果配置存在,覆盖
    for k, v in global_config.get('global_spider').items():
        globals(
        )[k] = v  # eval(x) How to get the value of a variable given its name in a string? [duplicate]
Beispiel #7
0
import jieba.analyse
import numpy
import pymongo
from bokeh.plotting import figure
from efficient_apriori import apriori
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

from analysis.config import reserve_word_file
from utils.parse_yaml import global_config

jieba.load_userdict(reserve_word_file)  # 用户字典

db_config = global_config.get('global_database').get('db').get(
    global_config.get('global_data_source'))  # 得到节点
myclient = pymongo.MongoClient(db_config.get('mongo_url'))
mydb = myclient[db_config.get('db_name')]
mycol = mydb[db_config.get('col')]


def db_mon(kw, max_rows=1000):
    """
    返回拼接的职位描述字段
    :param max_rows:
    :param kw:
    :return: 返回所有文档相加
    """
    # 查找文档
    posts = mycol.find({'kw': kw}, {'职位描述': 1, '_id': 0}).limit(max_rows)
    # 处理相加
    return '\n'.join(v.get('职位描述') for v in posts if v.get('职位描述') is not None)
Beispiel #8
0
# ----------------------------------------
from utils.parse_yaml import global_config

jobs_json_file = 'G:\\数据集\\zhaopincom\\DP\\storage_jobs\\finally_storage_drop_nan_file.json'
job_detail_json_file = 'G:\\数据集\\zhaopincom\\DP\\storage_job_info\\finally_job_detail_storage_drop_nan.json'
job_recommend_json_file = 'G:\\数据集\\zhaopincom\\DP\\storage_job_info\\finally_recommend_storage_drop_nan.json'
# ----------------------------------------
mongo_url = "mongodb://localhost:27017/"
mongo_DB = 'ERADV'
mongo_col_jobs = "jobs"
mongo_col_job_info = 'job_info'
mongo_col_job_recommend = 'recommend_job'
# id mongo id 默认在数据处理时 number 改为了_id
# ------------------------------------------
elastic_index_jobs = 'jobs'
elastic_index_job_detail = 'job_info'  # 索引名
elastic_index_job_recommend_data = 'recommend_job'
id_column_name = 'number'  # 要作id 的列'

if global_config.get('global_data_source'):
    # 如果配置存在,覆盖
    db_config = global_config.get('global_database').get('db').get(
        global_config.get('global_data_source'))
    if global_config.get('global_data_source') == 'mongo':
        mongo_url = db_config.get('mongo_url')
        mongo_DB = db_config.get('db_name')
        mongo_col_jobs = db_config.get('col')
    else:
        elastic_index_jobs = db_config.get('col')
        id_column_name = db_config.get('id_column_name')