Beispiel #1
0
async def process_send_to_magic(callback_query: types.CallbackQuery,
                                state: FSMContext):
    await callback_query.answer()
    await state.get_data()
    my_dict = await state.get_data(
    )  # Вытаскиваем значения переменных b_date и pub_date в словарь
    magic_dates = [my_dict['b_date'],
                   my_dict['pub_date']]  # Передаем их в список
    # Заменить на отправку в модель Classic:
    keywords = get_keywords(*magic_dates)
    title = generate_text(keywords[0], 10, model,
                          random.randint(8, max_sequence_len - 1))
    describe = predict_quantile(title)
    response = f"Подходящий для вас заголовок для публикации: \n{title.title()} \n \n" + describe
    await bot.send_message(callback_query.from_user.id, response)
Beispiel #2
0
def get_graph(word="Дерево", n=3):
    count = 0
    word = word.lower()

    if n > 0:
        definition = get_definition(word)
        open('/tmp/rez.txt', 'a').write("=\n%s\n=\n" % definition)
        for word_i in unwiki(filter_keywords(get_keywords(definition))):
            if word_i != word and not is_stop_word(word_i):
                count += 1
                if get_graph(word_i, n - 1):
                    try:
                        tree[(word, word_i)] += 1
                    except KeyError:
                        tree.update({(word, word_i): 1})
    return count
Beispiel #3
0
def get_vocabulary_dic():
    files = [
        os.path.join('livechat', f) for f in os.listdir('livechat')
        if os.path.isfile(os.path.join('livechat', f))
    ]

    json_files = [f for f in files if os.path.splitext(f)[1] == '.json']

    dic = {}
    ignore_set = set()
    for file in json_files:
        vocabulary = {}

        keywords = get_keywords(m, file, ['名詞', '形容詞', '感動詞'])

        for keyword in keywords:
            # ストップワードは除外
            if keyword in sw_set:
                continue

            if keyword in vocabulary:
                vocabulary[keyword] += 1
            else:
                if keyword in ignore_set:
                    continue

                # 長さ1の場合は漢字のみ許可
                # 造語の場合、漢字が一文字ずつばらされてしまって除外されてしまうとなかったことになってしまう
                # それを防ぐために漢字の場合は1文字でも許可する
                # (原理的には漢字以外でも同じ問題があるがすべてを許可してしまうとあまりにノイズが多すぎるので妥協する)
                if len(keyword) == 1:
                    if not unicodedata.name(keyword,
                                            '').startswith('CJK UNIFIED'):
                        ignore_set.add(keyword)
                        continue

                vocabulary[keyword] = 1

        dic[file] = vocabulary

    return dic
Beispiel #4
0
async def process_send_to_magic(callback_query: types.CallbackQuery,
                                state: FSMContext):
    await callback_query.answer()
    await state.get_data()
    my_dict = await state.get_data(
    )  # Вытаскиваем значения переменных b_date и pub_date в словарь
    magic_dates = [my_dict['b_date'],
                   my_dict['pub_date']]  # Передаем их в список
    # Заменить на отправку в модель DDF:
    keywords = get_keywords(*magic_dates)
    # state = random.randint(1, 2)
    # if state == 1:
    #     keywords = keywords[0] + ' ' + random.choice(gachi_tags)
    # else:
    #     keywords = keywords[1] + ' ' + random.choice(gachi_tags)
    title = generate_text(keywords[0], 10, model,
                          random.randint(8, max_sequence_len - 1))
    title = replace_text(title, dict_gachi)
    describe = predict_quantile_ddf(title)
    response = f"Подходящий для вас заголовок для публикации: \n{title.title()} \n \n" + describe
    await bot.send_message(callback_query.from_user.id,
                           response)  # Сейчас бот отправляет полученный список
Beispiel #5
0
def main():
    list_count = []
    
    time_TP_start = time.time()
    list_txt = TP.main_pretreat()
    time_TP_end = time.time()
    list_keywords = gk.get_keywords()
    for z in list_keywords:
        list_count.append(int(0))
    time_match_start = time.time()
    
    for i in list_keywords:
        for j in list_txt:
            if j == i:
                list_count[list_keywords.index(i)] +=1
            else:
                continue
    time_match_end = time.time()
    data_output(list_keywords,list_count)
    print('预处理时间为:'+ str(time_TP_end - time_TP_start) + '\n')
    print('匹配的时间为:'+ str(time_match_end - time_match_start) + '\n')
    print('共用时:' + str(time_match_end+time_TP_end-time_match_start-time_TP_start) + '\n')
    return True
def cluster_score(user_desc, user_skills):
    user_keywords = list(get_keywords.get_keywords(user_desc))
    user_keywords.extend([x.lower() for x in user_skills])
    cwd = os.getcwd()
    os.chdir('../scripts')
    cluster_file = 'cluster_dump.txt'
    clusterDump = cPickle.load(open('data/'+cluster_file, 'r'))
    os.chdir(cwd)
    token_lists = clusterDump['tokens']
    token_vector = array([0.0] * len(token_lists))
    n = 0
    '''
    Creating the token vector for the current user
    '''
    for token in token_lists:
        if token in user_keywords:
            token_vector[n] = 1.0
        n += 1
    centroids = clusterDump['centroids']
    titles = clusterDump['job_titles']
    recommendations = clusterDump['recos']
    '''
    Calculating similarity of current user's token vector with each
    of the centroids using Euclidean distance between the vectors and normalizing
    based on the magnitude of the centroids.
    '''
    minV = centroids[0] - token_vector
    minMag = sqrt(minV.dot(minV))
    cluster_value = 0
    for i in range(1, len(centroids)):
        diff = centroids[i] - token_vector
        mag = sqrt(diff.dot(diff))
        if mag < minMag:
            minMag = mag
            cluster_value = i
    Result = recommendations[cluster_value], titles[cluster_value]
    return Result
Beispiel #7
0
from collections import defaultdict, Counter
import operator as op
import cPickle

kc = 8  # k-means no: of clusters
tokenList = []
vectorList = []
profileVector = []
profileList = scrapy_reader.profile_dump

for profile in profileList.itervalues():
    tokenList.extend([x.lower() for x in profile['skills']])
    if len(profile['experience']) != 0:
        exp = profile['experience'][0]
        if type(exp.desc) is not types.NoneType:
            descTokens = get_keywords.get_keywords(exp.desc)
            tokenList.extend(descTokens)
tokenS = set(tokenList)
tokenList = list(tokenS)
'''
Forming token vectors and profiling of users
'''
for profile in profileList.itervalues():
    tokens = []
    tokens.extend([x.lower() for x in profile['skills']])
    if len(profile['experience']) != 0:
        exp = profile['experience'][0]
        if type(exp.desc) is not types.NoneType:
            descTokens = get_keywords.get_keywords(exp.desc)
            tokens.extend(descTokens)
        profileVector.append([exp.company.lower(), exp.postitle.lower()])
Beispiel #8
0
def Auto_PushReply(getvalue_dict,pushper_dict,threadcount):
    appid = getvalue_dict['appid']
    authtype = getvalue_dict['authtype']

    # SubData.initData(appid)    ###初始化appid的统计,在Autoreply——Monior统一初始化
    try:
        reply_dict, get_keywords_url = get_keywords.get_keywords(getvalue_dict)

        if reply_dict=='NULL':
            print 'totalCount=0,无关键词配置!'
            writelog.errorlog(get_keywords_url,'查看该appid是否有关键词配置','totalCount=0,无关键词配置!')
            static_result = SubData.data
        elif reply_dict=='FAIL':
            send_failEmail('获取关键词失败,请检查接口参数或接口请求是否正常:\n%s'%get_keywords_url)
            writelog.errorlog(get_keywords_url,'获取该appid的关键词', '获取关键词失败,请检查接口参数或接口请求是否正常!')
            static_result=SubData.Key_static(appid, '500')

        else:
            threads=[]
            for key in reply_dict.keys():
                reply_relation = reply_dict[key]
                openid = getvalue_dict['openid']
                to_xml = MesType.runtext_reply(key, openid)
                MENUNAME = reply_relation['MENUNAME']
                inputword = '%s,\t菜单名称:%s' % (str(key),str(MENUNAME))
                getvalue_dict['type'] = 1
                getvalue_dict['content'] = key
                service_result, service_url = get_service.get_service(getvalue_dict)
                # print service_result
                if service_result == '进入客服':
                    close_result, close_url = get_service.exitservice(getvalue_dict)
                    print '\n%s\n该关键词触发了客服状态'%service_url
                    writelog.errorlog(appid, reply_relation['MENUNAME'], '此为人工客服独立功能,暂不校验')
                    while close_result == '进入客服':
                        close_result, close_url = get_service.exitservice(getvalue_dict)
                        break
                    if close_result == '请求失败':
                        close_result, close_url = get_service.exitservice(getvalue_dict)
                        MENUNAME = reply_relation['MENUNAME']
                        # print "key:%s" % str(key)
                        inputword = '%s,\t菜单名称:%s' % (str(key), str(MENUNAME))
                        print '\n该关键词判断客服状态失败,判断是否进入客服状态:%s\n判断是否退出成功:%s' % (close_url, close_result)
                        send_emails.send_failEmail('该关键词退出客服状态失败:\n%s' % close_url)
                        writelog.errorlog(str(service_url), '关键词名称:%s--该菜单退出客服状态失败,请检查该接口请求是否正常\n' % str(inputword),
                                          str(reply_relation['reply_content']))
                        SubData.Menu_static(appid, '500')
                    else:
                        SubData.Key_static(appid, '200')

                elif service_result == '请求失败':
                    MENUNAME = reply_relation['MENUNAME']
                    # print "key:%s" % str(key)
                    inputword = '%s,\t菜单名称:%s' % (str(key), str(MENUNAME))
                    print '\n%s\n该关键词判断客服状态失败' % (service_url)
                    send_emails.send_failEmail('该关键词判断是否进入客服状态:%s\n' % service_url)
                    writelog.errorlog(str(service_url), '关键词名称:%s--该菜单判断是否进入客服状态失败,请检查该接口请求是否正常' % str(inputword),
                                      str(reply_relation['reply_content']))
                    SubData.Key_static(appid, '500')

                else:

                    if reply_relation['reply_content_type'] in ['2', '3', '4']:
                        material_result, material_url = get_material.get_material(getvalue_dict,reply_relation['reply_content'])
                        if material_result == 'NULL':
                            SubData.Key_static(appid, '200')
                            print '该material_id获取不到对应的素材:%s' % str(material_url)

                            # sendEmail(request_url, request_xml, result_decrypt, str(inputword), str(reply_relation['reply_content']), '该material_id获取不到对应的素材!\n%s' %str( material_url))
                            writelog.errorlog(str(material_url), '关键词名称:%s--该material_id获取不到对应的素材' % str(inputword),str(reply_relation['reply_content']))
                            continue
                        elif material_result == 'FAIL':
                            SubData.Key_static(appid, '500')
                            print '该material_id获取素材失败,请检查该接口请求是否正常:%s' % str(material_url)
                            print reply_relation
                            send_emails.send_failEmail('获取素材失败,请检查该接口请求是否正常%s\n' % str(material_url))
                            writelog.errorlog(str(material_url), '菜单名称:%s--获取素材失败,请检查该接口请求是否正常' % str(inputword),str(reply_relation['reply_content']))
                            continue
                        else:
                            pass
                    else:
                        pass

                    # print '\n授权方式', authtype
                    if authtype == '3':
                        pushper_dict['General'] = {}
                        pushper_dict['token']='irPNHoJN'
                    elif authtype == '1':
                        pushper_dict['token'] = getvalue_dict['authtoken']
                        General = {}
                        General['uniqueid'] = getvalue_dict['uniqueid']
                        General['appid'] = getvalue_dict['appid']
                        pushper_dict['General'] = General
                    # Pushdata.pushdata(pushper_dict, getvalue_dict, key, reply_relation, to_xml)
                    obj = Pushdata.pushdata
                    t=threading.Thread(target=obj,args=(pushper_dict, getvalue_dict, key,reply_relation, to_xml))
                    threads.append(t)

            for t in threads:
                t.setDaemon(True)
                t.start()
                while True:
                    if(threading.activeCount() < threadcount):
                        break
                # print 'threading.activeCount:',threading.activeCount()

            for t in threads:
                t.join()
                # time.sleep(0.02)

            static_result=SubData.data
            # print static_result

        return static_result


    except Exception, e:
        print Exception, ":", e
        static_result = SubData.data
        send_emails.send_failEmail('\n%s :%s' % (str(Exception), str(e)))
        return static_result
from collections import defaultdict, Counter
import operator as op
import cPickle

kc = 8  # k-means no: of clusters
tokenList = []
vectorList = []
profileVector = []
profileList = scrapy_reader.profile_dump

for profile in profileList.itervalues():
    tokenList.extend([x.lower() for x in profile['skills']])
    if len(profile['experience']) != 0:
        exp = profile['experience'][0]
        if type(exp.desc) is not types.NoneType:
            descTokens = get_keywords.get_keywords(exp.desc)
            tokenList.extend(descTokens)
tokenS = set(tokenList)
tokenList = list(tokenS)

'''
Forming token vectors and profiling of users
'''
for profile in profileList.itervalues():
    tokens = []
    tokens.extend([x.lower() for x in profile['skills']])
    if len(profile['experience']) != 0:
        exp = profile['experience'][0]
        if type(exp.desc) is not types.NoneType:
            descTokens = get_keywords.get_keywords(exp.desc)
            tokens.extend(descTokens)