Exemple #1
0
    def __init__(self, use_redis=False, debug=True, file_name_head=''):
        QQZoneAnalysis.__init__(self,
                                use_redis=use_redis,
                                debug=debug,
                                username=file_name_head,
                                analysis_friend=False)

        TRAIN_BASE_DIR = BASE_DIR + 'data/train/' + file_name_head

        self.MOOD_DATA_SCORE_FILE_NAME = TRAIN_BASE_DIR + '_score_mood_data.csv'
        self.RE_DO_SENTIMENT_FILE_NAME = TRAIN_BASE_DIR + '_re_do_mood_data.csv'
        self.TEXT_LABEL_TRAIN_DATA = TRAIN_BASE_DIR + '_mood_text.csv'
        self.TRAIN_DATA_AFTER_CLASSIFIC = TRAIN_BASE_DIR + '_mood_classific.csv'

        self.TEXT_LABEL_RESULT_TRAIN_DATA = '../data/train3/text_' + file_name_head + '_label.csv'
        self.TEXT_CLASSIFICATION_DATA_SET = '../data/train/'
        self.FINAL_RESULT_TRAIN_DATA = '../data/train/' + file_name_head + '_final_train.csv'
        self.mood_data_df = pd.read_csv(self.MOOD_DATA_FILE_NAME)
        self.IMAGE_OBJECT_FILE_NAME = '../data/train3/' + file_name_head + '_image_object.csv'
        self.MOOD_DATA_AFTER_OBJECT = '../data/train/' + file_name_head + '_after_object.csv'

        self.sc = SentimentClassify()

        self.mood_data_df['score'] = '-1'
        self.label_dict = {
            '1': '旅游与运动',
            '2': '爱情与家庭',
            '3': '学习与工作',
            '4': '广告',
            '5': '生活日常',
            '6': '其他',
            '7': '人生感悟'
        }
        self.label_dict_reverse = {v: k for k, v in self.label_dict.items()}
Exemple #2
0
 def test_generate_friend_info(self):
     qa = QQZoneAnalysis(mood_num=200, use_redis=False, debug=True)
     # 建议在resource/config/friend_info.json中配置需要爬取的好友QQ号
     # 也可以直接在这里传入qq号,此处传入的QQ号优先级比配置文件大,但是配置文件可以批量传入QQ号
     qa.get_friend_mood(friend_qq='')
     qa.get_useful_info_from_json()
     qa.draw_like_cloud(qa.mood_data_df)
     qa.draw_cmt_cloud(qa.mood_data_df)
Exemple #3
0
    def test_get_most_people(self):
        qa = QQZoneAnalysis(use_redis=True)
        qa.get_most_people()
        print(qa.user_info.total_like_list)
        print(qa.user_info.like_friend_name)
        print(qa.user_info.cmt_friend_name)
        qa.user_info.save_user()

        print("只点赞的好友数量:", qa.user_info.like_friend_num)
Exemple #4
0
def generate_friend_info():
    """
    获取好友的空间数据并进行数据分析
    :return:
    """
    qa = QQZoneAnalysis(use_redis=False, debug=False, analysis_friend=False)
    # 建议在resource/config/friend_info.json中配置需要爬取的好友QQ号
    # 也可以直接在这里传入qq号,此处传入的QQ号优先级比配置文件大,但是配置文件可以批量传入QQ号
    qa.get_friend_mood(friend_qq='')
    do_analysis_for_all(qa)
Exemple #5
0
 def test_draw_like_cloud(self):
     qa = QQZoneAnalysis(use_redis=True)
     qa.get_useful_info_from_json()
     qa.draw_like_cloud(qa.mood_data_df)
Exemple #6
0
 def test_get_content_top_words(self):
     qa = QQZoneAnalysis(use_redis=True)
     qa.get_useful_info_from_json()
     qa.get_top_words(qa.mood_data_df)
Exemple #7
0
def web_interface(username, nickname, stop_time, mood_num, cookie_text,
                  no_delete, password, pool_flag):
    sp = QQZoneAnalysis(use_redis=True,
                        debug=False,
                        username=username,
                        analysis_friend=True,
                        from_web=True,
                        nickname=nickname,
                        stop_time=stop_time,
                        mood_num=mood_num,
                        no_delete=no_delete,
                        cookie_text=cookie_text,
                        pool_flag=pool_flag)
    try:
        sp.login()
        sp.re.rpush(WEB_SPIDER_INFO + username,
                    "用户" + str(sp.username) + "登陆成功")
        # 存储用户密码
        sp.re.hset(USER_MAP_KEY, username, password)
    except BaseException:
        sp.re.rpush(WEB_SPIDER_INFO + username, GET_MAIN_PAGE_FAILED)
    try:
        sp.get_main_page_info()
        sp.re.rpush(WEB_SPIDER_INFO + username, "获取主页信息成功")
        sp.re.rpush(WEB_SPIDER_INFO + username,
                    MOOD_NUM_PRE + ":" + str(sp.mood_num))
    except BaseException:
        sp.re.rpush(WEB_SPIDER_INFO + username, LOGIN_FAILED)

    try:

        # 获取动态的数据
        t1 = threading.Thread(target=sp.get_mood_list)
        # 获取好友数据
        t2 = threading.Thread(target=sp.get_friend_detail)
        t1.setDaemon(False)
        t2.setDaemon(False)
        t1.start()
        t2.start()
        # 等待两个线程都结束
        t1.join()
        t2.join()
        # sp.user_info.save_user(username)
    except BaseException:
        sp.re.rpush(WEB_SPIDER_INFO + username, GET_MOOD_FAILED)
        exit(1)

    # 清洗好友数据
    sp.clean_friend_data()
    # 获取第一位好友数据
    sp.get_first_friend_info()
    # 清洗说说数据并计算点赞最多的人和评论最多的人
    sp.get_most_people()
    # 保存说说数据
    sp.export_mood_df()

    sp.calculate_history_like_agree()
    sp.re.set(CLEAN_DATA_KEY + username, 1)
Exemple #8
0
def web_interface(username, nickname, stop_time, mood_num, cookie_text, no_delete, password, pool_flag):
    sp = QQZoneAnalysis(use_redis=True, debug=False, username=username, analysis_friend=True, from_web=True,
                        nickname=nickname, stop_time=stop_time, mood_num=mood_num, no_delete=no_delete, cookie_text=cookie_text, pool_flag=pool_flag)

    sp.re.hset(USER_MAP_KEY, username, password)
    sp.re.set(USER_LOGIN_STATE + username, 0)
    sp.logging_info(username + "init success")
    try:
        state = sp.login_with_qr_code()
        sp.remove_qr_code()
        # 登陆失败就退出本线程
        if not state:
            sp.logging_info(username + "logging failed")
            sp.re.rpush(WEB_SPIDER_INFO + username, LOGIN_FAILED)
            exit(1)
        else:
            # 存储登陆状态
            sp.logging_info(username + "logging success")
            sp.re.rpush(WEB_SPIDER_INFO + username, "用户" + str(sp.username) + "登陆成功")
            sp.re.set(USER_LOGIN_STATE + username, 1)
    except BaseException as e:
        sp.format_error(e, "logging failed")
        sp.logging_info(username + "logging failed")
        sp.re.rpush(WEB_SPIDER_INFO + username, LOGIN_FAILED)
        exit(1)
    sp.get_main_page_info()
    sp.logging_info("get main page success")
    try:
        # 获取动态的数据
        t1 = threading.Thread(target=sp.get_mood_list)
        # 获取好友数据
        t2 = threading.Thread(target=sp.get_friend_detail)
        t1.setDaemon(False)
        t2.setDaemon(False)
        t1.start()
        t2.start()
        # 等待两个线程都结束
        t1.join()
        t2.join()
        # sp.user_info.save_user(username)
    except BaseException:
        sp.re.rpush(WEB_SPIDER_INFO + username, GET_MOOD_FAILED)
        exit(1)
    sp.re.set(MOOD_FINISH_KEY + str(username), 1)
    sp.logging_info("finish to capture data")
    sp.logging_info("begin to analysis...")

    # 在爬虫完成之后分析所有数据
    do_analysis_for_all(sp)

    sp.user_info.save_user()
    sp.logging_info("finish to analysis")
    sp.re.set(CLEAN_DATA_KEY + username, 1)
    now_user = sp.re.get(FINISH_USER_NUM_KEY)
    if now_user is None:
        now_user = 0
    else:
        now_user = int(now_user)
    sp.re.set(FINISH_USER_NUM_KEY, now_user + 1)
    # 对排队list中删除当前用户,注意该指令的传参方式在不同redis版本中有差异
    sp.re.lrem(WAITING_USER_LIST, 0, username)
    sp.logging_info("finish to delete user from waiting list")
    sp.logging_info("Success!")
Exemple #9
0
 def test_most_common_friend(self):
     qa = QQZoneAnalysis(use_redis=True, export_csv=True)
     qa.get_most_common_friend()
Exemple #10
0
 def test_load_data(self):
     qa = QQZoneAnalysis(use_redis=True)
     qa.load_file_from_redis()
     print("data len:",len(qa.content))
Exemple #11
0
 def test_get_most_people(self):
     qa = QQZoneAnalysis(use_redis=True)
     qa.get_most_people()
     print(qa.user_info.like_friend_name)
     print(qa.user_info.cmt_friend_name)
Exemple #12
0
 def setUp(self) -> None:
     self.qa = QQZoneAnalysis(use_redis=True)
     self.qa.change_username("458246290", "fuyuko")
Exemple #13
0
class FriendAnalysisTest(unittest.TestCase):

    def setUp(self) -> None:
        self.qa = QQZoneAnalysis(use_redis=True)
        self.qa.change_username("458246290", "fuyuko")

    def tearDown(self) -> None:
        pass

    def test_init(self) -> None:
        pass

    def test_get_friend_data(self):
        self.qa.reset_username()
        self.qa.get_friend_mood("458246290", "fuyuko", mood_num=20)

    def test_clean_friend_data(self):
        self.qa.get_useful_info_from_json()
        assert self.qa.has_clean_data == True

    def test_draw_friend_cmt_cloud(self):
        self.qa.get_useful_info_from_json()
        self.qa.draw_cmt_cloud(self.qa.mood_data_df)

    # 计算点赞和评论最多的人
    def test_get_friend_most_people(self):
        self.qa.get_most_people()
        print(self.qa.user_info.like_friend_name)
        print(self.qa.user_info.cmt_friend_name)

    # 绘制说说关键字词云图
    def test_draw_content_cloud(self):
        self.qa.get_useful_info_from_json()
        self.qa.draw_content_cloud(self.qa.mood_data_df)

    # 绘制点赞的人的词云图
    def test_draw_like_cloud(self):
        self.qa.get_useful_info_from_json()
        self.qa.draw_like_cloud(self.qa.mood_data_df)

    def test_get_history(self):
        self.qa.calculate_history_like_agree()
        print(len(self.qa.re.get(self.qa.history_like_agree_file_name)))

    def test_export_data_df(self):
        self.qa.export_mood_df()
Exemple #14
0
def web_interface(username, nickname, stop_time, mood_num, cookie_text, no_delete, password, pool_flag):
    sp = QQZoneAnalysis(use_redis=True, debug=False, username=username, analysis_friend=True, from_web=True,
                        nickname=nickname, stop_time=stop_time, mood_num=mood_num, no_delete=no_delete, cookie_text=cookie_text, pool_flag=pool_flag)

    sp.re.hset(USER_MAP_KEY, username, password)
    sp.re.set(USER_LOGIN_STATE + username, 0)
    sp.logging_info(username + "init success")
    try:
        state = sp.login_with_qr_code()
        sp.remove_qr_code()
        # 登陆失败就退出本线程
        if not state:
            sp.logging_info(username + "logging failed")
            sp.re.rpush(WEB_SPIDER_INFO + username, LOGIN_FAILED)
            exit(1)
        else:
            # 存储登陆状态
            sp.logging_info(username + "logging success")
            sp.re.rpush(WEB_SPIDER_INFO + username, "用户" + str(sp.username) + "登陆成功")
            sp.re.set(USER_LOGIN_STATE + username, 1)
    except BaseException as e:
        sp.format_error(e, "logging failed")
        sp.logging_info(username + "logging failed")
        sp.re.rpush(WEB_SPIDER_INFO + username, LOGIN_FAILED)
        exit(1)

    sp.get_main_page_info()
    sp.logging_info("get main page success")
    try:
        # 获取动态的数据
        t1 = threading.Thread(target=sp.get_mood_list)
        # 获取好友数据
        t2 = threading.Thread(target=sp.get_friend_detail)
        t1.setDaemon(False)
        t2.setDaemon(False)
        t1.start()
        t2.start()
        # 等待两个线程都结束
        t1.join()
        t2.join()
        # sp.user_info.save_user(username)
    except BaseException:
        sp.re.rpush(WEB_SPIDER_INFO + username, GET_MOOD_FAILED)
        exit(1)
    sp.re.set(MOOD_FINISH_KEY + str(username), 1)

    # 清洗好友数据
    friend_data_state = sp.clean_friend_data()
    if friend_data_state:
        # 获取第一位好友数据
        sp.get_first_friend_info()
        # 计算共同好友最多的人
        sp.get_most_common_friend()
        # 计算共同群组
        sp.get_most_group()
    sp.get_useful_info_from_json()
    if not sp.mood_data_df.empty:
        # 清洗说说数据并计算点赞最多的人和评论最多的人
        sp.get_most_people()
        # 计算发送动态的时间
        sp.calculate_send_time()
        sp.draw_cmt_cloud(sp.mood_data_df)
        sp.draw_like_cloud(sp.mood_data_df)
        # 说说中的关键字,这个比较花时间
        # sp.draw_content_cloud(sp.mood_data_df)
        # 保存说说数据
        sp.export_mood_df()
        sp.calculate_history_like_agree()

    sp.user_info.save_user()
    sp.re.set(CLEAN_DATA_KEY + username, 1)
    now_user = sp.re.get(FINISH_USER_NUM_KEY)
    if now_user is None:
        now_user = 0
    else:
        now_user = int(now_user)
    sp.re.set(FINISH_USER_NUM_KEY, now_user + 1)
    # 对排队list中删除当前用户,注意该指令的传参方式与redis-cli中不同
    sp.re.lrem(WAITING_USER_LIST, username)
Exemple #15
0
 def test_get_history(self):
     qa = QQZoneAnalysis(use_redis=True)
     qa.calculate_history_like_agree()
     print(len(qa.re.get(qa.history_like_agree_file_name)))
Exemple #16
0
 def test_init(self):
     QQZoneAnalysis()
Exemple #17
0
 def test_load_data_from_json(self):
     qa = QQZoneAnalysis(use_redis=False)
     qa.load_all_data_from_json()
Exemple #18
0
 def test_most_common_groups(self):
     qa = QQZoneAnalysis(use_redis=True, export_csv=True)
     qa.get_most_group()
Exemple #19
0
 def test_clean_data(self):
     qa = QQZoneAnalysis(use_redis=False)
     qa.get_useful_info_from_json()
     assert qa.has_clean_data == True
Exemple #20
0
def capture_main_data_and_analysis():
    """
    开启爬虫并分析数据
    :return:
    """
    qa = QQZoneAnalysis(use_redis=False, debug=True, stop_time='2011-11-11', mood_num=20, analysis_friend=False)
    qa.login_with_qr_code()
    qa.get_main_page_info()
    qa.get_mood_list()
    if qa.analysis_friend:
        qa.thread_num = 20
        qa.get_friend_detail()
    do_analysis_for_all(qa)
    qa.user_info.save_user()
Exemple #21
0
 def test_calculate_send_time(self):
     qa = QQZoneAnalysis(use_redis=True)
     qa.calculate_send_time()
     print("TEST IS NIGHT:", bool(qa.user_info.is_night))
Exemple #22
0
def web_interface(username, nickname, stop_time, mood_num, cookie_text,
                  no_delete, password, pool_flag):
    sp = QQZoneAnalysis(use_redis=True,
                        debug=False,
                        username=username,
                        analysis_friend=True,
                        from_web=True,
                        nickname=nickname,
                        stop_time=stop_time,
                        mood_num=mood_num,
                        no_delete=no_delete,
                        cookie_text=cookie_text,
                        pool_flag=pool_flag)
    try:
        # 存储用户密码
        sp.re.hset(USER_MAP_KEY, username, password)
        sp.logging_info(username + "init success")
        sp.login()
        sp.logging_info(username + "logging success")
        sp.re.rpush(WEB_SPIDER_INFO + username,
                    "用户" + str(sp.username) + "登陆成功")
    except BaseException as e:
        sp.format_error(e, "logging failed")
        sp.re.rpush(WEB_SPIDER_INFO + username, LOGIN_FAILED)
        # 删除用户密码
        sp.re.hdel(USER_MAP_KEY, username)
    sp.get_main_page_info()
    sp.logging_info("get main page success")

    try:

        # 获取动态的数据
        t1 = threading.Thread(target=sp.get_mood_list)
        # 获取好友数据
        t2 = threading.Thread(target=sp.get_friend_detail)
        t1.setDaemon(False)
        t2.setDaemon(False)
        t1.start()
        t2.start()
        # 等待两个线程都结束
        t1.join()
        t2.join()
        # sp.user_info.save_user(username)
    except BaseException:
        sp.re.rpush(WEB_SPIDER_INFO + username, GET_MOOD_FAILED)
        exit(1)

    # 清洗好友数据
    sp.clean_friend_data()
    # 获取第一位好友数据
    sp.get_first_friend_info()
    # 清洗说说数据并计算点赞最多的人和评论最多的人
    sp.get_most_people()
    sp.calculate_send_time()
    sp.user_info.save_user()
    # 保存说说数据
    sp.export_mood_df()
    sp.re.set(MOOD_FINISH_KEY + str(username), 1)
    sp.calculate_history_like_agree()
    sp.re.set(CLEAN_DATA_KEY + username, 1)
    # 对排队list中删除当前用户,注意该指令的传参方式与redis-cli中不同
    sp.re.lrem(WAITING_USER_LIST, username)
Exemple #23
0
 def test_calculate_early_send_time(self):
     qa = QQZoneAnalysis(use_redis=True)
     qa.calculate_early_send_time()
     print(qa.user_info.early_mood_content)