def clear_cache(QQ, password): pool = get_pool() conn = redis.Redis(connection_pool=pool) if not check_password(conn, QQ, password): return json.dumps(dict(finish="QQ号与识别码不匹配"), ensure_ascii=False) else: try: DATA_DIR_HEAD = BASE_DIR + 'data/' + QQ FRIEND_DIR_HEAD = BASE_DIR + 'friend/' + QQ CONTENT_FILE_NAME = DATA_DIR_HEAD + '_QQ_content.json' LIKE_DETAIL_FILE_NAME = DATA_DIR_HEAD + '_QQ_like_detail' + '.json' LIKE_LIST_NAME_FILE_NAME = DATA_DIR_HEAD + '_QQ_like_list_name' + '.json' MOOD_DETAIL_FILE_NAME = DATA_DIR_HEAD + '_QQ_mood_detail' + '.json' FRIEND_DETAIL_FILE_NAME = FRIEND_DIR_HEAD + '_friend_detail.json' conn.delete(CONTENT_FILE_NAME) conn.delete(LIKE_LIST_NAME_FILE_NAME) conn.delete(MOOD_DETAIL_FILE_NAME) conn.delete(LIKE_DETAIL_FILE_NAME) os.remove(os.path.join(RESULT_BASE_DIR, QQ + '_mood_data.xlsx')) os.remove(os.path.join(RESULT_BASE_DIR, QQ + '_mood_data.csv')) os.remove(os.path.join(BASE_DIR + 'temp', QQ + '.json')) os.remove(os.path.join(BASE_DIR + 'log', QQ + '.log')) finish = 1 return json.dumps(dict(finish=finish), ensure_ascii=False) except BaseException as e: finish = 0 print(e) return json.dumps(dict(info="未知错误:" + str(e), finish=finish), ensure_ascii=False)
def query_spider_info(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) info = conn.lpop(WEB_SPIDER_INFO + QQ) if not check_password(conn, QQ, password): if info is not None and info.find("登陆失败") != -1: return json.dumps(dict(finish=FAILED_STATE, info=info)) else: return json.dumps(dict(finish=INVALID_LOGIN, info=0)) finish = 0 mood_num = -1 friend_num = 0 if info is not None: if info.find(FRIEND_INFO_PRE) != -1: finish = FINISH_FRIEND friend_num = int(info.split(':')[1]) elif info.find(MOOD_NUM_PRE) != -1: finish = SUCCESS_STATE mood_num = int(info.split(':')[1]) elif info.find("失败") != -1: finish = FAILED_STATE mood_num = FAILED_STATE result = dict(info=info, finish=finish, mood_num=mood_num, friend_num=friend_num) return json.dumps(result, ensure_ascii=False) else: info = '' result = dict(info=info, finish=finish, mood_num=mood_num, friend_num=friend_num) return json.dumps(result, ensure_ascii=False)
def download_excel(QQ, password, file_type): pool = get_pool() conn = redis.Redis(connection_pool=pool) if not check_password(conn, QQ, password): return json.dumps(dict(finish="QQ号与识别码不匹配"), ensure_ascii=False) else: if file_type == 'xlsx': path = RESULT_BASE_DIR if os.path.isfile(os.path.join(path, QQ + '_mood_data.xlsx')): print(os.path.join(path, QQ + '_mood_data.xlsx')) return send_from_directory(path, QQ + '_mood_data.xlsx', as_attachment=True) else: return json.dumps(dict(finish="文件不存在"), ensure_ascii=False) elif file_type == 'csv': path = FRIEND_BASE_DIR if os.path.isfile( os.path.join(path, QQ + '_friend_detail_list.xlsx')): return send_from_directory(path, QQ + '_friend_detail_list.xlsx', as_attachment=True) else: return json.dumps(dict(finish="文件不存在"), ensure_ascii=False)
def clear_cache(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish="QQ号与识别码不匹配"), ensure_ascii=False) else: try: DATA_DIR_KEY = BASE_DIR + QQ + '/' if os.path.exists(DATA_DIR_KEY): # 删除有QQ号的所有key # 该方法在docker中无法使用,因为该容器内无redis-cli # delete_cmd = "redis-cli KEYS \"*" + QQ + "*\"|xargs redis-cli DEL" # print(delete_cmd) # os.system(delete_cmd) # 删除 该路径下所有文件 os.system("rm -rf " + DATA_DIR_KEY) conn.hdel(USER_MAP_KEY, QQ) # redis的del不支持正则表达式,因此只能循环删除 all_keys = conn.keys("*" + QQ + "*") print() for key in all_keys: conn.delete(key) # os.removedirs(os.path.join(BASE_DIR, QQ)) finish = 1 else: finish = 2 return json.dumps(dict(finish=finish), ensure_ascii=False) except BaseException as e: finish = 0 print(e) return json.dumps(dict(info="未知错误:" + str(e), finish=finish), ensure_ascii=False)
def query_spider_info(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=-2)) info = conn.lpop(WEB_SPIDER_INFO + QQ) finish = 0 mood_num = -1 friend_num = 0 if info is not None: if info.find(FRIEND_INFO_PRE) != -1: finish = 2 friend_num = int(info.split(':')[1]) elif info.find(MOOD_NUM_PRE) != -1: finish = 1 mood_num = int(info.split(':')[1]) elif info.find("失败") != -1: finish = -1 mood_num = -1 result = dict(info=info, finish=finish, mood_num=mood_num, friend_num=friend_num) return json.dumps(result, ensure_ascii=False)
def query_spider_num(QQ, mood_num, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=-2)) info = conn.get(MOOD_COUNT_KEY + str(QQ)) finish = 0 if int(info) >= int(mood_num): finish = 1 return json.dumps(dict(num=info, finish=finish))
def query_spider_num(QQ, mood_num, password): pool = get_pool() conn = redis.Redis(connection_pool=pool) if not check_password(conn, QQ, password): return json.dumps(dict(finish=-2)) info = conn.get(MOOD_COUNT_KEY + str(QQ)) finish = 0 if int(info) >= int(mood_num): finish = 1 return json.dumps(dict(num=info, finish=finish))
def query_friend_info_num(QQ, friend_num, password): pool = get_pool() conn = redis.Redis(connection_pool=pool) if not check_password(conn, QQ, password): return json.dumps(dict(finish=-2)) info = conn.get(FRIEND_INFO_COUNT_KEY + str(QQ)) finish = 0 if int(info) >= int(friend_num): finish = 1 return json.dumps(dict(num=info, finish=finish))
def userinfo(QQ, name, password): pool = get_pool() conn = redis.Redis(connection_pool=pool) if check_password(conn, QQ, password): user = UserInfo() user.load(QQ) result = dict(finish=1, user=user.to_dict()) return json.dumps(result, ensure_ascii=False) else: result = dict(finish=0) return json.dumps(result, ensure_ascii=False)
def userinfo(QQ, name, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if check_password(conn, QQ, password): user = UserInfo(QQ) user.load() result = dict(finish=1, user=user.to_dict()) return json.dumps(result, ensure_ascii=False) else: result = dict(finish=0) return json.dumps(result, ensure_ascii=False)
def query_friend_info_num(QQ, friend_num, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if conn is None: return json.dumps(dict(num="数据库未连接", finish=0)) if not check_password(conn, QQ, password): return json.dumps(dict(finish=-2)) info = conn.get(FRIEND_INFO_COUNT_KEY + str(QQ)) finish = 0 if int(info) >= int(friend_num): finish = 1 return json.dumps(dict(num=info, finish=finish))
def query_friend_info_num(QQ, friend_num, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=INVALID_LOGIN)) info = conn.get(FRIEND_INFO_COUNT_KEY + str(QQ)) finish = 0 if friend_num == "null": friend_num = 0 if int(info) >= int(friend_num): finish = 1 return json.dumps(dict(num=info, finish=finish))
def query_spider_num(QQ, mood_num, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=INVALID_LOGIN)) info = conn.get(MOOD_COUNT_KEY + str(QQ)) # 强制停止,保证在由于网络等原因导致爬取的说说数量有缺失时也能正常停止程序 finish_key = conn.get(MOOD_FINISH_KEY + str(QQ)) finish = 0 if finish_key == "1" or int(info) >= int(mood_num): finish = SUCCESS_STATE return json.dumps(dict(num=info, finish=finish, finish_key=finish_key))
def stop_spider_force(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=INVALID_LOGIN)) # 更新标记位,停止爬虫 conn.set(STOP_SPIDER_KEY + QQ, STOP_SPIDER_FLAG) # 从waiting_list中删除该用户 conn.lrem(WAITING_USER_LIST, QQ) conn.hdel(USER_MAP_KEY, QQ) return json.dumps(dict(finish=1))
def query_clean_data(QQ, password): pool = get_pool() conn = redis.Redis(connection_pool=pool) if not check_password(conn, QQ, password): return json.dumps(dict(finish=-2), ensure_ascii=False) while True: key = conn.get(CLEAN_DATA_KEY + QQ) if key == '1': break else: sleep(0.1) return json.dumps(dict(finish=key), ensure_ascii=False)
def stop_spider_force(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=INVALID_LOGIN)) # 删除与该用户有关的数据 finish = do_clear_data_by_user(QQ, conn) # 重新设置标记位 conn.set(STOP_SPIDER_KEY + QQ, STOP_SPIDER_FLAG) conn.set(FORCE_STOP_SPIDER_FLAG + QQ, FORCE_STOP_SPIDER_FLAG) return json.dumps(dict(finish=finish))
def query_clean_data(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=INVALID_LOGIN), ensure_ascii=False) while True: key = conn.get(CLEAN_DATA_KEY + QQ) if key == '1': break else: sleep(0.1) return json.dumps(dict(finish=key), ensure_ascii=False)
def clear_cache(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish="QQ号与校验码不匹配"), ensure_ascii=False) else: try: finish = do_clear_data_by_user(QQ, conn) return json.dumps(dict(finish=finish), ensure_ascii=False) except BaseException as e: finish = 0 print(e) return json.dumps(dict(info="未知错误:" + str(e), finish=finish), ensure_ascii=False)
def get_history(QQ, name, password): pool = get_pool() conn = redis.Redis(connection_pool=pool) result = {} if not check_password(conn, QQ, password): result['finish'] = 0 return json.dumps(result) history = conn.get(BASE_DIR + 'friend/' + QQ + '_history_like_list.json') if history: history_json = json.loads(history) result['finish'] = 1 result['data'] = history_json else: result['finish'] = -1 return json.dumps(result, ensure_ascii=False)
def get_history(QQ, name, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) result = {} if not check_password(conn, QQ, password): result['finish'] = 0 return json.dumps(result) history = conn.get(BASE_DIR + QQ + '/friend/' + 'history_like_list.json') if history: history_json = json.loads(history) result['finish'] = 1 result['data'] = history_json else: result['finish'] = -1 return json.dumps(result, ensure_ascii=False)
def stop_spider(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish=-2)) # 更新标记位,停止爬虫 conn.set(STOP_SPIDER_KEY + QQ, STOP_SPIDER_FLAG) stop = 0 # 等待数据保存 while True: finish_info = conn.get(STOP_SPIDER_KEY + QQ) if finish_info == FINISH_ALL_INFO: stop = 1 break else: sleep(0.1) num = conn.get(MOOD_COUNT_KEY + str(QQ)) return json.dumps(dict(num=num, finish=stop))
def userinfo(QQ, name, password): pool_flag = session.get(POOL_FLAG) # 当前后端域名:端口不一致时,cookie无法跨站传输,导致session为空,故需要再判断一次 if not pool_flag: host = judge_pool() conn = get_redis_conn(host) else: conn = get_redis_conn(pool_flag) if check_password(conn, QQ, password): user = UserInfo(QQ) result = user.load_from_redis(QQ) if result != None: result = dict(finish=1, user=user.to_dict()) return json.dumps(result, ensure_ascii=False) else: result = dict(finish=0) return json.dumps(result, ensure_ascii=False) else: result = dict(finish=0) return json.dumps(result, ensure_ascii=False)
def download_excel(QQ, password, file_type): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish="QQ号与校验码不匹配"), ensure_ascii=False) else: if file_type == 'mood': path = BASE_DIR + QQ + "/data/result" if os.path.isfile(os.path.join(path, 'mood_data.xlsx')): print(os.path.join(path, 'mood_data.xlsx')) return send_from_directory(path, 'mood_data.xlsx', as_attachment=True) else: return json.dumps(dict(finish="文件不存在"), ensure_ascii=False) elif file_type == 'friend': path = BASE_DIR + QQ + "/friend" if os.path.isfile(os.path.join(path, 'friend_detail_list.xlsx')): return send_from_directory(path, 'friend_detail_list.xlsx', as_attachment=True) else: return json.dumps(dict(finish="文件不存在"), ensure_ascii=False)
def clear_cache(QQ, password): pool_flag = session.get(POOL_FLAG) conn = get_redis_conn(pool_flag) if not check_password(conn, QQ, password): return json.dumps(dict(finish="QQ号与识别码不匹配"), ensure_ascii=False) else: try: DATA_DIR_KEY = BASE_DIR + QQ + '/' if os.path.exists(DATA_DIR_KEY): # 删除以 DATA_DIR_KEY 开头的所有key conn.delete(DATA_DIR_KEY + '*') # 删除 该路径下所有文件 os.system("rm -rf " + DATA_DIR_KEY) # os.removedirs(os.path.join(BASE_DIR, QQ)) finish = 1 else: finish = 2 return json.dumps(dict(finish=finish), ensure_ascii=False) except BaseException as e: finish = 0 print(e) return json.dumps(dict(info="未知错误:" + str(e), finish=finish), ensure_ascii=False)