'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': '_T_WM=51449543537; WEIBOCN_FROM=1110003030; ALF=1574402457; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFVwy_5W1_NYGmdgVwWBcfN5JpX5K-hUgL.FoeN1hBpSK27SK-2dJLoI7DNIPS.dcfb9g8X; MLOGIN=1; SCF=AggyYma_GtwsdhDBuBkInChFzyrf4h-c6xIyt9HoFg9MZfRq8WQgL1pXQxVYwwayYLv_vu6gicNVt2KDE6ofZcM.; SUB=_2A25wq547DeRhGeVJ41YQ9S_MzjmIHXVQVyJzrDV6PUJbktANLVn4kW1NT8Wm8Dq_jO_PosHk-r88d5dazQKGDJ1n; SUHB=0IIVtU8bMn5Rc2; SSOLoginState=1571810923; XSRF-TOKEN=204527; M_WEIBOCN_PARAMS=luicode%3D10000011%26lfid%3D1076032393886580%26fid%3D1005052393886580%26uicode%3D10000011', 'MWeibo-Pwa': '1', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.9 Safari/537.36', 'x-requested-with': 'XMLHttpRequest', 'x-xsrf-token': '1751a7' } data = {'name': 'username', 'password': '******'} filesolver = conj.FileSolve() crawler = conj.Crawler(headers, data) ticks = datetime.date.today().strftime('%y%m%d') filesolver.dir_jdg('../qa_txt/qa_users/update') user_filename = '../qa_txt/qa_users/update/' + ticks + '.pkl' user_dta = filesolver.file_jdg( user_filename, pd.DataFrame(columns=[ 'uid', 'name', 'follow_count', 'followers_count', 'urank', 'gender', 'verified_type' ])) users = user_dta['uid'] user_info = filesolver.read_pkl('../qa_txt/user_info.pkl') user_uid = user_info.keys() print('___Begin___') for uid in user_uid:
def data_group(data, group_list, beta): """ groupby the data and do the select :param data: the data need to be group which should contain the feature 'the number of questions answered'. DataFrame :param group_list: select which to groupby. List :param beta: select the number bigger than beta. Int :return: group_selected: DataFramequeques """ group = data.groupby(group_list).sum() group_selected = group[group['the number of questions answered'] > beta] return group_selected filesovler = conj.FileSolve() dta = filesovler.read_pkl('../qa_txt/question_all.pkl') user_dta = filesovler.read_pkl('../qa_txt/qa_users/update/191024.pkl') user_dta = user_dta.dropna().drop_duplicates(subset='uid') dta1_name = '../qa_txt/prepare_free_dta1.pkl' dta2_name = '../qa_txt/prepare_free_dta2.pkl' dta3_name = '../qa_txt/prepare_free_dta3.pkl' dta4_name = '../qa_txt/prepare_free_dta4.pkl' dta5_name = '../qa_txt/prepare_free_dta5.pkl' dta6_name = '../qa_txt/prepare_free_dta6.pkl' dta7_name = '../qa_txt/prepare_free_dta7.pkl' dta8_name = '../qa_txt/prepare_free_dta8.pkl' if not path.exists(dta1_name): answer_dta = filesovler.read_pkl('../qa_txt/author_info.pkl')