Esempio n. 1
0
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cookie':
    '_T_WM=51449543537; WEIBOCN_FROM=1110003030; ALF=1574402457; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFVwy_5W1_NYGmdgVwWBcfN5JpX5K-hUgL.FoeN1hBpSK27SK-2dJLoI7DNIPS.dcfb9g8X; MLOGIN=1; SCF=AggyYma_GtwsdhDBuBkInChFzyrf4h-c6xIyt9HoFg9MZfRq8WQgL1pXQxVYwwayYLv_vu6gicNVt2KDE6ofZcM.; SUB=_2A25wq547DeRhGeVJ41YQ9S_MzjmIHXVQVyJzrDV6PUJbktANLVn4kW1NT8Wm8Dq_jO_PosHk-r88d5dazQKGDJ1n; SUHB=0IIVtU8bMn5Rc2; SSOLoginState=1571810923; XSRF-TOKEN=204527; M_WEIBOCN_PARAMS=luicode%3D10000011%26lfid%3D1076032393886580%26fid%3D1005052393886580%26uicode%3D10000011',
    'MWeibo-Pwa': '1',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.9 Safari/537.36',
    'x-requested-with': 'XMLHttpRequest',
    'x-xsrf-token': '1751a7'
}

data = {'name': 'username', 'password': '******'}

filesolver = conj.FileSolve()
crawler = conj.Crawler(headers, data)
ticks = datetime.date.today().strftime('%y%m%d')
filesolver.dir_jdg('../qa_txt/qa_users/update')
user_filename = '../qa_txt/qa_users/update/' + ticks + '.pkl'
user_dta = filesolver.file_jdg(
    user_filename,
    pd.DataFrame(columns=[
        'uid', 'name', 'follow_count', 'followers_count', 'urank', 'gender',
        'verified_type'
    ]))
users = user_dta['uid']
user_info = filesolver.read_pkl('../qa_txt/user_info.pkl')
user_uid = user_info.keys()
print('___Begin___')
for uid in user_uid:
Esempio n. 2
0

def data_group(data, group_list, beta):
    """
    groupby the data and do the select
    :param data: the data need to be group which should contain the feature 'the number of questions answered'. DataFrame
    :param group_list: select which to groupby. List
    :param beta: select the number bigger than beta. Int
    :return: group_selected: DataFramequeques
    """
    group = data.groupby(group_list).sum()
    group_selected = group[group['the number of questions answered'] > beta]
    return group_selected


filesovler = conj.FileSolve()

dta = filesovler.read_pkl('../qa_txt/question_all.pkl')
user_dta = filesovler.read_pkl('../qa_txt/qa_users/update/191024.pkl')
user_dta = user_dta.dropna().drop_duplicates(subset='uid')
dta1_name = '../qa_txt/prepare_free_dta1.pkl'
dta2_name = '../qa_txt/prepare_free_dta2.pkl'
dta3_name = '../qa_txt/prepare_free_dta3.pkl'
dta4_name = '../qa_txt/prepare_free_dta4.pkl'
dta5_name = '../qa_txt/prepare_free_dta5.pkl'
dta6_name = '../qa_txt/prepare_free_dta6.pkl'
dta7_name = '../qa_txt/prepare_free_dta7.pkl'
dta8_name = '../qa_txt/prepare_free_dta8.pkl'

if not path.exists(dta1_name):
    answer_dta = filesovler.read_pkl('../qa_txt/author_info.pkl')