def fourtree(session, coordinate, starttime, geo_range, inter_lat, inter_lon): temp_time = random.randint(2, 5) wait_time(temp_time) info_list = get_weibo_by_coordinate(session, coordinate, starttime, 0, geo_range, 0, 50, 20, 0) if info_list: print(geo_range) save_data_by_db(info_list) else: print(0) if info_list: inter_lat = round(inter_lat / 2, 6) inter_lon = round(inter_lon / 2, 6) coordinate1 = dict() coordinate2 = dict() coordinate3 = dict() coordinate4 = dict() geo_range = float(geo_range) geo_range = int(round(geo_range / 2 * 1.3)) if geo_range < 100: geo_range = 100 coordinate1['latitude'] = str( round(float(coordinate['latitude']) + inter_lat, 6)) coordinate1['longitude'] = str( round(float(coordinate['longitude']) + inter_lat, 6)) coordinate2['latitude'] = str( round(float(coordinate['latitude']) - inter_lon, 6)) coordinate2['longitude'] = str( round(float(coordinate['longitude']) + inter_lon, 6)) coordinate3['latitude'] = str( round(float(coordinate['latitude']) + inter_lat, 6)) coordinate3['longitude'] = str( round(float(coordinate['longitude']) - inter_lat, 6)) coordinate4['latitude'] = str( round(float(coordinate['latitude']) - inter_lon, 6)) coordinate4['longitude'] = str( round(float(coordinate['longitude']) - inter_lon, 6)) l1 = fourtree(session, coordinate1, starttime, geo_range, inter_lat, inter_lon) l2 = fourtree(session, coordinate2, starttime, geo_range, inter_lat, inter_lon) l3 = fourtree(session, coordinate3, starttime, geo_range, inter_lat, inter_lon) l4 = fourtree(session, coordinate4, starttime, geo_range, inter_lat, inter_lon) l5 = [{'coordinate': coordinate, 'geo_range': geo_range}] return l1 + l2 + l3 + l4 + l5 else: return [{'coordinate': coordinate, 'geo_range': geo_range}]
def search_info_by_id(session, keyword="", start_time="", end_time="", num=1, location=0): content_text = str() id_text = str() haslink = str() if location != 0: haslink = "&haslink=1" for i in range(START_PAGE, START_PAGE + TOTAL_PAGE): url = 'http://s.weibo.com/weibo/' + keyword + '&scope=ori' + haslink + '×cope=custom:' + start_time + ':' + end_time + '&page=' + str( i) + '&rd=newTips' sleep_time = random.randint(10, 30) os_sleep = 'sleep ' + str(sleep_time) os.system(os_sleep) get_text = session.get(url).text get_text = u'' + get_text get_text = get_text.encode('utf-8') content_text = save_catch_page(get_text) pd = out_page(content_text) if not pd: return num id_list = get_id_list(content_text, session) info_list = get_weibo_by_ids(id_list, session) status = save_data_by_db(info_list) lg_info(status) return num
def get_info_history(session): geo_num = 63 starttime = convert_time('2015', '1', '1', '0') starttime = str(starttime) starttime = starttime[:-2] page_count = 50 endtime = starttime sort = 0 offset = 0 inter_lat = INTER_LAT inter_lon = INTER_LON for p_id in range(0, geo_num): geo_range = DISTANCE p = 1 index = [0]*50 view_list = fourtree(session, QUERY_COORDINATE_LIST[p_id], starttime, geo_range, inter_lat, inter_lon) for view in view_list: p = 1 while p < 20: coordinate = view['coordinate'] geo_range = view['geo_range'] temp_time = random.randint(10, 15) wait_time(temp_time) info_list = get_weibo_by_coordinate(session, coordinate, starttime, endtime, geo_range, 0, page_count, p, 0) if info_list: pd = save_data_by_db(info_list) if not pd: break p += 1 else: break sleep_time = random.randint(10, 20) wait_time(sleep_time)
def fourtree(session, coordinate, starttime, geo_range, inter_lat, inter_lon): temp_time = random.randint(2, 5) wait_time(temp_time) info_list = get_weibo_by_coordinate(session, coordinate, starttime, 0, geo_range, 0, 50, 20, 0) if info_list: print (geo_range) save_data_by_db(info_list) else: print (0) if info_list: inter_lat = round(inter_lat/2, 6) inter_lon = round(inter_lon/2, 6) coordinate1 = dict() coordinate2 = dict() coordinate3 = dict() coordinate4 = dict() geo_range = float(geo_range) geo_range = int(round(geo_range/2*1.3)) if geo_range < 100: geo_range = 100 coordinate1['latitude'] = str(round(float(coordinate['latitude']) + inter_lat, 6)) coordinate1['longitude'] = str(round(float(coordinate['longitude']) + inter_lat, 6)) coordinate2['latitude'] = str(round(float(coordinate['latitude']) - inter_lon, 6)) coordinate2['longitude'] = str(round(float(coordinate['longitude']) + inter_lon, 6)) coordinate3['latitude'] = str(round(float(coordinate['latitude']) + inter_lat, 6)) coordinate3['longitude'] = str(round(float(coordinate['longitude']) - inter_lat, 6)) coordinate4['latitude'] = str(round(float(coordinate['latitude']) - inter_lon, 6)) coordinate4['longitude'] = str(round(float(coordinate['longitude']) - inter_lon, 6)) l1 = fourtree(session, coordinate1, starttime, geo_range, inter_lat, inter_lon) l2 = fourtree(session, coordinate2, starttime, geo_range, inter_lat, inter_lon) l3 = fourtree(session, coordinate3, starttime, geo_range, inter_lat, inter_lon) l4 = fourtree(session, coordinate4, starttime, geo_range, inter_lat, inter_lon) l5 = [{'coordinate': coordinate, 'geo_range': geo_range}] return l1+l2+l3+l4 +l5 else: return [{'coordinate': coordinate, 'geo_range': geo_range}]
def search_info_by_id(session, keyword="", start_time="", end_time="", num=1, location=0): content_text = str() id_text = str() haslink = str() if location != 0: haslink = "&haslink=1" for i in range(START_PAGE, START_PAGE+TOTAL_PAGE): url = 'http://s.weibo.com/weibo/'+keyword+'&scope=ori'+haslink+'×cope=custom:'+start_time+':'+end_time+'&page='+str(i)+'&rd=newTips' sleep_time = random.randint(10, 30) os_sleep = 'sleep '+str(sleep_time) os.system(os_sleep) get_text = session.get(url).text get_text = u'' + get_text get_text = get_text.encode('utf-8') content_text = save_catch_page(get_text) pd = out_page(content_text) if not pd: return num id_list = get_id_list(content_text, session) info_list = get_weibo_by_ids(id_list, session) status = save_data_by_db(info_list) lg_info(status) return num
def get_info_history(session): geo_num = 63 starttime = convert_time('2015', '1', '1', '0') starttime = str(starttime) starttime = starttime[:-2] page_count = 50 endtime = starttime sort = 0 offset = 0 inter_lat = INTER_LAT inter_lon = INTER_LON for p_id in range(0, geo_num): geo_range = DISTANCE p = 1 index = [0] * 50 view_list = fourtree(session, QUERY_COORDINATE_LIST[p_id], starttime, geo_range, inter_lat, inter_lon) for view in view_list: p = 1 while p < 20: coordinate = view['coordinate'] geo_range = view['geo_range'] temp_time = random.randint(10, 15) wait_time(temp_time) info_list = get_weibo_by_coordinate(session, coordinate, starttime, endtime, geo_range, 0, page_count, p, 0) if info_list: pd = save_data_by_db(info_list) if not pd: break p += 1 else: break sleep_time = random.randint(10, 20) wait_time(sleep_time)
count_time = datetime(2016, 9, 21, 18, 20) geo_num = 63 page_count = 50 geo_range = 10000 index_num = ['0'] * geo_num while datetime.now() < count_time: for p_id in range(0, geo_num): p = 1 starttime = convert_time('2015', '7', '1', '0') endtime = starttime starttime = str(starttime) starttime = starttime[:-2] info_list = get_weibo_by_coordinate(session, QUERY_COORDINATE_LIST[p_id], starttime, endtime, geo_range, 0, page_count, p, 0) save_data_by_db(info_list) if not info_list: continue else: pass length = len(info_list) cmpstr1 = info_list[length - 1]['mid'] cmpstr2 = index_num[p_id] index_num[p_id] = info_list[0]['mid'] while arbitrary_precision_compare(cmpstr1, cmpstr2) == 1: p += 1 info_list = get_weibo_by_coordinate(session, QUERY_COORDINATE_LIST[p_id], starttime, endtime, geo_range, 0, page_count, p, 0) save_data_by_db(info_list) if not info_list: break