Ejemplo n.º 1
0
def fourtree(session, coordinate, starttime, geo_range, inter_lat, inter_lon):
    temp_time = random.randint(2, 5)
    wait_time(temp_time)
    info_list = get_weibo_by_coordinate(session, coordinate, starttime, 0,
                                        geo_range, 0, 50, 20, 0)
    if info_list:
        print(geo_range)
        save_data_by_db(info_list)
    else:
        print(0)
    if info_list:
        inter_lat = round(inter_lat / 2, 6)
        inter_lon = round(inter_lon / 2, 6)
        coordinate1 = dict()
        coordinate2 = dict()
        coordinate3 = dict()
        coordinate4 = dict()
        geo_range = float(geo_range)
        geo_range = int(round(geo_range / 2 * 1.3))
        if geo_range < 100:
            geo_range = 100
        coordinate1['latitude'] = str(
            round(float(coordinate['latitude']) + inter_lat, 6))
        coordinate1['longitude'] = str(
            round(float(coordinate['longitude']) + inter_lat, 6))
        coordinate2['latitude'] = str(
            round(float(coordinate['latitude']) - inter_lon, 6))
        coordinate2['longitude'] = str(
            round(float(coordinate['longitude']) + inter_lon, 6))
        coordinate3['latitude'] = str(
            round(float(coordinate['latitude']) + inter_lat, 6))
        coordinate3['longitude'] = str(
            round(float(coordinate['longitude']) - inter_lat, 6))
        coordinate4['latitude'] = str(
            round(float(coordinate['latitude']) - inter_lon, 6))
        coordinate4['longitude'] = str(
            round(float(coordinate['longitude']) - inter_lon, 6))
        l1 = fourtree(session, coordinate1, starttime, geo_range, inter_lat,
                      inter_lon)
        l2 = fourtree(session, coordinate2, starttime, geo_range, inter_lat,
                      inter_lon)
        l3 = fourtree(session, coordinate3, starttime, geo_range, inter_lat,
                      inter_lon)
        l4 = fourtree(session, coordinate4, starttime, geo_range, inter_lat,
                      inter_lon)
        l5 = [{'coordinate': coordinate, 'geo_range': geo_range}]
        return l1 + l2 + l3 + l4 + l5
    else:
        return [{'coordinate': coordinate, 'geo_range': geo_range}]
Ejemplo n.º 2
0
def search_info_by_id(session,
                      keyword="",
                      start_time="",
                      end_time="",
                      num=1,
                      location=0):
    content_text = str()
    id_text = str()
    haslink = str()
    if location != 0:
        haslink = "&haslink=1"
    for i in range(START_PAGE, START_PAGE + TOTAL_PAGE):
        url = 'http://s.weibo.com/weibo/' + keyword + '&scope=ori' + haslink + '&timescope=custom:' + start_time + ':' + end_time + '&page=' + str(
            i) + '&rd=newTips'
        sleep_time = random.randint(10, 30)
        os_sleep = 'sleep ' + str(sleep_time)
        os.system(os_sleep)
        get_text = session.get(url).text
        get_text = u'' + get_text
        get_text = get_text.encode('utf-8')
        content_text = save_catch_page(get_text)
        pd = out_page(content_text)
        if not pd:
            return num
        id_list = get_id_list(content_text, session)
        info_list = get_weibo_by_ids(id_list, session)
        status = save_data_by_db(info_list)
        lg_info(status)
    return num
Ejemplo n.º 3
0
def get_info_history(session):
    geo_num = 63
    starttime = convert_time('2015', '1', '1', '0')
    starttime = str(starttime)
    starttime = starttime[:-2]
    page_count = 50
    endtime = starttime
    sort = 0
    offset = 0
    inter_lat = INTER_LAT
    inter_lon = INTER_LON
    for p_id in range(0, geo_num):
        geo_range = DISTANCE
        p = 1
        index = [0]*50
        view_list = fourtree(session, QUERY_COORDINATE_LIST[p_id], starttime, geo_range, inter_lat, inter_lon)
        for view in view_list:
            p = 1
            while p < 20:
                coordinate = view['coordinate']
                geo_range = view['geo_range']
                temp_time = random.randint(10, 15)
                wait_time(temp_time)
                info_list = get_weibo_by_coordinate(session, coordinate, starttime, endtime, geo_range, 0, page_count, p, 0)
                if info_list:
                    pd = save_data_by_db(info_list)
                    if not pd:
                        break
                    p += 1
                else:
                    break
        sleep_time = random.randint(10, 20)
        wait_time(sleep_time)
Ejemplo n.º 4
0
def fourtree(session, coordinate, starttime, geo_range, inter_lat, inter_lon):
    temp_time = random.randint(2, 5)
    wait_time(temp_time)
    info_list = get_weibo_by_coordinate(session, coordinate, starttime, 0, geo_range, 0, 50, 20, 0)
    if info_list:
        print (geo_range)
        save_data_by_db(info_list)
    else:
        print (0)
    if info_list:
        inter_lat = round(inter_lat/2, 6)
        inter_lon = round(inter_lon/2, 6)
        coordinate1 = dict()
        coordinate2 = dict()
        coordinate3 = dict()
        coordinate4 = dict()
        geo_range = float(geo_range)
        geo_range = int(round(geo_range/2*1.3))
        if geo_range < 100:
            geo_range = 100
        coordinate1['latitude'] = str(round(float(coordinate['latitude']) + inter_lat, 6))
        coordinate1['longitude'] = str(round(float(coordinate['longitude']) + inter_lat, 6))
        coordinate2['latitude'] = str(round(float(coordinate['latitude']) - inter_lon, 6))
        coordinate2['longitude'] = str(round(float(coordinate['longitude']) + inter_lon, 6))
        coordinate3['latitude'] = str(round(float(coordinate['latitude']) + inter_lat, 6))
        coordinate3['longitude'] = str(round(float(coordinate['longitude']) - inter_lat, 6))
        coordinate4['latitude'] = str(round(float(coordinate['latitude']) - inter_lon, 6))
        coordinate4['longitude'] = str(round(float(coordinate['longitude']) - inter_lon, 6))
        l1 = fourtree(session, coordinate1, starttime, geo_range, inter_lat, inter_lon)
        l2 = fourtree(session, coordinate2, starttime, geo_range, inter_lat, inter_lon)
        l3 = fourtree(session, coordinate3, starttime, geo_range, inter_lat, inter_lon)
        l4 = fourtree(session, coordinate4, starttime, geo_range, inter_lat, inter_lon)
        l5 = [{'coordinate': coordinate, 'geo_range': geo_range}]
        return l1+l2+l3+l4 +l5
    else:
        return [{'coordinate': coordinate, 'geo_range': geo_range}]
Ejemplo n.º 5
0
def search_info_by_id(session, keyword="", start_time="", end_time="",  num=1, location=0):
    content_text = str()
    id_text = str()
    haslink = str()
    if location != 0:
        haslink = "&haslink=1"
    for i in range(START_PAGE, START_PAGE+TOTAL_PAGE):
        url = 'http://s.weibo.com/weibo/'+keyword+'&scope=ori'+haslink+'&timescope=custom:'+start_time+':'+end_time+'&page='+str(i)+'&rd=newTips'
        sleep_time = random.randint(10, 30)
        os_sleep = 'sleep '+str(sleep_time)
        os.system(os_sleep)
        get_text = session.get(url).text
        get_text = u'' + get_text
        get_text = get_text.encode('utf-8')
        content_text = save_catch_page(get_text)
        pd = out_page(content_text)
        if not pd:
            return num
        id_list = get_id_list(content_text, session)
        info_list = get_weibo_by_ids(id_list, session)
        status = save_data_by_db(info_list)
        lg_info(status)
    return num
Ejemplo n.º 6
0
def get_info_history(session):
    geo_num = 63
    starttime = convert_time('2015', '1', '1', '0')
    starttime = str(starttime)
    starttime = starttime[:-2]
    page_count = 50
    endtime = starttime
    sort = 0
    offset = 0
    inter_lat = INTER_LAT
    inter_lon = INTER_LON
    for p_id in range(0, geo_num):
        geo_range = DISTANCE
        p = 1
        index = [0] * 50
        view_list = fourtree(session, QUERY_COORDINATE_LIST[p_id], starttime,
                             geo_range, inter_lat, inter_lon)
        for view in view_list:
            p = 1
            while p < 20:
                coordinate = view['coordinate']
                geo_range = view['geo_range']
                temp_time = random.randint(10, 15)
                wait_time(temp_time)
                info_list = get_weibo_by_coordinate(session, coordinate,
                                                    starttime, endtime,
                                                    geo_range, 0, page_count,
                                                    p, 0)
                if info_list:
                    pd = save_data_by_db(info_list)
                    if not pd:
                        break
                    p += 1
                else:
                    break
        sleep_time = random.randint(10, 20)
        wait_time(sleep_time)
Ejemplo n.º 7
0
    count_time = datetime(2016, 9, 21, 18, 20)
    geo_num = 63
    page_count = 50
    geo_range = 10000
    index_num = ['0'] * geo_num
    while datetime.now() < count_time:

        for p_id in range(0, geo_num):
            p = 1
            starttime = convert_time('2015', '7', '1', '0')
            endtime = starttime
            starttime = str(starttime)
            starttime = starttime[:-2]
            info_list = get_weibo_by_coordinate(session, QUERY_COORDINATE_LIST[p_id],
                                                starttime, endtime, geo_range, 0, page_count, p, 0)
            save_data_by_db(info_list)
            if not info_list:
                continue
            else:
                pass
            length = len(info_list)
            cmpstr1 = info_list[length - 1]['mid']
            cmpstr2 = index_num[p_id]
            index_num[p_id] = info_list[0]['mid']
            while arbitrary_precision_compare(cmpstr1, cmpstr2) == 1:
                p += 1
                info_list = get_weibo_by_coordinate(session, QUERY_COORDINATE_LIST[p_id],
                                                    starttime, endtime, geo_range, 0, page_count, p, 0)
                save_data_by_db(info_list)
                if not info_list:
                    break