Exemple #1
0
def get_gacha_record_to_sql(num=5):
    db = pymysql.connect(host,username,password,database)
    hash_list = []
    print("一共进行{}次爬取".format(str(num)))
    for i in range(num):
        valid_times = 0
        time1 = time.time()
        print("正在进行第{}次爬取..".format(str(i+1)))
        sample_gacha_record = get_sample_gacha_record()
        record_hash = get_md5(json.dumps(sample_gacha_record))
        if record_hash in hash_list:
            continue
        else:
            hash_list.append(record_hash)
            # 应判断item是否已经存在于数据库中,如何判断是否同一次出货?
            for item in sample_gacha_record:
                # print(item['uid'])
                if is_valid_data(db,item):
                    valid_times += 1
                    insert_data(db,item)
            db.commit()
        print("第{}次爬取成功,本次共爬取有效数据{}条,用时{}秒".format(str(i+1),str(valid_times),str((time.time()-time1))))
        next_time = int(60 * ((-valid_times/40)+5))
        print("下次爬取时间为{}秒后".format(next_time))
        time.sleep(next_time)
Exemple #2
0
def do_request(url, oauth, carcel, payload, geocode):
    if payload != None:
        r = requests.get(url=url, auth=oauth, params=payload)
        data = r.json()
    else:
        r = requests.get(url=url, auth=oauth)
        data = r.json()
    try:
        next_results = data['search_metadata']['next_results']
        print next_results
    except:
        print "There are not next_results"
        next_results = "None"
    for status in data['statuses']:
        obj = {}
        obj['carcel'] = carcel
        status_id = status['id']
        obj['status_id'] = status_id

        text = status['text']
        obj['text'] = text

        screen_name = status['user']['screen_name']
        obj['screen_name'] = screen_name

        profile_img_url = status['user']['profile_image_url_https']
        download_profile_image(profile_img_url, screen_name)

        utc_offset = status['user']['utc_offset']
        obj['utc_offset'] = utc_offset

        user_id = status['user']['id']
        obj['user_id'] = user_id

        created_at = status['created_at']
        obj['created_at'] = created_at

        if 'geo' in status and status['geo'] != None:
            latitude = status['geo']['coordinates'][0]
            longitude = status['geo']['coordinates'][1]
            obj['latitude'] = latitude
            obj['longitude'] = longitude
            obj['retuited'] = "no"
            obj['in_jail'] = "no"

            if calc_distance(latitude, longitude, float(geocode.split(",")[0]),
                             float(geocode.split(",")[1])) < 1.1:
                print obj['status_id'], calc_distance(
                    latitude, longitude, float(geocode.split(",")[0]),
                    float(geocode.split(",")[1]))
                insert_data(obj)

    return next_results
def do_request(url, oauth, carcel, payload, geocode):
    if payload != None:
        r = requests.get(url=url, auth=oauth, params=payload)
        data = r.json()
    else:
        r = requests.get(url=url, auth=oauth)
        data = r.json()
    try:
        next_results = data['search_metadata']['next_results']
        print next_results
    except:
        print "There are not next_results"
        next_results = "None"
    for status in data['statuses']:
        obj = {}
        obj['carcel'] = carcel
        status_id = status['id']
        obj['status_id'] = status_id

        text = status['text']
        obj['text'] = text

        screen_name = status['user']['screen_name']
        obj['screen_name'] = screen_name

        profile_img_url = status['user']['profile_image_url_https']
        download_profile_image(profile_img_url, screen_name)

        utc_offset = status['user']['utc_offset']
        obj['utc_offset'] = utc_offset

        user_id = status['user']['id']
        obj['user_id'] = user_id

        created_at = status['created_at']
        obj['created_at'] = created_at
        
        if 'geo' in status and status['geo'] != None:
            latitude = status['geo']['coordinates'][0]
            longitude = status['geo']['coordinates'][1]
            obj['latitude'] = latitude
            obj['longitude'] = longitude
            obj['retuited'] = "no"

            if calc_distance(latitude, longitude, float(geocode.split(",")[0]), float(geocode.split(",")[1])) < 1.1:
                print obj['status_id'], calc_distance(latitude, longitude, float(geocode.split(",")[0]), float(geocode.split(",")[1]))
                insert_data(obj)

    return next_results
Exemple #4
0
def run(username, flow):
    # data = lib.get_mixedtables(username, flow)
    # return jsonify(
    #     data=data,
    #     username=username,
    #     flow=flow)

    data = [(u'news:5', 1), (u'norris:6', 2)]
    print(data)
    simpleList = []
    for line in data:
        action_data = lib.run_action(line)
        simpleList.append(action_data)

    return jsonify(data=simpleList,
                   username=username,
                   flow=flow,
                   time=datetime.datetime.now())

    newslist = []
    for line in news:
        list_data = lib.insert_news(line)
        newslist.append[news_data]

    norrislist = []
    for line in norris:
        list_data = lib.insert_data(line)
        norrislist.append[norris_data]
Exemple #5
0
import lib

if len(sys.argv) < 3:
    print "This script inserts 1 tuit into our database using the status_id value and a string for the jail"
    print "Usage python insert_tuit.py 123208309281908 'Penal Diroes'"
    sys.exit()


oauth = api.get_oauth()

# get tuit data
url = "https://api.twitter.com/1.1/statuses/show.json"
payload = {'id': sys.argv[1].strip()}

r = requests.get(url=url, auth=oauth, params=payload)
data = r.json()

obj = {}
obj['carcel'] = unicode(sys.argv[2].strip(), "utf-8")
obj['created_at'] = data['created_at']
obj['screen_name'] = data['user']['screen_name']
obj['status_id'] = data['id']
obj['text'] = data['text']
obj['user_id'] = data['user']['id']
obj['utc_offset'] = data['user']['utc_offset']
coords = data['geo']['coordinates']
obj['latitude'] = coords[0]
obj['longitude'] = coords[1]

lib.insert_data(obj)