Example #1
0
 def run(self):
     while True:
         time.sleep(30)
         videos = set([])
         users = set([])
         post_counts = {}
         jdata = {}
         jdata["total_posts"] = len(stat_data)
         total_posts = len(stat_data)
         for i in range(len(stat_data)):
             user_id = stat_data[i][0]
             video_id = stat_data[i][1]
             status_time = stat_data[i][2]
             if status_time < (time.time() - 86400):
                 stat_data.pop(i)
             else:
                 videos.add(video_id)
                 users.add(user_id)
                 if not video_id in post_counts.keys(): post_counts[video_id] = 0
                 post_counts[video_id] +=1
         jdata["unique_users"] = len(users)
         jdata["unique_videos"] = len(videos)     
         sorted_counts = sorted(post_counts.iteritems(), key=operator.itemgetter(1))
         sorted_counts.reverse()
         jdata["top_100"] = sorted_counts[0:100]
         lock.acquire()
         sql_data = {"type":"facebook_posts", "added":str(time.time()), "data":json.dumps(jdata)}
         sql.insertRow(cursor, "parsed_data"+tableSuffix(), sql_data, True)
         cursor.connection.commit()
         lock.release()  
Example #2
0
    def run(self):
        while True:
            time.sleep(30)
            t = int(time.time())
            compiled_data = {
                60: {},
                3600: {},
                86400: {}
            }  #compile data for intervals of a minute, hour and day
            for k in self.status_data.keys():
                for limit in compiled_data.keys():
                    if int(k) >= (t - limit):
                        for event_name in self.status_data[k].keys():
                            if not compiled_data[limit].has_key(event_name):
                                compiled_data[limit][event_name] = 0
                            compiled_data[limit][
                                event_name] += self.status_data[k][event_name]
                if int(k) < (t - 86400): self.status_data.pop(k)
            compiled_data["start_time"] = self.start_time
            compiled_data["compiled"] = t

            sql_data = {
                "script": self.file,
                "added": time.time(),
                "data": json.dumps(compiled_data)
            }
            cursor = sql.slytics1().connection.cursor()
            sql.insertRow(cursor, "script_statuses" + tableSuffix(), sql_data,
                          False, True)
            cursor.connection.close()
            cursor.close()
Example #3
0
 def run(self):
     while True:
         lock.acquire()
         if not q.empty():
             jdata = q.get()
             lock.release()
             status_id = jdata["id"]
             text = jdata["text"]
             status.event("statuses_parsed")
             urls = re.findall("(?P<url>https?://[^\s]+)", text)
             videos = []
             for url in urls:
                 u = url
                 if needsExpansion(url)==True: 
                     u = expandURL(url)
                 video_id = getVideoID(u)
                 if video_id!=None and not video_id in videos: videos.append(video_id)
                 if video_id!=None:
                     lock.acquire()
                     sql_data = {"original_url":url[:200], "expanded_url":u[:1000], "video_id":video_id}
                     sql.insertRow(cursor, "youtube_urls", sql_data, True)
                     cursor.connection.commit()
                     lock.release()
                 status.event("urls_found")
             lock.acquire()
             for video in videos:
                 sql_data = {"id":video}
                 sql.insertRow(cursor, "youtube_ids", sql_data, True)
                 status.event("videos_found") 
             cursor.connection.commit()
             lock.release()
         else:
             lock.release()
         time.sleep(0.1)
Example #4
0
 def run(self):
     while True:
         lock.acquire()
         if not q.empty():
             jdata = q.get()
             lock.release()
             status_id = jdata["id"]
             user_id = status_id.split("_")[0]
             text = ""
             for key in ["message", "link", "description", "source"]:
                 if key in jdata: text += " "+jdata[key]
             status.event("statuses_parsed")
             urls = re.findall("(?P<url>https?://[^\s]+)", text)
             videos = []
             for url in urls:
                 video_id = getVideoID(url)                   
                 if video_id!=None and not video_id in videos: videos.append(video_id)
                 if video_id!=None:
                     lock.acquire()
                     sql_data = {"original_url":url[:200], "expanded_url":url[:1000], "video_id":video_id}
                     sql.insertRow(cursor, "youtube_urls", sql_data, True)
                     cursor.connection.commit()
                     lock.release()                                                                   
                 status.event("urls_found")
             lock.acquire()
             for video in videos:
                 sql_data = {"id":video}
                 sql.insertRow(cursor, "youtube_ids", sql_data, True)
                 status.event("videos_found")
                 stat_data.append([user_id, video, time.time()]) 
             cursor.connection.commit()
             lock.release()
         else:
             lock.release()
         time.sleep(0.1)
 def run(self):
     base_url = "/method/links.getStats?format=json&urls="
     youtube_base = "youtube.com%2Fwatch%3Fv%3D"
     while True:
         lock.acquire()
         req_ids = []
         while len(req_ids) < 850 and not q.empty(): req_ids.append(q.get())
         if q.empty(): 
             for ytid in get_ids.ids: q.put(ytid)
         lock.release()
         
         if len(req_ids) > 0:
             req_url = base_url
             for req_id in req_ids: req_url += youtube_base + req_id + ","
             error_thrown = False
             try:
                 time.sleep(0.01)
                 self.httpconn.request("GET", req_url)
                 res = self.httpconn.getresponse().read()
             except socket.error as ex:
                 error_thrown = True
                 res = "{}"
                 self.httpconn = httplib.HTTPSConnection("api.facebook.com")
             status.event("requests")
             retrieved = time.time()
             jdata = {}
             parsed = json.loads(res)
             for video in parsed:
                 try:
                     video_id = getVideoID(video["normalized_url"])
                     total_count = video["total_count"]
                     lock.acquire()
                     if video_id in video_ids_seen and total_counts[video_id]==total_count:
                         status.event("data_unchanged") #data has remained unchanged since last request for this video
                     else:
                         video_ids_seen.add(video_id)
                         total_counts.update({video_id:total_count})
                         jdata = video
                         status.event("data_changed")
                     lock.release()
                     status.event("urls_polled")
                     jdata.update({"retrieved":retrieved})
                     sql_data = {"data":json.dumps(jdata)}
                     sql.insertRow(self.cursor, "facebook_polldata"+tableSuffix(), sql_data)
                     self.conn.commit()
                 except:
                     error_thrown = True
             if error_thrown==True:
                 lock.acquire()
                 for req_id in req_ids: q.put(req_id)
                 lock.release()
                 status.event("request_errors")
Example #6
0
 def run(self):
     while True:
         time.sleep(30)
         t = int(time.time())
         compiled_data = {60:{}, 3600:{}, 86400:{}} #compile data for intervals of a minute, hour and day
         for k in self.status_data.keys():
             for limit in compiled_data.keys():
                  if int(k) >= (t - limit):
                     for event_name in self.status_data[k].keys():
                         if not compiled_data[limit].has_key(event_name): compiled_data[limit][event_name] = 0
                         compiled_data[limit][event_name] += self.status_data[k][event_name]
             if int(k) < (t - 86400): self.status_data.pop(k)            
         compiled_data["start_time"] = self.start_time
         compiled_data["compiled"] = t
         
         sql_data = {"script":self.file, "added":time.time(), "data":json.dumps(compiled_data)}
         cursor = sql.slytics1().connection.cursor()
         sql.insertRow(cursor, "script_statuses"+tableSuffix(), sql_data, False, True)
         cursor.connection.close()
         cursor.close()
Example #7
0
 def run(self):
     while True:
         lock.acquire()
         if not q.empty():
             jdata = q.get()
             lock.release()
             status_id = jdata["id"]
             text = jdata["text"]
             status.event("statuses_parsed")
             urls = re.findall("(?P<url>https?://[^\s]+)", text)
             videos = []
             for url in urls:
                 u = url
                 if needsExpansion(url) == True:
                     u = expandURL(url)
                 video_id = getVideoID(u)
                 if video_id != None and not video_id in videos:
                     videos.append(video_id)
                 if video_id != None:
                     lock.acquire()
                     sql_data = {
                         "original_url": url[:200],
                         "expanded_url": u[:1000],
                         "video_id": video_id
                     }
                     sql.insertRow(cursor, "youtube_urls", sql_data, True)
                     cursor.connection.commit()
                     lock.release()
                 status.event("urls_found")
             lock.acquire()
             for video in videos:
                 sql_data = {"id": video}
                 sql.insertRow(cursor, "youtube_ids", sql_data, True)
                 status.event("videos_found")
             cursor.connection.commit()
             lock.release()
         else:
             lock.release()
         time.sleep(0.1)
cursor = conn.cursor()

c = sql.slytics1().connection
ccursor = c.cursor()

max_id = 0
table_suffix = tableSuffix()
while True:
    cursor.execute("select id, data from facebook_polldata"+table_suffix+" where id > "+str(max_id)+" limit 500")
    res = cursor.fetchone()
    if res==None and table_suffix != tableSuffix():
            table_suffix = tableSuffix()
            max_id = 0
    while res:
        max_id = res[0]
        data = json.loads(res[1])
        status.event("rows_processed")
        if "normalized_url" in data.keys():
            vid = getVideoID(data["normalized_url"])
            extant_data = sql.scalar(ccursor, "facebook_pollcount", "data", "video", vid)
            str_data = str(data["retrieved"])+" "+str(data["like_count"])+" "+str(data["share_count"])+" "
            if extant_data==None:
                sql_data = {"video":vid, "data":str_data}
                sql.insertRow(ccursor, "facebook_pollcount", sql_data)
            else:
                ccursor.execute("update facebook_pollcount set data = concat(data, '"+str_data+"') where video = '"+vid+"'")
        res = cursor.fetchone()
    cursor.connection.commit()
    ccursor.connection.commit()
    time.sleep(0.1)
            except:
                conn = httplib.HTTPSConnection("graph.facebook.com")
            
            if parsed.has_key("data"):
                if parsed.has_key("paging"): locales[l]["since"] = int(urlparse.parse_qs(urlparse.urlparse(parsed["paging"]["previous"])[4])["since"][0]) - 1
                delay = ( 100 / ( (len(parsed["data"]) + 5) / (time.time() - locale["last_retrieve"]) ) )
                if delay > 300: delay = 300
                if l == "en_US": delay = 10
                
                locales[l]["next_retrieve"] = time.time() + delay
                locales[l]["last_retrieve"] = time.time()
            
                for post in parsed["data"]:
                    status.event(l+"posts")
                    if "from" in post: post["from"].update({"locale":l})
                    sql_data = {"id":post["id"], "data":json.dumps(post)}
                    if not locales[l]["skip"].has_key(post["id"]): 
                        sql.insertRow(cursor, "facebook_statuses"+tableSuffix(), sql_data, True)
                    if post["updated_time"] == parsed["data"][0]["updated_time"]: locales[l]["skip"][post["id"]] = locale["since"]
        
                for key in locales[l]["skip"].keys():
                    if locales[l]["skip"][key] != locales[l]["since"]: locales[l]["skip"].pop(key)
                
                if len(parsed["data"]) > 480: status.event(l+"pegged_requests")
            
            result_count = 0
            if parsed.has_key("data"): result_count = len(parsed["data"])
            sql_data = {"time":str(time.time()), "locale":l, "since":locale["since"], "status_code":str(res.status), "results":str(result_count)} 
            sql.insertRow(cursor, "facebook_requests"+tableSuffix(), sql_data)
            cursor.connection.commit()
Example #10
0
    resp = "{}"
    try:
        conn.request("GET", "/search.json?q=youtube.com&rpp=100&result_type=recent&filter=links&since_id="+str(since_id), None, {"User-Agent":"VideoMuffin"})
        status.event("requests")
        res = conn.getresponse()
        if str(res.status) !=  "200": status.event("non_200_responses")
        resp = res.read()
    except (socket.error, httplib.error) as ex:
        status.event(str(ex))

    try:
        parsed = json.loads(resp)
    except ValueError:
        parsed = {}
        status.event("json_value_errors")
        
    if parsed.has_key("results"):
        tweets_this_request = 0 
        for tweet in parsed["results"]:
            tweets_this_request +=1
            status.event("tweets")
            if since_id < tweet["id"]: since_id = tweet["id"]
            sql_data = {"id":tweet["id"], "data":json.dumps(tweet)}
            sql.insertRow(cursor, "twitter_statuses"+tableSuffix(), sql_data, True)
        if tweets_this_request == 100: status.event("pegged_requests")
    sql_data = {"time":str(time.time()), "since_id":str(since_id), "status_code":str(res.status), "results":str(tweets_this_request)} 
    sql.insertRow(cursor, "twitter_requests"+tableSuffix(), sql_data)
    cursor.connection.commit()   
    conn.close()
    time.sleep(1)
Example #11
0
max_id = 0
table_suffix = tableSuffix()
while True:
    cursor.execute("select id, data from facebook_polldata" + table_suffix +
                   " where id > " + str(max_id) + " limit 500")
    res = cursor.fetchone()
    if res == None and table_suffix != tableSuffix():
        table_suffix = tableSuffix()
        max_id = 0
    while res:
        max_id = res[0]
        data = json.loads(res[1])
        status.event("rows_processed")
        if "normalized_url" in data.keys():
            vid = getVideoID(data["normalized_url"])
            extant_data = sql.scalar(ccursor, "facebook_pollcount", "data",
                                     "video", vid)
            str_data = str(data["retrieved"]) + " " + str(
                data["like_count"]) + " " + str(data["share_count"]) + " "
            if extant_data == None:
                sql_data = {"video": vid, "data": str_data}
                sql.insertRow(ccursor, "facebook_pollcount", sql_data)
            else:
                ccursor.execute(
                    "update facebook_pollcount set data = concat(data, '" +
                    str_data + "') where video = '" + vid + "'")
        res = cursor.fetchone()
    cursor.connection.commit()
    ccursor.connection.commit()
    time.sleep(0.1)
Example #12
0
                        ["since"][0]) - 1
                delay = (100 / ((len(parsed["data"]) + 5) /
                                (time.time() - locale["last_retrieve"])))
                if delay > 300: delay = 300
                if l == "en_US": delay = 10

                locales[l]["next_retrieve"] = time.time() + delay
                locales[l]["last_retrieve"] = time.time()

                for post in parsed["data"]:
                    status.event(l + "posts")
                    if "from" in post: post["from"].update({"locale": l})
                    sql_data = {"id": post["id"], "data": json.dumps(post)}
                    if not locales[l]["skip"].has_key(post["id"]):
                        sql.insertRow(cursor,
                                      "facebook_statuses" + tableSuffix(),
                                      sql_data, True)
                    if post["updated_time"] == parsed["data"][0][
                            "updated_time"]:
                        locales[l]["skip"][post["id"]] = locale["since"]

                for key in locales[l]["skip"].keys():
                    if locales[l]["skip"][key] != locales[l]["since"]:
                        locales[l]["skip"].pop(key)

                if len(parsed["data"]) > 480:
                    status.event(l + "pegged_requests")

            result_count = 0
            if parsed.has_key("data"): result_count = len(parsed["data"])
            sql_data = {
Example #13
0
    def run(self):
        base_url = "/method/links.getStats?format=json&urls="
        youtube_base = "youtube.com%2Fwatch%3Fv%3D"
        while True:
            lock.acquire()
            req_ids = []
            while len(req_ids) < 850 and not q.empty():
                req_ids.append(q.get())
            if q.empty():
                for ytid in get_ids.ids:
                    q.put(ytid)
            lock.release()

            if len(req_ids) > 0:
                req_url = base_url
                for req_id in req_ids:
                    req_url += youtube_base + req_id + ","
                error_thrown = False
                try:
                    time.sleep(0.01)
                    self.httpconn.request("GET", req_url)
                    res = self.httpconn.getresponse().read()
                except socket.error as ex:
                    error_thrown = True
                    res = "{}"
                    self.httpconn = httplib.HTTPSConnection("api.facebook.com")
                status.event("requests")
                retrieved = time.time()
                jdata = {}
                parsed = json.loads(res)
                for video in parsed:
                    try:
                        video_id = getVideoID(video["normalized_url"])
                        total_count = video["total_count"]
                        lock.acquire()
                        if video_id in video_ids_seen and total_counts[
                                video_id] == total_count:
                            status.event(
                                "data_unchanged"
                            )  #data has remained unchanged since last request for this video
                        else:
                            video_ids_seen.add(video_id)
                            total_counts.update({video_id: total_count})
                            jdata = video
                            status.event("data_changed")
                        lock.release()
                        status.event("urls_polled")
                        jdata.update({"retrieved": retrieved})
                        sql_data = {"data": json.dumps(jdata)}
                        sql.insertRow(self.cursor,
                                      "facebook_polldata" + tableSuffix(),
                                      sql_data)
                        self.conn.commit()
                    except:
                        error_thrown = True
                if error_thrown == True:
                    lock.acquire()
                    for req_id in req_ids:
                        q.put(req_id)
                    lock.release()
                    status.event("request_errors")
Example #14
0
        resp = res.read()
    except (socket.error, httplib.error) as ex:
        status.event(str(ex))

    try:
        parsed = json.loads(resp)
    except ValueError:
        parsed = {}
        status.event("json_value_errors")

    if parsed.has_key("results"):
        tweets_this_request = 0
        for tweet in parsed["results"]:
            tweets_this_request += 1
            status.event("tweets")
            if since_id < tweet["id"]: since_id = tweet["id"]
            sql_data = {"id": tweet["id"], "data": json.dumps(tweet)}
            sql.insertRow(cursor, "twitter_statuses" + tableSuffix(), sql_data,
                          True)
        if tweets_this_request == 100: status.event("pegged_requests")
    sql_data = {
        "time": str(time.time()),
        "since_id": str(since_id),
        "status_code": str(res.status),
        "results": str(tweets_this_request)
    }
    sql.insertRow(cursor, "twitter_requests" + tableSuffix(), sql_data)
    cursor.connection.commit()
    conn.close()
    time.sleep(1)