def __init__(self): threading.Thread.__init__(self) self.ids = [] self.ll = self.ul = "1970-01-01 10:10:10" #initial timestamp upper and lower limits self.conn = sql.slytics1().connection self.cursor = self.conn.cursor() self.start()
def run(self): while True: time.sleep(30) t = int(time.time()) compiled_data = { 60: {}, 3600: {}, 86400: {} } #compile data for intervals of a minute, hour and day for k in self.status_data.keys(): for limit in compiled_data.keys(): if int(k) >= (t - limit): for event_name in self.status_data[k].keys(): if not compiled_data[limit].has_key(event_name): compiled_data[limit][event_name] = 0 compiled_data[limit][ event_name] += self.status_data[k][event_name] if int(k) < (t - 86400): self.status_data.pop(k) compiled_data["start_time"] = self.start_time compiled_data["compiled"] = t sql_data = { "script": self.file, "added": time.time(), "data": json.dumps(compiled_data) } cursor = sql.slytics1().connection.cursor() sql.insertRow(cursor, "script_statuses" + tableSuffix(), sql_data, False, True) cursor.connection.close() cursor.close()
def run(self): while True: time.sleep(30) t = int(time.time()) compiled_data = {60:{}, 3600:{}, 86400:{}} #compile data for intervals of a minute, hour and day for k in self.status_data.keys(): for limit in compiled_data.keys(): if int(k) >= (t - limit): for event_name in self.status_data[k].keys(): if not compiled_data[limit].has_key(event_name): compiled_data[limit][event_name] = 0 compiled_data[limit][event_name] += self.status_data[k][event_name] if int(k) < (t - 86400): self.status_data.pop(k) compiled_data["start_time"] = self.start_time compiled_data["compiled"] = t sql_data = {"script":self.file, "added":time.time(), "data":json.dumps(compiled_data)} cursor = sql.slytics1().connection.cursor() sql.insertRow(cursor, "script_statuses"+tableSuffix(), sql_data, False, True) cursor.connection.close() cursor.close()
def __init__(self): threading.Thread.__init__(self) self.httpconn = httplib.HTTPSConnection("api.facebook.com") self.conn = sql.slytics1().connection self.cursor = self.conn.cursor() self.start()
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib, operator from util import * #same imports as facebook-parser....they are not all necessary. prune later status = status() conn = sql.slytics1().connection cursor = conn.cursor() c = sql.slytics1().connection ccursor = c.cursor() max_id = 0 table_suffix = tableSuffix() while True: cursor.execute("select id, data from facebook_polldata"+table_suffix+" where id > "+str(max_id)+" limit 500") res = cursor.fetchone() if res==None and table_suffix != tableSuffix(): table_suffix = tableSuffix() max_id = 0 while res: max_id = res[0] data = json.loads(res[1]) status.event("rows_processed") if "normalized_url" in data.keys(): vid = getVideoID(data["normalized_url"]) extant_data = sql.scalar(ccursor, "facebook_pollcount", "data", "video", vid) str_data = str(data["retrieved"])+" "+str(data["like_count"])+" "+str(data["share_count"])+" " if extant_data==None: sql_data = {"video":vid, "data":str_data} sql.insertRow(ccursor, "facebook_pollcount", sql_data) else:
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib from util import * lock = threading.Lock() conn = sql.slytics1().connection cursor = conn.cursor() status = status() q = Queue.Queue() class worker(threading.Thread): def run(self): while True: lock.acquire() if not q.empty(): jdata = q.get() lock.release() status_id = jdata["id"] text = jdata["text"] status.event("statuses_parsed") urls = re.findall("(?P<url>https?://[^\s]+)", text) videos = [] for url in urls: u = url if needsExpansion(url) == True: u = expandURL(url) video_id = getVideoID(u) if video_id != None and not video_id in videos: videos.append(video_id) if video_id != None: lock.acquire()
import httplib, socket, time, json, MySQLdb, sql, xml.etree.ElementTree, urllib, urlparse, threading from util import * cursor = sql.slytics1().connection.cursor() status = status() access_token = "189798971066603|486f5fac1bb43befc78b7e14.1-1357950042|LOONKF6Zp8yVXff-Ck5i1sC2hk0" def getLocales(): """returns list of all supported facebook locales found at facebook.com/translations/FacebookLocales.xml""" conn = httplib.HTTPConnection("www.facebook.com") conn.request("GET", "/translations/FacebookLocales.xml") tree = xml.etree.ElementTree.fromstring(conn.getresponse().read()) conn.close() res = [] locales = tree.findall("locale") for locale in locales: representation = locale.find("codes").find("code").find("standard").find("representation").text if representation != "tl_ST" and representation != "ck_US": res.append(representation) #it's a little bizarre, but the above two locales appear to return search results for _all_ locales return res locales_list = getLocales() #a straight-up list of all the locales locales = {} #a dict that will store, for each locale, time since last retrieval, time until next retrieval, records to skip, etc. for locale in locales_list: locales[locale] = {"since":int(time.time()), "skip":{}, "last_retrieve":time.time(), "next_retrieve":time.time()} conn = httplib.HTTPSConnection("graph.facebook.com") while True: for l in locales: #l is the name of the locale
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib from util import * lock = threading.Lock() conn = sql.slytics1().connection cursor = conn.cursor() status = status() q = Queue.Queue() class worker(threading.Thread): def run(self): while True: lock.acquire() if not q.empty(): jdata = q.get() lock.release() status_id = jdata["id"] text = jdata["text"] status.event("statuses_parsed") urls = re.findall("(?P<url>https?://[^\s]+)", text) videos = [] for url in urls: u = url if needsExpansion(url)==True: u = expandURL(url) video_id = getVideoID(u) if video_id!=None and not video_id in videos: videos.append(video_id) if video_id!=None: lock.acquire() sql_data = {"original_url":url[:200], "expanded_url":u[:1000], "video_id":video_id} sql.insertRow(cursor, "youtube_urls", sql_data, True)
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib, operator from util import * #same imports as facebook-parser....they are not all necessary. prune later status = status() conn = sql.slytics1().connection cursor = conn.cursor() c = sql.slytics1().connection ccursor = c.cursor() max_id = 0 table_suffix = tableSuffix() while True: cursor.execute("select id, data from facebook_polldata" + table_suffix + " where id > " + str(max_id) + " limit 500") res = cursor.fetchone() if res == None and table_suffix != tableSuffix(): table_suffix = tableSuffix() max_id = 0 while res: max_id = res[0] data = json.loads(res[1]) status.event("rows_processed") if "normalized_url" in data.keys(): vid = getVideoID(data["normalized_url"]) extant_data = sql.scalar(ccursor, "facebook_pollcount", "data", "video", vid) str_data = str(data["retrieved"]) + " " + str( data["like_count"]) + " " + str(data["share_count"]) + " " if extant_data == None:
import httplib, socket, time, json, MySQLdb, sql, xml.etree.ElementTree, urllib, urlparse, threading from util import * cursor = sql.slytics1().connection.cursor() status = status() access_token = "189798971066603|486f5fac1bb43befc78b7e14.1-1357950042|LOONKF6Zp8yVXff-Ck5i1sC2hk0" def getLocales(): """returns list of all supported facebook locales found at facebook.com/translations/FacebookLocales.xml""" conn = httplib.HTTPConnection("www.facebook.com") conn.request("GET", "/translations/FacebookLocales.xml") tree = xml.etree.ElementTree.fromstring(conn.getresponse().read()) conn.close() res = [] locales = tree.findall("locale") for locale in locales: representation = locale.find("codes").find("code").find( "standard").find("representation").text if representation != "tl_ST" and representation != "ck_US": res.append(representation) #it's a little bizarre, but the above two locales appear to return search results for _all_ locales return res locales_list = getLocales() #a straight-up list of all the locales locales = { } #a dict that will store, for each locale, time since last retrieval, time until next retrieval, records to skip, etc. for locale in locales_list: