Beispiel #1
0
 def __init__(self):
     threading.Thread.__init__(self)
     self.ids = []
     self.ll = self.ul = "1970-01-01 10:10:10"  #initial timestamp upper and lower limits
     self.conn = sql.slytics1().connection
     self.cursor = self.conn.cursor()
     self.start()
 def __init__(self):
     threading.Thread.__init__(self)
     self.ids = []
     self.ll = self.ul = "1970-01-01 10:10:10" #initial timestamp upper and lower limits
     self.conn = sql.slytics1().connection
     self.cursor = self.conn.cursor()
     self.start()
Beispiel #3
0
    def run(self):
        while True:
            time.sleep(30)
            t = int(time.time())
            compiled_data = {
                60: {},
                3600: {},
                86400: {}
            }  #compile data for intervals of a minute, hour and day
            for k in self.status_data.keys():
                for limit in compiled_data.keys():
                    if int(k) >= (t - limit):
                        for event_name in self.status_data[k].keys():
                            if not compiled_data[limit].has_key(event_name):
                                compiled_data[limit][event_name] = 0
                            compiled_data[limit][
                                event_name] += self.status_data[k][event_name]
                if int(k) < (t - 86400): self.status_data.pop(k)
            compiled_data["start_time"] = self.start_time
            compiled_data["compiled"] = t

            sql_data = {
                "script": self.file,
                "added": time.time(),
                "data": json.dumps(compiled_data)
            }
            cursor = sql.slytics1().connection.cursor()
            sql.insertRow(cursor, "script_statuses" + tableSuffix(), sql_data,
                          False, True)
            cursor.connection.close()
            cursor.close()
Beispiel #4
0
 def run(self):
     while True:
         time.sleep(30)
         t = int(time.time())
         compiled_data = {60:{}, 3600:{}, 86400:{}} #compile data for intervals of a minute, hour and day
         for k in self.status_data.keys():
             for limit in compiled_data.keys():
                  if int(k) >= (t - limit):
                     for event_name in self.status_data[k].keys():
                         if not compiled_data[limit].has_key(event_name): compiled_data[limit][event_name] = 0
                         compiled_data[limit][event_name] += self.status_data[k][event_name]
             if int(k) < (t - 86400): self.status_data.pop(k)            
         compiled_data["start_time"] = self.start_time
         compiled_data["compiled"] = t
         
         sql_data = {"script":self.file, "added":time.time(), "data":json.dumps(compiled_data)}
         cursor = sql.slytics1().connection.cursor()
         sql.insertRow(cursor, "script_statuses"+tableSuffix(), sql_data, False, True)
         cursor.connection.close()
         cursor.close()
 def __init__(self):
     threading.Thread.__init__(self)
     self.httpconn = httplib.HTTPSConnection("api.facebook.com")
     self.conn = sql.slytics1().connection
     self.cursor = self.conn.cursor()
     self.start()
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib, operator
from util import * #same imports as facebook-parser....they are not all necessary.  prune later

status = status()

conn = sql.slytics1().connection
cursor = conn.cursor()

c = sql.slytics1().connection
ccursor = c.cursor()

max_id = 0
table_suffix = tableSuffix()
while True:
    cursor.execute("select id, data from facebook_polldata"+table_suffix+" where id > "+str(max_id)+" limit 500")
    res = cursor.fetchone()
    if res==None and table_suffix != tableSuffix():
            table_suffix = tableSuffix()
            max_id = 0
    while res:
        max_id = res[0]
        data = json.loads(res[1])
        status.event("rows_processed")
        if "normalized_url" in data.keys():
            vid = getVideoID(data["normalized_url"])
            extant_data = sql.scalar(ccursor, "facebook_pollcount", "data", "video", vid)
            str_data = str(data["retrieved"])+" "+str(data["like_count"])+" "+str(data["share_count"])+" "
            if extant_data==None:
                sql_data = {"video":vid, "data":str_data}
                sql.insertRow(ccursor, "facebook_pollcount", sql_data)
            else:
Beispiel #7
0
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib
from util import *

lock = threading.Lock()
conn = sql.slytics1().connection
cursor = conn.cursor()
status = status()
q = Queue.Queue()


class worker(threading.Thread):
    def run(self):
        while True:
            lock.acquire()
            if not q.empty():
                jdata = q.get()
                lock.release()
                status_id = jdata["id"]
                text = jdata["text"]
                status.event("statuses_parsed")
                urls = re.findall("(?P<url>https?://[^\s]+)", text)
                videos = []
                for url in urls:
                    u = url
                    if needsExpansion(url) == True:
                        u = expandURL(url)
                    video_id = getVideoID(u)
                    if video_id != None and not video_id in videos:
                        videos.append(video_id)
                    if video_id != None:
                        lock.acquire()
import httplib, socket, time, json, MySQLdb, sql, xml.etree.ElementTree, urllib, urlparse, threading
from util import *

cursor = sql.slytics1().connection.cursor()
status = status()
access_token = "189798971066603|486f5fac1bb43befc78b7e14.1-1357950042|LOONKF6Zp8yVXff-Ck5i1sC2hk0"

def getLocales():
    """returns list of all supported facebook locales found at facebook.com/translations/FacebookLocales.xml"""
    conn = httplib.HTTPConnection("www.facebook.com")
    conn.request("GET", "/translations/FacebookLocales.xml")
    
    tree = xml.etree.ElementTree.fromstring(conn.getresponse().read())
    conn.close()
    
    res = []    
    locales = tree.findall("locale")
    for locale in locales:
        representation = locale.find("codes").find("code").find("standard").find("representation").text
        if representation != "tl_ST" and representation != "ck_US": res.append(representation)
        #it's a little bizarre, but the above two locales appear to return search results for _all_ locales
    return res
        
locales_list = getLocales() #a straight-up list of all the locales
locales = {} #a dict that will store, for each locale, time since last retrieval, time until next retrieval, records to skip, etc.
for locale in locales_list:
    locales[locale] = {"since":int(time.time()), "skip":{}, "last_retrieve":time.time(), "next_retrieve":time.time()}

conn = httplib.HTTPSConnection("graph.facebook.com")
while True:
    for l in locales: #l is the name of the locale
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib
from util import *

lock = threading.Lock()
conn = sql.slytics1().connection
cursor = conn.cursor()
status = status()
q = Queue.Queue()
    
class worker(threading.Thread):
    def run(self):
        while True:
            lock.acquire()
            if not q.empty():
                jdata = q.get()
                lock.release()
                status_id = jdata["id"]
                text = jdata["text"]
                status.event("statuses_parsed")
                urls = re.findall("(?P<url>https?://[^\s]+)", text)
                videos = []
                for url in urls:
                    u = url
                    if needsExpansion(url)==True: 
                        u = expandURL(url)
                    video_id = getVideoID(u)
                    if video_id!=None and not video_id in videos: videos.append(video_id)
                    if video_id!=None:
                        lock.acquire()
                        sql_data = {"original_url":url[:200], "expanded_url":u[:1000], "video_id":video_id}
                        sql.insertRow(cursor, "youtube_urls", sql_data, True)
Beispiel #10
0
import sql, json, time, re, urllib2, threading, Queue, urlparse, httplib, operator
from util import *  #same imports as facebook-parser....they are not all necessary.  prune later

status = status()

conn = sql.slytics1().connection
cursor = conn.cursor()

c = sql.slytics1().connection
ccursor = c.cursor()

max_id = 0
table_suffix = tableSuffix()
while True:
    cursor.execute("select id, data from facebook_polldata" + table_suffix +
                   " where id > " + str(max_id) + " limit 500")
    res = cursor.fetchone()
    if res == None and table_suffix != tableSuffix():
        table_suffix = tableSuffix()
        max_id = 0
    while res:
        max_id = res[0]
        data = json.loads(res[1])
        status.event("rows_processed")
        if "normalized_url" in data.keys():
            vid = getVideoID(data["normalized_url"])
            extant_data = sql.scalar(ccursor, "facebook_pollcount", "data",
                                     "video", vid)
            str_data = str(data["retrieved"]) + " " + str(
                data["like_count"]) + " " + str(data["share_count"]) + " "
            if extant_data == None:
Beispiel #11
0
import httplib, socket, time, json, MySQLdb, sql, xml.etree.ElementTree, urllib, urlparse, threading
from util import *

cursor = sql.slytics1().connection.cursor()
status = status()
access_token = "189798971066603|486f5fac1bb43befc78b7e14.1-1357950042|LOONKF6Zp8yVXff-Ck5i1sC2hk0"


def getLocales():
    """returns list of all supported facebook locales found at facebook.com/translations/FacebookLocales.xml"""
    conn = httplib.HTTPConnection("www.facebook.com")
    conn.request("GET", "/translations/FacebookLocales.xml")

    tree = xml.etree.ElementTree.fromstring(conn.getresponse().read())
    conn.close()

    res = []
    locales = tree.findall("locale")
    for locale in locales:
        representation = locale.find("codes").find("code").find(
            "standard").find("representation").text
        if representation != "tl_ST" and representation != "ck_US":
            res.append(representation)
        #it's a little bizarre, but the above two locales appear to return search results for _all_ locales
    return res


locales_list = getLocales()  #a straight-up list of all the locales
locales = {
}  #a dict that will store, for each locale, time since last retrieval, time until next retrieval, records to skip, etc.
for locale in locales_list:
Beispiel #12
0
 def __init__(self):
     threading.Thread.__init__(self)
     self.httpconn = httplib.HTTPSConnection("api.facebook.com")
     self.conn = sql.slytics1().connection
     self.cursor = self.conn.cursor()
     self.start()