Esempio n. 1
0
 def log_event(self, request, event, source=None, enable_logging=False):
     source = source if source else self.default_source
     headers = self.get_headers_from_request(request, source=source)
     user_id = castle_userid(request.user) if request else "<no-id>"
     resp = self.make_request("events", data={"name": event, "user_id": user_id}, headers=headers)
     if request and enable_logging:
         logmessage(request, pprint.pformat(resp))
Esempio n. 2
0
def geocode():

    filename = "/mnt/nfs6/wikipedia.proj/wikibaseball/rawdata/stash/ip.csv"
    outfilename = "/mnt/nfs6/wikipedia.proj/wikibaseball/rawdata/stash/ip_geo.csv"
    
    output = []
    count = 0

    with open(filename) as f:
        for line in f:
            items = line.strip().split("\t")
            ip = items[1].strip('"')

            count += 1
            data = utils.geocode_ip(ip)

            if data[0] != "NA":
                output.append(data)
                utils.logmessage(str(count) + " Got: " + str(ip) + " " + data[4], "getdata", 1)
            else:
                utils.logmessage(str(count) + " Failed: " + str(ip) + "", "getdata", 1)


    with open(outfilename, "a") as f:
        for row in output:
            line = "\t".join([unicode(x).encode('utf8') for x in row]) + "\n"
            f.write(line)
            
    # utils.geocode_ip()

    pass
Esempio n. 3
0
def parse_traffic(wikihandles, years):
    
    for wikihandle in wikihandles:
        utils.logmessage("Parsing traffic for: " + wikihandle,"getdata", 0)
        for year in years:
            traf = utils.parse_traf(wikihandle, year)
            data = traf + [wikihandle, year]
            print "\t".join([unicode(x).encode('utf8') for x in data])

    pass
Esempio n. 4
0
def get_traffic(wikihandles, years):

    #wikihandles = ["Michael_Jordan","Mahmoud_Abdul-Rauf"]
    #years = [2012]

    for wikihandle in wikihandles:
        utils.logmessage("Downloading traffic for: " + wikihandle,"getdata", 1)
        for year in years:
            utils.get_traf(wikihandle, year)

    pass
Esempio n. 5
0
 def log_event(self, request, event, source=None, enable_logging=False):
     source = source if source else self.default_source
     headers = self.get_headers_from_request(request, source=source)
     user_id = castle_userid(request.user) if request else "<no-id>"
     resp = self.make_request("events",
                              data={
                                  "name": event,
                                  "user_id": user_id
                              },
                              headers=headers)
     if request and enable_logging:
         logmessage(request, pprint.pformat(resp))
Esempio n. 6
0
def get_revs(wikihandles, years):

    global root

    for wikihandle in wikihandles:
        for year in years:

            filename = wikihandle + "_" + str(year)
            path = root + "rawdata/wiki/revdata/" + filename + ".xml"

            if os.path.exists(path):
                utils.logmessage("Not fetching again: " + filename, "getdata", 1)
            else: 
                data = utils.get_xml(wikihandle, year) 
                utils.write_xml(data, filename)
                utils.logmessage("Getting: " + filename, "getdata", 1)

        sleep(0.01)
Esempio n. 7
0
def get_sport(sport):

    global root

    utils.logmessage("Getting: " + sport, "getdata", 1)

    if sport == "baseball":
        url = "http://seanlahman.com/files/database/lahman-csv_2014-02-14.zip"
        path = root + "rawdata/lahman/"
        saveas = "lahman.zip"

    if sport == "basketball":
        url = "http://www.databasebasketball.com/databasebasketball_2009_v1.zip"
        path = root + "rawdata/basketball/"
        saveas = "basketball.zip"

    # download file
    utils.logmessage("Downloading zip file", "getdata", 1)
    utils.download_file(url, path+saveas)

    # unzip file
    utils.unzip(path+saveas, path)
    utils.logmessage("Unzipping file", "getdata", 1)

    pass
Esempio n. 8
0
def get_revlist_all(wikihandles):

    for wikihandle in wikihandles:
        flag = utils.get_revlist(wikihandle)
        utils.logmessage("got "+str(flag)+" rows for " + wikihandle, "getdata", 1)
    pass