def log_event(self, request, event, source=None, enable_logging=False): source = source if source else self.default_source headers = self.get_headers_from_request(request, source=source) user_id = castle_userid(request.user) if request else "<no-id>" resp = self.make_request("events", data={"name": event, "user_id": user_id}, headers=headers) if request and enable_logging: logmessage(request, pprint.pformat(resp))
def geocode(): filename = "/mnt/nfs6/wikipedia.proj/wikibaseball/rawdata/stash/ip.csv" outfilename = "/mnt/nfs6/wikipedia.proj/wikibaseball/rawdata/stash/ip_geo.csv" output = [] count = 0 with open(filename) as f: for line in f: items = line.strip().split("\t") ip = items[1].strip('"') count += 1 data = utils.geocode_ip(ip) if data[0] != "NA": output.append(data) utils.logmessage(str(count) + " Got: " + str(ip) + " " + data[4], "getdata", 1) else: utils.logmessage(str(count) + " Failed: " + str(ip) + "", "getdata", 1) with open(outfilename, "a") as f: for row in output: line = "\t".join([unicode(x).encode('utf8') for x in row]) + "\n" f.write(line) # utils.geocode_ip() pass
def parse_traffic(wikihandles, years): for wikihandle in wikihandles: utils.logmessage("Parsing traffic for: " + wikihandle,"getdata", 0) for year in years: traf = utils.parse_traf(wikihandle, year) data = traf + [wikihandle, year] print "\t".join([unicode(x).encode('utf8') for x in data]) pass
def get_traffic(wikihandles, years): #wikihandles = ["Michael_Jordan","Mahmoud_Abdul-Rauf"] #years = [2012] for wikihandle in wikihandles: utils.logmessage("Downloading traffic for: " + wikihandle,"getdata", 1) for year in years: utils.get_traf(wikihandle, year) pass
def log_event(self, request, event, source=None, enable_logging=False): source = source if source else self.default_source headers = self.get_headers_from_request(request, source=source) user_id = castle_userid(request.user) if request else "<no-id>" resp = self.make_request("events", data={ "name": event, "user_id": user_id }, headers=headers) if request and enable_logging: logmessage(request, pprint.pformat(resp))
def get_revs(wikihandles, years): global root for wikihandle in wikihandles: for year in years: filename = wikihandle + "_" + str(year) path = root + "rawdata/wiki/revdata/" + filename + ".xml" if os.path.exists(path): utils.logmessage("Not fetching again: " + filename, "getdata", 1) else: data = utils.get_xml(wikihandle, year) utils.write_xml(data, filename) utils.logmessage("Getting: " + filename, "getdata", 1) sleep(0.01)
def get_sport(sport): global root utils.logmessage("Getting: " + sport, "getdata", 1) if sport == "baseball": url = "http://seanlahman.com/files/database/lahman-csv_2014-02-14.zip" path = root + "rawdata/lahman/" saveas = "lahman.zip" if sport == "basketball": url = "http://www.databasebasketball.com/databasebasketball_2009_v1.zip" path = root + "rawdata/basketball/" saveas = "basketball.zip" # download file utils.logmessage("Downloading zip file", "getdata", 1) utils.download_file(url, path+saveas) # unzip file utils.unzip(path+saveas, path) utils.logmessage("Unzipping file", "getdata", 1) pass
def get_revlist_all(wikihandles): for wikihandle in wikihandles: flag = utils.get_revlist(wikihandle) utils.logmessage("got "+str(flag)+" rows for " + wikihandle, "getdata", 1) pass