Example #1
0
    def mapper(self, _, line):
        try:
            o = Weblog(line)
        except ValueError:
            sys.stderr.write("Invalid logfile line: {}\n".format(line))
            return

        # See if this is the desired URL
        if o.wikipage() == "Main_Page":
            self.lowest.append((o.datetime, line))
            self.lowest = sorted(self.lowest)[0:50]  # keep just the first 50
Example #2
0
    def mapper(self, _, line):
        try:
            o = Weblog(line)
        except ValueError:
            sys.stderr.write("Invalid logfile line: {}\n".format(line))
            return

        # See if this is the desired URL
        if o.wikipage() == "Main_Page":
            self.lowest.append((o.datetime, line))
            self.lowest = sorted(self.lowest)[0:50]  # keep just the first 50
Example #3
0
 def mapper(self, _, line):
     # Is this a weblog file, or a MaxMind GeoLite2 file?
     filename = mrjob.compat.jobconf_from_env("map.input.file")
     if "top1000ips_to_country.txt" in filename:
         fields = line.split("\t")
         self.increment_counter("Info","top1000_ips_to_country Count",1)
         yield fields[0], ("Country", fields[1])
     else:
         log = Weblog(line)
         logfields = (log.ipaddr,log.datetime,log.url,log.wikipage())
         self.increment_counter("Info","weblog Count",1)
         yield logfields[0], ("Weblog",logfields)
Example #4
0
 def mapper(self, _, line):
     # Is this a weblog file, or a MaxMind GeoLite2 file?
     filename = mrjob.compat.jobconf_from_env("map.input.file")
     if "top1000ips_to_country.txt" in filename:
         fields = line.split("\t")
         self.increment_counter("Info","top1000_ips_to_country Count",1)
         yield fields[0], ("Country", fields[1])
     else:
         log = Weblog(line)
         logfields = (log.ipaddr,log.datetime,log.url,log.wikipage())
         self.increment_counter("Info","weblog Count",1)
         yield logfields[0], ("Weblog",logfields)
Example #5
0
    def mapper(self, _, line):
        # Is this a weblog file, or a MaxMind GeoLite2 file?
        filename = mrjob.compat.jobconf_from_env("map.input.file")
        if "top1000ips_to_country.txt" in filename:
			self.increment_counter("Status","top1000_ips_to_country file found",1) 
			try:
				(ipaddr, country) =  line.strip().split("\t")
				yield ipaddr, "+"+country
			except ValueError as e:
				pass
        else:
			try:
				o = Weblog(line)
			except ValueError:
				sys.stderr.write("Invalid Logfile line :{}\n".format(line))
				return
			if o.wikipage() == "Main_Page":
				yield o.ipaddr, line
Example #6
0
    def mapper(self, _, line):
        # Get the name of the input file, per mrjob v0.4.6 documentation
        # https://pythonhosted.org/mrjob/utils-compat.html
        filename = mrjob.compat.jobconf_from_env("map.input.file")

        # parse the weblog input line
        log = Weblog(line)

        # output <filename,datetime>
        yield filename, log.datetime
Example #7
0
 def mapper(self, _, line):
     # Is this a weblog file, or a MaxMind GeoLite2 file?
     filename = mrjob.compat.jobconf_from_env("map.input.file")
     if "top1000ips_to_country.txt" in filename:
         self.increment_counter("Status",
                                "top1000_ips_to_country file found", 1)
         try:
             (ipaddr, country) = line.strip().split("\t")
             yield ipaddr, "+" + country
         except ValueError as e:
             pass
     else:
         try:
             o = Weblog(line)
         except ValueError:
             sys.stderr.write("Invalid Logfile line :{}\n".format(line))
             return
         if o.wikipage() == "Main_Page":
             yield o.ipaddr, line
Example #8
0
 def reducer(self, key, values):
     country = None
     for v in values:
         if v[0:1] == '+':
             country = v[1:]
             continue
         if not country:
             self.increment_counter("Warning", "No Country Found", 1)
             continue
         o = Weblog(v)
         yield "Geolocated", [o.date, country, v]
Example #9
0
 def mapper(self, _, line):
     # Is this a weblog file, or a MaxMind GeoLite2 file?
     filename = mrjob.compat.jobconf_from_env("map.input.file")
     import sys
     if "top1000ips_to_country.txt" in filename:
         # Handle as a GeoLite2 file
         #
         try:
             (ipaddr, country) = line.strip().split("\t")
             yield ipaddr, ("country", country)
         except ValueError as e:
             pass
     else:
         # Handle as a weblog file
         try:
             o = Weblog(line)
         except ValueError:
             sys.stderr.write("Invalid logfile line: {}\n".format(line))
             return
         if o.wikipage() == "Main_Page":
             yield o.ipaddr, ("ip", line)
Example #10
0
 def mapper(self, _, line):
     # Is this a weblog file, or a MaxMind GeoLite2 file?
     filename = mrjob.compat.jobconf_from_env("map.input.file")
     import sys
     if "top1000ips_to_country.txt" in filename:
         # Handle as a GeoLite2 file
         #
         try:
             (ipaddr, country) = line.strip().split("\t")
             yield ipaddr, ("country", country)
         except ValueError as e:
             pass
     else:
         # Handle as a weblog file
         try:
             o = Weblog(line)
         except ValueError:
             sys.stderr.write("Invalid logfile line: {}\n".format(line))
             return
         if o.wikipage() == "Main_Page":
             yield o.ipaddr,("ip", line)
 def reducer(self, key, values):
     # values has all the lines for this key
     country = None
     for v in values:
         if v[0:1] == "+":  # found the location!
             country = v[1:]
             continue
         if not country:  #
             self.increment_counter("Warning", "No Country Found", 1)
             continue
         # If we get here, v is a logfile line. Parse it again
         o = Weblog(v)
         yield "Geolocated", [o.date, country, v]
Example #12
0
 def reducer(self, key, values):
     # values has all the lines for this key
     country = None
     for v in values:
         if v[0:1] == "+":  # found the location!
             country = v[1:]
             continue
         if not country:  #
             self.increment_counter("Warning", "No Country Found", 1)
             continue
         # If we get here, v is a logfile line. Parse it again
         o = Weblog(v)
         self.lowest.append((o.datetime, country, v))
         self.lowest = sorted(self.lowest)[0:50]
Example #13
0
 def mapper(self, _, line):
     # Is this a weblog file, or a MaxMind GeoLite2 file?
     filename = mrjob.compat.jobconf_from_env("map.input.file")
     if "top1000ips_to_country.txt" in filename:
         fields = line.split('\t')
         # Handle as a GeoLite2 file
         #
         self.increment_counter("Info", "Obs Count", 1)
         yield fields[0], ("country", fields)
     else:
         # Handle as a weblog file
         self.increment_counter("Info", "Name Count", 1)
         fields = Weblog(line)
         yield fields.ipaddr, ("ip", line)
Example #14
0
 def mapper(self, _, line):
     if "Special:" not in line:
         log = Weblog(line)
         yield log.date, 1
Example #15
0
 def mapper(self, _, line):
     log = Weblog(line)
     yield log.date, 1
Example #16
0
 def mapper(self, _, line):
     o = Weblog(line)
     yield "first50",(o.datetime,line)
Example #17
0
 def mapper(self, _, line):
     log = Weblog(line)
     yield log.wikipage(), 1
    def mapper(self, _, line):
	filename = mrjob.compat.jobconf_from_env("map.input.file")
	log = Weblog(line)
	yield (log.wikipage(),1)
Example #19
0
 def mapper(self, _, line):
     log = Weblog(line)
     yield log.wikipage(), 1