def submit(environ, start_response): setupStderr(environ["wsgi.errors"]) config = get_config() # Check that this is a POST request if environ["REQUEST_METHOD"] != "POST": return common.show_error("Unsupported request method", start_response) # Parse the submitted JSON try: data = json.loads(environ["wsgi.input"].read( int(environ["CONTENT_LENGTH"]))) except (KeyError, IOError, ValueError): return common.show_error("Error while parsing JSON data.", start_response) # Make sure the submitted data was contained within an object at least if not isinstance(data, dict): return common.show_error( "Error, data must be contained within an object.", start_response) # Log the data to a file log_dir = config.get("filterhitstats", "log_dir") try: log_file = log_filterhits(data, log_dir, environ.get("QUERY_STRING", "")) except (OSError, IOError): traceback.print_exc() return common.show_error("Failed to write data to log file!", start_response, "500 Logging error") # Update the geometrical_mean aggregations in the database interval = config.get("filterhitstats", "interval") try: db_connection = db.connect() try: db.write(db_connection, geometrical_mean.update(interval, data)) finally: db_connection.close() except: # Updating the aggregations in the database failed for whatever reason, # log the details but continue to return 204 to the client to avoid the # re-transmission of data. processing_error_log = os.path.join( config.get("filterhitstats", "log_dir"), "processing-errors.log") with open(processing_error_log, "a+") as f: message = "Problem processing data file %s:\n%s" % ( log_file, traceback.format_exc()) print >> f, "[%s] %s" % ( datetime.now().strftime("%d/%b/%Y:%H:%M:%S %z"), message) # Send back a 204 No Content start_response("204 No Content", []) return []
def submit(environ, start_response): setupStderr(environ["wsgi.errors"]) config = get_config() # Check that this is a POST request if environ["REQUEST_METHOD"] != "POST": return common.show_error("Unsupported request method", start_response) # Parse the submitted JSON try: data = json.loads(environ["wsgi.input"].read(int(environ["CONTENT_LENGTH"]))) except (KeyError, IOError, ValueError): return common.show_error("Error while parsing JSON data.", start_response) # Make sure the submitted data was contained within an object at least if not isinstance(data, dict): return common.show_error("Error, data must be contained within an object.", start_response) # Log the data to a file log_dir = config.get("filterhitstats", "log_dir") try: log_file = log_filterhits(data, log_dir, environ.get("QUERY_STRING", "")) except (OSError, IOError): traceback.print_exc() return common.show_error("Failed to write data to log file!", start_response, "500 Logging error") # Update the geometrical_mean aggregations in the database interval = config.get("filterhitstats", "interval") try: db_connection = db.connect() try: db.write(db_connection, geometrical_mean.update(interval, data)) finally: db_connection.close() except: # Updating the aggregations in the database failed for whatever reason, # log the details but continue to return 204 to the client to avoid the # re-transmission of data. processing_error_log = os.path.join(config.get("filterhitstats", "log_dir"), "processing-errors.log") with open(processing_error_log, "a+") as f: message = "Problem processing data file %s:\n%s" % ( log_file, traceback.format_exc() ) print >> f, "[%s] %s" % (datetime.now().strftime("%d/%b/%Y:%H:%M:%S %z"), message) # Send back a 204 No Content start_response("204 No Content", []) return []
def read_update(f): return geometrical_mean.update(interval, read_data(f))
def test_calculations(self): interval = 86400 # Tables should be empty to start with self.assertEqual(db.query(self.db, "SELECT * FROM filters"), ()) self.assertEqual(db.query(self.db, "SELECT * FROM frequencies"), ()) # First batch db.write(self.db, geometrical_mean.update(interval, test_data[0])) self.assertEqual(db.query(self.db, "SELECT * FROM filters"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"##.top-box-right-ad"),)) self.assertEqual( db.query(self.db, "SELECT * FROM frequencies"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com", 6L, datetime.utcfromtimestamp(1414817340948 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net", 36L, datetime.utcfromtimestamp(1414838712373 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net", 26L, datetime.utcfromtimestamp(1414823430333 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com", 50L, datetime.utcfromtimestamp(1414849084678 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com", 14L, datetime.utcfromtimestamp(1414859271125 / 1000)))) # Second batch db.write(self.db, geometrical_mean.update(interval, test_data[1])) self.assertEqual(db.query(self.db, "SELECT * FROM filters"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"##.top-box-right-ad"),)) self.assertEqual( db.query(self.db, "SELECT * FROM frequencies"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com", 6L, datetime.utcfromtimestamp(1414817340948 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com", 49L, datetime.utcfromtimestamp(1414953943015 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com", 2L, datetime.utcfromtimestamp(1414913563746 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net", 36L, datetime.utcfromtimestamp(1414838712373 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net", 26L, datetime.utcfromtimestamp(1414823430333 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com", self.geometrical(interval, 21, 1414953920364, 50, 1414849084678), datetime.utcfromtimestamp(1414953920364 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com", 34L, datetime.utcfromtimestamp(1414916268769 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com", self.geometrical(interval, 27, 1414917270343, 14, 1414859271125), datetime.utcfromtimestamp(1414917270343 / 1000)))) # Third batch db.write(self.db, geometrical_mean.update(interval, test_data[2])) self.assertEqual(db.query(self.db, "SELECT * FROM filters"), (("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"stevedeace.com##.topAddHolder"), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"##.top-box-right-ad"))) self.assertEqual( db.query(self.db, "SELECT * FROM frequencies"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com", 6L, datetime.utcfromtimestamp(1414817340948 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com", self.geometrical(interval, 15, 1414994112862, 49, 1414953943015), datetime.utcfromtimestamp(1414994112862 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com", 2L, datetime.utcfromtimestamp(1414913563746 / 1000)), ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"amazonaws.com", 18L, datetime.utcfromtimestamp(1414977342966 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net", 36L, datetime.utcfromtimestamp(1414838712373 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net", 26L, datetime.utcfromtimestamp(1414823430333 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com", self.geometrical(interval, 14, 1415008533089, self.geometrical(interval, 21, 1414953920364, 50, 1414849084678), 1414953920364), datetime.utcfromtimestamp(1415008533089 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com", 34L, datetime.utcfromtimestamp(1414916268769 / 1000)), ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"mathtag.com", 14L, datetime.utcfromtimestamp(1415032601175 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com", self.geometrical(interval, 43, 1415045194098, self.geometrical(interval, 27, 1414917270343, 14, 1414859271125), 1414917270343), datetime.utcfromtimestamp(1415045194098 / 1000))))
def read_update(f): return geometrical_mean.update(interval, read_data(f))
def test_calculations(self): interval = 86400 # Tables should be empty to start with self.assertEqual(db.query(self.db, "SELECT * FROM filters"), ()) self.assertEqual(db.query(self.db, "SELECT * FROM frequencies"), ()) # First batch db.write(self.db, geometrical_mean.update(interval, test_data[0])) self.assertEqual(db.query(self.db, "SELECT * FROM filters"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"##.top-box-right-ad"),)) self.assertEqual( db.query(self.db, "SELECT * FROM frequencies"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com", 6L, datetime.utcfromtimestamp(1414817340948 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net", 36L, datetime.utcfromtimestamp(1414838712373 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net", 26L, datetime.utcfromtimestamp(1414823430333 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com", 50L, datetime.utcfromtimestamp(1414849084678 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com", 14L, datetime.utcfromtimestamp(1414859271125 / 1000)))) # Second batch db.write(self.db, geometrical_mean.update(interval, test_data[1])) self.assertEqual(db.query(self.db, "SELECT * FROM filters"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"##.top-box-right-ad"),)) self.assertEqual( db.query(self.db, "SELECT * FROM frequencies"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com", 6L, datetime.utcfromtimestamp(1414817340948 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com", 49L, datetime.utcfromtimestamp(1414953943015 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com", 2L, datetime.utcfromtimestamp(1414913563746 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net", 36L, datetime.utcfromtimestamp(1414838712373 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net", 26L, datetime.utcfromtimestamp(1414823430333 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com", self.geometrical(interval, 21, 1414953920364, 50, 1414849084678), datetime.utcfromtimestamp(1414953920364 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com", 34L, datetime.utcfromtimestamp(1414916268769 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com", self.geometrical(interval, 27, 1414917270343, 14, 1414859271125), datetime.utcfromtimestamp(1414917270343 / 1000)))) # Third batch db.write(self.db, geometrical_mean.update(interval, test_data[2])) self.assertEqual(db.query(self.db, "SELECT * FROM filters"), (("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"stevedeace.com##.topAddHolder"), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"##.top-box-right-ad"))) self.assertEqual( db.query(self.db, "SELECT * FROM frequencies"), (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com", 6L, datetime.utcfromtimestamp(1414817340948 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com", self.geometrical(interval, 15, 1414994112862, 49, 1414953943015), datetime.utcfromtimestamp(1414994112862 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com", 2L, datetime.utcfromtimestamp(1414913563746 / 1000)), ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"amazonaws.com", 18L, datetime.utcfromtimestamp(1414977342966 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net", 36L, datetime.utcfromtimestamp(1414838712373 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net", 26L, datetime.utcfromtimestamp(1414823430333 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com", self.geometrical(interval, 14, 1415008533089, self.geometrical(interval, 21, 1414953920364, 50, 1414849084678), 1414953920364), datetime.utcfromtimestamp(1415008533089 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com", 34L, datetime.utcfromtimestamp(1414916268769 / 1000)), ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"mathtag.com", 14L, datetime.utcfromtimestamp(1415032601175 / 1000)), ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com", self.geometrical(interval, 43, 1415045194098, self.geometrical(interval, 27, 1414917270343, 14, 1414859271125), 1414917270343), datetime.utcfromtimestamp(1415045194098 / 1000))))