Esempio n. 1
0
def submit(environ, start_response):
    setupStderr(environ["wsgi.errors"])
    config = get_config()

    # Check that this is a POST request
    if environ["REQUEST_METHOD"] != "POST":
        return common.show_error("Unsupported request method", start_response)

    # Parse the submitted JSON
    try:
        data = json.loads(environ["wsgi.input"].read(
            int(environ["CONTENT_LENGTH"])))
    except (KeyError, IOError, ValueError):
        return common.show_error("Error while parsing JSON data.",
                                 start_response)

    # Make sure the submitted data was contained within an object at least
    if not isinstance(data, dict):
        return common.show_error(
            "Error, data must be contained within an object.", start_response)

    # Log the data to a file
    log_dir = config.get("filterhitstats", "log_dir")
    try:
        log_file = log_filterhits(data, log_dir,
                                  environ.get("QUERY_STRING", ""))
    except (OSError, IOError):
        traceback.print_exc()
        return common.show_error("Failed to write data to log file!",
                                 start_response, "500 Logging error")

    # Update the geometrical_mean aggregations in the database
    interval = config.get("filterhitstats", "interval")
    try:
        db_connection = db.connect()
        try:
            db.write(db_connection, geometrical_mean.update(interval, data))
        finally:
            db_connection.close()
    except:
        # Updating the aggregations in the database failed for whatever reason,
        # log the details but continue to return 204 to the client to avoid the
        # re-transmission of data.
        processing_error_log = os.path.join(
            config.get("filterhitstats", "log_dir"), "processing-errors.log")
        with open(processing_error_log, "a+") as f:
            message = "Problem processing data file %s:\n%s" % (
                log_file, traceback.format_exc())
            print >> f, "[%s] %s" % (
                datetime.now().strftime("%d/%b/%Y:%H:%M:%S %z"), message)

    # Send back a 204 No Content
    start_response("204 No Content", [])
    return []
Esempio n. 2
0
def submit(environ, start_response):
  setupStderr(environ["wsgi.errors"])
  config = get_config()

  # Check that this is a POST request
  if environ["REQUEST_METHOD"] != "POST":
    return common.show_error("Unsupported request method", start_response)

  # Parse the submitted JSON
  try:
    data = json.loads(environ["wsgi.input"].read(int(environ["CONTENT_LENGTH"])))
  except (KeyError, IOError, ValueError):
    return common.show_error("Error while parsing JSON data.", start_response)

  # Make sure the submitted data was contained within an object at least
  if not isinstance(data, dict):
    return common.show_error("Error, data must be contained within an object.", start_response)

  # Log the data to a file
  log_dir = config.get("filterhitstats", "log_dir")
  try:
    log_file = log_filterhits(data, log_dir, environ.get("QUERY_STRING", ""))
  except (OSError, IOError):
    traceback.print_exc()
    return common.show_error("Failed to write data to log file!", start_response,
                             "500 Logging error")

  # Update the geometrical_mean aggregations in the database
  interval = config.get("filterhitstats", "interval")
  try:
    db_connection = db.connect()
    try:
      db.write(db_connection, geometrical_mean.update(interval, data))
    finally:
      db_connection.close()
  except:
    # Updating the aggregations in the database failed for whatever reason,
    # log the details but continue to return 204 to the client to avoid the
    # re-transmission of data.
    processing_error_log = os.path.join(config.get("filterhitstats", "log_dir"),
                                        "processing-errors.log")
    with open(processing_error_log, "a+") as f:
      message = "Problem processing data file %s:\n%s" % (
        log_file, traceback.format_exc()
      )
      print >> f, "[%s] %s" % (datetime.now().strftime("%d/%b/%Y:%H:%M:%S %z"), message)

  # Send back a 204 No Content
  start_response("204 No Content", [])
  return []
Esempio n. 3
0
 def read_update(f):
     return geometrical_mean.update(interval, read_data(f))
  def test_calculations(self):
    interval = 86400

    # Tables should be empty to start with
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"), ())
    self.assertEqual(db.query(self.db, "SELECT * FROM frequencies"), ())
    # First batch
    db.write(self.db, geometrical_mean.update(interval, test_data[0]))
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"),
                     (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"),
                       u"##.top-box-right-ad"),))
    self.assertEqual(
      db.query(self.db, "SELECT * FROM frequencies"),
      (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com",
        6L, datetime.utcfromtimestamp(1414817340948 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net",
        36L, datetime.utcfromtimestamp(1414838712373 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net",
        26L, datetime.utcfromtimestamp(1414823430333 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com",
        50L, datetime.utcfromtimestamp(1414849084678 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com",
        14L, datetime.utcfromtimestamp(1414859271125 / 1000))))
    # Second batch
    db.write(self.db, geometrical_mean.update(interval, test_data[1]))
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"),
                     (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"),
                       u"##.top-box-right-ad"),))
    self.assertEqual(
      db.query(self.db, "SELECT * FROM frequencies"),
      (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com",
        6L, datetime.utcfromtimestamp(1414817340948 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com",
        49L, datetime.utcfromtimestamp(1414953943015 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com",
        2L, datetime.utcfromtimestamp(1414913563746 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net",
        36L, datetime.utcfromtimestamp(1414838712373 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net",
        26L, datetime.utcfromtimestamp(1414823430333 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com",
        self.geometrical(interval, 21, 1414953920364, 50, 1414849084678),
        datetime.utcfromtimestamp(1414953920364 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com",
        34L, datetime.utcfromtimestamp(1414916268769 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com",
        self.geometrical(interval, 27, 1414917270343, 14, 1414859271125),
        datetime.utcfromtimestamp(1414917270343 / 1000))))
    # Third batch
    db.write(self.db, geometrical_mean.update(interval, test_data[2]))
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"),
                     (("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"),
                       u"stevedeace.com##.topAddHolder"),
                      ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"),
                       u"##.top-box-right-ad")))
    self.assertEqual(
      db.query(self.db, "SELECT * FROM frequencies"),
      (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com",
        6L, datetime.utcfromtimestamp(1414817340948 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com",
        self.geometrical(interval, 15, 1414994112862, 49, 1414953943015),
        datetime.utcfromtimestamp(1414994112862 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com",
        2L, datetime.utcfromtimestamp(1414913563746 / 1000)),
       ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"amazonaws.com",
        18L, datetime.utcfromtimestamp(1414977342966 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net",
        36L, datetime.utcfromtimestamp(1414838712373 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net",
        26L, datetime.utcfromtimestamp(1414823430333 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com",
        self.geometrical(interval, 14, 1415008533089,
                         self.geometrical(interval, 21, 1414953920364,
                                          50, 1414849084678),
                         1414953920364),
        datetime.utcfromtimestamp(1415008533089 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com",
        34L, datetime.utcfromtimestamp(1414916268769 / 1000)),
       ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"mathtag.com",
        14L, datetime.utcfromtimestamp(1415032601175 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com",
        self.geometrical(interval, 43, 1415045194098,
                         self.geometrical(interval, 27, 1414917270343,
                                          14, 1414859271125),
                         1414917270343),
        datetime.utcfromtimestamp(1415045194098 / 1000))))
Esempio n. 5
0
 def read_update(f):
   return geometrical_mean.update(interval, read_data(f))
  def test_calculations(self):
    interval = 86400

    # Tables should be empty to start with
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"), ())
    self.assertEqual(db.query(self.db, "SELECT * FROM frequencies"), ())
    # First batch
    db.write(self.db, geometrical_mean.update(interval, test_data[0]))
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"),
                     (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"),
                       u"##.top-box-right-ad"),))
    self.assertEqual(
      db.query(self.db, "SELECT * FROM frequencies"),
      (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com",
        6L, datetime.utcfromtimestamp(1414817340948 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net",
        36L, datetime.utcfromtimestamp(1414838712373 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net",
        26L, datetime.utcfromtimestamp(1414823430333 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com",
        50L, datetime.utcfromtimestamp(1414849084678 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com",
        14L, datetime.utcfromtimestamp(1414859271125 / 1000))))
    # Second batch
    db.write(self.db, geometrical_mean.update(interval, test_data[1]))
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"),
                     (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"),
                       u"##.top-box-right-ad"),))
    self.assertEqual(
      db.query(self.db, "SELECT * FROM frequencies"),
      (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com",
        6L, datetime.utcfromtimestamp(1414817340948 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com",
        49L, datetime.utcfromtimestamp(1414953943015 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com",
        2L, datetime.utcfromtimestamp(1414913563746 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net",
        36L, datetime.utcfromtimestamp(1414838712373 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net",
        26L, datetime.utcfromtimestamp(1414823430333 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com",
        self.geometrical(interval, 21, 1414953920364, 50, 1414849084678),
        datetime.utcfromtimestamp(1414953920364 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com",
        34L, datetime.utcfromtimestamp(1414916268769 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com",
        self.geometrical(interval, 27, 1414917270343, 14, 1414859271125),
        datetime.utcfromtimestamp(1414917270343 / 1000))))
    # Third batch
    db.write(self.db, geometrical_mean.update(interval, test_data[2]))
    self.assertEqual(db.query(self.db, "SELECT * FROM filters"),
                     (("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"),
                       u"stevedeace.com##.topAddHolder"),
                      ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"),
                       u"##.top-box-right-ad")))
    self.assertEqual(
      db.query(self.db, "SELECT * FROM frequencies"),
      (("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"acxiom-online.com",
        6L, datetime.utcfromtimestamp(1414817340948 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"adsymptotic.com",
        self.geometrical(interval, 15, 1414994112862, 49, 1414953943015),
        datetime.utcfromtimestamp(1414994112862 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"amazon.com",
        2L, datetime.utcfromtimestamp(1414913563746 / 1000)),
       ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"amazonaws.com",
        18L, datetime.utcfromtimestamp(1414977342966 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"demdex.net",
        36L, datetime.utcfromtimestamp(1414838712373 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"doubleclick.net",
        26L, datetime.utcfromtimestamp(1414823430333 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"google.com",
        self.geometrical(interval, 14, 1415008533089,
                         self.geometrical(interval, 21, 1414953920364,
                                          50, 1414849084678),
                         1414953920364),
        datetime.utcfromtimestamp(1415008533089 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"live.com",
        34L, datetime.utcfromtimestamp(1414916268769 / 1000)),
       ("22de8d2ba8429eb170a0ece6ea7a426f7b22e574".decode("hex"), u"mathtag.com",
        14L, datetime.utcfromtimestamp(1415032601175 / 1000)),
       ("8c5ea548436c61f05536e205a29ada6204f603b0".decode("hex"), u"yahoo.com",
        self.geometrical(interval, 43, 1415045194098,
                         self.geometrical(interval, 27, 1414917270343,
                                          14, 1414859271125),
                         1414917270343),
        datetime.utcfromtimestamp(1415045194098 / 1000))))