Ejemplo n.º 1
0
  def testBadFormatNotationRaisesException(self):

    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("01-29-2016 11:01:59.01 AM",
                                    "%m-%d-%Y %I:%M:%S.%f %W")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '01-29-2016 11:01:59.01 AM' does not match format "
      "'%m-%d-%Y %I:%M:%S.%f %W'")
Ejemplo n.º 2
0
    def testBadFormatNotationRaisesException(self):

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("01-29-2016 11:01:59.01 AM",
                                          "%m-%d-%Y %I:%M:%S.%f %W")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '01-29-2016 11:01:59.01 AM' does not match format "
            "'%m-%d-%Y %I:%M:%S.%f %W'")
Ejemplo n.º 3
0
  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """

    numRowsToSkip = self._inputSpec["rowOffset"]
    datetimeFormat = self._inputSpec["datetimeFormat"]
    inputRowTimestampIndex = self._inputSpec["timestampIndex"]
    inputRowValueIndex = self._inputSpec["valueIndex"]

    g_log.info("Processing model=%s", self._modelId)

    for inputRow in self._csvReader:
      g_log.debug("Got inputRow=%r", inputRow)

      if numRowsToSkip > 0:
        numRowsToSkip -= 1
        g_log.debug("Skipping header row %s; %s rows left to skip",
                    inputRow, numRowsToSkip)
        continue

      # Extract timestamp and value
      # NOTE: the order must match the `inputFields` that we passed to the
      # Aggregator constructor
      fields = [
        date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex],
                                      datetimeFormat),
        float(inputRow[inputRowValueIndex])
      ]

      # Aggregate
      aggRow, _ = self._aggregator.next(fields, None)
      g_log.debug("Aggregator returned %s for %s", aggRow, fields)
      if aggRow is not None:
        self._emitOutputMessage(
          dataRow=aggRow,
          anomalyProbability=self._computeAnomalyProbability(aggRow))


    # Reap remaining data from aggregator
    aggRow, _ = self._aggregator.next(None, curInputBookmark=None)
    g_log.debug("Aggregator reaped %s in final call", aggRow)
    if aggRow is not None:
      self._emitOutputMessage(
        dataRow=aggRow,
        anomalyProbability=self._computeAnomalyProbability(aggRow))
Ejemplo n.º 4
0
def _readCSVFile(fileName, rowOffset, timestampIndex, valueIndex,
                 datetimeFormat):
    """
  Read csv data file, the data file must have two columns
  that contains time stamps and data values

  :param str fileName: path to input csv file
  :param int rowOffset: index of first data row in csv
  :param int timestampIndex: column index of the timestamp
  :param int valueIndex: column index of the value
  :param str datetimeFormat: datetime format string for python's
    datetime.strptime
  :returns: Sequence of two tuples (timestamp, value), where
    timestamp of type datetime.datetime and value is a number (int of float)
  """

    with open(fileName, "rU") as csvFile:
        fileReader = _createCsvReader(csvFile)
        for _ in xrange(rowOffset):
            fileReader.next()  # skip header line

        samples = []
        numRows = 0
        for row in fileReader:
            timestamp = date_time_utils.parseDatetime(row[timestampIndex],
                                                      datetimeFormat)

            # use utc timezone if timezone information is not provided
            if timestamp.tzinfo is None:
                timestamp = timestamp.replace(tzinfo=tz.tzutc())

            samples.append((timestamp, float(row[valueIndex])))

            numRows += 1
            if numRows >= MAX_NUM_ROWS:
                break

        return samples
Ejemplo n.º 5
0
  def testBadTimezoneRaisesException(self):

    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+000",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+000' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z'")


    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+00:60",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+00:60' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z': UTC offset minutes exceed 59")


    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+25:00",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+25:00' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z': UTC offset +25:0 is out of bounds; must be in "
      "-24:59 .. +24:59")


    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+00:0",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+00:0' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z'")


    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+0",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+0' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z'")



    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+:00",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+:00' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z'")


    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+:",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+:' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z'")


    with self.assertRaises(ValueError) as excCtx:
      date_time_utils.parseDatetime("2016-01-29T23:00:00.123+",
                                    "%Y-%m-%dT%H:%M:%S.%f%z")

    self.assertEqual(
      excCtx.exception.args[0],
      "time data '2016-01-29T23:00:00.123+' does not match format "
      "'%Y-%m-%dT%H:%M:%S.%f%z'")
Ejemplo n.º 6
0
  def testGoodSamples(self):

    # Check for duplicate test cases
    self.assertEqual(
      len(self._GOOD_SAMPLES),
      len(set(self._GOOD_SAMPLES)),
      msg="There are duplicate test cases: {}".format(
        set(item for item in self._GOOD_SAMPLES
             if self._GOOD_SAMPLES.count(item) > 1))
    )

    # Verify the parser
    testedFormatSet = set()

    for fmt, timestamp, expectedIso in self._GOOD_SAMPLES:
      testedFormatSet.add(fmt)

      try:
        parsed = date_time_utils.parseDatetime(timestamp, fmt)
      except (TypeError, ValueError) as exc:
        self.fail(
          "Failed to parse ts={!r} using fmt={!r}; exc={!r}".format(
            timestamp, fmt, exc))

      try:
        isoEncoded = parsed.isoformat()
      except ValueError as exc:
        self.fail(
          "Failed to isoformat parsed datetime={!r}; ts={!r} using fmt={!r}; "
          "exc={!r}".format(parsed, timestamp, fmt, exc))

      self.assertEqual(
        isoEncoded, expectedIso,
        msg=(
          "ISO result {!r} didn't match expected {!r}; ts={!r} using fmt={!r}"
          .format(isoEncoded, expectedIso, timestamp, fmt)))


    # Make sure all timestamp formats from
    # unicorn/app/config/momentjs_to_datetime_strptime.json are covered by our
    # test cases

    mappingsPath = os.path.join(
      os.path.abspath(os.path.dirname(__file__)),
      os.path.pardir,
      os.path.pardir,
      os.path.pardir,
      os.path.pardir,
      "js",
      "config",
      "momentjs_to_datetime_strptime.json"
    )


    with open(mappingsPath) as mappingsFile:
      mapList = json.load(mappingsFile)


    formatsToCategoryMap = dict()

    for bundle in mapList:
      for fmt in bundle["mappings"].itervalues():
        self.assertNotIn(fmt, formatsToCategoryMap)

        formatsToCategoryMap[fmt] = bundle["category"]

    self.assertGreater(len(formatsToCategoryMap), 0)

    self.assertGreater(len(testedFormatSet), 0)

    untestedFormats = set(formatsToCategoryMap) - testedFormatSet

    self.assertFalse(
      untestedFormats,
      msg="{} format(s) not covered by GOOD SAMPLES test cases: {}".format(
        len(untestedFormats),
        [(fmt, formatsToCategoryMap[fmt]) for fmt in untestedFormats]))
Ejemplo n.º 7
0
    def testBadTimezoneRaisesException(self):

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+000",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+000' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z'")

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+00:60",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+00:60' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z': UTC offset minutes exceed 59")

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+25:00",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+25:00' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z': UTC offset +25:0 is out of bounds; must be in "
            "-24:59 .. +24:59")

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+00:0",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+00:0' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z'")

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+0",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+0' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z'")

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+:00",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+:00' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z'")

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+:",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+:' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z'")

        with self.assertRaises(ValueError) as excCtx:
            date_time_utils.parseDatetime("2016-01-29T23:00:00.123+",
                                          "%Y-%m-%dT%H:%M:%S.%f%z")

        self.assertEqual(
            excCtx.exception.args[0],
            "time data '2016-01-29T23:00:00.123+' does not match format "
            "'%Y-%m-%dT%H:%M:%S.%f%z'")
Ejemplo n.º 8
0
    def testGoodSamples(self):

        # Check for duplicate test cases
        self.assertEqual(len(self._GOOD_SAMPLES),
                         len(set(self._GOOD_SAMPLES)),
                         msg="There are duplicate test cases: {}".format(
                             set(item for item in self._GOOD_SAMPLES
                                 if self._GOOD_SAMPLES.count(item) > 1)))

        # Verify the parser
        testedFormatSet = set()

        for fmt, timestamp, expectedIso in self._GOOD_SAMPLES:
            testedFormatSet.add(fmt)

            try:
                parsed = date_time_utils.parseDatetime(timestamp, fmt)
            except (TypeError, ValueError) as exc:
                self.fail(
                    "Failed to parse ts={!r} using fmt={!r}; exc={!r}".format(
                        timestamp, fmt, exc))

            try:
                isoEncoded = parsed.isoformat()
            except ValueError as exc:
                self.fail(
                    "Failed to isoformat parsed datetime={!r}; ts={!r} using fmt={!r}; "
                    "exc={!r}".format(parsed, timestamp, fmt, exc))

            self.assertEqual(
                isoEncoded,
                expectedIso,
                msg=
                ("ISO result {!r} didn't match expected {!r}; ts={!r} using fmt={!r}"
                 .format(isoEncoded, expectedIso, timestamp, fmt)))

        # Make sure all timestamp formats from
        # unicorn/app/config/momentjs_to_datetime_strptime.json are covered by our
        # test cases

        mappingsPath = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                    os.path.pardir, os.path.pardir,
                                    os.path.pardir, os.path.pardir, "js",
                                    "config",
                                    "momentjs_to_datetime_strptime.json")

        with open(mappingsPath) as mappingsFile:
            mapList = json.load(mappingsFile)

        formatsToCategoryMap = dict()

        for bundle in mapList:
            for fmt in bundle["mappings"].itervalues():
                self.assertNotIn(fmt, formatsToCategoryMap)

                formatsToCategoryMap[fmt] = bundle["category"]

        self.assertGreater(len(formatsToCategoryMap), 0)

        self.assertGreater(len(testedFormatSet), 0)

        untestedFormats = set(formatsToCategoryMap) - testedFormatSet

        self.assertFalse(
            untestedFormats,
            msg="{} format(s) not covered by GOOD SAMPLES test cases: {}".
            format(len(untestedFormats), [(fmt, formatsToCategoryMap[fmt])
                                          for fmt in untestedFormats]))