def testBadFormatNotationRaisesException(self): with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("01-29-2016 11:01:59.01 AM", "%m-%d-%Y %I:%M:%S.%f %W") self.assertEqual( excCtx.exception.args[0], "time data '01-29-2016 11:01:59.01 AM' does not match format " "'%m-%d-%Y %I:%M:%S.%f %W'")
def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ numRowsToSkip = self._inputSpec["rowOffset"] datetimeFormat = self._inputSpec["datetimeFormat"] inputRowTimestampIndex = self._inputSpec["timestampIndex"] inputRowValueIndex = self._inputSpec["valueIndex"] g_log.info("Processing model=%s", self._modelId) for inputRow in self._csvReader: g_log.debug("Got inputRow=%r", inputRow) if numRowsToSkip > 0: numRowsToSkip -= 1 g_log.debug("Skipping header row %s; %s rows left to skip", inputRow, numRowsToSkip) continue # Extract timestamp and value # NOTE: the order must match the `inputFields` that we passed to the # Aggregator constructor fields = [ date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex], datetimeFormat), float(inputRow[inputRowValueIndex]) ] # Aggregate aggRow, _ = self._aggregator.next(fields, None) g_log.debug("Aggregator returned %s for %s", aggRow, fields) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow)) # Reap remaining data from aggregator aggRow, _ = self._aggregator.next(None, curInputBookmark=None) g_log.debug("Aggregator reaped %s in final call", aggRow) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow))
def _readCSVFile(fileName, rowOffset, timestampIndex, valueIndex, datetimeFormat): """ Read csv data file, the data file must have two columns that contains time stamps and data values :param str fileName: path to input csv file :param int rowOffset: index of first data row in csv :param int timestampIndex: column index of the timestamp :param int valueIndex: column index of the value :param str datetimeFormat: datetime format string for python's datetime.strptime :returns: Sequence of two tuples (timestamp, value), where timestamp of type datetime.datetime and value is a number (int of float) """ with open(fileName, "rU") as csvFile: fileReader = _createCsvReader(csvFile) for _ in xrange(rowOffset): fileReader.next() # skip header line samples = [] numRows = 0 for row in fileReader: timestamp = date_time_utils.parseDatetime(row[timestampIndex], datetimeFormat) # use utc timezone if timezone information is not provided if timestamp.tzinfo is None: timestamp = timestamp.replace(tzinfo=tz.tzutc()) samples.append((timestamp, float(row[valueIndex]))) numRows += 1 if numRows >= MAX_NUM_ROWS: break return samples
def testBadTimezoneRaisesException(self): with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+000", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+000' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z'") with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+00:60", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+00:60' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z': UTC offset minutes exceed 59") with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+25:00", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+25:00' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z': UTC offset +25:0 is out of bounds; must be in " "-24:59 .. +24:59") with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+00:0", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+00:0' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z'") with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+0", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+0' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z'") with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+:00", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+:00' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z'") with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+:", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+:' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z'") with self.assertRaises(ValueError) as excCtx: date_time_utils.parseDatetime("2016-01-29T23:00:00.123+", "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual( excCtx.exception.args[0], "time data '2016-01-29T23:00:00.123+' does not match format " "'%Y-%m-%dT%H:%M:%S.%f%z'")
def testGoodSamples(self): # Check for duplicate test cases self.assertEqual( len(self._GOOD_SAMPLES), len(set(self._GOOD_SAMPLES)), msg="There are duplicate test cases: {}".format( set(item for item in self._GOOD_SAMPLES if self._GOOD_SAMPLES.count(item) > 1)) ) # Verify the parser testedFormatSet = set() for fmt, timestamp, expectedIso in self._GOOD_SAMPLES: testedFormatSet.add(fmt) try: parsed = date_time_utils.parseDatetime(timestamp, fmt) except (TypeError, ValueError) as exc: self.fail( "Failed to parse ts={!r} using fmt={!r}; exc={!r}".format( timestamp, fmt, exc)) try: isoEncoded = parsed.isoformat() except ValueError as exc: self.fail( "Failed to isoformat parsed datetime={!r}; ts={!r} using fmt={!r}; " "exc={!r}".format(parsed, timestamp, fmt, exc)) self.assertEqual( isoEncoded, expectedIso, msg=( "ISO result {!r} didn't match expected {!r}; ts={!r} using fmt={!r}" .format(isoEncoded, expectedIso, timestamp, fmt))) # Make sure all timestamp formats from # unicorn/app/config/momentjs_to_datetime_strptime.json are covered by our # test cases mappingsPath = os.path.join( os.path.abspath(os.path.dirname(__file__)), os.path.pardir, os.path.pardir, os.path.pardir, os.path.pardir, "js", "config", "momentjs_to_datetime_strptime.json" ) with open(mappingsPath) as mappingsFile: mapList = json.load(mappingsFile) formatsToCategoryMap = dict() for bundle in mapList: for fmt in bundle["mappings"].itervalues(): self.assertNotIn(fmt, formatsToCategoryMap) formatsToCategoryMap[fmt] = bundle["category"] self.assertGreater(len(formatsToCategoryMap), 0) self.assertGreater(len(testedFormatSet), 0) untestedFormats = set(formatsToCategoryMap) - testedFormatSet self.assertFalse( untestedFormats, msg="{} format(s) not covered by GOOD SAMPLES test cases: {}".format( len(untestedFormats), [(fmt, formatsToCategoryMap[fmt]) for fmt in untestedFormats]))
def testGoodSamples(self): # Check for duplicate test cases self.assertEqual(len(self._GOOD_SAMPLES), len(set(self._GOOD_SAMPLES)), msg="There are duplicate test cases: {}".format( set(item for item in self._GOOD_SAMPLES if self._GOOD_SAMPLES.count(item) > 1))) # Verify the parser testedFormatSet = set() for fmt, timestamp, expectedIso in self._GOOD_SAMPLES: testedFormatSet.add(fmt) try: parsed = date_time_utils.parseDatetime(timestamp, fmt) except (TypeError, ValueError) as exc: self.fail( "Failed to parse ts={!r} using fmt={!r}; exc={!r}".format( timestamp, fmt, exc)) try: isoEncoded = parsed.isoformat() except ValueError as exc: self.fail( "Failed to isoformat parsed datetime={!r}; ts={!r} using fmt={!r}; " "exc={!r}".format(parsed, timestamp, fmt, exc)) self.assertEqual( isoEncoded, expectedIso, msg= ("ISO result {!r} didn't match expected {!r}; ts={!r} using fmt={!r}" .format(isoEncoded, expectedIso, timestamp, fmt))) # Make sure all timestamp formats from # unicorn/app/config/momentjs_to_datetime_strptime.json are covered by our # test cases mappingsPath = os.path.join(os.path.abspath(os.path.dirname(__file__)), os.path.pardir, os.path.pardir, os.path.pardir, os.path.pardir, "js", "config", "momentjs_to_datetime_strptime.json") with open(mappingsPath) as mappingsFile: mapList = json.load(mappingsFile) formatsToCategoryMap = dict() for bundle in mapList: for fmt in bundle["mappings"].itervalues(): self.assertNotIn(fmt, formatsToCategoryMap) formatsToCategoryMap[fmt] = bundle["category"] self.assertGreater(len(formatsToCategoryMap), 0) self.assertGreater(len(testedFormatSet), 0) untestedFormats = set(formatsToCategoryMap) - testedFormatSet self.assertFalse( untestedFormats, msg="{} format(s) not covered by GOOD SAMPLES test cases: {}". format(len(untestedFormats), [(fmt, formatsToCategoryMap[fmt]) for fmt in untestedFormats]))