def testGetLabels(self): """ Labels dictionary generated by CorpusLabel.getLabels() should match the label windows. """ data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2014-01-01"), datetime.timedelta(minutes=5), 10)}) windows = [["2014-01-01 00:00", "2014-01-01 00:10"], ["2014-01-01 00:10", "2014-01-01 00:15"]] writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data}) writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows}) corpus = nab.corpus.Corpus(self.tempCorpusPath) corpusLabel = nab.labeler.CorpusLabel(self.tempCorpusLabelPath, corpus) for relativePath, l in corpusLabel.labels.iteritems(): windows = corpusLabel.windows[relativePath] for t, lab in corpusLabel.labels["test_data_file.csv"].values: for w in windows: if (w[0] <= t and t <= w[1]): self.assertEqual(lab, 1, "Incorrect label value for timestamp %r" % t)
def testRowsLabeledAnomalousWithinAWindow(self): """ All timestamps labeled as anomalous should be within a label window. """ data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2014-01-01"), datetime.timedelta(minutes=5), 10)}) windows = [["2014-01-01 00:15", "2014-01-01 00:30"]] writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data}) writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows}) corpus = nab.corpus.Corpus(self.tempCorpusPath) corpusLabel = nab.labeler.CorpusLabel(self.tempCorpusLabelPath, corpus) for relativePath, lab in corpusLabel.labels.iteritems(): windows = corpusLabel.windows[relativePath] for row in lab[lab["label"] == 1].iterrows(): self.assertTrue( all([w[0] <= row[1]["timestamp"] <= w[1] for w in windows]), "The label at %s of file %s is not within a label window" % (row[1]["timestamp"], relativePath))
def testNonexistentDatafileOrLabelsThrowsError(self): """ A KeyError should be thrown when there are not corresponding windows labels for a data file (or vice-versa) in the corpus. """ data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2014-01-01"), datetime.timedelta(minutes=5), 10)}) windows = [["2014-01-01 00:15", "2014-01-01 00:30"]] # Case 1: nonexistent datafile for window labels writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data}) writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows, "non_existent_data_file.csv": windows}) corpus = nab.corpus.Corpus(self.tempCorpusPath) self.assertRaises( KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus) # Case 2: nonexistent window labels for datafile writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data, "non_existent_data_file.csv": data}) writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows}) corpus = nab.corpus.Corpus(self.tempCorpusPath) self.assertRaises( KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
def testBucketMerge(self): data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2015-12-01"), datetime.timedelta(days=1), 31)}) dataFileName = "test_data_file.csv" writeCorpus(self.tempCorpusPath, {dataFileName : data}) rawLabels = (["2015-12-24 00:00:00", "2015-12-31 00:00:00"], ["2015-12-01 00:00:00", "2015-12-25 00:00:00", "2015-12-31 00:00:00"], ["2015-12-25 00:00:00"]) for i, labels in enumerate(rawLabels): labelsPath = self.tempCorpusLabelPath.replace( os.path.sep+"label.json", os.path.sep+"raw"+os.path.sep+"label{}.json".format(i)) writeCorpusLabel(labelsPath, {"test_data_file.csv": labels}) labelsDir = labelsPath.replace(os.path.sep+"label{}.json".format(i), "") corpus = nab.corpus.Corpus(self.tempCorpusPath) labelCombiner = nab.labeler.LabelCombiner( labelsDir, corpus, 0.5, 0.10, 0.15, 0) labelCombiner.getRawLabels() labelTimestamps, _ = labelCombiner.combineLabels() expectedLabels = ['2015-12-25 00:00:00', '2015-12-31 00:00:00'] self.assertEqual(expectedLabels, labelTimestamps[dataFileName], "The combined labels did not bucket and merge as expected.")
def testNonexistentDatafileForLabelsThrowsError(self): data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2014-01-01"), datetime.timedelta(minutes=5), 10)}) windows = [["2014-01-01 00:15", "2014-01-01 00:30"]] writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data}) writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows, "non_existent_data_file.csv": windows}) corpus = nab.corpus.Corpus(self.tempCorpusPath) self.assertRaises( KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
def testWindowTimestampsNotInDataFileThrowsError(self): """ A ValueError should be thrown when label windows contain timestamps that do no exist in the data file. """ data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2014-01-01"), None, 1)}) windows = [["2015-01-01", "2015-01-01"]] writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data}) writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows}) corpus = nab.corpus.Corpus(self.tempCorpusPath) self.assertRaises(ValueError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
def testWindowTimestampsNonChronologicalThrowsError(self): """ A ValueError should be thrown when a label window's start and end times are not in chronological order. """ data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2014-01-01"), datetime.timedelta(minutes=5), 10)}) # Windows both in and out of order windows = [["2014-01-01 00:45", "2014-01-01 00:00"], ["2014-01-01 10:15", "2014-01-01 11:15"]] writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data}) writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows}) corpus = nab.corpus.Corpus(self.tempCorpusPath) self.assertRaises( ValueError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
def testRedundantTimestampsRaiseException(self): data = pandas.DataFrame({"timestamp" : generateTimestamps(strp("2015-01-01"), datetime.timedelta(days=1), 365)}) dataFileName = "test_data_file.csv" writeCorpus(self.tempCorpusPath, {dataFileName : data}) labels = ["2015-12-25 00:00:00", "2015-12-26 00:00:00", "2015-12-31 00:00:00"] labelsDir = self.tempCorpusLabelPath.replace( "/label.json", "/raw/label.json") writeCorpusLabel(labelsDir, {dataFileName: labels}) corpus = nab.corpus.Corpus(self.tempCorpusPath) labDir = labelsDir.replace("/label.json", "") labelCombiner = nab.labeler.LabelCombiner( labDir, corpus, 0.5, 0.10, 0.15, 0) self.assertRaises(ValueError, labelCombiner.combine)