Beispiel #1
0
  def testGetLabels(self):
    """
    Labels dictionary generated by CorpusLabel.getLabels() should match the
    label windows.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    windows = [["2014-01-01 00:00", "2014-01-01 00:10"],
               ["2014-01-01 00:10", "2014-01-01 00:15"]]
    
    writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    corpusLabel = nab.labeler.CorpusLabel(self.tempCorpusLabelPath, corpus)

    for relativePath, l in corpusLabel.labels.iteritems():
      windows = corpusLabel.windows[relativePath]

      for t, lab in corpusLabel.labels["test_data_file.csv"].values:
        for w in windows:
          if (w[0] <= t and t <= w[1]):
            self.assertEqual(lab, 1,
              "Incorrect label value for timestamp %r" % t)
Beispiel #2
0
  def testRowsLabeledAnomalousWithinAWindow(self):
    """
    All timestamps labeled as anomalous should be within a label window.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    windows = [["2014-01-01 00:15", "2014-01-01 00:30"]]

    writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    corpusLabel = nab.labeler.CorpusLabel(self.tempCorpusLabelPath, corpus)

    for relativePath, lab in corpusLabel.labels.iteritems():
      windows = corpusLabel.windows[relativePath]

      for row in lab[lab["label"] == 1].iterrows():
        self.assertTrue(
          all([w[0] <= row[1]["timestamp"] <= w[1] for w in windows]),
            "The label at %s of file %s is not within a label window"
            % (row[1]["timestamp"], relativePath))
  def testRowsLabeledAnomalousWithinAWindow(self):
    """
    All timestamps labeled as anomalous should be within a label window.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    windows = [["2014-01-01 00:15", "2014-01-01 00:30"]]

    writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    corpusLabel = nab.labeler.CorpusLabel(self.tempCorpusLabelPath, corpus)

    for relativePath, lab in corpusLabel.labels.iteritems():
      windows = corpusLabel.windows[relativePath]

      for row in lab[lab["label"] == 1].iterrows():
        self.assertTrue(
          all([w[0] <= row[1]["timestamp"] <= w[1] for w in windows]),
            "The label at %s of file %s is not within a label window"
            % (row[1]["timestamp"], relativePath))
Beispiel #4
0
  def testNonexistentDatafileOrLabelsThrowsError(self):
    """
    A KeyError should be thrown when there are not corresponding windows labels
    for a data file (or vice-versa) in the corpus.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    windows = [["2014-01-01 00:15", "2014-01-01 00:30"]]

    # Case 1: nonexistent datafile for window labels
    writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data})
    writeCorpusLabel(self.tempCorpusLabelPath,
      {"test_data_file.csv": windows, "non_existent_data_file.csv": windows})
    
    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(
      KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
  
    # Case 2: nonexistent window labels for datafile
    writeCorpus(self.tempCorpusPath,
      {"test_data_file.csv": data, "non_existent_data_file.csv": data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})
    
    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(
      KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
  def testBucketMerge(self):
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2015-12-01"),
      datetime.timedelta(days=1), 31)})
    dataFileName = "test_data_file.csv"
    writeCorpus(self.tempCorpusPath, {dataFileName : data})

    rawLabels = (["2015-12-24 00:00:00",
                  "2015-12-31 00:00:00"],
                 ["2015-12-01 00:00:00",
                  "2015-12-25 00:00:00",
                  "2015-12-31 00:00:00"],
                 ["2015-12-25 00:00:00"])

    for i, labels in enumerate(rawLabels):
      labelsPath = self.tempCorpusLabelPath.replace(
        os.path.sep+"label.json", os.path.sep+"raw"+os.path.sep+"label{}.json".format(i))
      writeCorpusLabel(labelsPath, {"test_data_file.csv": labels})
    labelsDir = labelsPath.replace(os.path.sep+"label{}.json".format(i), "")

    corpus = nab.corpus.Corpus(self.tempCorpusPath)
    labelCombiner = nab.labeler.LabelCombiner(
      labelsDir, corpus, 0.5, 0.10, 0.15, 0)
    labelCombiner.getRawLabels()
    labelTimestamps, _ = labelCombiner.combineLabels()

    expectedLabels = ['2015-12-25 00:00:00', '2015-12-31 00:00:00']
    self.assertEqual(expectedLabels, labelTimestamps[dataFileName],
      "The combined labels did not bucket and merge as expected.")
  def testGetLabels(self):
    """
    Labels dictionary generated by CorpusLabel.getLabels() should match the
    label windows.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    windows = [["2014-01-01 00:00", "2014-01-01 00:10"],
               ["2014-01-01 00:10", "2014-01-01 00:15"]]

    writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    corpusLabel = nab.labeler.CorpusLabel(self.tempCorpusLabelPath, corpus)

    for relativePath, l in corpusLabel.labels.iteritems():
      windows = corpusLabel.windows[relativePath]

      for t, lab in corpusLabel.labels["test_data_file.csv"].values:
        for w in windows:
          if (w[0] <= t and t <= w[1]):
            self.assertEqual(lab, 1,
              "Incorrect label value for timestamp %r" % t)
Beispiel #7
0
  def testNonexistentDatafileOrLabelsThrowsError(self):
    """
    A KeyError should be thrown when there are not corresponding windows labels
    for a data file (or vice-versa) in the corpus.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    windows = [["2014-01-01 00:15", "2014-01-01 00:30"]]

    # Case 1: nonexistent datafile for window labels
    writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data})
    writeCorpusLabel(self.tempCorpusLabelPath,
      {"test_data_file.csv": windows, "non_existent_data_file.csv": windows})
    
    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(
      KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
  
    # Case 2: nonexistent window labels for datafile
    writeCorpus(self.tempCorpusPath,
      {"test_data_file.csv": data, "non_existent_data_file.csv": data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})
    
    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(
      KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
  def testNonexistentDatafileForLabelsThrowsError(self):
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    windows = [["2014-01-01 00:15", "2014-01-01 00:30"]]

    writeCorpus(self.tempCorpusPath, {"test_data_file.csv": data})
    writeCorpusLabel(self.tempCorpusLabelPath,
      {"test_data_file.csv": windows, "non_existent_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(
      KeyError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
Beispiel #9
0
  def testWindowTimestampsNotInDataFileThrowsError(self):
    """
    A ValueError should be thrown when label windows contain timestamps
    that do no exist in the data file.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"), None, 1)})

    windows = [["2015-01-01", "2015-01-01"]]

    writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(ValueError,
      nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
  def testWindowTimestampsNotInDataFileThrowsError(self):
    """
    A ValueError should be thrown when label windows contain timestamps
    that do no exist in the data file.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"), None, 1)})

    windows = [["2015-01-01", "2015-01-01"]]

    writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(ValueError,
      nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
Beispiel #11
0
  def testWindowTimestampsNonChronologicalThrowsError(self):
    """
    A ValueError should be thrown when a label window's start and end
    times are not in chronological order.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    # Windows both in and out of order
    windows = [["2014-01-01 00:45", "2014-01-01 00:00"],
               ["2014-01-01 10:15", "2014-01-01 11:15"]]
    
    writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(
      ValueError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
  def testWindowTimestampsNonChronologicalThrowsError(self):
    """
    A ValueError should be thrown when a label window's start and end
    times are not in chronological order.
    """
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2014-01-01"),
      datetime.timedelta(minutes=5), 10)})

    # Windows both in and out of order
    windows = [["2014-01-01 00:45", "2014-01-01 00:00"],
               ["2014-01-01 10:15", "2014-01-01 11:15"]]

    writeCorpus(self.tempCorpusPath, {"test_data_file.csv" : data})
    writeCorpusLabel(self.tempCorpusLabelPath, {"test_data_file.csv": windows})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)

    self.assertRaises(
      ValueError, nab.labeler.CorpusLabel, self.tempCorpusLabelPath, corpus)
  def testRedundantTimestampsRaiseException(self):
    data = pandas.DataFrame({"timestamp" :
      generateTimestamps(strp("2015-01-01"),
      datetime.timedelta(days=1), 365)})
    dataFileName = "test_data_file.csv"
    writeCorpus(self.tempCorpusPath, {dataFileName : data})

    labels = ["2015-12-25 00:00:00",
              "2015-12-26 00:00:00",
              "2015-12-31 00:00:00"]
    labelsDir = self.tempCorpusLabelPath.replace(
      "/label.json", "/raw/label.json")
    writeCorpusLabel(labelsDir, {dataFileName: labels})

    corpus = nab.corpus.Corpus(self.tempCorpusPath)
    labDir = labelsDir.replace("/label.json", "")
    labelCombiner = nab.labeler.LabelCombiner(
      labDir, corpus, 0.5, 0.10, 0.15, 0)

    self.assertRaises(ValueError, labelCombiner.combine)