Esempio n. 1
0
def filter(log, dictionary=None):
    if dictionary == None:
        dictionary = set()
        for line in log:
            for word in line["MESSAGE"].split(" "):
                dictionary.add(word)

    TIMESTAMP_FORMAT = "%Y-%m-%d-%H.%M.%S.%f"
    result = []

    step = len(log)/1000
    percent = -0.1
    for i in xrange(0,len(log)-1):
        if i % step == 0:
            percent += 0.1
            print percent, "% complete"

        if (log[i]['CAT'] == "ignore"):
            continue
        else:
            result.append(log[i])

        logIDate = datetime.strptime(log[i]['EVENT_TIME'], TIMESTAMP_FORMAT)
        for j in xrange(i+1,len(log)-1):
            if (log[j]['CAT'] == "ignore"):
                continue

            logJDate = datetime.strptime(log[j]['EVENT_TIME'], TIMESTAMP_FORMAT)
            timeDiff = (logJDate - logIDate).total_seconds()
            if (timeDiff > 20 * 60):
                break

            requiredCorr = getRequiredCorrelation(timeDiff)
            corr = PearsonCorrelation.correlation(log[i], log[j], dictionary)
            if (corr > requiredCorr):
                log[j]['CAT'] = "ignore" # deleting this log

    return result
Esempio n. 2
0
 def testSomewhatSimilarLogs(self):
     log1 = {'MESSAGE':"hello I am a log file"}
     log2 = {'MESSAGE':"hello I am something else"}
     dictionary = set(["hello", "I", "am", "a", "log", "file", "these", "are", "some", "other", "words", "something", "else"])
     corr = PearsonCorrelation.correlation(log1, log2, dictionary)
     self.assertGreater(corr, 0.0)
Esempio n. 3
0
 def testUnequalLogs(self):
     log1 = {'MESSAGE':"hello I am a log file"}
     log2 = {'MESSAGE':"Bobby likes ice cream"}
     dictionary = set(["hello", "I", "am", "a", "log", "file", "these", "are", "some", "other", "words", "Bobby", "likes", "ice", "cream"])
     corr = PearsonCorrelation.correlation(log1, log2, dictionary)
     self.assertLess(corr, 0.0)
Esempio n. 4
0
 def testEqualUnorderedLogs(self):
     log1 = {'MESSAGE':"hello I am a log file"}
     log2 = {'MESSAGE':"I log a file hello am log file"}
     dictionary = set(["hello", "I", "am", "a", "log", "file", "these", "are", "some", "other", "words"])
     corr = PearsonCorrelation.correlation(log1, log2, dictionary)
     self.assertEqual(corr, 1.0)