Beispiel #1
0
    def histoWeighted(matrix):
        if matrix.__class__ != Matrix:
            HandleError.exit("In Histogram.histo: incorrect type: " +
                             str(type(matrix)))
        if matrix.cols > 2:
            HandleError.exit("In Histogram.histo: matrix cols > 2")

        events = {}
        counts = {}
        for r in range(matrix.rows):
            row = matrix.getRow(r)
            if row[0] in events.keys():
                events[row[0]] = events[row[0]] + row[1]
                counts[row[0]] = counts[row[0]] + 1
            else:
                events[row[0]] = row[1]
                counts[row[0]] = 1

        m = Matrix(len(events.keys()), 2)
        m.addColumn(events.keys())

        n1 = NumColumn(events.values())
        n2 = NumColumn(counts.values())
        m.addColumn(n1.scalarDivision(n2))

        return m
Beispiel #2
0
    def euclideanDistance(self, win2):
        if not isinstance(win2, Window):
            HandleError.exit("In euclideanDistance: argument is not a window")

        d = self.colB.substract(win2.colB)
        p = d.dotProduct(d)
        return math.sqrt(p)
Beispiel #3
0
def createHyperSphere(matrix, winSize, winType='SLIDING'):
    c1 = matrix.getCol(0)
    c2 = matrix.getCol(1)
    rows = matrix.rows

    s = set([])
    if winType is 'REGULAR':
        for i in range(rows - winSize - 1):
            x = i
            y = i + winSize - 1
            subCol1 = c1.getSubColumn(x, y)
            subCol2 = c2.getSubColumn(x, y)
            w = Window(subCol1, subCol2)
            s.add(w)
    elif winType is 'SLIDING':
        numWins = int(math.floor(float(rows) / winSize))
        for i in range(numWins):
            x = i * winSize
            y = x + winSize - 1
            subCol1 = c1.getSubColumn(x, y)
            subCol2 = c2.getSubColumn(x, y)
            w = Window(subCol1, subCol2)
            s.add(w)
    else:
        HandleError.exit("In createHyperSphere: incorrect window type")

    return s
Beispiel #4
0
def getMetric(options):
    metric_id = ''
    if options.METRIC is None:
        HandleError.exit('No abnormal metric is given.\nUse -h option for help.')
    else:
        metric_name = options.METRIC
        
        # Find metric number in the file
        normalFile = options.NFILE
        file = open(normalFile, 'r')
        metrics = file.readline()[:-1].split(',')
        del(metrics[0])
        file.close()
        
        metric_found = False
        for i in range(len(metrics)):
            if metric_name == metrics[i]:
                metric_found = True
                metric_id = i + 1
                break
        
        if metric_found == False:
            HandleError.exit('Unknown metric name.')
            
    return metric_id
Beispiel #5
0
def getMetric(options):
    metric_id = ''
    if options.METRIC is None:
        HandleError.exit('No abnormal metric is given.\nUse -h option for help.')
    else:
        metric_name = options.METRIC
        
        # Find metric number in the file
        normalFile = options.NFILE
        file = open(normalFile, 'r')
        metrics = file.readline()[:-1].split(',')
        del(metrics[0])
        file.close()
        
        metric_found = False
        for i in range(len(metrics)):
            if metric_name == metrics[i]:
                metric_found = True
                metric_id = i + 1
                break
        
        if metric_found == False:
            HandleError.exit('Unknown metric name.')
            
    return metric_id
Beispiel #6
0
    def normalizeAggregates(self, averageList, stdList):
        s = len(self.aggr)
        if (len(averageList) != s or len(stdList) != s):
            HandleError.exit("In normalizeAggregates: incorrect list sizes")

        for i in range(len(self.aggr)):
            if (stdList[i] != 0.0):
                self.aggr[i] = (self.aggr[i] - averageList[i]) / stdList[i]
            else:
                self.aggr[i] = 0
Beispiel #7
0
def getMatricsFromCorrelationIndex(index, n):
    count = 0
    for i in range(n - 1):
        for j in range(n - i - 1):
            x = i
            y = x + j + 1
            if index == count:
                return (x, y)
            count = count + 1
    HandleError.exit('Could not find correlation index')
Beispiel #8
0
 def __init__(self, col1, col2):
     if isinstance(col1, StringColumn) and isinstance(col2, NumColumn):
         if col1.size() != col2.size():
             HandleError.exit("In window: cols of different sizes")
         self.colA = col1
         self.colB = col2
     else:
         if len(col1) != len(col2):
             HandleError.exit("In window: cols of different sizes")
         self.colA = StringColumn(col1)
         self.colB = NumColumn(col2)
     self.aggr = self.colB.getAggregates()
Beispiel #9
0
    def removeColumnsButKeep(self, list=[]):
        if len(list) == 0:
            return
        for i in list:
            if i > self.cols - 1:
                HandleError.exit("Incorrect column index: " + str(i))

        newList = []
        for i in range(self.cols):
            if i in list:
                newList.append(self.listOfCols[i])
        self.listOfCols = newList
        self.cols = len(self.listOfCols)
Beispiel #10
0
def findAnomalousPoints(windowsList, method):
    occurrenceNumber = {}
    subNameOccurr = {}
    for win in windowsList:
        obsSet = win.getUniqueObservations()
        for o in obsSet:
            # Parse observation
            if method == 'CLASSNAME_ONLY':

                # Only split by '-' in Java applications.
                if '-' in o:
                    name = o.split('-')[1].split('$')[0]
                    tmp = o.split('-')[1].split('$')[1:]
                    subName = "$".join(tmp)
                else:
                    name = o
                    subName = o

                if name not in subNameOccurr.keys():
                    tmp = {}
                    tmp[subName] = 1
                    subNameOccurr[name] = tmp
                else:
                    if subName not in subNameOccurr[name].keys():
                        subNameOccurr[name][subName] = 1
                    else:
                        subNameOccurr[name][
                            subName] = subNameOccurr[name][subName] + 1

            elif method == 'CLASSNAME_AND_METHOD':
                if '-' in o:
                    name = o.split('-')[1]
                else:
                    name = o
            else:
                HandleError.exit('in findAnomalousPoints: unknown method')

            if name not in occurrenceNumber.keys():
                occurrenceNumber[name] = 1
            else:
                occurrenceNumber[name] = occurrenceNumber[name] + 1

    l = sorted(occurrenceNumber, key=occurrenceNumber.get)
    l.reverse()
    ret = []
    for e in l:
        ret.append((e, occurrenceNumber[e]))

    #print "org/apache/hadoop/dfs/DFSClient", subNameOccurr['org/apache/hadoop/dfs/DFSClient']
    #print "org/apache/hadoop/hbase/regionserver/HRegion", subNameOccurr['org/apache/hadoop/hbase/regionserver/HRegion']
    return ret
Beispiel #11
0
def getMode(options):
    select_metrics = options.SELECT_METRICS
    select_regions = options.SELECT_REGIONS
    
    # Can only specify one operational mode
    if select_metrics is True and select_regions is True:
        HandleError.exit('Cannot use these options together:\n --select-metrics & --select-regions. \nUse -h option for help.')
        
    if select_metrics is False and select_regions is False:
        HandleError.exit('Please use one of these options:\n--select-metrics OR --select-regions. \nUse -h option for help.')
        
    if select_metrics is True:
        mode = 'SELECT_METRICS'
    elif select_regions is True:
        mode = 'SELECT_REGIONS'
        
    return mode
Beispiel #12
0
    def normalize(matrix):
        if matrix.__class__ != Matrix:
            HandleError.exit("In Histogram.histo: incorrect type: " +
                             str(type(matrix)))
        if matrix.cols > 2:
            HandleError.exit("In Histogram.histo: matrix cols > 2")

        maximum = float(matrix.max(1))
        v = []
        for r in range(matrix.rows):
            row = matrix.getRow(r)
            v.append(row[1] / maximum * 100)

        ret = Matrix(matrix.rows, 2)
        ret.addColumn(matrix.getCol(0))
        ret.addColumn(v)
        return ret
Beispiel #13
0
def calculateCorrelation(listX, listY):
    if len(listX) != len(listY):
        HandleError.exit("In calculateCorrelation: lists of different sizes")

    avg_x = numpy.average(listX)
    avg_y = numpy.average(listY)
    std_x = numpy.std(listX)
    std_y = numpy.std(listY)

    ret = 0
    for i in range(len(listX)):
        tmp1 = (listX[i] - avg_x) / std_x
        tmp2 = (listY[i] - avg_y) / std_y
        ret = ret + (tmp1 * tmp2)
    ret = ret / (len(listX) - 1)
    if math.isnan(ret):
        ret = 0
    return ret
Beispiel #14
0
    def addColumn(self, col):
        # If column is a 'Column' class
        if isinstance(col, Column):
            self.listOfCols.append(col)
            return

        # If it's a list
        if (len(col) != self.rows):
            HandleError.exit("Invalid size of column")

        if (len(self.listOfCols) + 1 > self.cols):
            HandleError.exit("Too many columns")

        if (len(self.listOfCols) == 0):
            c = StringColumn(col)
        else:
            c = NumColumn(col)
        self.listOfCols.append(c)
Beispiel #15
0
def getMode(options):
    select_metrics = options.SELECT_METRICS
    select_regions = options.SELECT_REGIONS
    select_classname = options.SELECT_CLASSNAME
    
    # Can only specify one operational mode
    if (select_metrics is True and select_regions is True) or (select_metrics is True and select_classname is True):
        HandleError.exit('Cannot use these options together:\n --select-metrics & --select-regions & --select-classname. \nUse -h option for help.')
        
    if select_metrics is False and select_regions is False and select_classname is False:
        HandleError.exit('Please use one of these options:\n--select-metrics OR --select-regions OR --select-classname. \nUse -h option for help.')
        
    if select_metrics is True:
        mode = 'SELECT_METRICS'
    elif select_regions is True:
        mode = 'SELECT_REGIONS'
    elif select_classname is True:
        mode = 'SELECT_CLASSNAME'

    return mode
Beispiel #16
0
    def histo(matrix):
        if matrix.__class__ != Matrix:
            HandleError.exit("In Histogram.histo: incorrect type: " +
                             str(type(matrix)))
        if matrix.cols > 2:
            HandleError.exit("In Histogram.histo: matrix cols > 2")

        events = {}
        for r in range(matrix.rows):
            row = matrix.getRow(r)
            if row[0] in events.keys():
                events[row[0]] = events[row[0]] + row[1]
            else:
                events[row[0]] = row[1]

        m = Matrix(len(events.keys()), 2)
        m.addColumn(events.keys())
        m.addColumn(events.values())

        return m
Beispiel #17
0
    def aggregatesDistance(self, win2):
        if not isinstance(win2, Window):
            HandleError.exit("In aggregateDistance: argument is not a window")

        return Window.listDistance(win2.aggr, self.aggr)
Beispiel #18
0
def getPrintAbnormal(options):
    if options.PRINT_ABNORMAL is None:
        HandleError.exit('No print preference given.\nUse -h option for help.')
    else:
        return options.PRINT_ABNORMAL
Beispiel #19
0
def getClassName(options):
    if options.CLASSNAME is None:
        HandleError.exit('No classname given.\nUse -h option for help.')
    else:
        return options.CLASSNAME
Beispiel #20
0
def getNormalFile(options):
    if options.NFILE is None:
        HandleError.exit('No normal-traces file given.\nUse -h option for help.')
    else:
        return options.NFILE
Beispiel #21
0
def getNormalFile(options):
    if options.NFILE is None:
        HandleError.exit('No normal-traces file given.\nUse -h option for help.')
    else:
        return options.NFILE