Example #1
0
    def get_avg_vectors(self):
        '''
        (self) -> None
        Create a 30-day rolling average of log statistics to date
        '''
        month_vecs = []
        grouped_vecs = {}

        # create a list of 31 dates, starting the day before
        # yesterday since yesterday is the test data set
        dates = []
        dates.append(
            datetime.now(pytz.timezone(esconstants.TIMEZONE)) -
            timedelta(days=(esconstants.TEST_DAY + 1)))
        # Monday = 0, Sunday = 6
        dayofweek = (datetime.now(pytz.timezone(esconstants.TIMEZONE)) -
                     timedelta(days=(esconstants.TEST_DAY + 1))).weekday()
        for i in range(1, esconstants.NUM_DAYS):
            dates.append(dates[0] - timedelta(days=i))

        # create argument namespace for ESQuery
        args = argparse.Namespace()

        # perform elasticsearch query
        query = ESQuery()
        args.action = 'stringquery'
        args.count = False
        args.fields = None
        args.host = esconstants.ES_HOST
        args.index = 'logstash*'
        args.list = False
        args.query = None
        args.size = esconstants.ES_QUERY_SIZE
        args.terms = ['*']

        # get 30 day's worth of logs
        for adate in dates:
            # create date range for single day, pass to esquery
            args.range = [esconstants.KEY_ES_TIMESTAMP,\
                          str(adate.isoformat()[:10]),\
                          str(adate.isoformat()[:10])]
            query.post(args)
            result = query.response.json()
            # skip empty results
            if result[esconstants.KEY_ES_SEARCH][esconstants.KEY_ES_TOTAL] \
                      == 0:
                print('Dropped day: ' + str(adate.isoformat()[:10]))
                # decrement day
                dayofweek = (dayofweek - 1) % 7
                continue
            self._result_data = result
            self._count_day()
            # append day's results to month
            print('Got day: ' + str(adate.isoformat()[:10]))
            month_vecs.append(dayofweek)
            month_vecs.append(self._result_vectors)
            # decrement day
            dayofweek = (dayofweek - 1) % 7
        print()

        # generate a single array containing avg of each day in past 30
        # first we need to group all like vectors by day, type, subtype
        day = 0
        for i, elt in enumerate(month_vecs):
            # add days to structure, every other element of list is a day
            if i % 2 == 0:
                if elt in grouped_vecs:
                    day = elt
                    continue
                else:
                    day = elt
                    grouped_vecs[elt] = {}
            else:
                # organize the structure by type and subtype
                grouped_vecs[day] = self._organize_vectors(
                    elt, grouped_vecs[day])
        # calculate average scalars by day, type, subtype
        for day in grouped_vecs.keys():
            for type_key in grouped_vecs[day].keys():
                for subtype in grouped_vecs[day][type_key].keys():
                    # if only one line don't average
                    if len(grouped_vecs[day][type_key][subtype].shape) == 1:
                        continue
                    else:
                        grouped_vecs[day][type_key][subtype] = \
                                numpy.average(grouped_vecs[day]
                                [type_key][subtype], axis=0)
        self.grouped_vecs = grouped_vecs
Example #2
0
    def get_avg_vectors(self):
        '''
        (self) -> None
        Create a 30-day rolling average of log statistics to date
        '''
        month_vecs = []
        grouped_vecs = {}

        # create a list of 31 dates, starting the day before
        # yesterday since yesterday is the test data set
        dates = []
        dates.append(datetime.now(pytz.timezone(esconstants.TIMEZONE)) -
                timedelta(days=(esconstants.TEST_DAY + 1)))
        # Monday = 0, Sunday = 6
        dayofweek = (datetime.now(pytz.timezone(esconstants.TIMEZONE)) -
                     timedelta(days=(esconstants.TEST_DAY + 1))).weekday()
        for i in range(1, esconstants.NUM_DAYS):
            dates.append(dates[0] - timedelta(days=i))

        # create argument namespace for ESQuery
        args = argparse.Namespace()

        # perform elasticsearch query
        query = ESQuery()
        args.action = 'stringquery'
        args.count = False
        args.fields = None
        args.host = esconstants.ES_HOST
        args.index = 'logstash*'
        args.list = False
        args.query = None
        args.size = esconstants.ES_QUERY_SIZE
        args.terms = ['*']

        # get 30 day's worth of logs
        for adate in dates:
            # create date range for single day, pass to esquery
            args.range = [esconstants.KEY_ES_TIMESTAMP,\
                          str(adate.isoformat()[:10]),\
                          str(adate.isoformat()[:10])]
            query.post(args)
            result = query.response.json()
            # skip empty results
            if result[esconstants.KEY_ES_SEARCH][esconstants.KEY_ES_TOTAL] \
                      == 0:
                print('Dropped day: ' + str(adate.isoformat()[:10]))
                # decrement day
                dayofweek = (dayofweek - 1) % 7
                continue
            self._result_data = result
            self._count_day()
            # append day's results to month
            print('Got day: ' + str(adate.isoformat()[:10]))
            month_vecs.append(dayofweek)
            month_vecs.append(self._result_vectors)
            # decrement day
            dayofweek = (dayofweek - 1) % 7
        print()

        # generate a single array containing avg of each day in past 30
        # first we need to group all like vectors by day, type, subtype
        day = 0
        for i, elt in enumerate(month_vecs):
            # add days to structure, every other element of list is a day
            if i % 2 == 0:
                if elt in grouped_vecs:
                    day = elt
                    continue
                else:
                    day = elt
                    grouped_vecs[elt] = {}
            else:
                # organize the structure by type and subtype
                grouped_vecs[day] = self._organize_vectors(elt,
                        grouped_vecs[day])
        # calculate average scalars by day, type, subtype
        for day in grouped_vecs.keys():
            for type_key in grouped_vecs[day].keys():
                for subtype in grouped_vecs[day][type_key].keys():
                    # if only one line don't average
                    if len(grouped_vecs[day][type_key][subtype].shape) == 1:
                        continue
                    else:
                        grouped_vecs[day][type_key][subtype] = \
                                numpy.average(grouped_vecs[day]
                                [type_key][subtype], axis=0)
        self.grouped_vecs = grouped_vecs
Example #3
0
    args = argparse.Namespace()

    args.action = 'stringquery'
    args.count = False
    args.fields = None
    args.host = esconstants.ES_HOST
    args.index = 'logstash*'
    args.list = False
    args.query = None
    args.range = [esconstants.KEY_ES_TIMESTAMP, str(day), str(day)]
    # get all the things
    args.size = 1000000
    args.terms = ['*']

    # perform elasticsearch query
    query = ESQuery()
    query.post(args)
    response = query.response

    # stuff data in normalization class
    r = response.json()
    # load vectorize object with day's results
    day = ESVectorize(r)
    # obtain vectors for the day and training data
    day.get_day_vectors()
    day.get_avg_vectors()

    # now analyze and compare
    result = ESAnalyze(day.day_vecs, day.grouped_vecs, plot=False)
    result.data_clean()
    result.outlier_detection()