def run(self, **kwargs):
        self._result = kwargs

        # data filtered from Serenity input...
        data_to_test = self._filter_data_from_serenity(kwargs)

        # ... will be updated by database data
        metrics_to_query = self._get_which_metrics_query()
        log.debug(metrics_to_query)
        for metric, fields in metrics_to_query.iteritems():
            # TODO: How big set of data we should analyze?
            # In the meaning of time (where statement)
            where_clause = "time > now() - " + TIME_FROM_NOW +\
            " and source = \'%s\'" % self.source
            query_to_execute = compile(
                Q().tables('"' + metric + '"').
                    fields(fields["field"]).where(where_clause)
            )

            log.debug(self._format_query_to_string(query_to_execute))
            database_output = self._get_data_from_database(
                self._format_query_to_string(query_to_execute))

            data_to_test[metric] = (database_output, fields)

        self._result[DATA_FIELD] = data_to_test
        return self._result
Example #2
0
 def run(self, **kwargs):
     # TODO: 2 matrixes as result?
     log.debug("CF_FINISH")
     if CfFinisher.KEY_FOR_DATA in kwargs:
         self._insert_data(kwargs[CfFinisher.KEY_FOR_DATA], kwargs[CfFinisher.KEY_NAME])
         return CfFinisher.STATUS_CODE_SUCCESSFUL
     else:
         raise CfFinisherException("No data for insert retrieved from a previous step. Failing...")
    def _prepare_data(self, key_list, corr_matrix):
        result = {}
        for key in key_list:
            index = key_list.index(key)
            result[key] = {}

            for val in corr_matrix[index]:
                val_index = corr_matrix[index].index(val)
                result[key][key_list[val_index]] = val

        log.debug(result)
        return result
Example #4
0
    def run(self, **kwargs):
        input_matrix = []
        key_list = []
        data_to_test = kwargs[DATA_FIELD]

        min_length = 99999999999
        for key, val in data_to_test.iteritems():
            points = list(val[0].get_points())

            values = self._post_effect_output(points, val[1])
            if len(values) == 0:
                log.error("No data points for measurement: " + str(key))
                continue

            log.debug("Last value in series:" + str(values[-1]))

            log.info(str(key) + " has length " + str(len(values)))

            min_length = min(len(values), min_length)

            input_matrix.append(values)
            key_list.append(key)

        for values in input_matrix:
            if min_length < len(values):
                differ = len(values) - min_length
                if 0 < differ <= 5:
                    values = values[:-differ]
                    continue
                raise ValueError("Measurements have different lengths!")

        log.info("Counting Common factor... from " + str(len(key_list)) + " dimensions.")
        # Move to Common Factor! TODO:
        # corr_matrix = np.corrcoef(input_matrix)

        corrmat = np.corrcoef(input_matrix)
        corrmat = np.nan_to_num(corrmat)
        log.info(corrmat)
        eigenvalues, _ = np.linalg.eig(corrmat)
        eigenvalues = filter(lambda x: True if x > 1 else False, eigenvalues)
        eigenvalues = [x / len(key_list) for x in eigenvalues]

        input_matrix = np.array(input_matrix)
        input_matrix = np.transpose(input_matrix)
        factor = FactorAnalysis(n_components=len(eigenvalues)).fit(input_matrix)
        log.info(factor.components_)

        return {
            CfFinisher.KEY_FOR_DATA: self._prepare_data(key_list, eigenvalues, factor.components_),
            CfFinisher.KEY_NAME: "factors",
        }
    def run(self, **kwargs):
        input_matrix = []
        key_list = []
        data_to_test = kwargs[DATA_FIELD]

        min_length = 99999999999
        for key, val in data_to_test.iteritems():
            points = list(val[0].get_points())

            values = self._post_effect_output(points, val[1])
            if len(values) == 0:
                log.error("No data points for measurement: " + str(key))
                continue

            log.debug("Last value in series:" + str(values[-1]))

            log.info(str(key) + " has length " + str(len(values)))

            min_length = min(len(values), min_length)

            input_matrix.append(values)
            key_list.append(key)

        for values in input_matrix:
            if min_length < len(values):
                differ = len(values) - min_length
                if 0 < differ <= 5:
                    log.debug(len(values))
                    values = values[:-differ]
                    log.debug(len(values))
                    continue
                raise ValueError("Measurements have different lengths!")

        log.info('Counting PCA... from ' + str(len(key_list)) + " dimensions.")

        corr_matrix = np.corrcoef(input_matrix)

        log.info(corr_matrix)
        return {PcaFinisher.KEY_FOR_DATA: self._prepare_data(key_list,
                                                     corr_matrix.tolist()),
                PcaFinisher.KEY_NAME: 'correlations'}