def run(self, **kwargs): self._result = kwargs # data filtered from Serenity input... data_to_test = self._filter_data_from_serenity(kwargs) # ... will be updated by database data metrics_to_query = self._get_which_metrics_query() log.debug(metrics_to_query) for metric, fields in metrics_to_query.iteritems(): # TODO: How big set of data we should analyze? # In the meaning of time (where statement) where_clause = "time > now() - " + TIME_FROM_NOW +\ " and source = \'%s\'" % self.source query_to_execute = compile( Q().tables('"' + metric + '"'). fields(fields["field"]).where(where_clause) ) log.debug(self._format_query_to_string(query_to_execute)) database_output = self._get_data_from_database( self._format_query_to_string(query_to_execute)) data_to_test[metric] = (database_output, fields) self._result[DATA_FIELD] = data_to_test return self._result
def run(self, **kwargs): # TODO: 2 matrixes as result? log.debug("CF_FINISH") if CfFinisher.KEY_FOR_DATA in kwargs: self._insert_data(kwargs[CfFinisher.KEY_FOR_DATA], kwargs[CfFinisher.KEY_NAME]) return CfFinisher.STATUS_CODE_SUCCESSFUL else: raise CfFinisherException("No data for insert retrieved from a previous step. Failing...")
def _prepare_data(self, key_list, corr_matrix): result = {} for key in key_list: index = key_list.index(key) result[key] = {} for val in corr_matrix[index]: val_index = corr_matrix[index].index(val) result[key][key_list[val_index]] = val log.debug(result) return result
def run(self, **kwargs): input_matrix = [] key_list = [] data_to_test = kwargs[DATA_FIELD] min_length = 99999999999 for key, val in data_to_test.iteritems(): points = list(val[0].get_points()) values = self._post_effect_output(points, val[1]) if len(values) == 0: log.error("No data points for measurement: " + str(key)) continue log.debug("Last value in series:" + str(values[-1])) log.info(str(key) + " has length " + str(len(values))) min_length = min(len(values), min_length) input_matrix.append(values) key_list.append(key) for values in input_matrix: if min_length < len(values): differ = len(values) - min_length if 0 < differ <= 5: values = values[:-differ] continue raise ValueError("Measurements have different lengths!") log.info("Counting Common factor... from " + str(len(key_list)) + " dimensions.") # Move to Common Factor! TODO: # corr_matrix = np.corrcoef(input_matrix) corrmat = np.corrcoef(input_matrix) corrmat = np.nan_to_num(corrmat) log.info(corrmat) eigenvalues, _ = np.linalg.eig(corrmat) eigenvalues = filter(lambda x: True if x > 1 else False, eigenvalues) eigenvalues = [x / len(key_list) for x in eigenvalues] input_matrix = np.array(input_matrix) input_matrix = np.transpose(input_matrix) factor = FactorAnalysis(n_components=len(eigenvalues)).fit(input_matrix) log.info(factor.components_) return { CfFinisher.KEY_FOR_DATA: self._prepare_data(key_list, eigenvalues, factor.components_), CfFinisher.KEY_NAME: "factors", }
def run(self, **kwargs): input_matrix = [] key_list = [] data_to_test = kwargs[DATA_FIELD] min_length = 99999999999 for key, val in data_to_test.iteritems(): points = list(val[0].get_points()) values = self._post_effect_output(points, val[1]) if len(values) == 0: log.error("No data points for measurement: " + str(key)) continue log.debug("Last value in series:" + str(values[-1])) log.info(str(key) + " has length " + str(len(values))) min_length = min(len(values), min_length) input_matrix.append(values) key_list.append(key) for values in input_matrix: if min_length < len(values): differ = len(values) - min_length if 0 < differ <= 5: log.debug(len(values)) values = values[:-differ] log.debug(len(values)) continue raise ValueError("Measurements have different lengths!") log.info('Counting PCA... from ' + str(len(key_list)) + " dimensions.") corr_matrix = np.corrcoef(input_matrix) log.info(corr_matrix) return {PcaFinisher.KEY_FOR_DATA: self._prepare_data(key_list, corr_matrix.tolist()), PcaFinisher.KEY_NAME: 'correlations'}