def train_model(self): """Calculate the mean and stddev for all relevant metrics and store them for use in calulcating zscore at each timestep. """ before = int(datetime.now().timestamp()) - self.offset_secs after = before - self.train_secs self.df_mean = get_data( self.host, self.charts_in_scope, after, before, points=10, group='average', col_sep='.' ).mean().to_frame().rename(columns={0: "mean"}) self.df_std = get_data( self.host, self.charts_in_scope, after, before, points=10, group='stddev', col_sep='.' ).mean().to_frame().rename(columns={0: "std"})
def get_data(self): """ """ self.df = get_data(self.hosts, self.charts, after=self.after, before=self.before, user=None, pwd=None) # remove duplicate columns that we might get from get_data() self.df = self.df.loc[:, ~self.df.columns.duplicated()] # drop any empty columns self.df = self.df.dropna(axis=1, how='all') # forward fill and backward fill to try remove any N/A values self.df = self.df.ffill().bfill()
def train(self, models_to_train=None, train_data_after=0, train_data_before=0): """Pull required training data and train a model for each specified model. :param models_to_train <list>: list of models to train on. :param train_data_after <int>: integer timestamp for start of train data. :param train_data_before <int>: integer timestamp for end of train data. """ now = datetime.now().timestamp() if train_data_after > 0 and train_data_before > 0: before = train_data_before after = train_data_after else: before = int(now) - self.offset_n_secs after = before - self.train_n_secs # get training data df_train = get_data(host_charts_dict=self.host_charts_dict, host_prefix=True, host_sep='::', after=after, before=before, sort_cols=True, numeric_only=True, protocol=self.protocol, float_size='float32', user=self.username, pwd=self.password).ffill() self.expected_cols = list(df_train.columns) if self.custom_models: df_train = self.add_custom_models_dims(df_train) # train model self.try_fit(df_train, models_to_train=models_to_train) self.info( f'training complete in {round(time.time() - now, 2)} seconds (runs_counter={self.runs_counter}, model={self.model}, train_n_secs={self.train_n_secs}, models={len(self.fitted_at)}, n_fit_success={self.n_fit_success}, n_fit_fails={self.n_fit_fail}, after={after}, before={before}).' ) self.last_train_at = self.runs_counter
def results(): time_start = time.time() # get params params = parse_params(request) app.logger.info(f'... params = {params}') highlight_before = params['highlight_before'] highlight_after = params['highlight_after'] baseline_before = params['baseline_before'] baseline_after = params['baseline_after'] return_type = params['return_type'] remote_host = params['remote_host'] local_host = params['local_host'] model = params['model'] score_thold = params['score_thold'] model_level = params['model'].get('model_level', 'dim') # get charts to pull data for charts = get_chart_list(host=remote_host) # get data df = get_data(remote_host, charts, after=baseline_after, before=highlight_before, diff=True, ffill=True, numeric_only=True, nunique_thold=0.05) colnames = list(df.columns) arr_baseline = df.query(f'{baseline_after} <= time_idx <= {baseline_before}').values arr_highlight = df.query(f'{highlight_after} <= time_idx <= {highlight_before}').values charts = list(set([col.split('|')[0] for col in colnames])) app.logger.info(f'... len(charts) = {len(charts)}') app.logger.info(f'... len(colnames) = {len(colnames)}') app.logger.info(f'... arr_baseline.shape = {arr_baseline.shape}') app.logger.info(f'... arr_highlight.shape = {arr_highlight.shape}') time_got_data = time.time() app.logger.info(f'... time start to data = {time_got_data - time_start}') # get scores results_dict = run_model(model, colnames, arr_baseline, arr_highlight) time_got_scores = time.time() app.logger.info(f'... time data to scores = {round(time_got_scores - time_got_data, 2)}') # get results to df df_results = results_to_df(results_dict, score_thold) time_done = time.time() app.logger.info(f'... time total = {round(time_done - time_start, 2)}') # build response if return_type == 'html': charts = df_results['chart'].values.tolist() counts = OrderedDict(Counter([c.split('.')[0] for c in charts]).most_common()) counts = ' | '.join([f"{c}:{counts[c]}" for c in counts]) summary_text = f"number of charts = {df_results['chart'].nunique()}, number of dimensions = {len(df_results)}, {counts}" charts_to_render = [] for chart in df_results['chart'].unique(): df_results_chart = df_results[df_results['chart'] == chart] dimensions = ','.join(df_results_chart['dimension'].values.tolist()) rank = df_results_chart['chart_rank'].unique().tolist()[0] score_avg = round(df_results_chart['score'].mean(), 2) score_min = round(df_results_chart['score'].min(), 2) score_max = round(df_results_chart['score'].max(), 2) charts_to_render.append( { "id": chart, "title": f"{rank} - {chart} - score_avg = {score_avg}, score_min = {score_min}, score_max = {score_max}", "after": baseline_after, "before": highlight_before, "data_host": "http://" + f"{remote_host.replace('127.0.0.1', local_host)}/".replace('//', '/'), "dimensions": dimensions } ) return render_template( 'results.html', charts=charts_to_render, highlight_after=highlight_after*1000, highlight_before=highlight_before*1000, summary_text=summary_text ) elif return_type == 'json': return jsonify(df_results.to_dict(orient='records')) else: return None