def find_all(self, filter=None, projection=None, *args, flatten=True, **kwargs): if not filter: filter = dict() logger.info( f'db.getCollection("%s").find(\n' '%s\n' ',\n%s\n)', str(self.reports.name), strings.pad_lines(strings.to_json(filter)), strings.pad_lines(strings.to_json(projection)), ) with Timer('db find: db-stuff', log=logger.debug): with Timer('db find: db-stuff: find', log=logger.debug): # TODO this may take a while depending on the size of the collection cursor = self.reports.find(filter, projection, *args, **kwargs) with Timer('db find: db-stuff: fetch and flatten', log=logger.debug): if flatten: items = [ datautils.flatten(x, sep='.') for x in list(cursor) ] else: items = list(cursor) return items
def aggregate(self, match=None, unwind=None, project=None, flatten=True, *args): """ Shortcut for a aggregate method Parameters ---------- match : dict $match aggregation object according to MongoDB specification unwind : dict or str $unwind aggregation object according to MongoDB specification project : dict $project aggregation object according to MongoDB specification """ pipeline = list() if match: pipeline.append({'$match': match}) if unwind: pipeline.append({'$unwind': unwind}) if project: pipeline.append({'$project': project}) if args: pipeline.extend(args) logger.debug( f'db.getCollection("{self.reports.name}").aggregate(\n%s\n)' % strings.pad_lines(strings.to_json(pipeline))) with Timer('db find: db-stuff', log=logger.debug): with Timer('db aggregate: db-stuff: find', log=logger.debug): # TODO this may take a while depending on the size of the collection cursor = self.reports.aggregate(pipeline) with Timer('db aggregate: db-stuff: fetch and flatten', log=logger.debug): if flatten: items = [ datautils.flatten(x, sep='.') for x in list(cursor) ] else: items = list(cursor) return items
def _process_payload(self, payload): """ Popens a new process for the given payload Parameters ---------- payload: cihpc.common.utils.git.webhooks.push_hook.PushWebhook Returns ------- int return code of the process """ logger.info(f'%s starting' % payload.after) with Timer(payload.after) as timer: try: returncode = self.webhook_trigger.process(payload) if returncode != 0: raise Exception('script ended with non zero status') except Exception as e: # no such binary returncode = -1 logger.exception('Error while starting the process %s' % self.webhook_trigger) logger.info(f'%s took %s [%d]' % (payload.after, timer.pretty_duration, returncode)) return returncode
def _process_stage_threads(self, stage: ProjectStage, threads: List[ProcessStage]): with Timer(stage.ord_name) as timer: pool = WorkerPool(cpu_count=stage.parallel.cpus, threads=threads) pool.update_cpu_values(extract_cpus_from_worker) if stage.parallel: logger.info( f'{len(threads)} job(s) will be now executed in parallel\n' 'allocated cores: {stage.parallel.cpus}') else: logger.info( f'{len(threads)} job(s) will be now executed in serial') default_status = pool.get_statuses(LogStatusFormat.ONELINE) progress_line = progress.Line(total=len(threads), desc='%s: %s' % (stage.ord_name, default_status), tty=False) def update_status_enter(worker: ProcessStage): progress_line.desc = '%02d-%s: %s ' % ( stage.ord, worker.debug_name if worker else '?', pool.get_statuses(LogStatusFormat.ONELINE)) progress_line.update(0) def update_status_exit(worker: ProcessStage): progress_line.desc = '%02d-%s: %s ' % ( stage.ord, worker.debug_name if worker else '?', pool.get_statuses(LogStatusFormat.ONELINE)) progress_line.update() pool.thread_event.on_exit.on(update_status_exit) pool.thread_event.on_enter.on(update_status_enter) # run in serial or parallel progress_line.start() pool.start() progress_line.close() if pool.terminate: logger.error('Caught pool terminate signal!') if not pool.exception or pool.exception.on_error is OnError.EXIT: logger.error(f'Exiting application with 1') exit(1) if pool.exception.on_error is OnError.BREAK: return False timers_total = sum( [sum(x.collect_result.total) for x in threads if x.collect_result]) logger.info( f'{len(threads)} processes finished, found {timers_total} documents' ) return True
def __init__(self, target=None): super(SimpleWorker, self).__init__() self.cpus = 1 self.target = target self.semaphore = None # type: ComplexSemaphore self.thread_event = None # type: EnterExitEvent self.result = None # type: ProcessStepResult self.lock_event = None # type: threading.Event self._status = None self.status = WorkerStatus.CREATED # random.choice(list(WorkerStatus)) self.timer = Timer(self.name) self._pretty_name = None self.terminate = False self.exception = None
def run(self): self.commit_browser.load_commits() self.commit_browser.pick_commits() logger.info(f'starting commit processing') for commit in self.commit_browser.commits: logger.info(f'%s starting' % commit.short_format) with Timer(commit.short_format) as timer: args = self.args_constructor.construct_arguments(commit.hash) logger.info(f' '.join([str(x) for x in args])) process = subprocess.Popen(args, cwd=global_configuration.cwd) process.wait() logger.info(f'%s took %s [%d]' % (commit.short_format, timer.pretty_duration, process.returncode))
def frame_view(project, base64data=''): if base64data: options = json.loads( base64.decodebytes(base64data.encode()).decode() ) else: options = dict() print( strings.to_json(options) ) config = ProjectConfig.get(project) _ids = [objectid.ObjectId(x) for y in options['_ids'] for x in y] field_set = config.fields.required_fields() filter_dict = options['filters'] projection_list = set(filter_dict.keys()) | field_set projection_list.add(config.frame_view.unwind) db_find_fields = dict(zip(projection_list, itertools.repeat(1))) # add _ids to selector db_find_filters = du.filter_keys( filter_dict, forbidden=("", None, "*") ) db_find_filters['_id'] = {'$in': _ids} with Timer('db find & apply', log=logger.debug): mongo = CIHPCMongo.get(project) data_frame = pd.DataFrame( mongo.aggregate( match=db_find_filters, project=db_find_fields, unwind='$%s' % config.frame_view.unwind ) ) if data_frame.empty: return FrameView.error_empty_df(db_find_filters) config.fields.apply_to_df(data_frame) chart_options = du.dotdict( y=config.frame_view.fields.timers.duration.name, x=config.frame_view.fields.timers.name.name, n=config.frame_view.fields.timers.path.name, groupby={}, colorby=config.frame_view.groupby, ) if not config.frame_view.fields.timers.path: data_frame[config.frame_view.fields.timers.path.name] = config.frame_view.fields.timers.name.name print(chart_options) charts = list() for group_values, group_keys, group_names, group_data in FrameView.group_by(data_frame, chart_options.groupby): group_title = du.join_lists(group_names, group_values, '{} = <b>{}</b>', '<br />') group_title = du.join_lists(group_names, group_values, '{} = <b>{}</b>', '<br />') series = list() colors_iter = iter(config.color_palette.copy() * 5) for color_values, color_keys, color_names, color_data in FrameView.group_by(group_data, chart_options.colorby): color_title = du.join_lists(color_names, color_values, '{} = {}', ', ') color = next(colors_iter) print(color_title) with Timer('agg ' + color_title, log=logger.info): # color_data = color_data.groupby(chart_options.x).agg({ # chart_options.y: 'mean', # chart_options.n: 'first' # }).sort_values(by=chart_options.y, ascending=False).head(50) small_values = color_data[color_data[chart_options.y] < 0.5] color_data = color_data[color_data[chart_options.y] >= 0.5] small_values_grouped = small_values.groupby(chart_options.x).agg({ chart_options.y: 'mean', }).sum() color_data = color_data.append({ chart_options.y: small_values_grouped[chart_options.y], chart_options.x: 'values < 0.5', chart_options.n: 'sum of the means of the values lesser than 0.5 sec', }, ignore_index=True) color_data_grouped = color_data.groupby(chart_options.x).agg({ chart_options.y: { '25%': lambda x: np.percentile(x, 25), '75%': lambda x: np.percentile(x, 75), }, chart_options.n: 'first', }).reset_index() print(color_data_grouped) columnrange = pd.DataFrame() columnrange['y'] = list(color_data_grouped[chart_options.x]) columnrange['n'] = list(color_data_grouped[chart_options.n]['first']) columnrange['low'] = list(color_data_grouped[chart_options.y]['25%']) columnrange['high'] = list(color_data_grouped[chart_options.y]['75%']) columnrange = columnrange.sort_values(by='high', ascending=False).reset_index(drop=True) a, b = list(columnrange['y']), list(columnrange['n']) columnrange.drop(columns=['n'], inplace=True) series.append(dict( type='columnrange', extra={ 'path': dict(zip(a, b)) }, data=du.dropzero(du.fillna(columnrange.round(3))), name='Q1-Q3 (%s)' % color_title, color=color(0.7)), ) color_data = color_data.reset_index() scatter = pd.DataFrame() scatter['y'] = list(color_data[chart_options.x]) scatter['x'] = list(color_data[chart_options.y]) scatter['n'] = list(color_data[chart_options.n]) scatter = scatter.sort_values(by='x', ascending=False).reset_index(drop=True) a, b = list(scatter['y']), list(scatter['n']) paths = list(scatter['n']) scatter.drop(columns=['n'], inplace=True) series.append(dict( type='scatter', extra={ 'path': dict(zip(a, b)), }, data=du.dropzero(du.fillna(scatter.round(3))), name='mean (%s)' % color_title, color=color(0.7)), ) charts.append(dict( title=group_title, xAxis=dict(title=dict(text=None)), yAxis=dict(title=dict(text=None)), series=series, )) return dict( status=200, data=charts )
def sparkline_view(project, base64data=''): options, config, mongo = SparklineView.prepare(project, base64data) mode = ViewMode(options.get('mode', {}).get('mode', ViewMode.TIME_SERIES.value)) squeeze = int(options.get('squeeze', {}).get('value', 1)) interval = options.get('range', {}) field_set = config.fields.required_fields() filter_dict = options['filters'] if interval and 'from' in interval and 'to' in interval: filter_dict[config.fields.git.datetime.name] = { '$gte': datetime.datetime.fromtimestamp(int(interval['from'])), '$lte': datetime.datetime.fromtimestamp(int(interval['to'])), } projection_list = set(filter_dict.keys()) | field_set db_find_fields = dict(zip(projection_list, itertools.repeat(1))) db_find_filters = du.filter_keys( filter_dict, forbidden=("", None, "*") ) with Timer('db find & apply', log=logger.debug): data_frame = pd.DataFrame( mongo.find_all( db_find_filters, db_find_fields, ) ) if data_frame.empty: return SparklineView.error_empty_df(db_find_filters) sort_field = config.fields.git.datetime.name data_frame = data_frame.sort_values(by=sort_field, ascending=False).reset_index(drop=True) config.fields.apply_to_df(data_frame) data_frame[':merged:'] = 'g(?)' if mode is ViewMode.SCALE_VIEW: # split charts based on commit when in scale-view mode # if config.fields.git.datetime: # config.test_view.groupby['git.datetime'] = 'date' # else: config.test_view.groupby['git.commit'] = 'commit' config.test_view.groupby = du.filter_values( config.test_view.groupby, forbidden=(config.fields.problem.size.name, config.fields.problem.cpu.name) ) chart_options = du.dotdict( y=config.fields.result.duration.name, x=config.fields.problem.cpu.name, c=config.fields.git.commit.name, groupby={k: v for k, v in config.test_view.groupby.items() if options['groupby'].get(k, False)}, colorby={k: v for k, v in config.test_view.groupby.items() if not options['groupby'].get(k, False)}, ) data_frame[chart_options.x] = data_frame[chart_options.x].apply(str) elif mode is ViewMode.TIME_SERIES: if config.fields.problem.cpu: config.test_view.groupby[config.fields.problem.cpu.name] = 'cpu' if config.fields.problem.size: config.test_view.groupby[config.fields.problem.size.name] = 'size' if config.fields.problem.test: config.test_view.groupby[config.fields.problem.test.name] = 'test' if config.fields.problem.case: config.test_view.groupby[config.fields.problem.case.name] = 'case' # if self.config.fields.problem.test and self.options['groupby'].get(self.config.fields.problem.test.name, False): # self.config.test_view.groupby[self.config.fields.problem.test.name] = 'test' # self.options['groupby'][self.config.fields.problem.test.name] = True # # if self.config.fields.problem.case and self.options['groupby'].get(self.config.fields.problem.case.name, False): # self.config.test_view.groupby[self.config.fields.problem.case.name] = 'size' # self.options['groupby'][self.config.fields.problem.case.name] = True chart_options = du.dotdict( y=config.fields.result.duration.name, x=config.fields.git.datetime.name, c=config.fields.git.commit.name, groupby={k: v for k, v in config.test_view.groupby.items() if options['groupby'].get(k, False) is True}, colorby={k: v for k, v in config.test_view.groupby.items() if options['groupby'].get(k, False) is False}, ) print(chart_options) else: raise Exception('Given mode is not supported %s' % mode) chart_group = ChartGroup(chart_options, options) if not chart_group: return SparklineView.show_error( status=300, message='No chart series selected', description='<p>All of the chart series are disabled, so no chart can be displayed. ' 'Please enable at least one of the chart type series</p>' '<a class="btn btn-warning" data-toggle="modal" data-target="#modal-options">Click here to open configuration.</a>' ) charts = list() for group_values, group_keys, group_names, group_data in SparklineView.group_by(data_frame, chart_options.groupby): group_title = du.join_lists(group_names, group_values, '<dt>{}</dt><dd>{}</dd>', '') group_title = '<dl>%s</dl>' % group_title series = list() extra = dict(size=list()) colors_iter = iter(config.color_palette.copy() * 5) for color_values, color_keys, color_names, color_data in SparklineView.group_by(group_data, chart_options.colorby): color_title = du.join_lists(color_names, color_values, '<small>{}</small> <b>{}</b>', ', ') if color_title == ' = ': color_title = '*' color = next(colors_iter) if squeeze and squeeze > 1: merge_unique = sorted(list(set(color_data[chart_options.x]))) merge_groups = np.repeat(np.arange(int(len(merge_unique) / squeeze + 1)), squeeze) merge_unique_len = len(merge_unique) merge_map = dict() for i in range(merge_unique_len): s = merge_groups[i] bb, ee = s * squeeze + 1, min((s + 1) * squeeze, merge_unique_len) b, e = merge_unique[bb - 1], merge_unique[ee - 1] cnt = ee - (bb - 1) if b == e: merge_map[merge_unique[i]] = 'group %s (1 item, %s)' % (chr(65 + s), b) else: if isinstance(b, datetime.datetime): duration = dateutils.human_interval(b, e) merge_map[merge_unique[i]] = 'group %s (%d items, period of %s)' % ( chr(65 + s), cnt, duration) else: merge_map[merge_unique[i]] = 'group %s (%d items, %s - %s)' % (chr(65 + s), cnt, b, e) # color_data[':merged:'] = color_data[chart_options.x].map(merge_map) # TODO carefully think this through color_data[chart_options.x] = color_data[chart_options.x].map(merge_map) # chart_options.x = ':merged:' with Timer('agg ' + color_title, log=logger.info): cd_group = color_data.groupby(chart_options.x, sort=True).aggregate({ chart_options.c: lambda x: list(set(x)), chart_options.y: chart_group.y_metrics.items(), '_id' : lambda x: list(set(x)), }) if chart_group.boxplot_chart: series.append(chart_group.boxplot_chart.get_chart( cd_group, color_title, color=color(0.8) )) if chart_group.std_chart: series.append(chart_group.std_chart.get_chart( cd_group, color_title, color=color(0.3), fillColor=color(0.1) )) if chart_group.ci_chart: series.append(chart_group.ci_chart.get_chart( cd_group, color_title, color=color(0.3), fillColor=color(0.1) )) if chart_group.errorbar_chart: series.append(chart_group.errorbar_chart.get_chart( cd_group, color_title, color=color(0.3), fillColor=color(0.1) )) if chart_group.mean_chart: series.append(chart_group.mean_chart.get_chart( cd_group, color_title, color=color(1.0), )) if chart_group.median_chart: series.append(chart_group.median_chart.get_chart( cd_group, color_title, color=color(1.0), )) if series: series[-1]['extra'] = { '_id' : cd_group['_id'], 'commits': cd_group[chart_options.c], } extra['size'].append(len(cd_group)) charts.append(dict( title=group_title, series=series, xAxis=dict(title=dict(text=chart_options.x)), yAxis=dict(title=dict(text=chart_options.y)), extra=extra, )) return dict( status=200, data=charts )