Example #1
0
    def find_all(self,
                 filter=None,
                 projection=None,
                 *args,
                 flatten=True,
                 **kwargs):
        if not filter:
            filter = dict()

        logger.info(
            f'db.getCollection("%s").find(\n'
            '%s\n'
            ',\n%s\n)',
            str(self.reports.name),
            strings.pad_lines(strings.to_json(filter)),
            strings.pad_lines(strings.to_json(projection)),
        )

        with Timer('db find: db-stuff', log=logger.debug):

            with Timer('db find: db-stuff: find', log=logger.debug):
                # TODO this may take a while depending on the size of the collection
                cursor = self.reports.find(filter, projection, *args, **kwargs)

            with Timer('db find: db-stuff: fetch and flatten',
                       log=logger.debug):
                if flatten:
                    items = [
                        datautils.flatten(x, sep='.') for x in list(cursor)
                    ]
                else:
                    items = list(cursor)

        return items
Example #2
0
    def aggregate(self,
                  match=None,
                  unwind=None,
                  project=None,
                  flatten=True,
                  *args):
        """
        Shortcut for a aggregate method
        Parameters
        ----------
        match : dict
            $match aggregation object according to MongoDB specification
        unwind : dict or str
            $unwind aggregation object according to MongoDB specification
        project : dict
            $project aggregation object according to MongoDB specification
        """

        pipeline = list()

        if match:
            pipeline.append({'$match': match})

        if unwind:
            pipeline.append({'$unwind': unwind})

        if project:
            pipeline.append({'$project': project})

        if args:
            pipeline.extend(args)

        logger.debug(
            f'db.getCollection("{self.reports.name}").aggregate(\n%s\n)' %
            strings.pad_lines(strings.to_json(pipeline)))

        with Timer('db find: db-stuff', log=logger.debug):

            with Timer('db aggregate: db-stuff: find', log=logger.debug):

                # TODO this may take a while depending on the size of the collection
                cursor = self.reports.aggregate(pipeline)

            with Timer('db aggregate: db-stuff: fetch and flatten',
                       log=logger.debug):
                if flatten:
                    items = [
                        datautils.flatten(x, sep='.') for x in list(cursor)
                    ]
                else:
                    items = list(cursor)

        return items
Example #3
0
    def _process_payload(self, payload):
        """
        Popens a new process for the given payload

        Parameters
        ----------
        payload: cihpc.common.utils.git.webhooks.push_hook.PushWebhook

        Returns
        -------
        int
            return code of the process

        """

        logger.info(f'%s starting' % payload.after)

        with Timer(payload.after) as timer:
            try:
                returncode = self.webhook_trigger.process(payload)
                if returncode != 0:
                    raise Exception('script ended with non zero status')

            except Exception as e:
                # no such binary
                returncode = -1
                logger.exception('Error while starting the process %s' % self.webhook_trigger)

        logger.info(f'%s took %s [%d]' % (payload.after, timer.pretty_duration, returncode))

        return returncode
Example #4
0
    def _process_stage_threads(self, stage: ProjectStage,
                               threads: List[ProcessStage]):
        with Timer(stage.ord_name) as timer:
            pool = WorkerPool(cpu_count=stage.parallel.cpus, threads=threads)
            pool.update_cpu_values(extract_cpus_from_worker)

            if stage.parallel:
                logger.info(
                    f'{len(threads)} job(s) will be now executed in parallel\n'
                    'allocated cores: {stage.parallel.cpus}')
            else:
                logger.info(
                    f'{len(threads)} job(s) will be now executed in serial')

            default_status = pool.get_statuses(LogStatusFormat.ONELINE)
            progress_line = progress.Line(total=len(threads),
                                          desc='%s: %s' %
                                          (stage.ord_name, default_status),
                                          tty=False)

            def update_status_enter(worker: ProcessStage):
                progress_line.desc = '%02d-%s: %s ' % (
                    stage.ord, worker.debug_name if worker else '?',
                    pool.get_statuses(LogStatusFormat.ONELINE))
                progress_line.update(0)

            def update_status_exit(worker: ProcessStage):
                progress_line.desc = '%02d-%s: %s ' % (
                    stage.ord, worker.debug_name if worker else '?',
                    pool.get_statuses(LogStatusFormat.ONELINE))
                progress_line.update()

            pool.thread_event.on_exit.on(update_status_exit)
            pool.thread_event.on_enter.on(update_status_enter)

            # run in serial or parallel
            progress_line.start()
            pool.start()
            progress_line.close()

            if pool.terminate:
                logger.error('Caught pool terminate signal!')
                if not pool.exception or pool.exception.on_error is OnError.EXIT:
                    logger.error(f'Exiting application with 1')
                    exit(1)

                if pool.exception.on_error is OnError.BREAK:
                    return False

        timers_total = sum(
            [sum(x.collect_result.total) for x in threads if x.collect_result])
        logger.info(
            f'{len(threads)} processes finished, found {timers_total} documents'
        )
        return True
Example #5
0
 def __init__(self, target=None):
     super(SimpleWorker, self).__init__()
     self.cpus = 1
     self.target = target
     self.semaphore = None  # type: ComplexSemaphore
     self.thread_event = None  # type: EnterExitEvent
     self.result = None  # type: ProcessStepResult
     self.lock_event = None  # type: threading.Event
     self._status = None
     self.status = WorkerStatus.CREATED  # random.choice(list(WorkerStatus))
     self.timer = Timer(self.name)
     self._pretty_name = None
     self.terminate = False
     self.exception = None
Example #6
0
    def run(self):
        self.commit_browser.load_commits()
        self.commit_browser.pick_commits()

        logger.info(f'starting commit processing')
        for commit in self.commit_browser.commits:
            logger.info(f'%s starting' % commit.short_format)

            with Timer(commit.short_format) as timer:
                args = self.args_constructor.construct_arguments(commit.hash)
                logger.info(f' '.join([str(x) for x in args]))
                process = subprocess.Popen(args, cwd=global_configuration.cwd)
                process.wait()

            logger.info(f'%s took %s [%d]' % (commit.short_format, timer.pretty_duration, process.returncode))
Example #7
0
def frame_view(project, base64data=''):
    if base64data:
        options = json.loads(
            base64.decodebytes(base64data.encode()).decode()
        )
    else:
        options = dict()

    print(
        strings.to_json(options)
    )

    config = ProjectConfig.get(project)
    _ids = [objectid.ObjectId(x) for y in options['_ids'] for x in y]

    field_set = config.fields.required_fields()
    filter_dict = options['filters']

    projection_list = set(filter_dict.keys()) | field_set
    projection_list.add(config.frame_view.unwind)
    db_find_fields = dict(zip(projection_list, itertools.repeat(1)))

    # add _ids to selector
    db_find_filters = du.filter_keys(
        filter_dict,
        forbidden=("", None, "*")
    )
    db_find_filters['_id'] = {'$in': _ids}

    with Timer('db find & apply', log=logger.debug):
        mongo = CIHPCMongo.get(project)
        data_frame = pd.DataFrame(
            mongo.aggregate(
                match=db_find_filters,
                project=db_find_fields,
                unwind='$%s' % config.frame_view.unwind
            )
        )

        if data_frame.empty:
            return FrameView.error_empty_df(db_find_filters)

        config.fields.apply_to_df(data_frame)

    chart_options = du.dotdict(
        y=config.frame_view.fields.timers.duration.name,
        x=config.frame_view.fields.timers.name.name,
        n=config.frame_view.fields.timers.path.name,
        groupby={},
        colorby=config.frame_view.groupby,
    )
    if not config.frame_view.fields.timers.path:
        data_frame[config.frame_view.fields.timers.path.name] = config.frame_view.fields.timers.name.name

    print(chart_options)

    charts = list()
    for group_values, group_keys, group_names, group_data in FrameView.group_by(data_frame, chart_options.groupby):
        group_title = du.join_lists(group_names, group_values, '{} = <b>{}</b>', '<br />')
        group_title = du.join_lists(group_names, group_values, '{} = <b>{}</b>', '<br />')

        series = list()
        colors_iter = iter(config.color_palette.copy() * 5)
        for color_values, color_keys, color_names, color_data in FrameView.group_by(group_data, chart_options.colorby):
            color_title = du.join_lists(color_names, color_values, '{} = {}', ', ')
            color = next(colors_iter)

            print(color_title)

            with Timer('agg ' + color_title, log=logger.info):
                # color_data = color_data.groupby(chart_options.x).agg({
                #     chart_options.y: 'mean',
                #     chart_options.n: 'first'
                # }).sort_values(by=chart_options.y, ascending=False).head(50)

                small_values = color_data[color_data[chart_options.y] < 0.5]
                color_data = color_data[color_data[chart_options.y] >= 0.5]

                small_values_grouped = small_values.groupby(chart_options.x).agg({
                    chart_options.y: 'mean',
                }).sum()

                color_data = color_data.append({
                    chart_options.y: small_values_grouped[chart_options.y],
                    chart_options.x: 'values &lt; 0.5',
                    chart_options.n: 'sum of the means of the values lesser than 0.5 sec',

                }, ignore_index=True)

                color_data_grouped = color_data.groupby(chart_options.x).agg({
                    chart_options.y: {
                        '25%': lambda x: np.percentile(x, 25),
                        '75%': lambda x: np.percentile(x, 75),
                    },
                    chart_options.n: 'first',
                }).reset_index()

                print(color_data_grouped)

                columnrange = pd.DataFrame()
                columnrange['y'] = list(color_data_grouped[chart_options.x])
                columnrange['n'] = list(color_data_grouped[chart_options.n]['first'])
                columnrange['low'] = list(color_data_grouped[chart_options.y]['25%'])
                columnrange['high'] = list(color_data_grouped[chart_options.y]['75%'])
                columnrange = columnrange.sort_values(by='high', ascending=False).reset_index(drop=True)

                a, b = list(columnrange['y']), list(columnrange['n'])
                columnrange.drop(columns=['n'], inplace=True)

                series.append(dict(
                    type='columnrange',
                    extra={
                        'path': dict(zip(a, b))
                    },
                    data=du.dropzero(du.fillna(columnrange.round(3))),
                    name='Q1-Q3 (%s)' % color_title,
                    color=color(0.7)),
                )

                color_data = color_data.reset_index()
                scatter = pd.DataFrame()
                scatter['y'] = list(color_data[chart_options.x])
                scatter['x'] = list(color_data[chart_options.y])
                scatter['n'] = list(color_data[chart_options.n])
                scatter = scatter.sort_values(by='x', ascending=False).reset_index(drop=True)

                a, b = list(scatter['y']), list(scatter['n'])

                paths = list(scatter['n'])
                scatter.drop(columns=['n'], inplace=True)

                series.append(dict(
                    type='scatter',
                    extra={
                        'path': dict(zip(a, b)),
                    },
                    data=du.dropzero(du.fillna(scatter.round(3))),
                    name='mean (%s)' % color_title,
                    color=color(0.7)),
                )

        charts.append(dict(
            title=group_title,
            xAxis=dict(title=dict(text=None)),
            yAxis=dict(title=dict(text=None)),
            series=series,
        ))

    return dict(
        status=200,
        data=charts
    )
Example #8
0
def sparkline_view(project, base64data=''):
    options, config, mongo = SparklineView.prepare(project, base64data)

    mode = ViewMode(options.get('mode', {}).get('mode', ViewMode.TIME_SERIES.value))
    squeeze = int(options.get('squeeze', {}).get('value', 1))
    interval = options.get('range', {})

    field_set = config.fields.required_fields()
    filter_dict = options['filters']

    if interval and 'from' in interval and 'to' in interval:
        filter_dict[config.fields.git.datetime.name] = {
            '$gte': datetime.datetime.fromtimestamp(int(interval['from'])),
            '$lte': datetime.datetime.fromtimestamp(int(interval['to'])),
        }

    projection_list = set(filter_dict.keys()) | field_set
    db_find_fields = dict(zip(projection_list, itertools.repeat(1)))

    db_find_filters = du.filter_keys(
        filter_dict,
        forbidden=("", None, "*")
    )

    with Timer('db find & apply', log=logger.debug):
        data_frame = pd.DataFrame(
            mongo.find_all(
                db_find_filters,
                db_find_fields,
            )
        )

        if data_frame.empty:
            return SparklineView.error_empty_df(db_find_filters)

        sort_field = config.fields.git.datetime.name
        data_frame = data_frame.sort_values(by=sort_field, ascending=False).reset_index(drop=True)

        config.fields.apply_to_df(data_frame)
        data_frame[':merged:'] = 'g(?)'

    if mode is ViewMode.SCALE_VIEW:
        # split charts based on commit when in scale-view mode
        # if config.fields.git.datetime:
        #     config.test_view.groupby['git.datetime'] = 'date'
        # else:
        config.test_view.groupby['git.commit'] = 'commit'

        config.test_view.groupby = du.filter_values(
            config.test_view.groupby,
            forbidden=(config.fields.problem.size.name, config.fields.problem.cpu.name)
        )
        chart_options = du.dotdict(
            y=config.fields.result.duration.name,
            x=config.fields.problem.cpu.name,
            c=config.fields.git.commit.name,
            groupby={k: v for k, v in config.test_view.groupby.items() if options['groupby'].get(k, False)},
            colorby={k: v for k, v in config.test_view.groupby.items() if
                     not options['groupby'].get(k, False)},
        )
        data_frame[chart_options.x] = data_frame[chart_options.x].apply(str)

    elif mode is ViewMode.TIME_SERIES:

        if config.fields.problem.cpu:
            config.test_view.groupby[config.fields.problem.cpu.name] = 'cpu'

        if config.fields.problem.size:
            config.test_view.groupby[config.fields.problem.size.name] = 'size'

        if config.fields.problem.test:
            config.test_view.groupby[config.fields.problem.test.name] = 'test'

        if config.fields.problem.case:
            config.test_view.groupby[config.fields.problem.case.name] = 'case'

        # if self.config.fields.problem.test and self.options['groupby'].get(self.config.fields.problem.test.name, False):
        #     self.config.test_view.groupby[self.config.fields.problem.test.name] = 'test'
        #     self.options['groupby'][self.config.fields.problem.test.name] = True
        #
        # if self.config.fields.problem.case and self.options['groupby'].get(self.config.fields.problem.case.name, False):
        #     self.config.test_view.groupby[self.config.fields.problem.case.name] = 'size'
        #     self.options['groupby'][self.config.fields.problem.case.name] = True

        chart_options = du.dotdict(
            y=config.fields.result.duration.name,
            x=config.fields.git.datetime.name,
            c=config.fields.git.commit.name,
            groupby={k: v for k, v in config.test_view.groupby.items() if
                     options['groupby'].get(k, False) is True},
            colorby={k: v for k, v in config.test_view.groupby.items() if
                     options['groupby'].get(k, False) is False},
        )
        print(chart_options)
    else:
        raise Exception('Given mode is not supported %s' % mode)

    chart_group = ChartGroup(chart_options, options)
    if not chart_group:
        return SparklineView.show_error(
            status=300,
            message='No chart series selected',
            description='<p>All of the chart series are disabled, so no chart can be displayed. '
                        'Please enable at least one of the chart type series</p>'
                        '<a class="btn btn-warning" data-toggle="modal" data-target="#modal-options">Click here to open configuration.</a>'
        )

    charts = list()
    for group_values, group_keys, group_names, group_data in SparklineView.group_by(data_frame, chart_options.groupby):
        group_title = du.join_lists(group_names, group_values, '<dt>{}</dt><dd>{}</dd>', '')
        group_title = '<dl>%s</dl>' % group_title

        series = list()
        extra = dict(size=list())
        colors_iter = iter(config.color_palette.copy() * 5)
        for color_values, color_keys, color_names, color_data in SparklineView.group_by(group_data,
                                                                                        chart_options.colorby):
            color_title = du.join_lists(color_names, color_values, '<small>{}</small> <b>{}</b>', ', ')
            if color_title == ' = ':
                color_title = '*'
            color = next(colors_iter)

            if squeeze and squeeze > 1:
                merge_unique = sorted(list(set(color_data[chart_options.x])))
                merge_groups = np.repeat(np.arange(int(len(merge_unique) / squeeze + 1)), squeeze)
                merge_unique_len = len(merge_unique)
                merge_map = dict()
                for i in range(merge_unique_len):
                    s = merge_groups[i]
                    bb, ee = s * squeeze + 1, min((s + 1) * squeeze, merge_unique_len)
                    b, e = merge_unique[bb - 1], merge_unique[ee - 1]
                    cnt = ee - (bb - 1)
                    if b == e:
                        merge_map[merge_unique[i]] = 'group %s (1 item, %s)' % (chr(65 + s), b)
                    else:
                        if isinstance(b, datetime.datetime):
                            duration = dateutils.human_interval(b, e)
                            merge_map[merge_unique[i]] = 'group %s (%d items, period of %s)' % (
                                chr(65 + s), cnt, duration)
                        else:
                            merge_map[merge_unique[i]] = 'group %s (%d items, %s - %s)' % (chr(65 + s), cnt, b, e)

                # color_data[':merged:'] = color_data[chart_options.x].map(merge_map)
                # TODO carefully think this through
                color_data[chart_options.x] = color_data[chart_options.x].map(merge_map)
                # chart_options.x = ':merged:'

            with Timer('agg ' + color_title, log=logger.info):
                cd_group = color_data.groupby(chart_options.x, sort=True).aggregate({
                    chart_options.c: lambda x: list(set(x)),
                    chart_options.y: chart_group.y_metrics.items(),
                    '_id'          : lambda x: list(set(x)),
                })

            if chart_group.boxplot_chart:
                series.append(chart_group.boxplot_chart.get_chart(
                    cd_group, color_title,
                    color=color(0.8)
                ))

            if chart_group.std_chart:
                series.append(chart_group.std_chart.get_chart(
                    cd_group, color_title,
                    color=color(0.3),
                    fillColor=color(0.1)
                ))

            if chart_group.ci_chart:
                series.append(chart_group.ci_chart.get_chart(
                    cd_group, color_title,
                    color=color(0.3),
                    fillColor=color(0.1)
                ))

            if chart_group.errorbar_chart:
                series.append(chart_group.errorbar_chart.get_chart(
                    cd_group, color_title,
                    color=color(0.3),
                    fillColor=color(0.1)
                ))

            if chart_group.mean_chart:
                series.append(chart_group.mean_chart.get_chart(
                    cd_group, color_title,
                    color=color(1.0),
                ))

            if chart_group.median_chart:
                series.append(chart_group.median_chart.get_chart(
                    cd_group, color_title,
                    color=color(1.0),
                ))

            if series:
                series[-1]['extra'] = {
                    '_id'    : cd_group['_id'],
                    'commits': cd_group[chart_options.c],
                }

            extra['size'].append(len(cd_group))

        charts.append(dict(
            title=group_title,
            series=series,
            xAxis=dict(title=dict(text=chart_options.x)),
            yAxis=dict(title=dict(text=chart_options.y)),
            extra=extra,
        ))

    return dict(
        status=200,
        data=charts
    )