Exemple #1
0
    def generate_default_report(self, output_csv=False):
        """Format a default summary report based on calculated statistics for
        an executed scenario.

        :returns: A report (string) suitable for printing, emailing, etc.
        """

        stats = self.stats
        template = Template(self.scenario_template())
        tmpl_vars = {
            'size_data': [],
            'stat_list': [
                ('TOTAL', stats['agg_stats'], stats['size_stats']),
                ('CREATE', stats['op_stats'][ssbench.CREATE_OBJECT],
                 stats['op_stats'][ssbench.CREATE_OBJECT]['size_stats']),
                ('READ', stats['op_stats'][ssbench.READ_OBJECT],
                 stats['op_stats'][ssbench.READ_OBJECT]['size_stats']),
                ('UPDATE', stats['op_stats'][ssbench.UPDATE_OBJECT],
                 stats['op_stats'][ssbench.UPDATE_OBJECT]['size_stats']),
                ('DELETE', stats['op_stats'][ssbench.DELETE_OBJECT],
                 stats['op_stats'][ssbench.DELETE_OBJECT]['size_stats']),
            ],
            'agg_stats':
            stats['agg_stats'],
            'nth_pctile':
            stats['nth_pctile'],
            'start_time':
            datetime.utcfromtimestamp(
                stats['time_series']['start_time']).strftime(
                    REPORT_TIME_FORMAT),
            'stop_time':
            datetime.utcfromtimestamp(
                stats['time_series']['stop']).strftime(REPORT_TIME_FORMAT),
            'duration':
            stats['time_series']['stop'] - stats['time_series']['start_time'],
            'jobs_per_worker_stats':
            stats['jobs_per_worker_stats'],
            'weighted_c':
            0.0,
            'weighted_r':
            0.0,
            'weighted_u':
            0.0,
            'weighted_d':
            0.0,
        }
        for size_data in self.scenario.sizes_by_name.values():
            if size_data['size_min'] == size_data['size_max']:
                size_range = '%-15s' % (self._format_bytes(
                    size_data['size_min']), )
            else:
                size_range = '%s - %s' % (
                    self._format_bytes(size_data['size_min']),
                    self._format_bytes(size_data['size_max']))
            initial_files = self.scenario._scenario_data['initial_files']
            initial_total = sum(initial_files.values())
            pct_total = (initial_files.get(size_data['name'], 0) /
                         float(initial_total) * 100.0)
            tmpl_vars['size_data'].append({
                'crud_pcts':
                '  '.join(map(lambda p: '%2.0f' % p, size_data['crud_pcts'])),
                'size_range':
                size_range,
                'size_name':
                size_data['name'],
                'pct_total_ops':
                '%3.0f%%' % pct_total,
            })
            tmpl_vars['weighted_c'] += \
                pct_total * size_data['crud_pcts'][0] / 100.0
            tmpl_vars['weighted_r'] += \
                pct_total * size_data['crud_pcts'][1] / 100.0
            tmpl_vars['weighted_u'] += \
                pct_total * size_data['crud_pcts'][2] / 100.0
            tmpl_vars['weighted_d'] += \
                pct_total * size_data['crud_pcts'][3] / 100.0
        if output_csv:
            csv_fields = [
                'scenario_name', 'ssbench_version', 'worker_count',
                'concurrency', 'start_time', 'stop_time', 'duration',
                'delete_after'
            ]
            csv_data = {
                'scenario_name': self.scenario.name,
                'ssbench_version': self.scenario.version,
                'worker_count': tmpl_vars['agg_stats']['worker_count'],
                'concurrency': self.scenario.user_count,
                'start_time': tmpl_vars['start_time'],
                'stop_time': tmpl_vars['stop_time'],
                'duration': tmpl_vars['duration'],
                'delete_after': str(self.scenario.delete_after),
            }
            for label, stats, sstats in tmpl_vars['stat_list']:
                label_lc = label.lower()
                if stats.get('req_count', 0):
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_count' % label_lc, stats['req_count'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_errors' % label_lc, stats['errors'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_retries' % label_lc, stats['retries'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_retry_rate' % label_lc,
                                     '%5.2f' % stats['retry_rate'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_avg_req_per_s' % label_lc,
                                     stats['avg_req_per_sec'])
                    self._add_stats_for(csv_fields, csv_data, label, 'all',
                                        stats, tmpl_vars['nth_pctile'])
                    for size_str, per_size_stats in sstats.iteritems():
                        if per_size_stats:
                            self._add_stats_for(csv_fields, csv_data, label,
                                                size_str, per_size_stats,
                                                tmpl_vars['nth_pctile'])
            csv_file = StringIO()
            csv_writer = DictWriter(csv_file,
                                    csv_fields,
                                    lineterminator='\n',
                                    quoting=csv.QUOTE_NONNUMERIC)
            csv_writer.writeheader()
            csv_writer.writerow(csv_data)
            return csv_file.getvalue()
        else:
            return template.render(scenario=self.scenario, **tmpl_vars)
Exemple #2
0
def parse_to_logn_and_normalize(problem_set):
        columns, data = parse(problem_set)
        for key in columns:
                info = columns[key]
                temp = list(info[:])
                temp.append(len(info[1]))
                columns[key] = tuple(temp)

        #Calculate the attribute statistics so we can figure out the most common attribute
        stats = {}
        for key in data:
                example = data[key]
                for i in range(0, len(example)-1):
                        if not stats.get(i):
                                stats[i] = {}
                        if example[i]:
                                if stats[i].get(example[i]):
                                        stats[i][example[i]] += 1
                                else:
                                        stats[i][example[i]] = 1

        #Calculate the value that should be filled in for any missing values
        missing_values= {}
        for i in range(0, len(example)-1):
                possibles = stats[i]
                if "continuous" in columns[i+1][1]:
                        temp = [x * y for x, y in possibles.iteritems()]
                        temp2 = sum(possibles.itervalues())
                        temp = math.fsum(temp)
                        missing_values[i] = temp / temp2
                else:
                        most_common = heapq.nlargest(1, possibles.iteritems(), operator.itemgetter(1))
                        missing_values[i] = most_common[0][0]

        #Fill in any missing values
        for key in data:
                example = data[key]
                for i in range(0, len(example)-1):
                        if not example[i]:
                                example[i] = missing_values[i]

        normalizers = {}
        #Calc Normalization Params
        for i in range(0, len(example)-1):
                if "continuous" in columns[i+1][1]:
                        temp = [example[i] for example in data.itervalues()]
                        normalizers[i] = (statlib.stats.lmean(temp), statlib.stats.lstdev(temp))

        #Create Log(N) mappings
        mappings = {}
        for i in range(0, len(example)-1):
                if "continuous" not in columns[i+1][1]:
                        mappings[i] = {}
                        values = columns[i+1][1]
                        num_values = columns[i+1][2]
                        num_inputs = math.ceil(math.log(num_values, 2))
                        for j in range(0, len(values)):
                                mappings[i][values[j]] = [int(x) for x in tobin(j, num_inputs)]
        
        #Convert to log(N) encoding and normalize
        for key in data:
                example = data[key]
                new_example = []
                for i in range(0, len(example)-1):
                        if "continuous" in columns[i+1][1]:
                                val = example[i]
                                new_val = (val - normalizers[i][0]) / normalizers[i][1]
                                new_example.append(new_val)
                        else:
                                new_example.extend(mappings[i][example[i]])
                new_example.append(int(example[-1]))
                data[key] = new_example
        
        return(columns, data)
Exemple #3
0
    def generate_default_report(self, output_csv=False):
        """Format a default summary report based on calculated statistics for
        an executed scenario.

        :returns: A report (string) suitable for printing, emailing, etc.
        """

        stats = self.stats
        template = Template(self.scenario_template())
        tmpl_vars = {
            'size_data': [],
            'stat_list': [
                ('TOTAL', stats['agg_stats'], stats['size_stats']),
                ('CREATE', stats['op_stats'][ssbench.CREATE_OBJECT],
                 stats['op_stats'][ssbench.CREATE_OBJECT]['size_stats']),
                ('READ', stats['op_stats'][ssbench.READ_OBJECT],
                 stats['op_stats'][ssbench.READ_OBJECT]['size_stats']),
                ('UPDATE', stats['op_stats'][ssbench.UPDATE_OBJECT],
                 stats['op_stats'][ssbench.UPDATE_OBJECT]['size_stats']),
                ('DELETE', stats['op_stats'][ssbench.DELETE_OBJECT],
                 stats['op_stats'][ssbench.DELETE_OBJECT]['size_stats']),
            ],
            'agg_stats': stats['agg_stats'],
            'nth_pctile': stats['nth_pctile'],
            'start_time': datetime.utcfromtimestamp(
                stats['time_series']['start_time']
            ).strftime(REPORT_TIME_FORMAT),
            'stop_time': datetime.utcfromtimestamp(
                stats['time_series']['stop']).strftime(REPORT_TIME_FORMAT),
            'duration': stats['time_series']['stop']
            - stats['time_series']['start_time'],
            'jobs_per_worker_stats': stats['jobs_per_worker_stats'],
            'weighted_c': 0.0,
            'weighted_r': 0.0,
            'weighted_u': 0.0,
            'weighted_d': 0.0,
        }
        for size_data in self.scenario.sizes_by_name.values():
            if size_data['size_min'] == size_data['size_max']:
                size_range = '%-15s' % (
                    self._format_bytes(size_data['size_min']),)
            else:
                size_range = '%s - %s' % (
                    self._format_bytes(size_data['size_min']),
                    self._format_bytes(size_data['size_max']))
            initial_files = self.scenario._scenario_data['initial_files']
            initial_total = sum(initial_files.values())
            pct_total = (initial_files.get(size_data['name'], 0)
                         / float(initial_total) * 100.0)
            tmpl_vars['size_data'].append({
                'crud_pcts': '  '.join(map(lambda p: '%2.0f' % p,
                                           size_data['crud_pcts'])),
                'size_range': size_range,
                'size_name': size_data['name'],
                'pct_total_ops': '%3.0f%%' % pct_total,
            })
            tmpl_vars['weighted_c'] += \
                pct_total * size_data['crud_pcts'][0] / 100.0
            tmpl_vars['weighted_r'] += \
                pct_total * size_data['crud_pcts'][1] / 100.0
            tmpl_vars['weighted_u'] += \
                pct_total * size_data['crud_pcts'][2] / 100.0
            tmpl_vars['weighted_d'] += \
                pct_total * size_data['crud_pcts'][3] / 100.0
        if output_csv:
            csv_fields = [
                'scenario_name', 'ssbench_version', 'worker_count',
                'concurrency', 'start_time', 'stop_time', 'duration',
                'delete_after']
            csv_data = {
                'scenario_name': self.scenario.name,
                'ssbench_version': self.scenario.version,
                'worker_count': tmpl_vars['agg_stats']['worker_count'],
                'concurrency': self.scenario.user_count,
                'start_time': tmpl_vars['start_time'],
                'stop_time': tmpl_vars['stop_time'],
                'duration': tmpl_vars['duration'],
                'delete_after': str(self.scenario.delete_after),
            }
            for label, stats, sstats in tmpl_vars['stat_list']:
                label_lc = label.lower()
                if stats.get('req_count', 0):
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_count' % label_lc, stats['req_count'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_errors' % label_lc,
                                     stats['errors'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_retries' % label_lc,
                                     stats['retries'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_retry_rate' % label_lc,
                                     '%5.2f' % stats['retry_rate'])
                    self._add_csv_kv(csv_fields, csv_data,
                                     '%s_avg_req_per_s' % label_lc,
                                     stats['avg_req_per_sec'])
                    self._add_stats_for(csv_fields, csv_data, label, 'all',
                                        stats, tmpl_vars['nth_pctile'])
                    for size_str, per_size_stats in sstats.iteritems():
                        if per_size_stats:
                            self._add_stats_for(csv_fields, csv_data, label,
                                                size_str, per_size_stats,
                                                tmpl_vars['nth_pctile'])
            csv_file = StringIO()
            csv_writer = DictWriter(csv_file, csv_fields,
                                    lineterminator='\n',
                                    quoting=csv.QUOTE_NONNUMERIC)
            csv_writer.writeheader()
            csv_writer.writerow(csv_data)
            return csv_file.getvalue()
        else:
            return template.render(scenario=self.scenario, **tmpl_vars)