Ejemplo n.º 1
0
    def statistics(self):
        """
        Return statistics about the S3UploadController object.

        :rtype: dict
        """
        logging.info("S3UploadController.statistics()")

        # This method can only run at the end of a non empty copy
        assert self.copy_end_time
        assert self.upload_stats

        # Initialise the result calculating the total runtime
        stat = {
            'total_time': total_seconds(
                self.copy_end_time - self.copy_start_time),
            'number_of_workers': self.cloud_interface.worker_processes_count,
            # Cloud uploads have no analysis
            'analysis_time': 0,
            'analysis_time_per_item': {},
            'copy_time_per_item': {},
            'serialized_copy_time_per_item': {},
        }

        # Calculate the time spent uploading
        upload_start = None
        upload_end = None
        serialized_time = datetime.timedelta(0)
        for name in self.upload_stats:
            data = self.upload_stats[name]
            logging.debug('Calculating statistics for file %s, data: %s',
                          name, json.dumps(data, indent=2, sort_keys=True,
                                           cls=BarmanEncoder))
            if upload_start is None or upload_start > data['start_time']:
                upload_start = data['start_time']
            if upload_end is None or upload_end < data['end_time']:
                upload_end = data['end_time']
            # Cloud uploads have no analysis
            stat['analysis_time_per_item'][name] = 0
            stat['copy_time_per_item'][name] = total_seconds(
                data['end_time'] - data['start_time'])
            parts = data['parts']
            total_time = datetime.timedelta(0)
            for num in parts:
                part = parts[num]
                total_time += part['end_time'] - part['start_time']
            stat['serialized_copy_time_per_item'][name] = total_seconds(
                total_time)
            serialized_time += total_time

        # Store the total time spent by copying
        stat['copy_time'] = total_seconds(upload_end - upload_start)
        stat['serialized_copy_time'] = total_seconds(serialized_time)

        return stat
Ejemplo n.º 2
0
    def statistics(self):
        """
        Return statistics about the copy object.

        :rtype: dict
        """
        # This method can only run at the end of a non empty copy
        assert self.copy_end_time
        assert self.item_list
        assert self.jobs_done

        # Initialise the result calculating the total runtime
        stat = {
            'total_time':
            total_seconds(self.copy_end_time - self.copy_start_time),
            'number_of_workers': self.workers,
            'analysis_time_per_item': {},
            'copy_time_per_item': {},
            'serialized_copy_time_per_item': {},
        }

        # Calculate the time spent during the analysis of the items
        analysis_start = None
        analysis_end = None
        for item in self.item_list:
            # Some items don't require analysis
            if not item.analysis_end_time:
                continue
            # Build a human readable name to refer to an item in the output
            ident = item.label
            if not analysis_start:
                analysis_start = item.analysis_start_time
            elif analysis_start > item.analysis_start_time:
                analysis_start = item.analysis_start_time

            if not analysis_end:
                analysis_end = item.analysis_end_time
            elif analysis_end < item.analysis_end_time:
                analysis_end = item.analysis_end_time

            stat['analysis_time_per_item'][ident] = total_seconds(
                item.analysis_end_time - item.analysis_start_time)
        stat['analysis_time'] = total_seconds(analysis_end - analysis_start)

        # Calculate the time spent per job
        # WARNING: this code assumes that every item is copied separately,
        # so it's strictly tied to the `_job_generator` method code
        item_data = {}
        for job in self.jobs_done:
            # WARNING: the item contained in the job is not the same object
            # contained in self.item_list, as it has gone through two
            # pickling/unpickling cycle
            # Build a human readable name to refer to an item in the output
            ident = self.item_list[job.item_idx].label
            # If this is the first time we see this item we just store the
            # values from the job
            if ident not in item_data:
                item_data[ident] = {
                    'start': job.copy_start_time,
                    'end': job.copy_end_time,
                    'total_time': job.copy_end_time - job.copy_start_time
                }
            else:
                data = item_data[ident]
                if data['start'] > job.copy_start_time:
                    data['start'] = job.copy_start_time
                if data['end'] < job.copy_end_time:
                    data['end'] = job.copy_end_time
                data['total_time'] += job.copy_end_time - job.copy_start_time

        # Calculate the time spent copying
        copy_start = None
        copy_end = None
        serialized_time = datetime.timedelta(0)
        for ident in item_data:
            data = item_data[ident]
            if copy_start is None or copy_start > data['start']:
                copy_start = data['start']
            if copy_end is None or copy_end < data['end']:
                copy_end = data['end']
            stat['copy_time_per_item'][ident] = total_seconds(data['end'] -
                                                              data['start'])
            stat['serialized_copy_time_per_item'][ident] = total_seconds(
                data['total_time'])
            serialized_time += data['total_time']
        # Store the total time spent by copying
        stat['copy_time'] = total_seconds(copy_end - copy_start)
        stat['serialized_copy_time'] = total_seconds(serialized_time)

        return stat