Esempio n. 1
0
class ExperimentInfo:
    def __init__(self, threads, block_size, path=None):
        self.threads = threads
        self.block_size = block_size
        self._path = path
        self._parser = Parser()

    def get_n_blocks(self, input_size):
        return np.ceil(input_size / self.block_size)

    def get_n_tasks(self, stage):
        task_amounts = self._get_ns_tasks(stage)
        assert len(task_amounts) == 1, 'More than 1 value: {}'.format(
            task_amounts)
        return task_amounts[0]

    def _get_ns_tasks(self, stage):
        amounts = set()
        for app in self._get_apps():
            tasks = app.stages[stage].tasks
            n = sum(1 for t in tasks if not t.failed)
            amounts.add(len(app.stages[stage].tasks))
        return list(amounts)

    def _get_apps(self):
        return self._parser.parse_folder(self._path)
Esempio n. 2
0
 def _build_all_tasks(self):
     parser = Parser()
     apps = parser.parse_folder(self._folder)
     first, nonfirst, columns = self._get_task_records(apps)
     return (pd.DataFrame.from_records(first,
                                       columns=columns),
             pd.DataFrame.from_records(nonfirst,
                                       columns=columns))
Esempio n. 3
0
 def _build(self):
     all_durs, all_sizes, all_workers = [], [], []
     parser = Parser()
     apps = parser.parse_folder(self._folder)
     sizes = self._get_app_sizes()
     for app, size in zip(apps, sizes):
         duration = self._get_app_duration(app)
         all_durs.append(duration)
         all_sizes.append(size)
         all_workers.append(app.slaves)
     return _get_df(all_workers, all_sizes, all_durs)
Esempio n. 4
0
 def _build_first_nonfirst_tasks(self):
     durations = {'first': [], 'nonfirst': []}
     sizes = {'first': [], 'nonfirst': []}
     workers = {'first': [], 'nonfirst': []}
     parser = Parser()
     apps = parser.parse_folder(self._folder)
     apps_sizes = self._get_app_sizes()
     for app, size in zip(apps, apps_sizes):
         first, nonfirst = self._get_first_nonfirst_tasks_durations(app)
         durations['first'].extend(first)
         durations['nonfirst'].extend(nonfirst)
         sizes['first'].extend([size] * len(first))
         sizes['nonfirst'].extend([size] * len(nonfirst))
         workers['first'].extend([app.slaves] * len(first))
         workers['nonfirst'].extend([app.slaves] * len(nonfirst))
     first = _get_df(workers, sizes, durations, 'first')
     nonfirst = _get_df(workers, sizes, durations, 'nonfirst')
     return first, nonfirst