Example #1
0
 def as_vector(self):
     '''
     turn this record to a vector that can be fed into a prediction model
     '''
     # assert self.tag == MetricsTag.ENC, 'metrics un-encoded, unable to vectorize'
     assert self.tag == 'enc', 'metrics un-encoded, unable to vectorize'
     conf = LumosConf()
     inst_id = conf.get_inst_id(self.inst_type)
     d_info = conf.get_inst_detailed_conf(self.inst_type)
     n_fam, n_cpu, n_mem = d_info['family'], d_info['cpu'], d_info['memory']
     scale_id = conf.get_scale_id(self.scale)
     X = np.array(
         [inst_id, n_fam, n_cpu, n_mem, scale_id, self.ts[0], self.ts[1]])
     X = np.concatenate((X, self.metrics), axis=0)
     Y = self.jct
     return X, Y
Example #2
0
    def get_train_test_data(self,
                            train_scale='tiny',
                            test_wl='',
                            flag='single'):
        '''
        get the training data that profiled on a concrete instance type
        param:
        @t_inst_type: the instance type that is used for profiling
        @test_wl: the workload that is to be used for testing
        '''
        rankize_data = self.get_data_rankize()
        assert test_wl in self.__data['1'] or test_wl in (
            'HiBench', 'BigBench'), 'invalid test workload'
        assert flag in ('single',
                        'multi'), 'indicating single/multi testing workloads'

        def is_test_wl(wl):
            if flag == 'single':
                return wl == test_wl
            else:
                if test_wl == 'BigBench':
                    return 'hive' in wl
                elif test_wl == 'HiBench':
                    return 'hive' not in wl

        conf = LumosConf()
        truncate = conf.get('dataset', 'truncate')
        fft_stat_encoder = FFTStatEncoder(truncate=truncate)

        train_data = defaultdict(lambda: defaultdict(lambda: {
            'X': [],
            'Y': []
        }))
        test_data = defaultdict(lambda: defaultdict(lambda: \
            defaultdict(lambda: defaultdict(lambda: {
            'X': [],
            'Y': []
        }))))

        predict_scales = ['tiny', 'small', 'large', 'huge']
        if train_scale == 'small':
            predict_scales.remove('tiny')

        for rnd, rnd_data in rankize_data.items():
            for wl, wl_data in rnd_data.items():
                if is_test_wl(wl): continue
                for record1 in wl_data[train_scale]:
                    t_inst_type = record1.inst_type
                    test_conf = conf.get_inst_detailed_conf(t_inst_type,
                                                            format='list')
                    test_metrics_vec = fft_stat_encoder.encode(
                        record1.metrics,
                        record1.raw_metrics,
                        sampling_interval=self.sampling_interval)
                    for scale in predict_scales:
                        target_scale = conf.get_scale_id(scale)
                        for record2 in wl_data[scale]:
                            target_conf = conf.get_inst_detailed_conf(
                                record2.inst_type, format='list')
                            target_rank = record2.rank
                            target_jct = record2.jct
                            X = test_conf.copy()
                            X.extend(target_conf)
                            X.append(target_scale)
                            X.extend(test_metrics_vec)
                            train_data[rnd][t_inst_type]['X'].append(X)
                            if self.ordinal:
                                train_data[rnd][t_inst_type]['Y'].append(
                                    target_rank)
                            else:
                                train_data[rnd][t_inst_type]['Y'].append(
                                    target_jct)

        for rnd, rnd_data in rankize_data.items():
            for wl, wl_data in rnd_data.items():
                if not is_test_wl(wl): continue
                # wl_data = rnd_data[test_wl]
                for record1 in wl_data[train_scale]:
                    t_inst_type = record1.inst_type
                    test_conf = conf.get_inst_detailed_conf(t_inst_type,
                                                            format='list')
                    test_metrics_vec = fft_stat_encoder.encode(
                        record1.metrics,
                        record1.raw_metrics,
                        sampling_interval=self.sampling_interval)
                    for scale in predict_scales:
                        target_scale = conf.get_scale_id(scale)
                        for record2 in wl_data[scale]:
                            target_conf = conf.get_inst_detailed_conf(
                                record2.inst_type, format='list')
                            target_rank = record2.rank
                            target_jct = record2.jct
                            X = test_conf.copy()
                            X.extend(target_conf)
                            X.append(target_scale)
                            X.extend(test_metrics_vec)
                            test_data[wl][rnd][t_inst_type][scale]['X'].append(
                                X)
                            if self.ordinal:
                                test_data[wl][rnd][t_inst_type][scale][
                                    'Y'].append(target_rank)
                            else:
                                test_data[wl][rnd][t_inst_type][scale][
                                    'Y'].append(target_jct)

        return train_data, test_data