예제 #1
0
def cli_main():
    SEED = 204
    BATCH_SIZE = 64
    MAX_SOURCE_POSITIONS = 1024
    EPOCH = 50

    from ncc.utils.set_seed import set_seed
    set_seed(SEED)

    use_cuda = torch.cuda.is_available()
    if use_cuda:
        device = os.environ.get('CUDA_VISIBALE_DEVICES', [0])[0]  # get first device as default
        torch.cuda.set_device(f'cuda:{device}')
    criterion = DeepTuneLoss(task=None, sentence_avg=-1)
    if use_cuda:
        criterion = criterion.cuda()

    data = []
    for i, platform in enumerate(LANGUAGES):
        DATA_DIR = os.path.join(DATASET_DIR, f'mapping/{platform}/data-mmap')

        def get_attr(attr):
            oracle_file = os.path.join(DATA_DIR, f'train.{attr}')
            with open(oracle_file, 'rb') as reader:
                out = pickle.load(reader)
            return np.asarray(out)

        platform_name = mapping_metrics.platform2str(platform)
        benchmarks = get_attr('benchmark')
        runtime_cpus = get_attr('runtime_cpu')
        runtime_gpus = get_attr('runtime_gpu')

        #################### load dataset ####################
        src_dataset = load_mmap_dataset(os.path.join(DATA_DIR, f'train.src_tokens'))
        src_dataset = TruncateDataset(src_dataset, truncation_length=MAX_SOURCE_POSITIONS, truncate_prefix=0)
        tgt_dataset = load_mmap_dataset(os.path.join(DATA_DIR, f'train.oracle'))

        src_dict = Dictionary.load(os.path.join(DATA_DIR, 'src_tokens.dict.jsonl'))
        src_aux = OrderedDict()
        src_aux['transfer'] = get_attr('transfer')
        src_aux['wgsize'] = get_attr('wgsize')

        tgt_dict = Dictionary.load(os.path.join(DATA_DIR, 'oracle.dict.jsonl'))

        dataset = LanguagePairDataset(
            src=src_dataset, src_sizes=src_dataset.sizes, src_dict=src_dict, src_aux=src_aux,
            tgt=tgt_dataset, tgt_sizes=tgt_dataset.sizes, tgt_dict=tgt_dict, tgt_aux=None,
            left_pad_source=True, max_source_positions=MAX_SOURCE_POSITIONS,
        )
        #################### load dataset ####################

        # build toy dataset for 10-fold cross validation
        tgt_data = [tgt_dataset[idx].item() for idx in range(len(tgt_dataset))]
        src_data = [None] * len(tgt_data)

        # 10-fold cross-validation
        kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)
        for j, (train_ids, test_ids) in enumerate(kf.split(src_data, tgt_data)):
            # deeptune model
            model = DeepTuneEncoder(dictionary=src_dict, embed_dim=64,
                                    rnn_cell='lstm', rnn_hidden_dim=64, rnn_dropout=0., rnn_num_layers=2,
                                    aux_dim=2, inner_dim=32, out_dim=2)
            if use_cuda:
                model = model.cuda()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
            for epoch_i in range(EPOCH):
                if dataset.shuffle:
                    random.shuffle(train_ids)
                train_batch_sampler = data_utils.batch_by_size(
                    train_ids,
                    num_tokens_fn=lambda *args: -1,
                    max_sentences=BATCH_SIZE,
                )
                train_dataloader = DataLoader(dataset=dataset,
                                              batch_sampler=train_batch_sampler,
                                              collate_fn=collate, )
                with tqdm(total=len(train_dataloader)) as t:
                    for sample_i, sample in enumerate(train_dataloader, start=1):
                        t.set_description(f'Epoch {epoch_i + 1}/{EPOCH} Batch {sample_i}/{len(train_dataloader)}')
                        if use_cuda:
                            sample = move_to_cuda(sample)
                        loss, sample_size, logging_output = criterion(model, sample)
                        loss.div_(sample_size)
                        t.set_postfix(loss=loss.item())
                        t.update()

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

            # test accuracy
            test_batch_sampler = data_utils.batch_by_size(
                test_ids,
                num_tokens_fn=lambda *args: -1,
                max_sentences=BATCH_SIZE,
            )
            test_dataloader = DataLoader(dataset=dataset,
                                         batch_sampler=test_batch_sampler,
                                         collate_fn=collate, )
            predictions, ground_truth = [], []
            for sample in test_dataloader:
                if use_cuda:
                    sample = move_to_cuda(sample)
                hybrid_out, _ = model(**sample['net_input'])
                predictions.append(hybrid_out.max(dim=-1)[1])
                ground_truth.append(sample['target'].view(-1))
            predictions = torch.cat(predictions)
            ground_truth = torch.cat(ground_truth)

            accuracy = (predictions == ground_truth).tolist()
            # runtimes of baseline mapping (CPU on AMD, GPU on NVIDIA)
            gt_runtimes = (runtime_cpus if platform == "amd" else runtime_gpus)[test_ids]
            pred_runtimes = [
                (runtime_cpus if pred == 0 else runtime_gpus)[idx]
                for idx, pred in zip(test_ids, predictions)
            ]
            speedup = gt_runtimes / pred_runtimes

            # record results
            for benchmark_, o_, p_, accuracy_, p_speedup_ in \
                zip(benchmarks[test_ids], ground_truth, predictions, accuracy, speedup):
                data.append({
                    "Model": model.__class__.__name__,
                    "Platform": platform_name,
                    'Benchmark': mapping_metrics.escape_benchmark_name(benchmark_),
                    'Benchmark Suite': mapping_metrics.escape_suite_name(benchmark_),
                    "Oracle Mapping": o_,
                    "Predicted Mapping": p_,
                    "Accuracy": accuracy_,
                    "Speedup": p_speedup_,
                })
            del model, optimizer
    performance = pd.DataFrame(
        data, index=range(1, len(data) + 1), columns=[
            "Model",
            "Platform",
            "Benchmark",
            "Benchmark Suite",
            "Oracle Mapping",
            "Predicted Mapping",
            "Accuracy",
            "Speedup"
        ])
    benchmark_out = performance.groupby(['Platform', 'Benchmark Suite'])[['Platform', 'Accuracy', 'Speedup']].mean()
    benchmark_out['Accuracy'] = round(benchmark_out['Accuracy'] * 100, 2)
    benchmark_out['Speedup'] = round(benchmark_out['Speedup'], 2)
    print(benchmark_out)
    out = performance.groupby(['Platform'])[['Platform', 'Accuracy', 'Speedup']].mean()
    out['Accuracy'] = round(out['Accuracy'] * 100, 2)
    out['Speedup'] = round(out['Speedup'], 2)
    print(out)
예제 #2
0
def cli_main():
    data = []
    for i, platform in enumerate(LANGUAGES):

        def get_attr(attr):
            oracle_file = os.path.join(ATTRIBUTES_DIR, platform,
                                       f'train.{attr}')
            with open(oracle_file, 'r') as reader:
                out = [json_io.json_loads(line) for line in reader]
            return np.asarray(out)

        def get_src_tokens():
            transfer = get_attr("transfer")
            comp = get_attr("comp")
            mem = get_attr("mem")
            coalesced = get_attr("coalesced")
            localmem = get_attr("localmem")
            wgsize = get_attr("wgsize")
            return np.array([
                transfer / (comp + mem),  # F1
                coalesced / mem,  # F2
                localmem / mem * wgsize,  # F3
                comp / mem,  # F4
            ]).T

        platform_name = mapping_metrics.platform2str(platform)
        devices = get_attr('oracle')
        benchmarks = get_attr('benchmark')
        runtime_cpus = get_attr('runtime_cpu')
        runtime_gpus = get_attr('runtime_gpu')

        # optimal mappings
        src_tokens = get_src_tokens()
        ground_truth = np.array([1 if x == "GPU" else 0 for x in devices])

        # 10-fold cross-validation
        kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)
        for j, (train_ids,
                test_ids) in enumerate(kf.split(src_tokens, ground_truth)):
            # decisoin tree model
            model = DecisionTree.build_model()
            # train
            model.fit(src_tokens=src_tokens[train_ids],
                      ground_truth=ground_truth[train_ids])
            # accuracy
            predictions = model(src_tokens=src_tokens[test_ids])
            gt = ground_truth[test_ids]
            correct = (predictions == gt)
            # runtimes of baseline mapping (CPU on AMD, GPU on NVIDIA)
            gt_runtimes = (runtime_cpus
                           if platform == "amd" else runtime_gpus)[test_ids]
            pred_runtimes = [(runtime_cpus if pred == 0 else runtime_gpus)[idx]
                             for idx, pred in zip(test_ids, predictions)]
            speedup = gt_runtimes / pred_runtimes

            # record results
            for benchmark_, o_, p_, correct_, p_speedup_ in \
                zip(benchmarks[test_ids], ground_truth[test_ids], predictions, correct, speedup):
                data.append({
                    "Model":
                    model.__class__.__name__,
                    "Platform":
                    platform_name,
                    'Benchmark':
                    mapping_metrics.escape_benchmark_name(benchmark_),
                    'Benchmark Suite':
                    mapping_metrics.escape_suite_name(benchmark_),
                    "Oracle Mapping":
                    o_,
                    "Predicted Mapping":
                    p_,
                    "Accuracy":
                    correct_,
                    "Speedup":
                    p_speedup_,
                })
            del model
    performance = pd.DataFrame(data,
                               index=range(1,
                                           len(data) + 1),
                               columns=[
                                   "Model", "Platform", "Benchmark",
                                   "Benchmark Suite", "Oracle Mapping",
                                   "Predicted Mapping", "Accuracy", "Speedup"
                               ])
    benchmark_out = performance.groupby(
        ['Platform', 'Benchmark Suite'])[['Platform', 'Accuracy',
                                          'Speedup']].mean()
    benchmark_out['Accuracy'] = round(benchmark_out['Accuracy'] * 100, 2)
    benchmark_out['Speedup'] = round(benchmark_out['Speedup'], 2)
    print(benchmark_out)
    out = performance.groupby(['Platform'
                               ])[['Platform', 'Accuracy', 'Speedup']].mean()
    out['Accuracy'] = round(out['Accuracy'] * 100, 2)
    out['Speedup'] = round(out['Speedup'], 2)
    print(out)
예제 #3
0
def cli_main():
    data = []
    for i, platform in enumerate(['amd', 'nvidia']):

        def get_attr(attr):
            oracle_file = os.path.join(ATTRIBUTES_DIR, f'{platform}.{attr}')
            with open(oracle_file, 'r') as reader:
                out = [json_io.json_loads(line) for line in reader]
            return np.asarray(out)

        platform_name = mapping_metrics.platform2str(platform)
        devices = get_attr('oracle')
        benchmarks = get_attr('benchmark')
        runtime_cpus = get_attr('runtime_cpu')
        runtime_gpus = get_attr('runtime_gpu')

        # staic mapping model
        model = StaticMapping.build_model(devices)

        # optimal mappings
        src_tokens = torch.from_numpy(np.zeros(len(devices)))
        ground_truth = torch.from_numpy(
            np.array([1 if x == 1 else 0 for x in devices]))

        predictions = model(src_tokens)
        accuracy = (predictions == ground_truth).tolist()
        # runtimes of baseline mapping (CPU on AMD, GPU on NVIDIA)
        gt_runtimes = (runtime_cpus if platform == "amd" else runtime_gpus)
        pred_runtimes = [(runtime_cpus if pred == 0 else runtime_gpus)[idx]
                         for idx, pred in enumerate(predictions)]
        speedup = gt_runtimes / pred_runtimes
        # record results
        for benchmark_, o_, p_, accuracy_, p_speedup_ in \
            zip(benchmarks, ground_truth, predictions, accuracy, speedup):
            data.append({
                "Model":
                model.__class__.__name__,
                "Platform":
                platform_name,
                'Benchmark':
                mapping_metrics.escape_benchmark_name(benchmark_),
                'Benchmark Suite':
                mapping_metrics.escape_suite_name(benchmark_),
                "Oracle Mapping":
                o_,
                "Predicted Mapping":
                p_,
                "Accuracy":
                accuracy_,
                "Speedup":
                p_speedup_,
            })

    performance = pd.DataFrame(data,
                               index=range(1,
                                           len(data) + 1),
                               columns=[
                                   "Model", "Platform", "Benchmark",
                                   "Benchmark Suite", "Oracle Mapping",
                                   "Predicted Mapping", "Accuracy", "Speedup"
                               ])
    benchmark_out = performance.groupby(
        ['Platform', 'Benchmark Suite'])[['Platform', 'Accuracy',
                                          'Speedup']].mean()
    benchmark_out['Accuracy'] = round(benchmark_out['Accuracy'] * 100, 2)
    print(benchmark_out)
    out = performance.groupby(['Platform'
                               ])[['Platform', 'Accuracy', 'Speedup']].mean()
    out['Accuracy'] = round(out['Accuracy'] * 100, 2)
    print(out)
예제 #4
0
def cli_main():
    data = []
    for i, platform in enumerate(LANGUAGES):

        def get_attr(attr):
            oracle_file = os.path.join(ATTRIBUTES_DIR, platform,
                                       f'train.{attr}')
            with open(oracle_file, 'r') as reader:
                out = [json_io.json_loads(line) for line in reader]
            return np.asarray(out)

        platform_name = mapping_metrics.platform2str(platform)
        devices = get_attr('oracle')
        benchmarks = get_attr('benchmark')
        runtime_cpus = get_attr('runtime_cpu')
        runtime_gpus = get_attr('runtime_gpu')

        # staic mapping model
        model = StaticMapping.build_model(devices)

        # optimal mappings
        features = np.zeros(len(devices))
        ground_truth = np.array([1 if x == "GPU" else 0 for x in devices])

        # 10-fold cross-validation
        kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)
        for j, (train_ids,
                test_ids) in enumerate(kf.split(features, ground_truth)):
            # no training
            # accuracy
            src_tokens = torch.from_numpy(features[test_ids])
            predictions = model(src_tokens)

            gt = torch.from_numpy(ground_truth[test_ids])
            accuracy = (predictions == gt).tolist()
            # runtimes of baseline mapping (CPU on AMD, GPU on NVIDIA)
            gt_runtimes = (runtime_cpus
                           if platform == "amd" else runtime_gpus)[test_ids]
            pred_runtimes = [(runtime_cpus if pred == 0 else runtime_gpus)[idx]
                             for idx, pred in zip(test_ids, predictions)]
            speedup = gt_runtimes / pred_runtimes

            # record results
            for benchmark_, o_, p_, accuracy_, p_speedup_ in \
                zip(benchmarks[test_ids], ground_truth[test_ids], predictions, accuracy, speedup):
                data.append({
                    "Model":
                    model.__class__.__name__,
                    "Platform":
                    platform_name,
                    'Benchmark':
                    mapping_metrics.escape_benchmark_name(benchmark_),
                    'Benchmark Suite':
                    mapping_metrics.escape_suite_name(benchmark_),
                    "Oracle Mapping":
                    o_,
                    "Predicted Mapping":
                    p_,
                    "Accuracy":
                    accuracy_,
                    "Speedup":
                    p_speedup_,
                })
    performance = pd.DataFrame(data,
                               index=range(1,
                                           len(data) + 1),
                               columns=[
                                   "Model", "Platform", "Benchmark",
                                   "Benchmark Suite", "Oracle Mapping",
                                   "Predicted Mapping", "Accuracy", "Speedup"
                               ])
    benchmark_out = performance.groupby(
        ['Platform', 'Benchmark Suite'])[['Platform', 'Accuracy',
                                          'Speedup']].mean()
    benchmark_out['Accuracy'] = round(benchmark_out['Accuracy'] * 100, 2)
    benchmark_out['Speedup'] = round(benchmark_out['Speedup'], 2)
    print(benchmark_out)
    out = performance.groupby(['Platform'
                               ])[['Platform', 'Accuracy', 'Speedup']].mean()
    out['Accuracy'] = round(out['Accuracy'] * 100, 2)
    out['Speedup'] = round(out['Speedup'], 2)
    print(out)