Python stop_counters 예제들, pypapi.papi_high.stop_counters Python 예제들

예제 #1

0

파일 보기

파일: runPython.py 프로젝트: quepas/performance-estimation-array-operations

def restartPAPI(aspect):
    try:
        papi_high.stop_counters()
    except:
        pass
    if aspect == 'PAPI_L1_DCM':
        papi_high.start_counters([papi_events.PAPI_L1_DCM])
    elif aspect == 'PAPI_TOT_INS':
        papi_high.start_counters([papi_events.PAPI_TOT_INS])

예제 #2

0

파일 보기

파일: main.py 프로젝트: mariateresachaves/CPAR

def main():
    if not (right_args(len(sys.argv), sys.argv)):
        return 1

    size_A = int(sys.argv[2])
    size_B = int(sys.argv[3])
    tot_FLOPS = 2 * MX * MX * size_A

    if valid_algorithm(sys.argv[1]):
        algorithm = int(sys.argv[1])
    else:
        return 2

    matrix_A = Matrix(size_A, size_A)
    fill(matrix_A)

    matrix_B = Matrix(size_B, size_B)
    fill(matrix_B)

    # Test Matrix Values
    #print_matrix(matrix_A)
    #print_matrix(matrix_B)

    # Starts some counters
    papi_high.start_counters([
        papi_events.PAPI_L1_DCM, papi_events.PAPI_L1_ICM,
        papi_events.PAPI_L1_TCM, papi_events.PAPI_L2_TCM,
        papi_events.PAPI_L3_TCM, papi_events.PAPI_TOT_INS
    ])

    if algorithm == 1:
        print 'Basic Matrix Multiplication'
        start_time = time.time()
        result = basic_multiply(matrix_A, matrix_B)
        end_time = time.time()
        #print_matrix(result)

    if algorithm == 2:
        print 'Line Matrix Multiplication'
        start_time = time.time()
        result = line_multiply(matrix_A, matrix_B)
        end_time = time.time()
        #print_matrix(result)

    # Reads values from counters and reset them
    results = papi_high.read_counters()  # -> [int, int]

    # Print results
    print_times(results, tot_FLOPS, start_time, end_time)

    # Stop counters
    papi_high.stop_counters()  # -> []

예제 #3

0

파일 보기

def profile(model, inputs, repeats=1000):
    # Reference for counting flops: http://www.bnikolic.co.uk/blog/python/flops/2019/10/01/pytorch-count-flops.html
    from pypapi import papi_high
    from pypapi import events as papi_events

    papi_high.start_counters([
        papi_events.PAPI_SP_OPS,
    ])
    model.forward(*inputs)
    flops = papi_high.stop_counters()[0] / 1000000.0

    from time import perf_counter
    times = []
    for _ in range(repeats):
        t = perf_counter()
        model.forward(*inputs)
        times.append(perf_counter() - t)
    params = sum(p.numel() for p in model.parameters()) / 1000000.0
    times = np.array(times) * 1000
    return {
        "params(M)": params,
        "flops(M)": flops,
        "inf_time_mean(ms)": np.mean(times),
        "inf_time_std(ms)": np.std(times)
    }

예제 #4

0

파일 보기

파일: do_numpy.py 프로젝트: shwetasalaria/benchmarker

 def run(self):
     if "nb_gpus" in self.params:
         if self.params["nb_gpus"] > 0:
             raise Exception(
                 "Numpy framework does not work with GPU back-end")
     M, N, K = self.matrix_size
     dtype = np.float32
     a = np.random.random((M, N)).astype(dtype)
     b = np.random.random((N, K)).astype(dtype)
     c = np.random.random((M, K)).astype(dtype)
     nb_epoch = 2
     papi_availalbe = True
     try:
         high.start_counters([
             events.PAPI_SP_OPS,
         ])
     except:
         papi_availalbe = False
     time_start = timer()
     for _ in range(nb_epoch):
         c = a @ b  # + c
     time_end = timer()
     if papi_availalbe:
         gflop_papi = high.stop_counters()[0] / (1024**3)
         self.params["GFLOP_papi"] = gflop_papi
     elapsed_time = (time_end - time_start) / nb_epoch
     self.params["time"] = elapsed_time
     self.params["GFLOP/sec"] = self.params["GFLOP"] / elapsed_time

예제 #5

0

파일 보기

파일: polybench.py 프로젝트: UDC-GAC/polybench-python

    def time_kernel(self, *args, **kwargs):
        if self.POLYBENCH_TIME or self.POLYBENCH_GFLOPS:
            # Simple time measurement
            self.__timer_start()
            self.kernel(*args, **kwargs)
            self.__timer_stop()
        elif self.POLYBENCH_PAPI:
            # Measuring performance counters is a bit tricky. The API allows to monitor multiple counters at once, but
            # that is not accurate so we need to measure each counter independently within a loop to ensure proper
            # operation.
            i = 0
            self.__papi_init()  # Initializes self.__papi_counters and self.__papi_available_counters
            self.__prepare_instruments()
            # Information for the following loop:
            # * self.__papi_counters holds a list of available counter ids
            # * self.__papi_counters_result holds the actual counter return values
            for counter in self.__papi_counters:
                if i > 0:
                    self.initialize_array(*args, **kwargs)  # force initialization
                i += 1
                papi_high.start_counters([counter])  # requires a list of counters
                self.kernel(*args, **kwargs)
                self.__papi_counters_result.extend(papi_high.stop_counters())  # returns a list of counter results
        else:
            # Default kernel run
            self.__prepare_instruments()
            self.kernel(*args, **kwargs)

        # Something like stop_instruments()
        if self.POLYBENCH_LINUX_FIFO_SCHEDULER:
            self.__linux_standard_scheduler()

예제 #6

0

파일 보기

파일: core.py 프로젝트: tma15/fairseq

 def count_flops(
     self,
     dataset,
     repeat,
 ):
     """Use PYPAPI library to count average flops for model inference.
     Note: It only works if the model is being run on cpu"""
     logger.info("Starting flop counter")
     high.start_counters([events.PAPI_DP_OPS])
     for i, sample in enumerate(dataset):
         for _r in range(repeat):
             self.forward(sample)
         if i % 100 == 0:
             logger.info(f"Counted flops for {i}/{len(dataset)} samples")
     flops = high.stop_counters()
     flops = round(flops[0] / (repeat * len(dataset)))
     return flops

예제 #7

0

파일 보기

        def spoped(*args, **kwargs):
            kkwargs = kwargs.copy()
            # check if the log_spops keyword is provided
            try:
                kkwargs.pop("log_spops")
            except:
                pass

            # if the event is available, do spop calculation & execute func
            try:
                high.start_counters([events.PAPI_SP_OPS])
                result = func(*args, **kkwargs)
                spops = high.stop_counters()[0]
                if "log_spops" in kwargs:
                    kwargs["log_spops"][func.__name__] = spops
                if show:
                    print("{}\tSPOPS:\t\t{}".format(func.__name__, spops))
                return result
            except pypapi.exceptions.PapiNoEventError as e:
                warnings.warn(
                    "{} \nYour kernel might not "
                    "support this function. Function {} is executed without SPOP "
                    "counting.".format(e, func.__name__))
                return func(*args, **kkwargs)

예제 #8

0

파일 보기

        with open(saliency_path) as out:
            with open(output_path, 'w') as output_sal:
                saliency_flops = []
                for j, line in enumerate(out):
                    high.start_counters([
                        events.PAPI_FP_OPS,
                    ])

                    try:
                        instance_saliency = json.loads(line)
                    except:
                        line = next(out)
                        instance_saliency = json.loads(line)

                    for i, token in enumerate(instance_saliency['tokens']):
                        if token['token'] == '[PAD]':
                            continue
                        for _c in classes:
                            instance_saliency['tokens'][i][str(
                                _c)] = np.random.rand()

                    output_sal.write(json.dumps(instance_saliency) + '\n')

                    x = sum(high.stop_counters())
                    saliency_flops.append(x)

        print(np.mean(saliency_flops), np.std(saliency_flops))
        flops.append(np.mean(saliency_flops))

    print('FLOPs', f'{np.mean(flops):.2f} ($\pm${np.std(flops):.2f})')

예제 #9

0

파일 보기

if __name__ == '__main__':
    datadir = '../data'
    # x_train: (n_samples, width, height)
    (x_train, y_train), (x_test, y_test) = load_data(args.dataset, num_classes,
                                                     datadir)

    if args.count_flops:
        rcdt_ns_obj = RCDT_NS(num_classes, theta, rm_edge)
        for n_samples in [1, 10, 100]:
            high.start_counters([
                events.PAPI_DP_OPS,
            ])
            rcdt_test = x_train[:n_samples]
            rcdt_test = rcdt_ns_obj.fun_rcdt_batch(rcdt_test)
            x = high.stop_counters()[0]
            print('rcdt_test.shape {} GFLOPS {}'.format(
                rcdt_test.shape, x / 1e9))
            rcdt_gflops = (x / 1e9) / n_samples
        print('rcdt_gflops: {}'.format(rcdt_gflops))

    num_repeats = 10
    accs = []
    all_preds = []

    if args.count_flops:
        all_train_gflops, all_test_gflops = [], []
    for n_samples_perclass in [2**i for i in range(0, po_train_max + 1)]:
        for repeat in range(num_repeats):
            x_train_sub, y_train_sub = take_train_samples(
                x_train, y_train, n_samples_perclass, num_classes, repeat)

예제 #10

0

파일 보기

# https://github.com/Lyken17/pytorch-OpCounter
# https://github.com/sovrasov/flops-counter.pytorch/issues/16


def train_gflops(model, epochs=1, num_train_samples=1, input_size=28):
    gflops = epochs * num_train_samples * 2 * test_gflops(model, 1, input_size)
    return gflops


himodel = MNISTNet(3, 10, img_size=28).double()
high.start_counters([
    events.PAPI_DP_OPS,
])
himodel(torch.randn(1, 3, 28, 28).double())
print(high.stop_counters()[0] / 1e9)


def test_gflops(model, input_size):
    assert model in ['shallowcnn', 'resnet18', 'vgg11']
    if model == 'shallowcnn':
        model = MNISTNet(3, 10, img_size=input_size)
    if model == 'resnet18':
        model = models.resnet18(num_classes=10)
    if model == 'vgg11':
        model = models.vgg11_bn(num_classes=10)
    input = torch.randn(1, 3, input_size, input_size)
    macs, params = profile(model, inputs=(input, ))
    gflops = 2 * macs / 1e9
    print(gflops)
    return gflops

예제 #11

0

파일 보기

파일: interpret_lime.py 프로젝트: copenlu/xai-benchmark

def generate_saliency(model_path, saliency_path):
    test = get_dataset(path=args.dataset_dir, mode=args.split,
                       dataset=args.dataset)
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    model_args = argparse.Namespace(**checkpoint['args'])

    if args.model == 'trans':
        model_args.batch_size = 7
        transformer_config = BertConfig.from_pretrained('bert-base-uncased',
                                                        num_labels=model_args.labels)
        model = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', config=transformer_config).to(device)
        model.load_state_dict(checkpoint['model'])
        modelw = BertModelWrapper(model, device, tokenizer, model_args)
    else:
        if args.model == 'lstm':
            model_args.batch_size = 200
            model = LSTM_MODEL(tokenizer, model_args,
                               n_labels=checkpoint['args']['labels'],
                               device=device).to(device)
        else:
            model_args.batch_size = 300
            model = CNN_MODEL(tokenizer, model_args,
                              n_labels=checkpoint['args']['labels']).to(device)

        model.load_state_dict(checkpoint['model'])
        modelw = ModelWrapper(model, device, tokenizer, model_args)

    modelw.eval()

    explainer = LimeTextExplainer()
    saliency_flops = []

    with open(saliency_path, 'w') as out:
        for instance in tqdm(test):
            # SALIENCY
            if not args.no_time:
                high.start_counters([events.PAPI_FP_OPS, ])

            saliencies = []
            if args.dataset in ['imdb', 'tweet']:
                token_ids = tokenizer.encode(instance[0])
            else:
                token_ids = tokenizer.encode(instance[0], instance[1])

            if len(token_ids) < 6:
                token_ids = token_ids + [tokenizer.pad_token_id] * (
                            6 - len(token_ids))
            try:
                exp = explainer.explain_instance(
                    " ".join([str(i) for i in token_ids]), modelw,
                    num_features=len(token_ids),
                    top_labels=args.labels)
            except Exception as e:
                print(e)
                if not args.no_time:
                    x = high.stop_counters()[0]
                    saliency_flops.append(x)

                for token_id in token_ids:
                    token_id = int(token_id)
                    token_saliency = {
                        'token': tokenizer.ids_to_tokens[token_id]
                    }
                    for cls_ in range(args.labels):
                        token_saliency[int(cls_)] = 0
                    saliencies.append(token_saliency)

                out.write(json.dumps({'tokens': saliencies}) + '\n')
                out.flush()

                continue

            if not args.no_time:
                x = high.stop_counters()[0]
                saliency_flops.append(x)

            # SERIALIZE
            explanation = {}
            for cls_ in range(args.labels):
                cls_expl = {}
                for (w, s) in exp.as_list(label=cls_):
                    cls_expl[int(w)] = s
                explanation[cls_] = cls_expl

            for token_id in token_ids:
                token_id = int(token_id)
                token_saliency = {'token': tokenizer.ids_to_tokens[token_id]}
                for cls_ in range(args.labels):
                    token_saliency[int(cls_)] = explanation[cls_].get(token_id,
                                                                      None)
                saliencies.append(token_saliency)

            out.write(json.dumps({'tokens': saliencies}) + '\n')
            out.flush()

    return saliency_flops

예제 #12

0

파일 보기

def generate_saliency(model_path, saliency_path, saliency, aggregation):
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    model_args = Namespace(**checkpoint['args'])
    if args.model == 'lstm':
        model = LSTM_MODEL(tokenizer,
                           model_args,
                           n_labels=checkpoint['args']['labels']).to(device)
        model.load_state_dict(checkpoint['model'])
    elif args.model == 'trans':
        transformer_config = BertConfig.from_pretrained(
            'bert-base-uncased', num_labels=model_args.labels)
        model_cp = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', config=transformer_config).to(device)
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        model_cp.load_state_dict(checkpoint['model'])
        model = BertModelWrapper(model_cp)
    else:
        model = CNN_MODEL(tokenizer,
                          model_args,
                          n_labels=checkpoint['args']['labels']).to(device)
        model.load_state_dict(checkpoint['model'])

    model.train()

    pad_to_max = False
    if saliency == 'deeplift':
        ablator = DeepLift(model)
    elif saliency == 'guided':
        ablator = GuidedBackprop(model)
    elif saliency == 'sal':
        ablator = Saliency(model)
    elif saliency == 'inputx':
        ablator = InputXGradient(model)
    elif saliency == 'occlusion':
        ablator = Occlusion(model)

    coll_call = get_collate_fn(dataset=args.dataset, model=args.model)

    return_attention_masks = args.model == 'trans'

    collate_fn = partial(coll_call,
                         tokenizer=tokenizer,
                         device=device,
                         return_attention_masks=return_attention_masks,
                         pad_to_max_length=pad_to_max)
    test = get_dataset(path=args.dataset_dir,
                       mode=args.split,
                       dataset=args.dataset)
    batch_size = args.batch_size if args.batch_size != None else \
        model_args.batch_size
    test_dl = DataLoader(batch_size=batch_size,
                         dataset=test,
                         shuffle=False,
                         collate_fn=collate_fn)

    # PREDICTIONS
    predictions_path = model_path + '.predictions'
    if not os.path.exists(predictions_path):
        predictions = defaultdict(lambda: [])
        for batch in tqdm(test_dl, desc='Running test prediction... '):
            if args.model == 'trans':
                logits = model(batch[0],
                               attention_mask=batch[1],
                               labels=batch[2].long())
            else:
                logits = model(batch[0])
            logits = logits.detach().cpu().numpy().tolist()
            predicted = np.argmax(np.array(logits), axis=-1)
            predictions['class'] += predicted.tolist()
            predictions['logits'] += logits

        with open(predictions_path, 'w') as out:
            json.dump(predictions, out)

    # COMPUTE SALIENCY
    if saliency != 'occlusion':
        embedding_layer_name = 'model.bert.embeddings' if args.model == \
                                                          'trans' else \
            'embedding'
        interpretable_embedding = configure_interpretable_embedding_layer(
            model, embedding_layer_name)

    class_attr_list = defaultdict(lambda: [])
    token_ids = []
    saliency_flops = []

    for batch in tqdm(test_dl, desc='Running Saliency Generation...'):
        if args.model == 'cnn':
            additional = None
        elif args.model == 'trans':
            additional = (batch[1], batch[2])
        else:
            additional = batch[-1]

        token_ids += batch[0].detach().cpu().numpy().tolist()
        if saliency != 'occlusion':
            input_embeddings = interpretable_embedding.indices_to_embeddings(
                batch[0])

        if not args.no_time:
            high.start_counters([
                events.PAPI_FP_OPS,
            ])
        for cls_ in range(checkpoint['args']['labels']):
            if saliency == 'occlusion':
                attributions = ablator.attribute(
                    batch[0],
                    sliding_window_shapes=(args.sw, ),
                    target=cls_,
                    additional_forward_args=additional)
            else:
                attributions = ablator.attribute(
                    input_embeddings,
                    target=cls_,
                    additional_forward_args=additional)

            attributions = summarize_attributions(
                attributions, type=aggregation, model=model,
                tokens=batch[0]).detach().cpu().numpy().tolist()
            class_attr_list[cls_] += [[_li for _li in _l]
                                      for _l in attributions]

        if not args.no_time:
            saliency_flops.append(
                sum(high.stop_counters()) / batch[0].shape[0])

    if saliency != 'occlusion':
        remove_interpretable_embedding_layer(model, interpretable_embedding)

    # SERIALIZE
    print('Serializing...', flush=True)
    with open(saliency_path, 'w') as out:
        for instance_i, _ in enumerate(test):
            saliencies = []
            for token_i, token_id in enumerate(token_ids[instance_i]):
                token_sal = {'token': tokenizer.ids_to_tokens[token_id]}
                for cls_ in range(checkpoint['args']['labels']):
                    token_sal[int(
                        cls_)] = class_attr_list[cls_][instance_i][token_i]
                saliencies.append(token_sal)

            out.write(json.dumps({'tokens': saliencies}) + '\n')
            out.flush()

    return saliency_flops

예제 #13

0

파일 보기

    if args.model == 'resnet18':
        model = models.resnet18(num_classes=num_classes).double().to(device)
    torch.save(model.state_dict(), './model_init.pth')

    model.eval()
    with torch.no_grad():
        high.start_counters([
            events.PAPI_DP_OPS,
        ])
        x_test_batch = torch.rand(1,
                                  3,
                                  img_size,
                                  img_size,
                                  dtype=torch.float64)
        test_logit = model(x_test_batch)
        test_gflops = high.stop_counters()[0] / 1e9
        print('test gflops: {}'.format(test_gflops))

    all_train_gflops = []
    for n_samples_perclass in [2**i for i in range(0, po_train_max + 1)]:
        model.load_state_dict(torch.load('./model_init.pth'))
        x_val_size = 0 if n_samples_perclass < 16 else int(n_samples_perclass *
                                                           0.1) * num_classes
        x_train_sub_size = n_samples_perclass * num_classes - x_val_size

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=5e-4)

        high.start_counters([
            events.PAPI_DP_OPS,
        ])

예제 #14

0

파일 보기

파일: interpret_shap.py 프로젝트: copenlu/xai-benchmark

def generate_saliency(model_path, saliency_path):
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    model_args = Namespace(**checkpoint['args'])
    model_args.batch_size = args.batch_size if args.batch_size != None else \
        model_args.batch_size

    if args.model == 'transformer':
        transformer_config = BertConfig.from_pretrained(
            'bert-base-uncased', num_labels=model_args.labels)
        modelb = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', config=transformer_config).to(device)
        modelb.load_state_dict(checkpoint['model'])
        model = BertModelWrapper(modelb)
    elif args.model == 'lstm':
        model = LSTM_MODEL(tokenizer,
                           model_args,
                           n_labels=checkpoint['args']['labels'],
                           device=device).to(device)
        model.load_state_dict(checkpoint['model'])
        model.train()
        model = ModelWrapper(model)
    else:
        # model_args.batch_size = 1000
        model = CNN_MODEL(tokenizer,
                          model_args,
                          n_labels=checkpoint['args']['labels']).to(device)
        model.load_state_dict(checkpoint['model'])
        model.train()
        model = ModelWrapper(model)

    ablator = ShapleyValueSampling(model)

    coll_call = get_collate_fn(dataset=args.dataset, model=args.model)

    collate_fn = partial(coll_call,
                         tokenizer=tokenizer,
                         device=device,
                         return_attention_masks=False,
                         pad_to_max_length=False)

    test = get_dataset(args.dataset_dir, mode=args.split)
    test_dl = DataLoader(batch_size=model_args.batch_size,
                         dataset=test,
                         shuffle=False,
                         collate_fn=collate_fn)

    # PREDICTIONS
    predictions_path = model_path + '.predictions'
    if not os.path.exists(predictions_path):
        predictions = defaultdict(lambda: [])
        for batch in tqdm(test_dl, desc='Running test prediction... '):
            logits = model(batch[0])
            logits = logits.detach().cpu().numpy().tolist()
            predicted = np.argmax(np.array(logits), axis=-1)
            predictions['class'] += predicted.tolist()
            predictions['logits'] += logits

        with open(predictions_path, 'w') as out:
            json.dump(predictions, out)

    # COMPUTE SALIENCY

    saliency_flops = []

    with open(saliency_path, 'w') as out_mean:
        for batch in tqdm(test_dl, desc='Running Saliency Generation...'):
            class_attr_list = defaultdict(lambda: [])

            if args.model == 'rnn':
                additional = batch[-1]
            else:
                additional = None

            if not args.no_time:
                high.start_counters([events.PAPI_FP_OPS])
            token_ids = batch[0].detach().cpu().numpy().tolist()

            for cls_ in range(args.labels):
                attributions = ablator.attribute(
                    batch[0].float(),
                    target=cls_,
                    additional_forward_args=additional)
                attributions = attributions.detach().cpu().numpy().tolist()
                class_attr_list[cls_] += attributions

            if not args.no_time:
                x = sum(high.stop_counters())
                saliency_flops.append(x / batch[0].shape[0])

            for i in range(len(batch[0])):
                saliencies = []
                for token_i, token_id in enumerate(token_ids[i]):
                    if token_id == tokenizer.pad_token_id:
                        continue
                    token_sal = {'token': tokenizer.ids_to_tokens[token_id]}
                    for cls_ in range(args.labels):
                        token_sal[int(
                            cls_)] = class_attr_list[cls_][i][token_i]
                    saliencies.append(token_sal)

                out_mean.write(json.dumps({'tokens': saliencies}) + '\n')
                out_mean.flush()

    return saliency_flops

예제 #15

0

파일 보기

파일: matrixprod.py 프로젝트: xRuiAlves/FEUP-CPAR

def papiStopCounters():
    return papi_high.stop_counters()

예제 #16

0

파일 보기

파일: measurement_collectors.py 프로젝트: smartarch/qoscloud

 def finish(self):
     papi_high.stop_counters()