Esempio n. 1
0
    def time_kernel(self, *args, **kwargs):
        if self.POLYBENCH_TIME or self.POLYBENCH_GFLOPS:
            # Simple time measurement
            self.__timer_start()
            self.kernel(*args, **kwargs)
            self.__timer_stop()
        elif self.POLYBENCH_PAPI:
            # Measuring performance counters is a bit tricky. The API allows to monitor multiple counters at once, but
            # that is not accurate so we need to measure each counter independently within a loop to ensure proper
            # operation.
            i = 0
            self.__papi_init()  # Initializes self.__papi_counters and self.__papi_available_counters
            self.__prepare_instruments()
            # Information for the following loop:
            # * self.__papi_counters holds a list of available counter ids
            # * self.__papi_counters_result holds the actual counter return values
            for counter in self.__papi_counters:
                if i > 0:
                    self.initialize_array(*args, **kwargs)  # force initialization
                i += 1
                papi_high.start_counters([counter])  # requires a list of counters
                self.kernel(*args, **kwargs)
                self.__papi_counters_result.extend(papi_high.stop_counters())  # returns a list of counter results
        else:
            # Default kernel run
            self.__prepare_instruments()
            self.kernel(*args, **kwargs)

        # Something like stop_instruments()
        if self.POLYBENCH_LINUX_FIFO_SCHEDULER:
            self.__linux_standard_scheduler()
Esempio n. 2
0
def profile(model, inputs, repeats=1000):
    # Reference for counting flops: http://www.bnikolic.co.uk/blog/python/flops/2019/10/01/pytorch-count-flops.html
    from pypapi import papi_high
    from pypapi import events as papi_events

    papi_high.start_counters([
        papi_events.PAPI_SP_OPS,
    ])
    model.forward(*inputs)
    flops = papi_high.stop_counters()[0] / 1000000.0

    from time import perf_counter
    times = []
    for _ in range(repeats):
        t = perf_counter()
        model.forward(*inputs)
        times.append(perf_counter() - t)
    params = sum(p.numel() for p in model.parameters()) / 1000000.0
    times = np.array(times) * 1000
    return {
        "params(M)": params,
        "flops(M)": flops,
        "inf_time_mean(ms)": np.mean(times),
        "inf_time_std(ms)": np.std(times)
    }
Esempio n. 3
0
 def run(self):
     if "nb_gpus" in self.params:
         if self.params["nb_gpus"] > 0:
             raise Exception(
                 "Numpy framework does not work with GPU back-end")
     M, N, K = self.matrix_size
     dtype = np.float32
     a = np.random.random((M, N)).astype(dtype)
     b = np.random.random((N, K)).astype(dtype)
     c = np.random.random((M, K)).astype(dtype)
     nb_epoch = 2
     papi_availalbe = True
     try:
         high.start_counters([
             events.PAPI_SP_OPS,
         ])
     except:
         papi_availalbe = False
     time_start = timer()
     for _ in range(nb_epoch):
         c = a @ b  # + c
     time_end = timer()
     if papi_availalbe:
         gflop_papi = high.stop_counters()[0] / (1024**3)
         self.params["GFLOP_papi"] = gflop_papi
     elapsed_time = (time_end - time_start) / nb_epoch
     self.params["time"] = elapsed_time
     self.params["GFLOP/sec"] = self.params["GFLOP"] / elapsed_time
def restartPAPI(aspect):
    try:
        papi_high.stop_counters()
    except:
        pass
    if aspect == 'PAPI_L1_DCM':
        papi_high.start_counters([papi_events.PAPI_L1_DCM])
    elif aspect == 'PAPI_TOT_INS':
        papi_high.start_counters([papi_events.PAPI_TOT_INS])
Esempio n. 5
0
def main():
    if not (right_args(len(sys.argv), sys.argv)):
        return 1

    size_A = int(sys.argv[2])
    size_B = int(sys.argv[3])
    tot_FLOPS = 2 * MX * MX * size_A

    if valid_algorithm(sys.argv[1]):
        algorithm = int(sys.argv[1])
    else:
        return 2

    matrix_A = Matrix(size_A, size_A)
    fill(matrix_A)

    matrix_B = Matrix(size_B, size_B)
    fill(matrix_B)

    # Test Matrix Values
    #print_matrix(matrix_A)
    #print_matrix(matrix_B)

    # Starts some counters
    papi_high.start_counters([
        papi_events.PAPI_L1_DCM, papi_events.PAPI_L1_ICM,
        papi_events.PAPI_L1_TCM, papi_events.PAPI_L2_TCM,
        papi_events.PAPI_L3_TCM, papi_events.PAPI_TOT_INS
    ])

    if algorithm == 1:
        print 'Basic Matrix Multiplication'
        start_time = time.time()
        result = basic_multiply(matrix_A, matrix_B)
        end_time = time.time()
        #print_matrix(result)

    if algorithm == 2:
        print 'Line Matrix Multiplication'
        start_time = time.time()
        result = line_multiply(matrix_A, matrix_B)
        end_time = time.time()
        #print_matrix(result)

    # Reads values from counters and reset them
    results = papi_high.read_counters()  # -> [int, int]

    # Print results
    print_times(results, tot_FLOPS, start_time, end_time)

    # Stop counters
    papi_high.stop_counters()  # -> []
 def __init__(self, cpu_events: List[str] = None):
     # Starts some counters
     # Check environment
     logging.info(f"CPU monitor supports {papi_high.num_counters()} counters in {papi_high.num_components()} "
                  f"components")
     if papi_high.num_counters() == 0:
         raise CPUEventsNotSupportedException("No CPU events to measure")
     # Events are defined at https://flozz.github.io/pypapi/events.html
     try:
         self._event_names = ["PAPI_REF_CYC", "PAPI_TOT_INS", "PAPI_L3_TCA", "PAPI_L3_TCM", "PAPI_BR_INS",
                              "PAPI_BR_MSP"]
         cpu_events = [getattr(papi_events, event) for event in self._event_names]
         papi_high.start_counters(cpu_events)
     except (PapiNoEventError, AttributeError) as e:
         raise CPUEventsNotSupportedException(e)
Esempio n. 7
0
 def count_flops(
     self,
     dataset,
     repeat,
 ):
     """Use PYPAPI library to count average flops for model inference.
     Note: It only works if the model is being run on cpu"""
     logger.info("Starting flop counter")
     high.start_counters([events.PAPI_DP_OPS])
     for i, sample in enumerate(dataset):
         for _r in range(repeat):
             self.forward(sample)
         if i % 100 == 0:
             logger.info(f"Counted flops for {i}/{len(dataset)} samples")
     flops = high.stop_counters()
     flops = round(flops[0] / (repeat * len(dataset)))
     return flops
Esempio n. 8
0
        def spoped(*args, **kwargs):
            kkwargs = kwargs.copy()
            # check if the log_spops keyword is provided
            try:
                kkwargs.pop("log_spops")
            except:
                pass

            # if the event is available, do spop calculation & execute func
            try:
                high.start_counters([events.PAPI_SP_OPS])
                result = func(*args, **kkwargs)
                spops = high.stop_counters()[0]
                if "log_spops" in kwargs:
                    kwargs["log_spops"][func.__name__] = spops
                if show:
                    print("{}\tSPOPS:\t\t{}".format(func.__name__, spops))
                return result
            except pypapi.exceptions.PapiNoEventError as e:
                warnings.warn(
                    "{} \nYour kernel might not "
                    "support this function. Function {} is executed without SPOP "
                    "counting.".format(e, func.__name__))
                return func(*args, **kkwargs)
Esempio n. 9
0
    flops = []
    for saliency_path, output_path, seed in zip(args.saliency_paths,
                                                args.output_paths, args.seeds):
        random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        np.random.seed(seed)

        with open(saliency_path) as out:
            with open(output_path, 'w') as output_sal:
                saliency_flops = []
                for j, line in enumerate(out):
                    high.start_counters([
                        events.PAPI_FP_OPS,
                    ])

                    try:
                        instance_saliency = json.loads(line)
                    except:
                        line = next(out)
                        instance_saliency = json.loads(line)

                    for i, token in enumerate(instance_saliency['tokens']):
                        if token['token'] == '[PAD]':
                            continue
                        for _c in classes:
                            instance_saliency['tokens'][i][str(
                                _c)] = np.random.rand()
Esempio n. 10
0
def generate_saliency(model_path, saliency_path):
    test = get_dataset(path=args.dataset_dir, mode=args.split,
                       dataset=args.dataset)
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    model_args = argparse.Namespace(**checkpoint['args'])

    if args.model == 'trans':
        model_args.batch_size = 7
        transformer_config = BertConfig.from_pretrained('bert-base-uncased',
                                                        num_labels=model_args.labels)
        model = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', config=transformer_config).to(device)
        model.load_state_dict(checkpoint['model'])
        modelw = BertModelWrapper(model, device, tokenizer, model_args)
    else:
        if args.model == 'lstm':
            model_args.batch_size = 200
            model = LSTM_MODEL(tokenizer, model_args,
                               n_labels=checkpoint['args']['labels'],
                               device=device).to(device)
        else:
            model_args.batch_size = 300
            model = CNN_MODEL(tokenizer, model_args,
                              n_labels=checkpoint['args']['labels']).to(device)

        model.load_state_dict(checkpoint['model'])
        modelw = ModelWrapper(model, device, tokenizer, model_args)

    modelw.eval()

    explainer = LimeTextExplainer()
    saliency_flops = []

    with open(saliency_path, 'w') as out:
        for instance in tqdm(test):
            # SALIENCY
            if not args.no_time:
                high.start_counters([events.PAPI_FP_OPS, ])

            saliencies = []
            if args.dataset in ['imdb', 'tweet']:
                token_ids = tokenizer.encode(instance[0])
            else:
                token_ids = tokenizer.encode(instance[0], instance[1])

            if len(token_ids) < 6:
                token_ids = token_ids + [tokenizer.pad_token_id] * (
                            6 - len(token_ids))
            try:
                exp = explainer.explain_instance(
                    " ".join([str(i) for i in token_ids]), modelw,
                    num_features=len(token_ids),
                    top_labels=args.labels)
            except Exception as e:
                print(e)
                if not args.no_time:
                    x = high.stop_counters()[0]
                    saliency_flops.append(x)

                for token_id in token_ids:
                    token_id = int(token_id)
                    token_saliency = {
                        'token': tokenizer.ids_to_tokens[token_id]
                    }
                    for cls_ in range(args.labels):
                        token_saliency[int(cls_)] = 0
                    saliencies.append(token_saliency)

                out.write(json.dumps({'tokens': saliencies}) + '\n')
                out.flush()

                continue

            if not args.no_time:
                x = high.stop_counters()[0]
                saliency_flops.append(x)

            # SERIALIZE
            explanation = {}
            for cls_ in range(args.labels):
                cls_expl = {}
                for (w, s) in exp.as_list(label=cls_):
                    cls_expl[int(w)] = s
                explanation[cls_] = cls_expl

            for token_id in token_ids:
                token_id = int(token_id)
                token_saliency = {'token': tokenizer.ids_to_tokens[token_id]}
                for cls_ in range(args.labels):
                    token_saliency[int(cls_)] = explanation[cls_].get(token_id,
                                                                      None)
                saliencies.append(token_saliency)

            out.write(json.dumps({'tokens': saliencies}) + '\n')
            out.flush()

    return saliency_flops
Esempio n. 11
0
def generate_saliency(model_path, saliency_path, saliency, aggregation):
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    model_args = Namespace(**checkpoint['args'])
    if args.model == 'lstm':
        model = LSTM_MODEL(tokenizer,
                           model_args,
                           n_labels=checkpoint['args']['labels']).to(device)
        model.load_state_dict(checkpoint['model'])
    elif args.model == 'trans':
        transformer_config = BertConfig.from_pretrained(
            'bert-base-uncased', num_labels=model_args.labels)
        model_cp = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', config=transformer_config).to(device)
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        model_cp.load_state_dict(checkpoint['model'])
        model = BertModelWrapper(model_cp)
    else:
        model = CNN_MODEL(tokenizer,
                          model_args,
                          n_labels=checkpoint['args']['labels']).to(device)
        model.load_state_dict(checkpoint['model'])

    model.train()

    pad_to_max = False
    if saliency == 'deeplift':
        ablator = DeepLift(model)
    elif saliency == 'guided':
        ablator = GuidedBackprop(model)
    elif saliency == 'sal':
        ablator = Saliency(model)
    elif saliency == 'inputx':
        ablator = InputXGradient(model)
    elif saliency == 'occlusion':
        ablator = Occlusion(model)

    coll_call = get_collate_fn(dataset=args.dataset, model=args.model)

    return_attention_masks = args.model == 'trans'

    collate_fn = partial(coll_call,
                         tokenizer=tokenizer,
                         device=device,
                         return_attention_masks=return_attention_masks,
                         pad_to_max_length=pad_to_max)
    test = get_dataset(path=args.dataset_dir,
                       mode=args.split,
                       dataset=args.dataset)
    batch_size = args.batch_size if args.batch_size != None else \
        model_args.batch_size
    test_dl = DataLoader(batch_size=batch_size,
                         dataset=test,
                         shuffle=False,
                         collate_fn=collate_fn)

    # PREDICTIONS
    predictions_path = model_path + '.predictions'
    if not os.path.exists(predictions_path):
        predictions = defaultdict(lambda: [])
        for batch in tqdm(test_dl, desc='Running test prediction... '):
            if args.model == 'trans':
                logits = model(batch[0],
                               attention_mask=batch[1],
                               labels=batch[2].long())
            else:
                logits = model(batch[0])
            logits = logits.detach().cpu().numpy().tolist()
            predicted = np.argmax(np.array(logits), axis=-1)
            predictions['class'] += predicted.tolist()
            predictions['logits'] += logits

        with open(predictions_path, 'w') as out:
            json.dump(predictions, out)

    # COMPUTE SALIENCY
    if saliency != 'occlusion':
        embedding_layer_name = 'model.bert.embeddings' if args.model == \
                                                          'trans' else \
            'embedding'
        interpretable_embedding = configure_interpretable_embedding_layer(
            model, embedding_layer_name)

    class_attr_list = defaultdict(lambda: [])
    token_ids = []
    saliency_flops = []

    for batch in tqdm(test_dl, desc='Running Saliency Generation...'):
        if args.model == 'cnn':
            additional = None
        elif args.model == 'trans':
            additional = (batch[1], batch[2])
        else:
            additional = batch[-1]

        token_ids += batch[0].detach().cpu().numpy().tolist()
        if saliency != 'occlusion':
            input_embeddings = interpretable_embedding.indices_to_embeddings(
                batch[0])

        if not args.no_time:
            high.start_counters([
                events.PAPI_FP_OPS,
            ])
        for cls_ in range(checkpoint['args']['labels']):
            if saliency == 'occlusion':
                attributions = ablator.attribute(
                    batch[0],
                    sliding_window_shapes=(args.sw, ),
                    target=cls_,
                    additional_forward_args=additional)
            else:
                attributions = ablator.attribute(
                    input_embeddings,
                    target=cls_,
                    additional_forward_args=additional)

            attributions = summarize_attributions(
                attributions, type=aggregation, model=model,
                tokens=batch[0]).detach().cpu().numpy().tolist()
            class_attr_list[cls_] += [[_li for _li in _l]
                                      for _l in attributions]

        if not args.no_time:
            saliency_flops.append(
                sum(high.stop_counters()) / batch[0].shape[0])

    if saliency != 'occlusion':
        remove_interpretable_embedding_layer(model, interpretable_embedding)

    # SERIALIZE
    print('Serializing...', flush=True)
    with open(saliency_path, 'w') as out:
        for instance_i, _ in enumerate(test):
            saliencies = []
            for token_i, token_id in enumerate(token_ids[instance_i]):
                token_sal = {'token': tokenizer.ids_to_tokens[token_id]}
                for cls_ in range(checkpoint['args']['labels']):
                    token_sal[int(
                        cls_)] = class_attr_list[cls_][instance_i][token_i]
                saliencies.append(token_sal)

            out.write(json.dumps({'tokens': saliencies}) + '\n')
            out.flush()

    return saliency_flops
Esempio n. 12
0
def generate_saliency(model_path, saliency_path):
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    model_args = Namespace(**checkpoint['args'])
    model_args.batch_size = args.batch_size if args.batch_size != None else \
        model_args.batch_size

    if args.model == 'transformer':
        transformer_config = BertConfig.from_pretrained(
            'bert-base-uncased', num_labels=model_args.labels)
        modelb = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', config=transformer_config).to(device)
        modelb.load_state_dict(checkpoint['model'])
        model = BertModelWrapper(modelb)
    elif args.model == 'lstm':
        model = LSTM_MODEL(tokenizer,
                           model_args,
                           n_labels=checkpoint['args']['labels'],
                           device=device).to(device)
        model.load_state_dict(checkpoint['model'])
        model.train()
        model = ModelWrapper(model)
    else:
        # model_args.batch_size = 1000
        model = CNN_MODEL(tokenizer,
                          model_args,
                          n_labels=checkpoint['args']['labels']).to(device)
        model.load_state_dict(checkpoint['model'])
        model.train()
        model = ModelWrapper(model)

    ablator = ShapleyValueSampling(model)

    coll_call = get_collate_fn(dataset=args.dataset, model=args.model)

    collate_fn = partial(coll_call,
                         tokenizer=tokenizer,
                         device=device,
                         return_attention_masks=False,
                         pad_to_max_length=False)

    test = get_dataset(args.dataset_dir, mode=args.split)
    test_dl = DataLoader(batch_size=model_args.batch_size,
                         dataset=test,
                         shuffle=False,
                         collate_fn=collate_fn)

    # PREDICTIONS
    predictions_path = model_path + '.predictions'
    if not os.path.exists(predictions_path):
        predictions = defaultdict(lambda: [])
        for batch in tqdm(test_dl, desc='Running test prediction... '):
            logits = model(batch[0])
            logits = logits.detach().cpu().numpy().tolist()
            predicted = np.argmax(np.array(logits), axis=-1)
            predictions['class'] += predicted.tolist()
            predictions['logits'] += logits

        with open(predictions_path, 'w') as out:
            json.dump(predictions, out)

    # COMPUTE SALIENCY

    saliency_flops = []

    with open(saliency_path, 'w') as out_mean:
        for batch in tqdm(test_dl, desc='Running Saliency Generation...'):
            class_attr_list = defaultdict(lambda: [])

            if args.model == 'rnn':
                additional = batch[-1]
            else:
                additional = None

            if not args.no_time:
                high.start_counters([events.PAPI_FP_OPS])
            token_ids = batch[0].detach().cpu().numpy().tolist()

            for cls_ in range(args.labels):
                attributions = ablator.attribute(
                    batch[0].float(),
                    target=cls_,
                    additional_forward_args=additional)
                attributions = attributions.detach().cpu().numpy().tolist()
                class_attr_list[cls_] += attributions

            if not args.no_time:
                x = sum(high.stop_counters())
                saliency_flops.append(x / batch[0].shape[0])

            for i in range(len(batch[0])):
                saliencies = []
                for token_i, token_id in enumerate(token_ids[i]):
                    if token_id == tokenizer.pad_token_id:
                        continue
                    token_sal = {'token': tokenizer.ids_to_tokens[token_id]}
                    for cls_ in range(args.labels):
                        token_sal[int(
                            cls_)] = class_attr_list[cls_][i][token_i]
                    saliencies.append(token_sal)

                out_mean.write(json.dumps({'tokens': saliencies}) + '\n')
                out_mean.flush()

    return saliency_flops
Esempio n. 13
0
def papiStartCounters():
    papi_high.start_counters([
        papi_events.PAPI_L1_DCM,
        papi_events.PAPI_L2_DCM,
    ])