예제 #1
0
def pipeline(subgraph_max_nodes):
    dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name)
    input_dim = dataset.num_node_features
    output_dim = dataset.num_classes
    data = dataset[0]
    node_indices = torch.where(data.test_mask * data.y != 0)[0]

    gnnNets = GnnNets_NC(input_dim, output_dim, model_args)
    checkpoint = torch.load(mcts_args.explain_model_path)
    gnnNets.update_state_dict(checkpoint['net'])
    gnnNets.to_device()
    gnnNets.eval()
    save_dir = os.path.join('./results', f"{mcts_args.dataset_name}"
                                         f"_{model_args.model_name}"
                                         f"_{reward_args.reward_method}")
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    plotutils = PlotUtils(dataset_name=data_args.dataset_name)
    fidelity_score_list = []
    sparsity_score_list = []
    for node_idx in tqdm(node_indices):
        # find the paths and build the graph
        result_path = os.path.join(save_dir, f"node_{node_idx}_score.pt")

        # get data and prediction
        logits, prob,  _ = gnnNets(data.clone())
        _, prediction = torch.max(prob, -1)
        prediction = prediction[node_idx].item()

        # build the graph for visualization
        graph = to_networkx(data, to_undirected=True)
        node_labels = {k: int(v) for k, v in enumerate(data.y)}
        nx.set_node_attributes(graph, node_labels, 'label')

        #  searching using gnn score
        mcts_state_map = MCTS(node_idx=node_idx, ori_graph=graph,
                              X=data.x, edge_index=data.edge_index,
                              num_hops=len(model_args.latent_dim),
                              n_rollout=mcts_args.rollout,
                              min_atoms=mcts_args.min_atoms,
                              c_puct=mcts_args.c_puct,
                              expand_atoms=mcts_args.expand_atoms)
        value_func = GnnNets_NC2value_func(gnnNets,
                                           node_idx=mcts_state_map.node_idx,
                                           target_class=prediction)
        score_func = reward_func(reward_args, value_func)
        mcts_state_map.set_score_func(score_func)

        # get searching result
        if os.path.isfile(result_path):
            gnn_results = torch.load(result_path)
        else:
            gnn_results = mcts_state_map.mcts(verbose=True)
            torch.save(gnn_results, result_path)
        tree_node_x = find_closest_node_result(gnn_results, subgraph_max_nodes)

        # calculate the metrics
        original_node_list = [i for i in tree_node_x.ori_graph.nodes]
        masked_node_list = [i for i in tree_node_x.ori_graph.nodes
                            if i not in tree_node_x.coalition or i == mcts_state_map.node_idx]
        original_score = gnn_score(original_node_list, tree_node_x.data,
                                   value_func=value_func, subgraph_building_method='zero_filling')
        masked_score = gnn_score(masked_node_list, tree_node_x.data,
                                 value_func=value_func, subgraph_building_method='zero_filling')
        sparsity_score = 1 - len(tree_node_x.coalition)/tree_node_x.ori_graph.number_of_nodes()

        fidelity_score_list.append(original_score - masked_score)
        sparsity_score_list.append(sparsity_score)

        # visualization
        subgraph_node_labels = nx.get_node_attributes(tree_node_x.ori_graph, name='label')
        subgraph_node_labels = torch.tensor([v for k, v in subgraph_node_labels.items()])
        plotutils.plot(tree_node_x.ori_graph, tree_node_x.coalition, y=subgraph_node_labels,
                       node_idx=mcts_state_map.node_idx,
                       figname=os.path.join(save_dir, f"node_{node_idx}.png"))

    fidelity_scores = torch.tensor(fidelity_score_list)
    sparsity_scores = torch.tensor(sparsity_score_list)
    return fidelity_scores, sparsity_scores
from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from pandas import DataFrame as df
from joblib import Parallel, delayed
from util import paths_to_tensor

import numpy as np
import multiprocessing

if __name__ == '__main__':
    """
    Realiza a extração de features das imagens utilizando a ResNet50 pretreinada (paralela) - data aug
    """
    model = ResNet50(include_top=False, weights='imagenet')

    print("Carregando os dados...")
    train_files, train_targets = get_dataset("train", 1, True)
    valid_files, valid_targets = get_dataset("val", 1, True)

    data_augmentation = True

    files_names = [["train_aug.csv", "train_target_aug.csv"],
                   ["val_aug.csv", "val_target_aug.csv"]]
    block_size = 500
    n_examples = [len(train_files), len(valid_files)]

    for i_file in [0]:
        files = [train_files, valid_files][i_file]
        target = [train_targets, valid_targets][i_file]

        print("Incializando o pre data Augmentation...")
        datagen_train = ImageDataGenerator(
예제 #3
0
파일: subgraphx.py 프로젝트: earlbabson/DIG
def pipeline(max_nodes):
    dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name)
    plotutils = PlotUtils(dataset_name=data_args.dataset_name)
    input_dim = dataset.num_node_features
    output_dim = dataset.num_classes

    if data_args.dataset_name == 'mutag':
        data_indices = list(range(len(dataset)))
    else:
        loader = get_dataloader(dataset,
                                batch_size=train_args.batch_size,
                                random_split_flag=data_args.random_split,
                                data_split_ratio=data_args.data_split_ratio,
                                seed=data_args.seed)
        data_indices = loader['test'].dataset.indices

    gnnNets = GnnNets(input_dim, output_dim, model_args)
    checkpoint = torch.load(mcts_args.explain_model_path)
    gnnNets.update_state_dict(checkpoint['net'])
    gnnNets.to_device()
    gnnNets.eval()

    save_dir = os.path.join(
        './results', f"{mcts_args.dataset_name}_"
        f"{model_args.model_name}_"
        f"{reward_args.reward_method}")
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    fidelity_score_list = []
    sparsity_score_list = []
    for i in tqdm(data_indices):
        # get data and prediction
        data = dataset[i]
        _, probs, _ = gnnNets(Batch.from_data_list([data.clone()]))
        prediction = probs.squeeze().argmax(-1).item()
        original_score = probs.squeeze()[prediction]

        # get the reward func
        value_func = GnnNets_GC2value_func(gnnNets, target_class=prediction)
        payoff_func = reward_func(reward_args, value_func)

        # find the paths and build the graph
        result_path = os.path.join(save_dir, f"example_{i}.pt")

        # mcts for l_shapely
        mcts_state_map = MCTS(data.x,
                              data.edge_index,
                              score_func=payoff_func,
                              n_rollout=mcts_args.rollout,
                              min_atoms=mcts_args.min_atoms,
                              c_puct=mcts_args.c_puct,
                              expand_atoms=mcts_args.expand_atoms)

        if os.path.isfile(result_path):
            results = torch.load(result_path)
        else:
            results = mcts_state_map.mcts(verbose=True)
            torch.save(results, result_path)

        # l sharply score
        graph_node_x = find_closest_node_result(results, max_nodes=max_nodes)
        masked_node_list = [
            node for node in list(range(graph_node_x.data.x.shape[0]))
            if node not in graph_node_x.coalition
        ]
        fidelity_score = original_score - gnn_score(
            masked_node_list,
            data,
            value_func,
            subgraph_building_method='zero_filling')
        sparsity_score = 1 - len(
            graph_node_x.coalition) / graph_node_x.ori_graph.number_of_nodes()
        fidelity_score_list.append(fidelity_score)
        sparsity_score_list.append(sparsity_score)

        # visualization
        if hasattr(dataset, 'supplement'):
            words = dataset.supplement['sentence_tokens'][str(i)]
            plotutils.plot(graph_node_x.ori_graph,
                           graph_node_x.coalition,
                           words=words,
                           figname=os.path.join(save_dir, f"example_{i}.png"))
        else:
            plotutils.plot(graph_node_x.ori_graph,
                           graph_node_x.coalition,
                           x=graph_node_x.data.x,
                           figname=os.path.join(save_dir, f"example_{i}.png"))

    fidelity_scores = torch.tensor(fidelity_score_list)
    sparsity_scores = torch.tensor(sparsity_score_list)
    return fidelity_scores, sparsity_scores
예제 #4
0
        state = torch.load("%s/%d.pth" % (opt.checkpoint_dir, opt.epoch))
        model.load_state_dict(state.get('weight', False))
        opt = state.get('opt')
        opt.epoch = temp_opt.epoch

    if opt.model_name == 'unet_nested':
        criterion = BCEDiceLoss()
    elif opt.n_class > 1:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.BCEWithLogitsLoss()

    optimizer = optim.AdamW(model.parameters())

    dataloader = DataLoader(
        load_dataset.get_dataset(opt),
        batch_size=opt.batch_size,
        shuffle=True,
        num_workers=opt.n_cpu,
    )

    Tensor = torch.cuda.FloatTensor if cuda else torch.Tensor
    log_list = []
    for epoch in range(opt.epoch, opt.n_epochs):
        epoch_loss = 0
        num_batches = len(dataloader)

        for i, imgs in enumerate(dataloader):
            # Configure model input
            data = Variable(imgs["input"].type(Tensor))
            true_mask = Variable(imgs["gt"].type(Tensor))
예제 #5
0
def pipeline_NC(top_k):
    dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name)
    input_dim = dataset.num_node_features
    output_dim = dataset.num_classes
    data = dataset[0]
    node_indices = torch.where(data.test_mask * data.y != 0)[0].tolist()

    gnnNets = GnnNets_NC(input_dim, output_dim, model_args)
    checkpoint = torch.load(model_args.model_path)
    gnnNets.update_state_dict(checkpoint['net'])
    gnnNets.to_device()
    gnnNets.eval()

    save_dir = os.path.join(
        './results', f"{data_args.dataset_name}_"
        f"{model_args.model_name}_"
        f"pgexplainer")
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    pgexplainer = PGExplainer(gnnNets)

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    tic = time.perf_counter()

    pgexplainer.get_explanation_network(dataset, is_graph_classification=False)

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    toc = time.perf_counter()
    training_duration = toc - tic
    print(f"training time is {training_duration}s ")

    duration = 0.0
    data = dataset[0]
    fidelity_score_list = []
    sparsity_score_list = []
    plotutils = PlotUtils(dataset_name=data_args.dataset_name)
    for ori_node_idx in tqdm(node_indices):
        tic = time.perf_counter()
        if glob.glob(os.path.join(save_dir, f"node_{ori_node_idx}.pt")):
            file = glob.glob(os.path.join(save_dir,
                                          f"node_{ori_node_idx}.pt"))[0]
            edge_mask, x, edge_index, y, subset = torch.load(file)
            edge_mask = torch.from_numpy(edge_mask)
            node_idx = int(torch.where(subset == ori_node_idx)[0])
            pred_label = pgexplainer.get_node_prediction(
                node_idx, x, edge_index)
        else:
            x, edge_index, y, subset, kwargs = \
                pgexplainer.get_subgraph(node_idx=ori_node_idx, x=data.x, edge_index=data.edge_index, y=data.y)
            node_idx = int(torch.where(subset == ori_node_idx)[0])

            edge_mask = pgexplainer.explain_edge_mask(x, edge_index)
            pred_label = pgexplainer.get_node_prediction(
                node_idx, x, edge_index)
            save_path = os.path.join(save_dir, f"node_{ori_node_idx}.pt")
            edge_mask = edge_mask.cpu()
            cache_list = [edge_mask.numpy(), x, edge_index, y, subset]
            torch.save(cache_list, save_path)

        duration += time.perf_counter() - tic
        sub_data = Data(x=x, edge_index=edge_index, y=y)

        graph = to_networkx(sub_data)

        fidelity_score = top_k_fidelity(sub_data, edge_mask, top_k, gnnNets,
                                        pred_label)
        sparsity_score = top_k_sparsity(sub_data, edge_mask, top_k)

        fidelity_score_list.append(fidelity_score)
        sparsity_score_list.append(sparsity_score)

        # visualization
        plotutils.plot_soft_edge_mask(graph,
                                      edge_mask,
                                      top_k,
                                      y=sub_data.y,
                                      node_idx=node_idx,
                                      un_directed=True,
                                      figname=os.path.join(
                                          save_dir,
                                          f"example_{ori_node_idx}.png"))

    fidelity_scores = torch.tensor(fidelity_score_list)
    sparsity_scores = torch.tensor(sparsity_score_list)
    return fidelity_scores, sparsity_scores
예제 #6
0
def pipeline_GC(top_k):
    dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name)
    if data_args.dataset_name == 'mutag':
        data_indices = list(range(len(dataset)))
        pgexplainer_trainset = dataset
    else:
        loader = get_dataloader(dataset,
                                batch_size=train_args.batch_size,
                                random_split_flag=data_args.random_split,
                                data_split_ratio=data_args.data_split_ratio,
                                seed=data_args.seed)
        data_indices = loader['test'].dataset.indices
        pgexplainer_trainset = loader['train'].dataset

    input_dim = dataset.num_node_features
    output_dim = dataset.num_classes
    gnnNets = GnnNets(input_dim, output_dim, model_args)
    checkpoint = torch.load(model_args.model_path)
    gnnNets.update_state_dict(checkpoint['net'])
    gnnNets.to_device()
    gnnNets.eval()

    save_dir = os.path.join(
        './results', f"{data_args.dataset_name}_"
        f"{model_args.model_name}_"
        f"pgexplainer")
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    pgexplainer = PGExplainer(gnnNets)

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    tic = time.perf_counter()

    pgexplainer.get_explanation_network(pgexplainer_trainset)

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    toc = time.perf_counter()
    training_duration = toc - tic
    print(f"training time is {training_duration: .4}s ")

    explain_duration = 0.0
    plotutils = PlotUtils(dataset_name=data_args.dataset_name)
    fidelity_score_list = []
    sparsity_score_list = []
    for data_idx in tqdm(data_indices):
        data = dataset[data_idx]
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        tic = time.perf_counter()

        prob = pgexplainer.eval_probs(data.x, data.edge_index)
        pred_label = prob.argmax(-1).item()

        if glob.glob(os.path.join(save_dir, f"example_{data_idx}.pt")):
            file = glob.glob(os.path.join(save_dir,
                                          f"example_{data_idx}.pt"))[0]
            edge_mask = torch.from_numpy(torch.load(file))
        else:
            edge_mask = pgexplainer.explain_edge_mask(data.x, data.edge_index)
            save_path = os.path.join(save_dir, f"example_{data_idx}.pt")
            edge_mask = edge_mask.cpu()
            torch.save(edge_mask.detach().numpy(), save_path)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        toc = time.perf_counter()
        explain_duration += (toc - tic)

        graph = to_networkx(data)

        fidelity_score = top_k_fidelity(data, edge_mask, top_k, gnnNets,
                                        pred_label)
        sparsity_score = top_k_sparsity(data, edge_mask, top_k)

        fidelity_score_list.append(fidelity_score)
        sparsity_score_list.append(sparsity_score)

        # visualization
        if hasattr(dataset, 'supplement'):
            words = dataset.supplement['sentence_tokens'][str(data_idx)]
            plotutils.plot_soft_edge_mask(graph,
                                          edge_mask,
                                          top_k,
                                          x=data.x,
                                          words=words,
                                          un_directed=True,
                                          figname=os.path.join(
                                              save_dir,
                                              f"example_{data_idx}.png"))
        else:
            plotutils.plot_soft_edge_mask(graph,
                                          edge_mask,
                                          top_k,
                                          x=data.x,
                                          un_directed=True,
                                          figname=os.path.join(
                                              save_dir,
                                              f"example_{data_idx}.png"))

    fidelity_scores = torch.tensor(fidelity_score_list)
    sparsity_scores = torch.tensor(sparsity_score_list)
    return fidelity_scores, sparsity_scores
예제 #7
0
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from PIL import ImageFile
from joblib import Parallel, delayed
from util import paths_to_tensor

import numpy as np
import multiprocessing

if __name__ == "__main__":
    print("Carregando os dados...")
    valid_files, valid_targets = get_dataset("val")

    # pre-process the data for Keras
    print("Convertendo para tensor...")
    valid_tensors = paths_to_tensor(valid_files).astype('float32') / 255.0

    model = Sequential()
    model.add(
        ResNet50(weights='imagenet',
                 include_top=False,
                 input_shape=(224, 224, 3)))
    model.add(Flatten(name='flatten'))
    model.add(Dense(83, activation='softmax'))

    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
예제 #8
0
    checkpoints = [
        f for f in glob.glob(os.path.join(opt.checkpoint_dir, '*.pth'))
    ]
    for checkpoint in checkpoints:
        print("Starting Checkpoint", checkpoint)
        state = torch.load(checkpoint)
        saved_opt = state.get('opt')
        model = load_model.load_model(saved_opt)
        model.load_state_dict(state.get('weight', False))
        if cuda:
            model.cuda()

        saved_opt.test = True

        dataloader = DataLoader(
            load_dataset.get_dataset(saved_opt),
            batch_size=saved_opt.batch_size,
            shuffle=False,
            num_workers=saved_opt.n_cpu,
        )

        for i, imgs in enumerate(dataloader):
            row = []
            # Configure model input
            data = imgs["input"].type(Tensor)
            true_mask = imgs["gt"].type(Tensor)

            model.eval()
            with torch.no_grad():
                predicted_mask = model(data)
                if saved_opt.deep_supervision:
예제 #9
0
def train_GC():
    # attention the multi-task here
    print('start loading data====================')
    dataset = get_dataset(data_args)
    input_dim = dataset.num_node_features
    output_dim = int(dataset.num_classes)
    dataloader = get_dataloader(dataset, data_args, train_args)

    print('start training model==================')
    gnnNets = GnnNets(input_dim, output_dim, model_args)
    gnnNets.to_device()
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(gnnNets.parameters(),
                     lr=train_args.learning_rate,
                     weight_decay=train_args.weight_decay)

    avg_nodes = 0.0
    avg_edge_index = 0.0
    for i in range(len(dataset)):
        avg_nodes += dataset[i].x.shape[0]
        avg_edge_index += dataset[i].edge_index.shape[1]
    avg_nodes /= len(dataset)
    avg_edge_index /= len(dataset)
    print(
        f"graphs {len(dataset)}, avg_nodes{avg_nodes :.4f}, avg_edge_index_{avg_edge_index/2 :.4f}"
    )

    best_acc = 0.0
    data_size = len(dataset)
    print(f'The total num of dataset is {data_size}')

    # save path for model
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    if not os.path.isdir(os.path.join('checkpoint', data_args.dataset_name)):
        os.mkdir(os.path.join('checkpoint', f"{data_args.dataset_name}"))
    ckpt_dir = f"./checkpoint/{data_args.dataset_name}/"

    early_stop_count = 0
    for epoch in range(train_args.max_epochs):
        acc = []
        loss_list = []
        gnnNets.train()
        for batch in dataloader['train']:
            logits, probs, _ = gnnNets(batch)
            loss = criterion(logits, batch.y)

            # optimization
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_value_(gnnNets.parameters(),
                                            clip_value=2.0)
            optimizer.step()

            ## record
            _, prediction = torch.max(logits, -1)
            loss_list.append(loss.item())
            acc.append(prediction.eq(batch.y).cpu().numpy())

        # report train msg
        print(f"Train Epoch:{epoch}  |Loss: {np.average(loss_list):.3f} | "
              f"Acc: {np.concatenate(acc, axis=0).mean():.3f}")

        # report eval msg
        eval_state = evaluate_GC(dataloader['eval'], gnnNets, criterion)
        print(
            f"Eval Epoch: {epoch} | Loss: {eval_state['loss']:.3f} | Acc: {eval_state['acc']:.3f}"
        )

        # only save the best model
        is_best = (eval_state['acc'] > best_acc)

        if eval_state['acc'] > best_acc:
            early_stop_count = 0
        else:
            early_stop_count += 1

        if early_stop_count > train_args.early_stopping:
            break

        if is_best:
            best_acc = eval_state['acc']
            early_stop_count = 0
        if is_best or epoch % train_args.save_epoch == 0:
            save_best(ckpt_dir, epoch, gnnNets, model_args.model_name,
                      eval_state['acc'], is_best)

    print(f"The best validation accuracy is {best_acc}.")
    # report test msg
    checkpoint = torch.load(
        os.path.join(ckpt_dir, f'{model_args.model_name}_best.pth'))
    gnnNets.update_state_dict(checkpoint['net'])
    test_state, _, _ = test_GC(dataloader['test'], gnnNets, criterion)
    print(
        f"Test: | Loss: {test_state['loss']:.3f} | Acc: {test_state['acc']:.3f}"
    )
예제 #10
0
def train_NC():
    print('start loading data====================')
    import pdb
    pdb.set_trace()
    dataset = get_dataset(data_args)
    input_dim = dataset.num_node_features
    output_dim = int(dataset.num_classes)

    # save path for model
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    if not os.path.isdir(
            os.path.join('checkpoint', f"{data_args.dataset_name}")):
        os.mkdir(os.path.join('checkpoint', f"{data_args.dataset_name}"))
    ckpt_dir = f"./checkpoint/{data_args.dataset_name}/"

    data = dataset[0]
    gnnNets_NC = GnnNets_NC(input_dim, output_dim, model_args)
    gnnNets_NC.to_device()
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(gnnNets_NC.parameters(),
                     lr=train_args.learning_rate,
                     weight_decay=train_args.weight_decay)

    best_val_loss = float('inf')
    best_acc = 0
    val_loss_history = []
    early_stop_count = 0
    for epoch in range(1, train_args.max_epochs + 1):
        gnnNets_NC.train()
        logits, prob, _ = gnnNets_NC(data)
        loss = criterion(logits[data.train_mask], data.y[data.train_mask])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        eval_info = evaluate_NC(data, gnnNets_NC, criterion)
        eval_info['epoch'] = epoch

        if eval_info['val_loss'] < best_val_loss:
            best_val_loss = eval_info['val_loss']
            val_acc = eval_info['val_acc']

        val_loss_history.append(eval_info['val_loss'])

        # only save the best model
        is_best = (eval_info['val_acc'] > best_acc)

        if eval_info['val_acc'] > best_acc:
            early_stop_count = 0
        else:
            early_stop_count += 1

        if early_stop_count > train_args.early_stopping:
            break

        if is_best:
            best_acc = eval_info['val_acc']
        if is_best or epoch % train_args.save_epoch == 0:
            save_best(ckpt_dir, epoch, gnnNets_NC, model_args.model_name,
                      eval_info['val_acc'], is_best)
            print(f'Epoch {epoch}, Train Loss: {eval_info["train_loss"]:.4f}, '
                  f'Train Accuracy: {eval_info["train_acc"]:.3f}, '
                  f'Val Loss: {eval_info["val_loss"]:.3f}, '
                  f'Val Accuracy: {eval_info["val_acc"]:.3f}')

    # report test msg
    checkpoint = torch.load(
        os.path.join(ckpt_dir, f'{model_args.model_name}_best.pth'))
    gnnNets_NC.update_state_dict(checkpoint['net'])
    eval_info = evaluate_NC(data, gnnNets_NC, criterion)
    print(
        f'Test Loss: {eval_info["test_loss"]:.4f}, Test Accuracy: {eval_info["test_acc"]:.3f}'
    )
from pandas import DataFrame as df
from joblib import Parallel, delayed
from util import paths_to_tensor

import numpy as np
import multiprocessing

if __name__ == '__main__':
    """
    Realiza a extração de features das imagens utilizando a ResNet50 pretreinada (paralela)
    """

    model = ResNet50(include_top=False, weights='imagenet')

    print("Carregando os dados...")
    train_files, train_targets = get_dataset("train")
    valid_files, valid_targets = get_dataset("val")

    files_names = ["dataset_train3.csv", "dataset_val3.csv"]
    block_size = 500

    # 0 - train, 1 - validação
    for i_file in [0, 1]:

        files = [train_files, valid_files][i_file]
        count = int(np.ceil(len(files) / block_size))

        for i in range(count):
            print("{0} lotte: {1}/{2}".format(files_names[i_file], i, count))

            begin = i * block_size