def main():
    # ************************************ DADOS ***************************************************
    padroniza_imagem = 300
    tamanho_da_entrada = (224, 224)
    arquivo = "./imagens/raposa.jpg"
    cor = (0, 255, 0)

    # Operacoes de preprocessamento e augumentacao
    composicao_de_transformacao = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    # ************************************* REDE ************************************************
    modelo = ResNet(1000, True)
    modelo.eval()

    # Abre a imagem
    imagem_original = np.asarray(Image.open(arquivo))
    imagem = imagem_original.copy()

    # Obtem as coordenadas da imagem
    (H, W) = imagem.shape[:2]
    r = padroniza_imagem / W
    dim_final = (padroniza_imagem, int(H * r))
    imagem = cv2.resize(imagem, dim_final, interpolation=cv2.INTER_AREA)

    # Area da regiao de interesse
    ROI = (150, 150)  #(H,W)

    # Lista de regioes de interesse (rois) e coods (coordenadas)
    rois = []
    coods = []

    # Execucao da funcao de piramede
    for nova_imagem in util.image_pyramid(imagem, escala=1.2):
        # Fator de escala entre a imagem original e a nova imagem gerada
        fator_escalar = W / float(nova_imagem.shape[1])

        # Executa a operacao de deslizamento de janela
        for (x, y, regiao) in util.sliding_window(nova_imagem,
                                                  size=ROI,
                                                  stride=8):

            # Condicao de parada
            key = cv2.waitKey(1) & 0xFF
            if (key == ord("q")):
                break

            if regiao.shape[0] != ROI[0] or regiao.shape[1] != ROI[1]:
                continue

            # Obtem as coordenadas da ROI com relacao aa image
            x_r = int(x * fator_escalar)
            w_r = int(fator_escalar * ROI[1])

            y_r = int(y * fator_escalar)
            h_r = int(fator_escalar * ROI[0])

            # Obtem o ROI e realiza a transformacao necessaria para o treinamento
            roi = cv2.resize(regiao, tamanho_da_entrada)
            roi = np.asarray(roi)
            rois.append(roi)

            # Obtem as coordenadas (x1, y1, x2, y2)
            coods.append((x_r, y_r, x_r + w_r, y_r + h_r))

            # Utiliza uma copia da imagem
            copia = nova_imagem.copy()
            # Imprime um retangulo na imagem de acordo com a posicao
            cv2.rectangle(copia, (x, y), (x + ROI[1], y + ROI[0]), cor, 2)
            # Mostra o resultado na janela
            cv2.imshow("Janela", copia[:, :, ::-1])

            # Atraso no loop
            time.sleep(0.01)

    # Fechar todas as janelas abertas
    cv2.destroyAllWindows()

    #rois = np.array(rois, dtype="float32") # transform to torch tensor
    dataset = DataSet(rois, coods, composicao_de_transformacao)
    size = len(dataset)
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               shuffle=True,
                                               batch_size=size)

    print("Cópias: ", size)
    with torch.no_grad():
        for _, (X, y) in enumerate(train_loader):
            # Classificacoes de todas as copias das imagens
            resultado = modelo.forward(X)

            # Obtem os melhores resultados por imagem
            confs, indices_dos_melhores_resultados = torch.max(resultado, 1)
            classe, _ = torch.mode(indices_dos_melhores_resultados.flatten(),
                                   -1)

            # Mascara
            mascara = [
                True if item == classe else False
                for item in indices_dos_melhores_resultados
            ]

            # Selecao de boxes
            boxes = []
            for i in range(size):
                if mascara[i] == True:
                    boxes.append(coods[i])

            # Realiza operacao de non_max_suppression
            boxes = util.non_max_suppression(np.asarray(boxes),
                                             overlapThresh=0.3)

            copia = imagem_original.copy()
            for (x1, y1, x2, y2) in boxes:
                cv2.rectangle(copia, (x1, y1), (x2, y2), cor, 2)

            cv2.imshow("Final", copia[:, :, ::-1])
            cv2.waitKey(0)

    cv2.destroyAllWindows()
예제 #2
0
def main():
    if not sys.warnoptions:
        warnings.simplefilter("ignore")

    # --- hyper parameters --- #
    BATCH_SIZE = 256
    LR = 1e-3
    WEIGHT_DECAY = 1e-4
    N_layer = 18
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # --- data process --- #
    # info
    src_path = './data/'
    target_path = './saved/ResNet18/'
    model_path = target_path + 'pkls/'
    pred_path = target_path + 'preds/'

    if not os.path.exists(model_path):
        os.makedirs(model_path)
    if not os.path.exists(pred_path):
        os.makedirs(pred_path)

    # evaluation: num of classify labels & image size
    # output testing id csv
    label2num_dict, num2label_dict = data_evaluation(src_path)

    # load
    train_data = dataLoader(src_path, 'train', label2num_dict)
    train_len = len(train_data)
    test_data = dataLoader(src_path, 'test')

    train_loader = Data.DataLoader(
        dataset=train_data,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=12,
    )
    test_loader = Data.DataLoader(
        dataset=test_data,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=12,
    )

    # --- model training --- #
    # fp: for storing data
    fp_train_acc = open(target_path + 'train_acc.txt', 'w')
    fp_time = open(target_path + 'time.txt', 'w')

    # train
    highest_acc, train_acc_seq = 0, []
    loss_funct = nn.CrossEntropyLoss()
    net = ResNet(N_layer).to(device)
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=LR,
                                 weight_decay=WEIGHT_DECAY)
    print(net)

    for epoch_i in count(1):
        right_count = 0

        # print('\nTraining epoch {}...'.format(epoch_i))
        # for batch_x, batch_y in tqdm(train_loader):
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            # clear gradient
            optimizer.zero_grad()

            # forward & backward
            output = net.forward(batch_x.float())
            highest_out = torch.max(output, 1)[1]
            right_count += sum(batch_y == highest_out).item()

            loss = loss_funct(output, batch_y)
            loss.backward()

            # update parameters
            optimizer.step()

        # calculate accuracy
        train_acc = right_count / train_len
        train_acc_seq.append(train_acc * 100)

        if train_acc > highest_acc:
            highest_acc = train_acc

        # save model
        torch.save(
            net.state_dict(),
            '{}{}_{}_{}.pkl'.format(model_path,
                                    target_path.split('/')[2],
                                    round(train_acc * 1000), epoch_i))

        # write data
        fp_train_acc.write(str(train_acc * 100) + '\n')
        fp_time.write(
            str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + '\n')
        print('\n{} Epoch {}, Training accuracy: {}'.format(
            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), epoch_i,
            train_acc))

        # test
        net.eval()
        test_df = pd.read_csv(src_path + 'testing_data/testing_labels.csv')
        with torch.no_grad():
            for i, (batch_x, _) in enumerate(test_loader):
                batch_x = batch_x.to(device)
                output = net.forward(batch_x.float())
                highest_out = torch.max(output, 1)[1].cpu()
                labels = [
                    num2label_dict[out_j.item()] for out_j in highest_out
                ]
                test_df['label'].iloc[i * BATCH_SIZE:(i + 1) *
                                      BATCH_SIZE] = labels
        test_df.to_csv('{}{}_{}_{}.csv'.format(pred_path,
                                               target_path.split('/')[2],
                                               round(train_acc * 1000),
                                               epoch_i),
                       index=False)
        net.train()

        lr_decay(optimizer)

    fp_train_acc.close()
    fp_time.close()