def main(): # ************************************ DADOS *************************************************** padroniza_imagem = 300 tamanho_da_entrada = (224, 224) arquivo = "./imagens/raposa.jpg" cor = (0, 255, 0) # Operacoes de preprocessamento e augumentacao composicao_de_transformacao = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # ************************************* REDE ************************************************ modelo = ResNet(1000, True) modelo.eval() # Abre a imagem imagem_original = np.asarray(Image.open(arquivo)) imagem = imagem_original.copy() # Obtem as coordenadas da imagem (H, W) = imagem.shape[:2] r = padroniza_imagem / W dim_final = (padroniza_imagem, int(H * r)) imagem = cv2.resize(imagem, dim_final, interpolation=cv2.INTER_AREA) # Area da regiao de interesse ROI = (150, 150) #(H,W) # Lista de regioes de interesse (rois) e coods (coordenadas) rois = [] coods = [] # Execucao da funcao de piramede for nova_imagem in util.image_pyramid(imagem, escala=1.2): # Fator de escala entre a imagem original e a nova imagem gerada fator_escalar = W / float(nova_imagem.shape[1]) # Executa a operacao de deslizamento de janela for (x, y, regiao) in util.sliding_window(nova_imagem, size=ROI, stride=8): # Condicao de parada key = cv2.waitKey(1) & 0xFF if (key == ord("q")): break if regiao.shape[0] != ROI[0] or regiao.shape[1] != ROI[1]: continue # Obtem as coordenadas da ROI com relacao aa image x_r = int(x * fator_escalar) w_r = int(fator_escalar * ROI[1]) y_r = int(y * fator_escalar) h_r = int(fator_escalar * ROI[0]) # Obtem o ROI e realiza a transformacao necessaria para o treinamento roi = cv2.resize(regiao, tamanho_da_entrada) roi = np.asarray(roi) rois.append(roi) # Obtem as coordenadas (x1, y1, x2, y2) coods.append((x_r, y_r, x_r + w_r, y_r + h_r)) # Utiliza uma copia da imagem copia = nova_imagem.copy() # Imprime um retangulo na imagem de acordo com a posicao cv2.rectangle(copia, (x, y), (x + ROI[1], y + ROI[0]), cor, 2) # Mostra o resultado na janela cv2.imshow("Janela", copia[:, :, ::-1]) # Atraso no loop time.sleep(0.01) # Fechar todas as janelas abertas cv2.destroyAllWindows() #rois = np.array(rois, dtype="float32") # transform to torch tensor dataset = DataSet(rois, coods, composicao_de_transformacao) size = len(dataset) train_loader = torch.utils.data.DataLoader(dataset=dataset, shuffle=True, batch_size=size) print("Cópias: ", size) with torch.no_grad(): for _, (X, y) in enumerate(train_loader): # Classificacoes de todas as copias das imagens resultado = modelo.forward(X) # Obtem os melhores resultados por imagem confs, indices_dos_melhores_resultados = torch.max(resultado, 1) classe, _ = torch.mode(indices_dos_melhores_resultados.flatten(), -1) # Mascara mascara = [ True if item == classe else False for item in indices_dos_melhores_resultados ] # Selecao de boxes boxes = [] for i in range(size): if mascara[i] == True: boxes.append(coods[i]) # Realiza operacao de non_max_suppression boxes = util.non_max_suppression(np.asarray(boxes), overlapThresh=0.3) copia = imagem_original.copy() for (x1, y1, x2, y2) in boxes: cv2.rectangle(copia, (x1, y1), (x2, y2), cor, 2) cv2.imshow("Final", copia[:, :, ::-1]) cv2.waitKey(0) cv2.destroyAllWindows()
def main(): if not sys.warnoptions: warnings.simplefilter("ignore") # --- hyper parameters --- # BATCH_SIZE = 256 LR = 1e-3 WEIGHT_DECAY = 1e-4 N_layer = 18 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # --- data process --- # # info src_path = './data/' target_path = './saved/ResNet18/' model_path = target_path + 'pkls/' pred_path = target_path + 'preds/' if not os.path.exists(model_path): os.makedirs(model_path) if not os.path.exists(pred_path): os.makedirs(pred_path) # evaluation: num of classify labels & image size # output testing id csv label2num_dict, num2label_dict = data_evaluation(src_path) # load train_data = dataLoader(src_path, 'train', label2num_dict) train_len = len(train_data) test_data = dataLoader(src_path, 'test') train_loader = Data.DataLoader( dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=12, ) test_loader = Data.DataLoader( dataset=test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=12, ) # --- model training --- # # fp: for storing data fp_train_acc = open(target_path + 'train_acc.txt', 'w') fp_time = open(target_path + 'time.txt', 'w') # train highest_acc, train_acc_seq = 0, [] loss_funct = nn.CrossEntropyLoss() net = ResNet(N_layer).to(device) optimizer = torch.optim.Adam(net.parameters(), lr=LR, weight_decay=WEIGHT_DECAY) print(net) for epoch_i in count(1): right_count = 0 # print('\nTraining epoch {}...'.format(epoch_i)) # for batch_x, batch_y in tqdm(train_loader): for batch_x, batch_y in train_loader: batch_x = batch_x.to(device) batch_y = batch_y.to(device) # clear gradient optimizer.zero_grad() # forward & backward output = net.forward(batch_x.float()) highest_out = torch.max(output, 1)[1] right_count += sum(batch_y == highest_out).item() loss = loss_funct(output, batch_y) loss.backward() # update parameters optimizer.step() # calculate accuracy train_acc = right_count / train_len train_acc_seq.append(train_acc * 100) if train_acc > highest_acc: highest_acc = train_acc # save model torch.save( net.state_dict(), '{}{}_{}_{}.pkl'.format(model_path, target_path.split('/')[2], round(train_acc * 1000), epoch_i)) # write data fp_train_acc.write(str(train_acc * 100) + '\n') fp_time.write( str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + '\n') print('\n{} Epoch {}, Training accuracy: {}'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), epoch_i, train_acc)) # test net.eval() test_df = pd.read_csv(src_path + 'testing_data/testing_labels.csv') with torch.no_grad(): for i, (batch_x, _) in enumerate(test_loader): batch_x = batch_x.to(device) output = net.forward(batch_x.float()) highest_out = torch.max(output, 1)[1].cpu() labels = [ num2label_dict[out_j.item()] for out_j in highest_out ] test_df['label'].iloc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] = labels test_df.to_csv('{}{}_{}_{}.csv'.format(pred_path, target_path.split('/')[2], round(train_acc * 1000), epoch_i), index=False) net.train() lr_decay(optimizer) fp_train_acc.close() fp_time.close()