def test_hard_clf(fname, data, labels): clf = loaddata.load_pkl(fname) data = norm_data(data) data = torch.tensor(data).float() pred_y = clf.predict(data) # Difference between values to be considered close enough to not be sure of class eps = 0.00005 pred_labels = [] for ele in pred_y: tensor_max = ele.max(0) max_val = tensor_max[0] temp_label = tensor_max[1] for i in range(len(ele)): if temp_label != i and abs(max_val - ele[i]) <= eps: temp_label = torch.tensor(-2) pred_labels.append(temp_label.item() + 1) # If labels provided, get the accuracy of the model pred_acc = -1.0 if labels is not None: pred_acc = sum(pred_labels == labels) / len(labels) return pred_labels, pred_acc
def prepare_data(self, data=None, labels=None): if (data == None): data = loaddata.load_pkl("train_data.pkl") labels = np.load("finalLabelsTrain.npy") data = np.array( [np.array(data[i], dtype=bool) for i in range(len(data))]) data_labelled = [[data[i], labels[i]] for i in range(len(data))] data_labelled_standardized = self.standardize_data(data_labelled) # np.random.shuffle( std_labelled_training_data ) self.training_data_loader = torch.utils.data.DataLoader( data_labelled_standardized[:int( len(data_labelled_standardized) * 0.80)], batch_size=self.batch_size, shuffle=True, num_workers=0) self.test_data_loader = torch.utils.data.DataLoader( data_labelled_standardized[ int(len(data_labelled_standardized) * 0.80):], batch_size=self.batch_size, shuffle=True, num_workers=0) return None
def test_easy_clf(fname, data, labels): clf = loaddata.load_pkl(fname) data = norm_data(data) pred_y = clf.predict(data) pred_acc = -1.0 if labels is not None: pred_acc = sum(pred_y == labels) / len(labels) return pred_y, pred_acc
def predict(): """ Load Test data. """ input_data = loaddata.load_pkl(sys.argv[2]) np.save('input_pickle_data.npy', input_data) test_data = np.load('input_pickle_data.npy', allow_pickle=True) """Network type""" if sys.argv[1] == 'AB': networkType = 'AB' else: networkType = 'All' def data_process(train_data): resized_data = [] for item in train_data: resized_data.append(skt.resize(np.float32(item), (100, 100))) for i in range(len(resized_data)): resized_data[i] = resized_data[i].flatten() resized_data = np.asarray(resized_data) return resized_data """ Obtain all images of 'a' and 'b'. Create a dataset with those instances.""" def data_split(resized_data, train_labels): set_X, set_Y = [], [] for i in (np.unique(train_labels)): items = list(np.where(train_labels == i)[0]) set_X.append(resized_data[items]) set_Y.append(train_labels[items]) if networkType == "AB": x = np.concatenate((np.asarray(set_X[0]), np.asarray(set_X[1]))) y = np.concatenate((np.asarray(set_Y[0]), np.asarray(set_Y[1]))) else: x = np.concatenate((np.asarray(set_X[0]), np.asarray(set_X[1]), np.asarray(set_X[2]), np.asarray(set_X[3]), np.asarray(set_X[4]), np.asarray(set_X[5]), np.asarray(set_X[6]), np.asarray(set_X[7]))) y = np.concatenate((np.asarray(set_Y[0]), np.asarray(set_Y[1]), np.asarray(set_Y[2]), np.asarray(set_Y[3]), np.asarray(set_Y[4]), np.asarray(set_Y[5]), np.asarray(set_Y[6]), np.asarray(set_Y[7]))) return x, y """Resize the test data""" test_resize_data = data_process(test_data) """ Load trained network""" networkFile = 'networkAB.pickle' if networkType == "AB" else "networkAll.pickle" with open(networkFile, 'rb') as handle: network = pickle.load(handle) load_lr_model = pickle.load(open(networkFile, 'rb')) """Forward the test data into network""" y_predicted = load_lr_model.predict(test_resize_data) """ Saving predicted values in an output file""" np.save(sys.argv[3], y_predicted) return y_predicted
def load_data_All_Classes(data, labels): data1 = load_pkl(data) data1 = np.asarray(data1) for i in range(len(data1)): data1[i] = np.asarray(data1[i]) labels = np.load('finalLabelsTrain.npy') for i in range(len(data1)): data1[i] = skt.resize(data1[i], ((50, 50)), anti_aliasing=True) return data1, labels
def test(data=None): weights_file_path = "./pre_trained_cnn_weights.weights" network = CNN(lr=0.001, batch_size=8, epochs=15, number_of_classes=9) weights = torch.load(weights_file_path) network.load_state_dict(weights) if (data == None): data = loaddata.load_pkl("train_data.pkl") data = [[np.array(data[i], dtype=bool)] for i in range(len(data))] data = network.standardize_data(data) data = torch.stack([data[i][0] for i in range(len(data))]) return network.predict(data)
def load_data_AB(data, labels): data_X = [] label_X = [] data1 = load_pkl(data) data1 = np.asarray(data1) for i in range(len(data1)): data1[i] = np.asarray(data1[i]) labels = np.load('finalLabelsTrain.npy') for i in range(len(data1)): if (labels[i] == 1 or labels[i] == 2): data_X.append(data1[i]) label_X.append(labels[i]) data_X = np.array(data_X) label_X = np.array(label_X) for i in range(len(data_X)): data_X[i] = skt.resize(data_X[i], ((50, 50)), anti_aliasing=True) return data_X, label_X
def load_data(): data = load_pkl('train_data(1).pkl') data1 = np.asarray(data) for i in range(len(data1)): data1[i] = np.asarray(data1[i]) labels = np.load('finalLabelsTrain.npy') for i in range(len(labels)): labels[i] = labels[i] - 1 for i in range(len(data)): data[i] = skt.resize(data[i], ((50, 50)), anti_aliasing=True) data[i] = np.expand_dims(data[i], axis=0) trans = transforms.ToPILImage() trans1 = transforms.Grayscale(num_output_channels=1) data_new.append(data[i]) data1 = np.array(data_new) return data1, labels
import numpy as np from pathlib import Path import matplotlib.pyplot as plt import pickle from skimage import transform as skt from PIL import * from loaddata import load_pkl from torch.utils.data import DataLoader, Dataset from PIL import Image from sklearn.model_selection import train_test_split #with open('/home/aritrab97/GNV_OCR/data2/train_data.pkl', 'rb') as f: # data = pickle.load(f) data= load_pkl('train_data(1).pkl') data1=np.asarray(data) for i in range(len(data1)): data1[i]=np.asarray(data1[i]) #pil_img = Image.fromarray(data[1000]) #pil_img.save('lena_square_save.png') #print(data1[1000].shape) #plt.imshow(data1[1000]) #plt.show() train_path = "/home/aritrab97/GNV_OCR/data1/train/" test_path = "/home/aritrab97/GNV_OCR/data1/test/" MODEL_STORE_PATH="/home/aritrab97/GNV_OCR/data2/" #transformations = transforms.Compose([transforms.ToPILImage(),transforms.Grayscale(num_output_channels=1),transforms.ToTensor()]) transformations = transforms.Compose([transforms.ToPILImage(),transforms.Grayscale(num_output_channels=1)])
import matplotlib.pyplot as plt from scipy import ndimage import skimage from PIL import Image from matplotlib import pyplot as plt import sys from scipy import ndimage from sklearn.model_selection import train_test_split import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torchvision.transforms as transforms # Loading in the train data train_data = loaddata.load_pkl('train_data.pkl') # Loading in the labels train_labels = np.load('finalLabelsTrain.npy') # When attempting to only classify a and b, looking only at reduced set. ab_train_data = train_data[np.logical_or((train_labels == 1), (train_labels == 2))] ab_train_labels = train_labels[np.logical_or((train_labels == 1), (train_labels == 2))] # It appears that in the original data, the letter switches after every count of 9 # List of a,b data points that need to be rotated - this is not used rot_list = []
def view_data(file_path, view=False): ''' Opens and (optionally) views the data in the .npy, .pkl data objects ''' label_dict = { 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'h', 6: 'i', 7: 'j', 8: 'k' } # Acquire all possible data.npy and label.npy files from file paths given data_files = [] for dir in file_path: dir_listing = os.listdir(dir) file_pair = [] # Find the data.npys files for f in dir_listing: match = re.match(r'(.*)data(.*)', f) if match != None: print("Got {} from directory {}".format(f, dir)) if f.endswith('.pkl') or f.endswith('.npy'): file_pair.append(dir + '/' + f) # Find the labels.npy files for f in dir_listing: match = re.match(r'(.*)[lL]abel(.*).npy', f) if match != None: print("Got {} from directory {}".format(f, dir)) file_pair.append(dir + '/' + f) data_files.append(file_pair) if view: print(data_files) data_files = np.array(data_files) # No files found if np.min(data_files.shape) == 0: print( "No '.npy' files found in the given directories; quitting data visualization" ) return data = [] labels = [] for pair in data_files: d = pair[0] if d.endswith('.pkl'): data.extend(loaddata.load_pkl(d)) else: data.extend(np.load(d, allow_pickle=True)) if len(pair) > 1: lbl = pair[1] labels.extend(np.load(lbl, allow_pickle=True)) # If we want to see the binarized data and their respective labels... if view: data_pair = zip(data, labels) # Visualize all the data for d, lbl in data_pair: for i in range(len(d)): print("Label:", label_dict[lbl[i]]) fig = plt.figure() plt.imshow(d[i], cmap="Greys") plt.pause(0.75) plt.close(fig) return data, labels
def test(data=None): weights_file_path = "./pre_trained_cnn_weights.weights" network = CNN(lr=0.001, batch_size=8, epochs=15, number_of_classes=9) weights = torch.load(weights_file_path) network.load_state_dict(weights) if (data == None): data = loaddata.load_pkl("train_data.pkl") data = [[np.array(data[i], dtype=bool)] for i in range(len(data))] data = network.standardize_data(data) data = torch.stack([data[i][0] for i in range(len(data))]) return network.predict(data) # driver for convinience, # will read data and labels as arguments from sys.argv # where data_filename = sys.argv[1] import sys # Given that this is an unrequired, for-convienience driver, no guarantees are extended for invalid arguments if __name__ == "__main__": data = None if (len(sys.argv) > 1): data = loaddata.load_pkl(sys.argv[1]) test(data)
temp_label = torch.tensor(-2) pred_labels.append(temp_label.item() + 1) # If labels provided, get the accuracy of the model pred_acc = -1.0 if labels is not None: pred_acc = sum(pred_labels == labels) / len(labels) return pred_labels, pred_acc if __name__ == '__main__': args = make_parser() # Load the test data set test_data = loaddata.load_pkl(args.data_file[0]) # Extract Features print("Grabbing test data set features...") test_data = feat_extraction.pad_data(test_data) test_feat = np.array(feat_extraction.feature_ext(test_data, debug=False)) test_sums = np.array(feat_extraction.extract_sums(test_data)) test_feat = np.hstack((test_feat, test_sums)) print("Done with feature extraction.") # Save extracted features into .npy files for future loading print("Size of test data set:", test_feat.shape) test_labels = None # if args.labels: # test_labels = np.load('test_data/test_feat/test_labels.npy')