def evaluate_model(): global model_fname loader = FileLoader( dir_to_walk=r'C:\Users\Ivo Ribeiro\Documents\open-cv\datasets\captures' ) loader.load_files() x = [] y = [] x_test = [] y_test = [] for row in loader.files: for fname in row['imgs_per_class']: img = cv2.imread(fname) descr = getDescriptor(img) x.append(descr) y.append(hot_encode_vect(len(loader.files), row['index'])) for row in loader.files_test: for fname in row['imgs_per_class']: img = cv2.imread(fname) descr = getDescriptor(img) x_test.append(descr) y_test.append(hot_encode_vect(len(loader.files_test), row['index'])) x = np.array(x) y = np.array(y) x_test = np.array(x_test) y_test = np.array(y_test) print('descriptors loaded.') print(loader.class_names) ann = MyAnn(input_layer_size=x.shape[1], hidden_nodes_size=[x.shape[1] // 4], output_layer_size=3, epochs=1000, ann_fname=model_fname) ann.fit(x=x, y=y) print('finished train or load.') print('evaluating') corrects = 0 for i in np.arange(x_test.shape[0]): x = x_test[i] y = np.argmax(y_test[i]) p, stats = ann.predict(x) p = int(p) y = int(y) stats = np.squeeze(stats) print('%s predicted as %s with %.2f' % (loader.class_names[y], loader.class_names[p], stats[p])) print(stats) if p == y: corrects += 1 print('acc %.2f%s' % ((corrects / x_test.shape[0]) * 100, '%'))
def load_data(self): if self.val_dl is not None: return (self.train_dl,self.val_dl) print('loading files...') loader = FileLoader(dir_to_walk=self.imgs_foldername) loader.load_files() self.train_dl = None if self.training: x_data,y_data = self.convert_files_to_tensors(loader.files) self.train_ds = TensorDataset(x_data,y_data) self.train_dl = DataLoader(self.train_ds,batch_size=self.batch_size) x_val,y_val = self.convert_files_to_tensors(loader.files_test) self.val_ds = TensorDataset(x_val,y_val) self.val_dl = DataLoader(self.val_ds,batch_size=self.batch_size) print('files loaded.') return (self.train_dl,self.val_dl)
def chart_data(): loader = FileLoader(r'C:\Users\Ivo Ribeiro\Documents\open-cv\datasets\captures') loader.load_files() seed = 11 tsne = TSNE(n_components=2,random_state=seed) fig = plt.figure() ax = fig.add_subplot(111) colors = ['b','g','r'] #3 classes all_features = [] all_labels = [] all_class_names = [] for row in loader.files: imgs_fnames = row['imgs_per_class'] features = [] labels = [] class_names = [] for f_name in imgs_fnames: img = cv2.imread(f_name) descr = getDescriptor(img,expected_shape=(128,64)) features.append(descr) labels.append(row['index']) class_names.append(row['class_name']) row['features'] = np.array(features) all_features.extend(features) all_labels.extend(labels) all_class_names.extend(class_names) all_features = np.array(all_features) all_labels = np.array(all_labels) stds = np.std(all_features,axis=0) all_features = all_features[:,stds > 0.0003] print(all_features.shape) print(all_labels.shape) for label,label_name in zip(np.unique(all_labels),np.unique(all_class_names)): features = all_features[ all_labels==label ,:] transformed = tsne.fit_transform(features) ax.scatter(x=transformed[:,0],y=transformed[:,1],c=colors[label],label=label_name) plt.title('captures dataset') ax.legend(loc='best') plt.show()
def start(): from analyzer import Analyzer p = ArgumentParser(description="python3 ./main.py -f \"laginak/*.doc.txt\" ") optional = p._action_groups.pop() # Edited this line required = p.add_argument_group('Required arguments') required.add_argument("-f", "--files", nargs='+', help="Files to analyze (in .txt, .odt, .doc or .docx format)") optional.add_argument('-a', '--all', action='store_true', help="Generate a CSV file with all the results") optional.add_argument('-s', '--similarity', action='store_true', help="Calculate similarity (max. 5 files)") p._action_groups.append(optional) opts = p.parse_args() FileLoader.load_files(opts.files) FileLoader.load_irregular_verbs_list() FileLoader.load_dale_chall_list() FileLoader.load_connectives_list() FileLoader.load_oxford_word_list() cube = Cube(verbose=True) # Cargar modelo Cube cube.load("en", "latest") df_row = None ### Files will be created in this folder path = Printer.create_directory(FileLoader.files[0]) file_num = 0 total = len(FileLoader.files) for input in FileLoader.files: texto = Analyzer.process_text(input=input) # Analizar a = Analyzer(texto, input, cube) i = a.analyze(opts.similarity) df = a.create_dataframe() prediction = a.predict_dificulty(df) file_num += 1 p = Printer(input, i) p.print_info(opts.similarity, prediction, file_num, total) if opts.all: df_row = p.write_in_full_csv(df_row, opts.similarity) p.generate_csv(path, prediction, opts.similarity) if opts.all: df_row.to_csv(os.path.join(path, "full_results_aztertest.csv"), encoding='utf-8', index=False)