def _generate_graph_embeddings(self): ''' This function first generates an object tree using the ObjectTree Class and then generates a graph using the Graph Class. The output of this function is a feature array along with label array (if any). ''' self.tree = ObjectTree() self.graph = Graph() # Generate a graph dictionary self.tree.read(self.object_map, self.image) graph_dict, text_list, coords_arr = self.tree.connect(plot=False, export_df=True) # Generate the Adjacency Matrix (A) and the Feature Matrix (X) A, X = self.graph.make_graph_data(graph_dict=graph_dict, text_list=text_list, coords_arr=coords_arr, img_dims=self.img_dims) # transform the feature matrix by muptiplying with the Adjacency Matrix X_transformed = np.matmul(A, X) # form new feature matrix self.X = np.concatenate((X, X_transformed), axis=1)
def main(): app = QtWidgets.QApplication(sys.argv) udp = UdpThread() udp.connect() udp.start() Graph(rssi_listener=udp).show() sys.exit(app.exec_())
def appStarted(mode): try: mode.bg = mode.loadImage("muslevelbg.jpg") except: mode.bg = mode.loadImage("https://i.ibb.co/8DwvThD/muslevelbg.jpg") mode.Progress = Progress() mode.gameState = "levels" mode.game = "music" (mode.marginX, mode.marginY) = (mode.app.width // 10, mode.app.height // 9) (mode.puzzleW, mode.puzzleH) = (mode.app.width - 2 * mode.marginX, mode.app.height - 2 * mode.marginY) mode.widthUnit = mode.app.width // 10 mode.heightUnit = mode.app.height // 9 mode.selected = [(-100, -100)] mode.complete = False mode.lastBeatTimer = 0 mode.beatTimer = 0 mode.dots = [] mode.createButtons() mode.levelButtons = [] levelPageB = FakeButton(mode.widthUnit * 0.5, mode.heightUnit * 5.5, mode.widthUnit * 0.8, mode.heightUnit * 0.6, "Levels", mode.levelPageBCom, mode) mode.buttons.append(levelPageB) cusLevelB = FakeButton(mode.widthUnit * 0.5, mode.heightUnit * 1.5, mode.widthUnit * 0.8, mode.heightUnit * 0.6, " S ", mode.cusBCom, mode, " S ") level1B = FakeButton(mode.widthUnit * 1.5, mode.heightUnit * 1.5, mode.widthUnit * 0.8, mode.heightUnit * 0.6, " 1 ", mode.levelBCom, mode, " 1 ") mode.levelButtons.append(cusLevelB) mode.levelButtons.append(level1B) mode.fourierColors = "#FFC5C5" mode.fourierGraph = Graph((mode.app.width, 0), mode.app.height, mode.fourierColors, 1.5, 0.684 * MusicGameMode.fourierInterval, False) mode.colorschemas = [ ["#55efc4", "#81ecec", "#74b9ff", "#a29bfe", "#ffeaa7", "#fab1a0"], ["#fa983a", "#b71540", "#F8EFBA", "#1e3799", "#3c6382"], ["#00CCCD", "#1287A5", "#EA7773", "#2B2B52", "#F5BCBA"], ["#30336B", "#67E6DC", "#0A3D62", "#6A89CC"], ["#40407a", "#706fd3", "#f7f1e3", "#34ace0", "#33d9b2"] ] mode.colors = mode.colorschemas[0] mode.time = 0 mode.last = 0 mode.backgroundColor = "#FEFEFE" mode.font = "Calibri" mode.velx = 10 mode.vely = 10 mode.offset = 0 mode.lastBeat = int(round(time.time() * 1000))
json_file = name + str(i).zfill(5) + '.json' image_file = name + str(i).zfill(5) + '.png' receipt_csv_file = name + str(i).zfill(5) + '.csv' print(json_file, image_file, receipt_csv_file) receipt_csv = pd.read_csv(folderNameReceipt_csv + receipt_csv_file) img = cv2.imread(folderName_image + image_file, 0) # json_path = '/home/arjun/Gcn_paper/gcn/gcn/OneDrive_2020-11-13/spaCy NER Annotator output/Image_0006.json' tree = ObjectTree() tree.read(receipt_csv, img) graph_dict, text_list = tree.connect(plot=True, export_df=True) graph = Graph(max_nodes=len(text_list)) adj, features = graph.make_graph_data(graph_dict, text_list) adj = sparse.csr_matrix(adj) adj = normalize(adj + sp.eye(adj.shape[0])) # adj = sparse_mx_to_torch_sparse_tensor(adj) if first_count == 0: features_merged = features else: features_merged = np.concatenate((features_merged, features), axis=0) # features = sparse.csr_matrix(features) # features = torch.FloatTensor(np.array(features.todense())) adj_array.append(adj.todense())
def setUp(self): self.graph = Graph()
class GraphTest(TestCase): def setUp(self): self.graph = Graph() def test_adding_nodes(self): self.assertEqual(self.graph._graph, {}) self.graph.add_node('1') self.assertEqual(self.graph._graph, {'1': {}}) self.graph.add_node('2') self.assertEqual(self.graph._graph, {'1': {}, '2': {}}) def test_add_duplicate_node(self): self.graph.add_node('1') self.assertRaises(DuplicateNodeError, self.graph.add_node, '1') def test_remove_node(self): self.graph.add_node('1') self.graph.remove_node('1') self.assertEqual(self.graph._graph, {}) def test_remove_non_existing_node(self): self.assertRaises(NodeNotFound, self.graph.remove_node, '1') def test_add_edges(self): self.graph.add_node('1') self.graph.add_node('2') self.graph.add_edge(from_node='1', to_node='2', weight=1) self.assertEqual(self.graph._graph, {'1': {'2': 1}, '2': {}}) self.graph.add_edge(from_node='2', to_node='1', weight=5) self.assertEqual(self.graph._graph, {'1': {'2': 1}, '2': {'1': 5}}) def test_add_edge_overwrite_existing(self): self.graph.add_node('1') self.graph.add_node('2') self.graph.add_edge(from_node='1', to_node='2', weight=1) self.graph.add_edge(from_node='1', to_node='2', weight=5) self.assertEqual(self.graph._graph, {'1': {'2': 5}, '2': {}}) def test_remove_node_with_edge(self): self.graph.add_node('1') self.graph.add_node('2') self.graph.add_edge(from_node='1', to_node='2', weight=1) self.graph.remove_node('2') self.assertEqual(self.graph._graph, {'1': {}}) def test_remove_edges(self): self.graph.add_node('1') self.graph.add_node('2') self.graph.add_edge(from_node='1', to_node='2', weight=1) self.graph.remove_edge(from_node='1', to_node='2') self.assertEqual(self.graph._graph, {'1': {}, '2': {}}) def test_remove_non_existing_edges(self): self.graph.add_node('1') self.graph.add_node('2') self.assertRaises(EdgeNotFound, self.graph.remove_edge, from_node='1', to_node='2') def test_remove_edges_without_node(self): self.assertRaises(NodeNotFound, self.graph.remove_edge, from_node='1', to_node='2') def test_complex_graph(self): exp_graph = {'A': {'C':2, 'D':6}, 'B': {'D':8, 'A':3}, 'C': {'D':7, 'E':5}, 'D': {'E':-2}, 'E': {}} self.graph.add_node('A') self.graph.add_node('B') self.graph.add_node('C') self.graph.add_node('D') self.graph.add_node('E') self.graph.add_edge(from_node='A', to_node='C', weight=2) self.graph.add_edge(from_node='A', to_node='D', weight=6) self.graph.add_edge(from_node='B', to_node='A', weight=3) self.graph.add_edge(from_node='B', to_node='D', weight=8) self.graph.add_edge(from_node='C', to_node='D', weight=7) self.graph.add_edge(from_node='C', to_node='E', weight=5) self.graph.add_edge(from_node='D', to_node='E', weight=-2) self.assertEqual(self.graph._graph, exp_graph) def test_from_dict(self): data = {'A': {'C':2, 'D':6}, 'B': {'D':8, 'A':3}, 'C': {'D':7, 'E':5}, 'D': {'E':-2}, 'E': {}} self.graph.from_dict(data) self.assertEqual(self.graph._graph, data)
def init(data): #Initialize all of data's variables global dati dati = data data.checkRadius = 400 data.targetRadii = [] data.leftScore = 0 data.rightScore = 0 data.leftDistance = 99999 data.rightDistance = 99999 data.lastBeatTimer = 0 data.beatTimer = 0 data.circWidth = 3 data.circColor = "black" data.songPath = "" data.dots = [] data.buttons = [] data.instant_energies = [] data.energy_averages = [] data.maxEnergy = 0 data.beats = [] data.fourier = [] data.fourier_colors = ["#FFC5C5", "#FFDFDF"] data.colorschemas = [ ["#BE1B38", "#01A98F", "#C9CF7E", "#E86644"], ["#354458", "#EB7260", "#29ABA4", "#E9E0D6", "#3A9AD9"], ["#FF0066", "#333366", "#3399CC", "#00CCCC", "#003399"], ["#a6dace", "#F7A04B", "#666699", "#EF8D24"], ["#05b3b2", "#2ca5dc", "#a5d3ea", "#f4cfbd", "#ff946c"] ] data.colors = data.colorschemas[0] data.targets = [] data.time = 0 data.last = 0 data.radius = 10 data.posx = data.width // 3 data.posy = data.height // 2 pickNewTarget(data) data.timescale = 1.0 data.backgroundBase = "#FEFEFE" data.font = "Calibri" data.gamestate = 0 data.gamemode = "song" data.offset = 0 data.score = 0 data.graphers = [] data.lastBeat = int(round(time.time() * 1000)) data.lastCallBack = time.time() data.backgroundColor = data.backgroundBase data.canvas = None data.playerPhased = False data.soundcloudActive = False data.beats = [] data.velx = 10 data.vely = 10 data.p = None data.volSum = 1841616 data.volCount = 1 data.max = 343 data.phaseJuice = 0 data.maxPhaseJuice = 100 data.boostJuice = 0 data.maxBoostJuice = 100 data.playerR = 370 data.playerPos = 6.28 data.playerVel = 0 data.playerAcel = 0.001 data.graphers += [BeatGrapher((1390, 10), 250, "#979797")] data.graphers += [Graph((1390, 10), 250, "red")] data.graphers += [Graph((1390, 10), 250, "green", 1)] data.graphers += [Graph((1390, 10), 250, "blue", 2)] data.graphers += [ Graph((1412, 260), 300, data.fourier_colors[0], 1.5, 0.684 * fourierInterval, False) ] # Usual Fourier height = 625 makeBtns(data)
def keyPressed(event, data): if (event.keysym == "space"): #Slow down time (or speed it back up) if (data.timescale == 1): data.backup = data.colors data.timescale = 0.2 for dot in data.dots: dot.fill = "#FF0051" data.colors = ["#FF0051"] data.backgroundColor = "#FFB9B9" data.graphers[4] = Graph((1412, 260), 300, data.fourier_colors[1], 1.5, 0.684 * fourierInterval, False) #Change the fourier graph's colors if (data.gamemode == "song"): remakeStream(songTimeScale) else: proont("-----") data.timescale = 1.0 data.backgroundColor = data.backgroundBase data.colors = data.backup for dot in data.dots: dot.fill = random.choice(data.colors) data.graphers[4] = Graph((1412, 260), 300, data.fourier_colors[0], 1.5, 0.684 * fourierInterval, False) #Change the fourier graph's colors if (data.gamemode == "song"): remakeStream(1) elif (event.keysym == "r"): #Move the emitter back to the center data.posx = data.width // 2 data.posy = data.height // 2 data.startx = data.posx data.starty = data.posy #Change some game modes elif (event.keysym == "1"): data.gamestate = 1 data.timer = 0 elif (event.keysym == "2"): data.gamestate = 2 data.startx = data.posx data.starty = data.posy elif (event.keysym == "3"): data.gamestate = 3 data.startx = data.posx data.starty = data.posy elif (event.keysym == "4"): data.gamestate = 4 elif (event.keysym == "5"): data.gamestate = 5 elif (event.keysym == "Escape"): #Go back to the menu screen and clean up data.gamestate = 0 if (data.stream != None): data.stream.stop_stream() data.p = None data.stream = None data.maxEnergy = 0 data.timescale = 1 data.gamemode = "song" data.fourier = [] data.instant_energies = [] data.energy_averages = [] data.beats = [] data.graphers[4].maxVal = 0 data.playerR = 370 data.playerPos = 6.28 data.playerVel = 0 data.leftScore = 0 data.rightScore = 0 data.targetRadii = [] elif (event.keysym == "0"): #Change the emitter's colors changeColors(data) if (not data.gamemode == "sandbox" ): #If we're playing a game, handle the players' key presses if (event.keysym == "a"): bestDist = 9999999 for test in data.targetRadii: distance = abs(data.checkRadius - test) if distance < bestDist: bestDist = distance data.leftDistance = bestDist if (bestDist > 50 ): #If there's no beat even somewhat close, they lose points data.leftScore -= 1 else: data.beatTimer = 2 #Wait a bit for the other player to make their move before we decide who gets the beat elif (event.keysym == "l"): bestDist = 9999999 for test in data.targetRadii: distance = abs(data.checkRadius - test) if distance < bestDist: bestDist = distance data.rightDistance = bestDist if (bestDist > 50 ): #If there's no beat even somewhat close, they lose points data.rightScore -= 1 else: data.beatTimer = 2 #Wait a bit for the other player to make their move before we decide who gets it pass
def get_train_data(data_folder_path): # ============= get paths for images and object maps =========================== image_jpg_path_glob = data_folder_path + r'\*\*.jpg' image_png_path_glob = data_folder_path + r'\*\*.png' csv_path_glob = data_folder_path + r'\*\*.csv' list_img_paths = glob(image_jpg_path_glob) + \ glob(image_png_path_glob) list_csv_paths = glob(csv_path_glob) # ------------------------------------------------------------------------------ # =============== initialize the ObjectTree and Graph Objects ================== tree = ObjectTree() graph = Graph() # ------------------------------------------------------------------------------ # === generate graph embeddings for each document ============================== data_df = pd.DataFrame(columns=['features', 'label']) count, skip_count = 0, 0 for image_path, csv_path in zip(list_img_paths, list_csv_paths): img = cv2.imread(image_path, 0) object_map = pd.read_csv(csv_path) # drop rows with nans object_map.dropna(inplace=True, how='any') try: # generate object tree tree.read(object_map, img) graph_dict, text_list, coords_arr = tree.connect(plot=True, export_df=True) # make graph data A, X = graph.make_graph_data(graph_dict, text_list, coords_arr, img) # transform the feature matrix by muptiplying with the Adjacency Matrix X_transformed = np.matmul(A, X) # form new feature matrix X = np.concatenate((X, X_transformed), axis=1) # get the labels y = object_map.label.values if count == 0: X_train = X y_train = y else: X_train = np.concatenate((X_train, X), axis=0) y_train = np.concatenate((y_train, y), axis=0) print('Finished processing image {} of {}...'.format( count, len(list_img_paths))) except Exception: skip_count += 1 print('Skipping Graph Generation...') count += 1 print('Finished generating Graph Dataset for {} documents. Skipped {}.'\ .format(count, skip_count)) return X_train, y_train
class Predictor(): def __init__(self): self.model_bin = None self.model_mult = None self.object_map = None self.tree = None self.graph = None self.image = None self.X = None self.img_dims = None self.label_mapping = { 0: 'Other', 1: 'Store Name', 2: 'Address', 3: 'Invoice Number Key', 4: 'Invoice Number Value', 5: 'Date-Time Key', 6: 'Date-Time Value', # 7: 'Item Key', 8: 'Item Value', 9: 'Amount Key', 10: 'Amount Value' # } self.entity_classifier = None self.company_name_model = None self.company_name_cv = None self.invoice_model = None self.invoice_cv = None def load_models( self, entity_classifier_mod=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\model_1_2.sav', company_name_mod=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\company_nb.sav', company_name_cv=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\company_nb_cv.sav', invoice_no_mod=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\invoice_nb.sav', invoice_no_cv=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\invoice_nb_cv.sav' ): ''' Function to load a trained model in sklearn Args: company_address_mod: str, filepath to .pkl file for address recognition model invoice_no_mod: str, filepath to .pkl file for invoice number recognition model ''' self.entity_classifier = pickle.load(open(entity_classifier_mod, 'rb')) # with open(entity_classifier_mod, 'rb') as f: # self.entity_classifier = pickle.load(f) self.company_name_model = pickle.load(open(company_name_mod, 'rb')) self.company_name_cv = pickle.load(open(company_name_cv, 'rb')) self.invoice_model = pickle.load(open(invoice_no_mod, 'rb')) self.invoice_cv = pickle.load(open(invoice_no_cv, 'rb')) print('Models loaded from disk.') def _ocr(self, image): ''' Function to perform OCR and generate Object Map Args: image: np.array Returns: object map: pd.DataFrame with columns xmin, ymin, xmax, ymax ''' print('Performing OCR...\n') self.object_map, self.img_dims = ocr_using_google_api(image) self.image = image def _generate_graph_embeddings(self): ''' This function first generates an object tree using the ObjectTree Class and then generates a graph using the Graph Class. The output of this function is a feature array along with label array (if any). ''' self.tree = ObjectTree() self.graph = Graph() # Generate a graph dictionary self.tree.read(self.object_map, self.image) graph_dict, text_list, coords_arr = self.tree.connect(plot=False, export_df=True) # Generate the Adjacency Matrix (A) and the Feature Matrix (X) A, X = self.graph.make_graph_data(graph_dict=graph_dict, text_list=text_list, coords_arr=coords_arr, img_dims=self.img_dims) # transform the feature matrix by muptiplying with the Adjacency Matrix X_transformed = np.matmul(A, X) # form new feature matrix self.X = np.concatenate((X, X_transformed), axis=1) @staticmethod def get_dates(string): ''' Wrapper for datefinder.find_dates function Returns: date_time string found in input string ''' for match in find_dates(string): date_time = match return date_time def perform_elimination(self, label_wise_objects): # ================== for Invoice Number Value ========================== # invoice_no_list = [] # if len(label_wise_objects[4]) != 0: # for possible_invoice_no in label_wise_objects[4]: # invoice_cv_text = self.invoice_cv.transform([possible_invoice_no]) # if self.invoice_model.predict(invoice_cv_text) == 1: # invoice_no_list.append(possible_invoice_no) # else: # pass # label_wise_objects[4] = invoice_no_list # ---------------------------------------------------------------------- # =================== for Date Time ==================================== date_time_list = [] if len(label_wise_objects[6]) != 0: for possible_date_time in label_wise_objects[6]: if len(list(find_dates(possible_date_time))) != 0: date_time_list.append( list(find_dates(possible_date_time))[0]) else: text_corpus = self.object_map.Object.values for possible_date_time in text_corpus: if len(list(find_dates(possible_date_time))) != 0: date_time_list.append( list(find_dates(possible_date_time))[0]) label_wise_objects[6] = date_time_list # ---------------------------------------------------------------------- # ============= for amount value ======================================= try: amount_value_list = [] for possible_amount in label_wise_objects[10]: if possible_amount.replace('.', '', 1).isnumeric(): amount_value_list.append(possible_amount) except: label_wise_objects[10] = amount_value_list # ====================================================================== # ============== for company name ====================================== company_name_list = [] for possible_company_name in label_wise_objects[1]: company_cv_text = self.company_name_cv.transform( [possible_company_name]) if self.company_name_model.predict(company_cv_text) == 1: company_name_list.append(possible_company_name) label_wise_objects[1] = company_name_list # ---------------------------------------------------------------------- return label_wise_objects def infer(self, image): ''' This function implements the entire pipeline for information extraction from the input document image. Prerequisites: All the ML Models must be loaded Args: Image, cv2 Returns: JSON Object ''' # perform OCR # print(image) # retval, buffer = cv2.imencode('.jpg', image) # jpg_as_text = base64.b64encode(buffer) # print(jpg_as_text) self._ocr(image) # Generate Graph Embeddings self._generate_graph_embeddings() # check if all models are available # assert None not in {self.entity_classifier, # self.address_model, # self.address_cv, # self.invoice_model, # self.invoice_cv}, "One or more of the required \ # models has not been loaded properly. Please see\ # `load_models() function which is a pre-requisite." preds = self.entity_classifier.predict(self.X) self.object_map['label'] = preds label_wise_objects = dict( self.object_map.groupby('label')['Object'].apply(list)) retained_objects = self.perform_elimination(label_wise_objects) try: retained_objects.pop(0) except: pass try: retained_objects.pop(3) except: pass try: retained_objects.pop(5) except: pass try: retained_objects.pop(7) except: pass try: retained_objects.pop(9) except: pass output_dict = {} for key, value in retained_objects.items(): output_dict[self.label_mapping[key]] = str(value) return output_dict