Ejemplo n.º 1
0
    def _generate_graph_embeddings(self):
        '''
        This function first generates an object tree using the ObjectTree Class
        and then generates a graph using the Graph Class. 

        The output of this function is a feature array along with label array
        (if any).
        '''
        self.tree = ObjectTree()
        self.graph = Graph()

        # Generate a graph dictionary
        self.tree.read(self.object_map, self.image)
        graph_dict, text_list, coords_arr = self.tree.connect(plot=False,
                                                              export_df=True)

        # Generate the Adjacency Matrix (A) and the Feature Matrix (X)
        A, X = self.graph.make_graph_data(graph_dict=graph_dict,
                                          text_list=text_list,
                                          coords_arr=coords_arr,
                                          img_dims=self.img_dims)

        # transform the feature matrix by muptiplying with the Adjacency Matrix
        X_transformed = np.matmul(A, X)

        # form new feature matrix
        self.X = np.concatenate((X, X_transformed), axis=1)
Ejemplo n.º 2
0
def main():
    app = QtWidgets.QApplication(sys.argv)
    udp = UdpThread()
    udp.connect()
    udp.start()
    Graph(rssi_listener=udp).show()
    sys.exit(app.exec_())
    def appStarted(mode):
        try:
            mode.bg = mode.loadImage("muslevelbg.jpg")
        except:
            mode.bg = mode.loadImage("https://i.ibb.co/8DwvThD/muslevelbg.jpg")
        mode.Progress = Progress()
        mode.gameState = "levels"
        mode.game = "music"
        (mode.marginX, mode.marginY) = (mode.app.width // 10,
                                        mode.app.height // 9)
        (mode.puzzleW, mode.puzzleH) = (mode.app.width - 2 * mode.marginX,
                                        mode.app.height - 2 * mode.marginY)
        mode.widthUnit = mode.app.width // 10
        mode.heightUnit = mode.app.height // 9
        mode.selected = [(-100, -100)]
        mode.complete = False

        mode.lastBeatTimer = 0
        mode.beatTimer = 0

        mode.dots = []
        mode.createButtons()
        mode.levelButtons = []
        levelPageB = FakeButton(mode.widthUnit * 0.5, mode.heightUnit * 5.5,
                                mode.widthUnit * 0.8, mode.heightUnit * 0.6,
                                "Levels", mode.levelPageBCom, mode)
        mode.buttons.append(levelPageB)
        cusLevelB = FakeButton(mode.widthUnit * 0.5, mode.heightUnit * 1.5,
                               mode.widthUnit * 0.8, mode.heightUnit * 0.6,
                               " S ", mode.cusBCom, mode, " S ")
        level1B = FakeButton(mode.widthUnit * 1.5, mode.heightUnit * 1.5,
                             mode.widthUnit * 0.8, mode.heightUnit * 0.6,
                             " 1 ", mode.levelBCom, mode, " 1 ")
        mode.levelButtons.append(cusLevelB)
        mode.levelButtons.append(level1B)

        mode.fourierColors = "#FFC5C5"
        mode.fourierGraph = Graph((mode.app.width, 0), mode.app.height,
                                  mode.fourierColors, 1.5,
                                  0.684 * MusicGameMode.fourierInterval, False)

        mode.colorschemas = [
            ["#55efc4", "#81ecec", "#74b9ff", "#a29bfe", "#ffeaa7", "#fab1a0"],
            ["#fa983a", "#b71540", "#F8EFBA", "#1e3799", "#3c6382"],
            ["#00CCCD", "#1287A5", "#EA7773", "#2B2B52", "#F5BCBA"],
            ["#30336B", "#67E6DC", "#0A3D62", "#6A89CC"],
            ["#40407a", "#706fd3", "#f7f1e3", "#34ace0", "#33d9b2"]
        ]
        mode.colors = mode.colorschemas[0]
        mode.time = 0
        mode.last = 0

        mode.backgroundColor = "#FEFEFE"
        mode.font = "Calibri"

        mode.velx = 10
        mode.vely = 10
        mode.offset = 0
        mode.lastBeat = int(round(time.time() * 1000))
Ejemplo n.º 4
0
        json_file = name + str(i).zfill(5) + '.json'
        image_file = name + str(i).zfill(5) + '.png'
        receipt_csv_file = name + str(i).zfill(5) + '.csv'
        print(json_file, image_file, receipt_csv_file)

        receipt_csv = pd.read_csv(folderNameReceipt_csv + receipt_csv_file)
        img = cv2.imread(folderName_image + image_file, 0)

        # json_path = '/home/arjun/Gcn_paper/gcn/gcn/OneDrive_2020-11-13/spaCy NER Annotator output/Image_0006.json'
        tree = ObjectTree()
        tree.read(receipt_csv, img)

        graph_dict, text_list = tree.connect(plot=True, export_df=True)

        graph = Graph(max_nodes=len(text_list))

        adj, features = graph.make_graph_data(graph_dict, text_list)
        adj = sparse.csr_matrix(adj)
        adj = normalize(adj + sp.eye(adj.shape[0]))

        # adj = sparse_mx_to_torch_sparse_tensor(adj)
        if first_count == 0:
            features_merged = features
        else:
            features_merged = np.concatenate((features_merged, features),
                                             axis=0)

        # features = sparse.csr_matrix(features)
        # features = torch.FloatTensor(np.array(features.todense()))
        adj_array.append(adj.todense())
Ejemplo n.º 5
0
 def setUp(self):
     self.graph = Graph()
Ejemplo n.º 6
0
class GraphTest(TestCase):
    def setUp(self):
        self.graph = Graph()

    def test_adding_nodes(self):
        self.assertEqual(self.graph._graph, {})
        self.graph.add_node('1')
        self.assertEqual(self.graph._graph, {'1': {}})
        self.graph.add_node('2')
        self.assertEqual(self.graph._graph, {'1': {}, '2': {}})

    def test_add_duplicate_node(self):
        self.graph.add_node('1')
        self.assertRaises(DuplicateNodeError, self.graph.add_node, '1')

    def test_remove_node(self):
        self.graph.add_node('1')
        self.graph.remove_node('1')
        self.assertEqual(self.graph._graph, {})

    def test_remove_non_existing_node(self):
        self.assertRaises(NodeNotFound, self.graph.remove_node, '1')

    def test_add_edges(self):
        self.graph.add_node('1')
        self.graph.add_node('2')
        self.graph.add_edge(from_node='1', to_node='2', weight=1)
        self.assertEqual(self.graph._graph, {'1': {'2': 1}, '2': {}})

        self.graph.add_edge(from_node='2', to_node='1', weight=5)
        self.assertEqual(self.graph._graph, {'1': {'2': 1}, '2': {'1': 5}})

    def test_add_edge_overwrite_existing(self):
        self.graph.add_node('1')
        self.graph.add_node('2')
        self.graph.add_edge(from_node='1', to_node='2', weight=1)
        self.graph.add_edge(from_node='1', to_node='2', weight=5)
        self.assertEqual(self.graph._graph, {'1': {'2': 5}, '2': {}})

    def test_remove_node_with_edge(self):
        self.graph.add_node('1')
        self.graph.add_node('2')
        self.graph.add_edge(from_node='1', to_node='2', weight=1)
        self.graph.remove_node('2')
        self.assertEqual(self.graph._graph, {'1': {}})

    def test_remove_edges(self):
        self.graph.add_node('1')
        self.graph.add_node('2')
        self.graph.add_edge(from_node='1', to_node='2', weight=1)
        self.graph.remove_edge(from_node='1', to_node='2')
        self.assertEqual(self.graph._graph, {'1': {}, '2': {}})

    def test_remove_non_existing_edges(self):
        self.graph.add_node('1')
        self.graph.add_node('2')
        self.assertRaises(EdgeNotFound, self.graph.remove_edge,
                          from_node='1', to_node='2')

    def test_remove_edges_without_node(self):
        self.assertRaises(NodeNotFound, self.graph.remove_edge,
                          from_node='1', to_node='2')

    def test_complex_graph(self):
        exp_graph = {'A': {'C':2,
                           'D':6},
                     'B': {'D':8,
                           'A':3},
                     'C': {'D':7,
                           'E':5},
                     'D': {'E':-2},
                     'E': {}}

        self.graph.add_node('A')
        self.graph.add_node('B')
        self.graph.add_node('C')
        self.graph.add_node('D')
        self.graph.add_node('E')
        self.graph.add_edge(from_node='A', to_node='C', weight=2)
        self.graph.add_edge(from_node='A', to_node='D', weight=6)
        self.graph.add_edge(from_node='B', to_node='A', weight=3)
        self.graph.add_edge(from_node='B', to_node='D', weight=8)
        self.graph.add_edge(from_node='C', to_node='D', weight=7)
        self.graph.add_edge(from_node='C', to_node='E', weight=5)
        self.graph.add_edge(from_node='D', to_node='E', weight=-2)
        self.assertEqual(self.graph._graph, exp_graph)

    def test_from_dict(self):
        data = {'A': {'C':2,
                      'D':6},
                'B': {'D':8,
                      'A':3},
                'C': {'D':7,
                      'E':5},
                'D': {'E':-2},
                'E': {}}
        self.graph.from_dict(data)
        self.assertEqual(self.graph._graph, data)
Ejemplo n.º 7
0
def init(data):  #Initialize all of data's variables
    global dati
    dati = data

    data.checkRadius = 400
    data.targetRadii = []
    data.leftScore = 0
    data.rightScore = 0
    data.leftDistance = 99999
    data.rightDistance = 99999
    data.lastBeatTimer = 0
    data.beatTimer = 0
    data.circWidth = 3
    data.circColor = "black"

    data.songPath = ""
    data.dots = []
    data.buttons = []
    data.instant_energies = []
    data.energy_averages = []
    data.maxEnergy = 0
    data.beats = []
    data.fourier = []
    data.fourier_colors = ["#FFC5C5", "#FFDFDF"]
    data.colorschemas = [
        ["#BE1B38", "#01A98F", "#C9CF7E", "#E86644"],
        ["#354458", "#EB7260", "#29ABA4", "#E9E0D6", "#3A9AD9"],
        ["#FF0066", "#333366", "#3399CC", "#00CCCC", "#003399"],
        ["#a6dace", "#F7A04B", "#666699", "#EF8D24"],
        ["#05b3b2", "#2ca5dc", "#a5d3ea", "#f4cfbd", "#ff946c"]
    ]
    data.colors = data.colorschemas[0]
    data.targets = []
    data.time = 0
    data.last = 0
    data.radius = 10
    data.posx = data.width // 3
    data.posy = data.height // 2
    pickNewTarget(data)
    data.timescale = 1.0
    data.backgroundBase = "#FEFEFE"
    data.font = "Calibri"
    data.gamestate = 0
    data.gamemode = "song"
    data.offset = 0
    data.score = 0
    data.graphers = []
    data.lastBeat = int(round(time.time() * 1000))
    data.lastCallBack = time.time()
    data.backgroundColor = data.backgroundBase
    data.canvas = None
    data.playerPhased = False
    data.soundcloudActive = False
    data.beats = []

    data.velx = 10
    data.vely = 10
    data.p = None
    data.volSum = 1841616
    data.volCount = 1
    data.max = 343

    data.phaseJuice = 0
    data.maxPhaseJuice = 100
    data.boostJuice = 0
    data.maxBoostJuice = 100

    data.playerR = 370
    data.playerPos = 6.28
    data.playerVel = 0
    data.playerAcel = 0.001

    data.graphers += [BeatGrapher((1390, 10), 250, "#979797")]
    data.graphers += [Graph((1390, 10), 250, "red")]
    data.graphers += [Graph((1390, 10), 250, "green", 1)]
    data.graphers += [Graph((1390, 10), 250, "blue", 2)]
    data.graphers += [
        Graph((1412, 260), 300, data.fourier_colors[0], 1.5,
              0.684 * fourierInterval, False)
    ]
    # Usual Fourier height = 625

    makeBtns(data)
Ejemplo n.º 8
0
def keyPressed(event, data):
    if (event.keysym == "space"):  #Slow down time (or speed it back up)
        if (data.timescale == 1):
            data.backup = data.colors
            data.timescale = 0.2
            for dot in data.dots:
                dot.fill = "#FF0051"
            data.colors = ["#FF0051"]
            data.backgroundColor = "#FFB9B9"
            data.graphers[4] = Graph((1412, 260), 300, data.fourier_colors[1],
                                     1.5, 0.684 * fourierInterval,
                                     False)  #Change the fourier graph's colors
            if (data.gamemode == "song"):
                remakeStream(songTimeScale)
        else:
            proont("-----")
            data.timescale = 1.0
            data.backgroundColor = data.backgroundBase
            data.colors = data.backup
            for dot in data.dots:
                dot.fill = random.choice(data.colors)
            data.graphers[4] = Graph((1412, 260), 300, data.fourier_colors[0],
                                     1.5, 0.684 * fourierInterval,
                                     False)  #Change the fourier graph's colors
            if (data.gamemode == "song"):
                remakeStream(1)

    elif (event.keysym == "r"):  #Move the emitter back to the center
        data.posx = data.width // 2
        data.posy = data.height // 2
        data.startx = data.posx
        data.starty = data.posy

    #Change some game modes
    elif (event.keysym == "1"):
        data.gamestate = 1
        data.timer = 0
    elif (event.keysym == "2"):
        data.gamestate = 2
        data.startx = data.posx
        data.starty = data.posy
    elif (event.keysym == "3"):
        data.gamestate = 3
        data.startx = data.posx
        data.starty = data.posy
    elif (event.keysym == "4"):
        data.gamestate = 4
    elif (event.keysym == "5"):
        data.gamestate = 5

    elif (event.keysym == "Escape"):  #Go back to the menu screen and clean up
        data.gamestate = 0
        if (data.stream != None):
            data.stream.stop_stream()
        data.p = None
        data.stream = None
        data.maxEnergy = 0
        data.timescale = 1
        data.gamemode = "song"
        data.fourier = []
        data.instant_energies = []
        data.energy_averages = []
        data.beats = []
        data.graphers[4].maxVal = 0
        data.playerR = 370
        data.playerPos = 6.28
        data.playerVel = 0
        data.leftScore = 0
        data.rightScore = 0
        data.targetRadii = []

    elif (event.keysym == "0"):  #Change the emitter's colors
        changeColors(data)

    if (not data.gamemode == "sandbox"
        ):  #If we're playing a game, handle the players' key presses
        if (event.keysym == "a"):
            bestDist = 9999999
            for test in data.targetRadii:
                distance = abs(data.checkRadius - test)
                if distance < bestDist:
                    bestDist = distance
            data.leftDistance = bestDist
            if (bestDist > 50
                ):  #If there's no beat even somewhat close, they lose points
                data.leftScore -= 1
            else:
                data.beatTimer = 2  #Wait a bit for the other player to make their move before we decide who gets the beat

        elif (event.keysym == "l"):
            bestDist = 9999999
            for test in data.targetRadii:
                distance = abs(data.checkRadius - test)
                if distance < bestDist:
                    bestDist = distance
            data.rightDistance = bestDist
            if (bestDist > 50
                ):  #If there's no beat even somewhat close, they lose points
                data.rightScore -= 1
            else:
                data.beatTimer = 2  #Wait a bit for the other player to make their move before we decide who gets it

    pass
Ejemplo n.º 9
0
def get_train_data(data_folder_path):
    # ============= get paths for images and object maps ===========================
    image_jpg_path_glob = data_folder_path + r'\*\*.jpg'
    image_png_path_glob = data_folder_path + r'\*\*.png'
    csv_path_glob = data_folder_path + r'\*\*.csv'

    list_img_paths = glob(image_jpg_path_glob) + \
        glob(image_png_path_glob)
    list_csv_paths = glob(csv_path_glob)
    # ------------------------------------------------------------------------------

    # =============== initialize the ObjectTree and Graph Objects ==================
    tree = ObjectTree()
    graph = Graph()
    # ------------------------------------------------------------------------------

    # === generate graph embeddings for each document ==============================
    data_df = pd.DataFrame(columns=['features', 'label'])
    count, skip_count = 0, 0

    for image_path, csv_path in zip(list_img_paths, list_csv_paths):

        img = cv2.imread(image_path, 0)
        object_map = pd.read_csv(csv_path)

        # drop rows with nans
        object_map.dropna(inplace=True, how='any')

        try:
            # generate object tree
            tree.read(object_map, img)
            graph_dict, text_list, coords_arr = tree.connect(plot=True,
                                                             export_df=True)

            # make graph data
            A, X = graph.make_graph_data(graph_dict, text_list, coords_arr,
                                         img)

            # transform the feature matrix by muptiplying with the Adjacency Matrix
            X_transformed = np.matmul(A, X)

            # form new feature matrix
            X = np.concatenate((X, X_transformed), axis=1)

            # get the labels
            y = object_map.label.values

            if count == 0:
                X_train = X
                y_train = y

            else:
                X_train = np.concatenate((X_train, X), axis=0)
                y_train = np.concatenate((y_train, y), axis=0)

            print('Finished processing image {} of {}...'.format(
                count, len(list_img_paths)))

        except Exception:
            skip_count += 1
            print('Skipping Graph Generation...')

        count += 1



    print('Finished generating Graph Dataset for {} documents. Skipped {}.'\
        .format(count, skip_count))

    return X_train, y_train
Ejemplo n.º 10
0
class Predictor():
    def __init__(self):
        self.model_bin = None
        self.model_mult = None
        self.object_map = None
        self.tree = None
        self.graph = None
        self.image = None
        self.X = None
        self.img_dims = None
        self.label_mapping = {
            0: 'Other',
            1: 'Store Name',
            2: 'Address',
            3: 'Invoice Number Key',
            4: 'Invoice Number Value',
            5: 'Date-Time Key',
            6: 'Date-Time Value',  #
            7: 'Item Key',
            8: 'Item Value',
            9: 'Amount Key',
            10: 'Amount Value'  #
        }
        self.entity_classifier = None
        self.company_name_model = None
        self.company_name_cv = None
        self.invoice_model = None
        self.invoice_cv = None

    def load_models(
        self,
        entity_classifier_mod=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\model_1_2.sav',
        company_name_mod=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\company_nb.sav',
        company_name_cv=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\company_nb_cv.sav',
        invoice_no_mod=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\invoice_nb.sav',
        invoice_no_cv=r'C:\Users\Think Analytics\Desktop\Side_Projects\Graph-Convolution-on-Structured-Documents-master\models\invoice_nb_cv.sav'
    ):
        '''
        Function to load a trained model in sklearn

        Args:
            company_address_mod: str, filepath to .pkl file for address 
                                        recognition model

            invoice_no_mod: str, filepath to .pkl file for invoice number
                                    recognition model
        '''
        self.entity_classifier = pickle.load(open(entity_classifier_mod, 'rb'))

        # with open(entity_classifier_mod, 'rb') as f:
        #     self.entity_classifier = pickle.load(f)
        self.company_name_model = pickle.load(open(company_name_mod, 'rb'))
        self.company_name_cv = pickle.load(open(company_name_cv, 'rb'))
        self.invoice_model = pickle.load(open(invoice_no_mod, 'rb'))
        self.invoice_cv = pickle.load(open(invoice_no_cv, 'rb'))

        print('Models loaded from disk.')

    def _ocr(self, image):
        '''
        Function to perform OCR and generate Object Map

        Args:
            image: np.array
        
        Returns:
            object map: pd.DataFrame with columns xmin, ymin, xmax, ymax
        '''
        print('Performing OCR...\n')
        self.object_map, self.img_dims = ocr_using_google_api(image)
        self.image = image

    def _generate_graph_embeddings(self):
        '''
        This function first generates an object tree using the ObjectTree Class
        and then generates a graph using the Graph Class. 

        The output of this function is a feature array along with label array
        (if any).
        '''
        self.tree = ObjectTree()
        self.graph = Graph()

        # Generate a graph dictionary
        self.tree.read(self.object_map, self.image)
        graph_dict, text_list, coords_arr = self.tree.connect(plot=False,
                                                              export_df=True)

        # Generate the Adjacency Matrix (A) and the Feature Matrix (X)
        A, X = self.graph.make_graph_data(graph_dict=graph_dict,
                                          text_list=text_list,
                                          coords_arr=coords_arr,
                                          img_dims=self.img_dims)

        # transform the feature matrix by muptiplying with the Adjacency Matrix
        X_transformed = np.matmul(A, X)

        # form new feature matrix
        self.X = np.concatenate((X, X_transformed), axis=1)

    @staticmethod
    def get_dates(string):
        '''
        Wrapper for datefinder.find_dates function

        Returns:
            date_time string found in input string
        '''
        for match in find_dates(string):
            date_time = match

        return date_time

    def perform_elimination(self, label_wise_objects):
        # ================== for Invoice Number Value ==========================
        # invoice_no_list = []
        # if len(label_wise_objects[4]) != 0:
        #     for possible_invoice_no in label_wise_objects[4]:
        #         invoice_cv_text = self.invoice_cv.transform([possible_invoice_no])
        #         if self.invoice_model.predict(invoice_cv_text) == 1:
        #             invoice_no_list.append(possible_invoice_no)

        # else:
        #     pass
        # label_wise_objects[4] = invoice_no_list
        # ----------------------------------------------------------------------

        # =================== for Date Time ====================================
        date_time_list = []
        if len(label_wise_objects[6]) != 0:
            for possible_date_time in label_wise_objects[6]:
                if len(list(find_dates(possible_date_time))) != 0:
                    date_time_list.append(
                        list(find_dates(possible_date_time))[0])

        else:
            text_corpus = self.object_map.Object.values
            for possible_date_time in text_corpus:
                if len(list(find_dates(possible_date_time))) != 0:
                    date_time_list.append(
                        list(find_dates(possible_date_time))[0])

        label_wise_objects[6] = date_time_list
        # ----------------------------------------------------------------------

        # ============= for amount value =======================================
        try:
            amount_value_list = []
            for possible_amount in label_wise_objects[10]:
                if possible_amount.replace('.', '', 1).isnumeric():
                    amount_value_list.append(possible_amount)
        except:
            label_wise_objects[10] = amount_value_list
        # ======================================================================

        # ============== for company name ======================================
        company_name_list = []
        for possible_company_name in label_wise_objects[1]:
            company_cv_text = self.company_name_cv.transform(
                [possible_company_name])
            if self.company_name_model.predict(company_cv_text) == 1:
                company_name_list.append(possible_company_name)

        label_wise_objects[1] = company_name_list

        # ----------------------------------------------------------------------

        return label_wise_objects

    def infer(self, image):
        '''
        This function implements the entire pipeline for information extraction
        from the input document image.

        Prerequisites: All the ML Models must be loaded 

        Args:
            Image, cv2

        Returns:
            JSON Object
        '''

        # perform OCR
        # print(image)
        # retval, buffer = cv2.imencode('.jpg', image)
        # jpg_as_text = base64.b64encode(buffer)
        # print(jpg_as_text)

        self._ocr(image)

        # Generate Graph Embeddings
        self._generate_graph_embeddings()

        # check if all models are available
        # assert None not in {self.entity_classifier,
        #                     self.address_model,
        #                     self.address_cv,
        #                     self.invoice_model,
        #                     self.invoice_cv}, "One or more of the required \
        #                         models has not been loaded properly. Please see\
        #                         `load_models() function which is a pre-requisite."

        preds = self.entity_classifier.predict(self.X)
        self.object_map['label'] = preds

        label_wise_objects = dict(
            self.object_map.groupby('label')['Object'].apply(list))
        retained_objects = self.perform_elimination(label_wise_objects)

        try:
            retained_objects.pop(0)
        except:
            pass

        try:
            retained_objects.pop(3)
        except:
            pass

        try:
            retained_objects.pop(5)
        except:
            pass

        try:
            retained_objects.pop(7)
        except:
            pass

        try:
            retained_objects.pop(9)
        except:
            pass

        output_dict = {}
        for key, value in retained_objects.items():
            output_dict[self.label_mapping[key]] = str(value)

        return output_dict