Beispiel #1
0
    def __init__(self, portNo):
        self.host = socket.gethostname()
        print("host ", self.host)
        self.port = portNo
        print("port ", self.port)
        self.size = 4096
        self.sock = socket.socket()
        print("sock 1", self.sock)
        self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        print("sock 2", self.sock)
        self.sock.bind((self.host, self.port))
        print("sock 3", self.sock)
        self.sock.listen(1)
        print("sock 4", self.sock)
        self.conn, self.addr = self.sock.accept()
        print("sock 5", self.sock)
        print("Client connected")

        self.billReader = BillReader()
        self.currencyRecognizer = CurrencyRecognizer()
        # self.currencyRecognizer.configure("D:\\TP_PROGS\\Projects\\TeProjSahara\\progs\\Sahara-System-For-Aiding-Visually-Impaired-master\\Sahara-System-For-Aiding-Visually-Impaired-master\\Server_side\\DataFiles\\yolov3.weights",\
        #      "D:\\TP_PROGS\\Projects\\TeProjSahara\\progs\\Sahara-System-For-Aiding-Visually-Impaired-master\\Sahara-System-For-Aiding-Visually-Impaired-master\\Server_side\\DataFiles\\yolov3-tiny.cfg", \
        #          ('10','20','50','100','200','500','2000'))
        self.summarizer = Summarizer()
        self.textRecognizer = TextRecognizer()
        self.bot = Bot()
        self.objectDet = DetectObject()
        self.maskDet = DetectMask()
Beispiel #2
0
def main():

    generator = SampleGenerator(10)
    text_recognizer = TextRecognizer()
    testset = tf.data.Dataset.from_generator(
        generator.gen, (tf.float32, tf.int64),
        (tf.TensorShape([32, None, 3]), tf.TensorShape([
            None,
        ]))).repeat(-1).map(ocr_parse_function).batch(1).prefetch(
            tf.data.experimental.AUTOTUNE)
    # restore from existing checkpoint
    for image, label in testset:
        text = text_recognizer.recognize(image, preprocess=False)
        print(text)
        cv2.imshow("image",
                   ((image[0, ...] / 2 + 0.5) * 255.).numpy().astype('uint8'))
        cv2.waitKey()
    def __init__(self, portNo):
        self.host = socket.gethostname()
        self.port = portNo
        self.size = 4096
        self.sock = socket.socket()
        self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.sock.bind((self.host, self.port))
        self.sock.listen(1)
        self.conn, self.addr = self.sock.accept()
        print("Client connected")

        self.billReader = BillReader()
        self.currencyRecognizer = CurrencyRecognizer()
        self.currencyRecognizer.configure("/Users/dhavalbagal/Desktop/BE-PROJECT/Sahara/DataFiles/yolov3.weights",\
             "/Users/dhavalbagal/Desktop/BE-PROJECT/Sahara/DataFiles/yolov3-tiny.cfg", \
                 ('10','20','50','100','200','500','2000'))
        self.summarizer = Summarizer()
        self.textRecognizer = TextRecognizer()
        self.bot = Bot()
Beispiel #4
0
class TextOCR(object):

  def __init__(self):

    self.detector = TextDetector();
    self.recognizer = TextRecognizer();

  def scan(self, img):

    textlines, bbox, scores = self.detector.detect(img);
    results = list();
    for textline in textlines:
      timg = img[int(textline[1])-5:int(textline[3])+5,int(textline[0])-5:int(textline[2])+5,:];
      text, _ = self.recognizer.recognize(timg);
      results.append({'image': timg, 'text': text, 'position': textline});
    return results;
Beispiel #5
0
class BillReader:
    def __init__(self):
        self.text = None
        self.size = 500
        self.textRecognizer = TextRecognizer()
        self.products = dict()

    """ Note: A product name in a bill is always followed by a floating number indicating either the tax or the amount.\
        Hence fetching all the characters before the first floating number will give the product name."""

    def extractProductNames(self):
        productNameRegex = '^(.*?)[0-9]+[.][0-9]+'
        productPrice = '[0-9]+[.][0-9]+'
        """ Every record contains productName followed by the tax amount, quantity, price etc."""
        print("text: ", self.text)
        records = self.text.splitlines()

        for record in records:
            try:
                """ When capturing a group in a regular expression, you need to put parentheses around the group that you aim to capture. \
                    Also, you need to pass the index of the group you want to capture to the group() method. """
                productName = re.search(productNameRegex, record).group(1)
                print(productName)
                """ Assumption: Last floating number in a record is the price of the product. """
                self.products[productName] = re.findall(productPrice,
                                                        record)[-1]
            except:
                """ Ignore if no product is found in a record """
                pass

    def readBill(self, img):
        print("in read bill")
        self.im = img
        self.text = self.textRecognizer.ocr(self.im)
        self.extractProductNames()
        if len(self.products) != 0:
            reply = ""
            for name, price in self.products.items():
                reply += name + " costs INR " + str(price) + ". "
        else:
            reply = "I am not able to read the bill. Please try again."
        return reply
Beispiel #6
0
class Server():
    def __init__(self, portNo):
        self.host = socket.gethostname()
        print("host ", self.host)
        self.port = portNo
        print("port ", self.port)
        self.size = 4096
        self.sock = socket.socket()
        print("sock 1", self.sock)
        self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        print("sock 2", self.sock)
        self.sock.bind((self.host, self.port))
        print("sock 3", self.sock)
        self.sock.listen(1)
        print("sock 4", self.sock)
        self.conn, self.addr = self.sock.accept()
        print("sock 5", self.sock)
        print("Client connected")

        self.billReader = BillReader()
        self.currencyRecognizer = CurrencyRecognizer()
        # self.currencyRecognizer.configure("D:\\TP_PROGS\\Projects\\TeProjSahara\\progs\\Sahara-System-For-Aiding-Visually-Impaired-master\\Sahara-System-For-Aiding-Visually-Impaired-master\\Server_side\\DataFiles\\yolov3.weights",\
        #      "D:\\TP_PROGS\\Projects\\TeProjSahara\\progs\\Sahara-System-For-Aiding-Visually-Impaired-master\\Sahara-System-For-Aiding-Visually-Impaired-master\\Server_side\\DataFiles\\yolov3-tiny.cfg", \
        #          ('10','20','50','100','200','500','2000'))
        self.summarizer = Summarizer()
        self.textRecognizer = TextRecognizer()
        self.bot = Bot()
        self.objectDet = DetectObject()
        self.maskDet = DetectMask()

    def fetchData(self):
        data = b""
        try:
            while True:
                dataChunk = self.conn.recv(self.size)
                data += dataChunk
                if dataChunk.endswith(b"eof"):
                    data = data[:-3]
                    imgString, command = data.split(b"mof")
                    nparr = np.frombuffer(imgString, np.uint8)
                    image = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)
                    break

            return image, command

        except ValueError:
            return None, ""

    def selectModule(self, image, command):
        msg = ""
        intent, _ = self.bot.getIntent(command.decode("utf-8"))
        cv2.imshow('image', image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # cv2.imshow('image', image)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()
        print(intent)
        global flag

        if flag == 1:
            if intent == "Next":
                msg, temp = self.currencyRecognizer.readCurr(image)
                if temp == 0:
                    pass
                else:
                    totCash.append(temp)
                    msg = msg + " say NEXT to continue, STOP for final amount"

            elif intent == "Stop":
                finalAmt = totAmt()
                msg = "Total amount of cash in your hand is equal to " + str(
                    finalAmt)
                flag = 0
            else:
                msg = "I didnt catch what you said, Please repeat"

        else:
            if intent == "CurrencyRecognition":
                msg, _ = self.currencyRecognizer.readCurr(image)
            elif intent == "BillReading":
                msg = self.billReader.readBill(image)
            elif intent == "TextSummarization":
                text = self.textRecognizer.ocr(image)
                msg = self.summarizer.generateSummary(text)
            elif intent == "BasicTextReading":
                msg = self.textRecognizer.ocr(image)
            elif intent == "Object":
                msg = self.objectDet.objDetect(image)
            elif intent == "Mask":
                msg = self.maskDet.masKDetect(image)

            elif intent == "TotalCash":
                msg, temp = self.currencyRecognizer.readCurr(image)
                totCash.append(temp)
                if temp == 0:
                    pass
                else:
                    msg = msg + " To continue totalling say the word NEXT. To get the final total say STOP"
                    flag = 1
            else:
                msg = "I didnt catch what you said. Please repeat"

        print(msg)
        return msg

    def sendReply(self, msg):
        msg = ".".join(msg.splitlines())
        msg += '\n'
        self.conn.send(msg.encode())

    def keepListening(self):
        print("on")
        while True:
            image, command = self.fetchData()
            print("image", command)
            if len(command) != 0:
                msgFromModule = self.selectModule(image, command)
                self.sendReply(msgFromModule)
            else:
                self.sendReply(
                    "Something went wrong. Please click the picture again.")

    def restartServer(self):
        self.conn.close()
        self.sock.bind((self.host, self.port))
        self.sock.listen(1)
        self.conn, self.addr = self.sock.accept()

    def closeConnection(self):
        self.conn.close()

    def __del__(self):
        self.conn.close()
Beispiel #7
0
  def __init__(self):

    self.detector = TextDetector();
    self.recognizer = TextRecognizer();
Beispiel #8
0
def train_ocr():

    generator = SampleGenerator(10)
    recognizer = TextRecognizer()
    optimizer = tf.keras.optimizers.Adam(1e-5)
    # load dataset
    trainset = tf.data.Dataset.from_generator(
        generator.gen, (tf.float32, tf.int64),
        (tf.TensorShape([32, None, 3]), tf.TensorShape([
            None,
        ]))).repeat(-1).map(ocr_parse_function).batch(batch_size).prefetch(
            tf.data.experimental.AUTOTUNE)
    # restore from existing checkpoint
    if False == exists('checkpoints'): mkdir('checkpoints')
    checkpoint = tf.train.Checkpoint(model=recognizer.crnn,
                                     optimizer=optimizer)
    checkpoint.restore(tf.train.latest_checkpoint('checkpoints'))
    # create log
    log = tf.summary.create_file_writer('checkpoints')
    # train model
    avg_loss = tf.keras.metrics.Mean(name='loss', dtype=tf.float32)
    for image, labels in trainset:
        if True == tf.math.reduce_any(tf.math.is_nan(image)):
            print("nan was detected in image! skip current iteration")
            continue
        with tf.GradientTape() as tape:
            # image.shape = (batch, seq_length, 32)
            pred = recognizer.crnn(image)
            # logits.shape = (batch, seq_length / 4, class_num + 1)
            if True == tf.math.reduce_any(tf.math.is_nan(pred)):
                print('nan was detected in pred! skip current iteration')
                continue
            loss = tf.keras.backend.ctc_batch_cost(
                y_true=labels,
                y_pred=pred,
                input_length=tf.tile([[pred.shape[1]]], (batch_size, 1)),
                label_length=tf.tile([[labels.shape[1]]], (batch_size, 1)))
        avg_loss.update_state(loss)
        # write log
        if tf.equal(optimizer.iterations % 100, 0):
            with log.as_default():
                tf.summary.scalar('loss',
                                  avg_loss.result(),
                                  step=optimizer.iterations)
                text, decoded = recognizer.recognize(image[0:1, ...], False)
                #err = tf.reduce_mean(tf.edit_distance(tf.cast(decoded, tf.int32), to_sparse(tf.cast(labels, dtype = tf.int32))));
                tf.summary.image('image',
                                 tf.cast((image[0:1, ...] / 2 + 0.5) * 255.,
                                         dtype=tf.uint8),
                                 step=optimizer.iterations)
                tf.summary.text('text', text, step=optimizer.iterations)
                #tf.summary.scalar('word error', err, step = optimizer.iterations);
            print('Step #%d Loss: %.6f' %
                  (optimizer.iterations, avg_loss.result()))
            if avg_loss.result() < 0.01: break
            avg_loss.reset_states()
        grads = tape.gradient(loss, recognizer.crnn.trainable_variables)
        optimizer.apply_gradients(
            zip(grads, recognizer.crnn.trainable_variables))
        # save model
        if tf.equal(optimizer.iterations % 2000, 0):
            checkpoint.save(join('checkpoints', 'ckpt'))
    # save the network structure with weights
    if False == exists('model'): mkdir('model')
    recognizer.crnn.save(join('model', 'crnn.h5'))
Beispiel #9
0
 def __init__(self):
     self.text = None
     self.size = 500
     self.textRecognizer = TextRecognizer()
     self.products = dict()
class Server():
    def __init__(self, portNo):
        self.host = socket.gethostname()
        self.port = portNo
        self.size = 4096
        self.sock = socket.socket()
        self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.sock.bind((self.host, self.port))
        self.sock.listen(1)
        self.conn, self.addr = self.sock.accept()
        print("Client connected")

        self.billReader = BillReader()
        self.currencyRecognizer = CurrencyRecognizer()
        self.currencyRecognizer.configure("/Users/dhavalbagal/Desktop/BE-PROJECT/Sahara/DataFiles/yolov3.weights",\
             "/Users/dhavalbagal/Desktop/BE-PROJECT/Sahara/DataFiles/yolov3-tiny.cfg", \
                 ('10','20','50','100','200','500','2000'))
        self.summarizer = Summarizer()
        self.textRecognizer = TextRecognizer()
        self.bot = Bot()

    def fetchData(self):
        data = b""
        try:
            while True:
                dataChunk = self.conn.recv(self.size)
                data += dataChunk
                if dataChunk.endswith(b"eof"):
                    data = data[:-3]
                    imgString, command = data.split(b"mof")
                    nparr = np.frombuffer(imgString, np.uint8)
                    image = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)
                    break

            return image, command

        except ValueError:
            return None, ""

    def selectModule(self, image, command):
        msg = ""
        intent, _ = self.bot.getIntent(command.decode("utf-8"))
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if intent == "CurrencyRecognition":
            msg = self.currencyRecognizer.giveTotal(image)
        elif intent == "BillReading":
            msg = self.billReader.readBill(image)
        elif intent == "TextSummarization":
            text = self.textRecognizer.ocr(image)
            msg = self.summarizer.generateSummary(text)
        elif intent == "BasicTextReading":
            msg = self.textRecognizer.ocr(image)

        return msg

    def sendReply(self, msg):
        msg = ".".join(msg.splitlines())
        msg += '\n'
        self.conn.send(msg.encode())

    def keepListening(self):
        while True:
            image, command = self.fetchData()
            if len(command) != 0:
                msgFromModule = self.selectModule(image, command)
                self.sendReply(msgFromModule)
            else:
                self.sendReply(
                    "Something went wrong. Please click the picture again.")

    def restartServer(self):
        self.conn.close()
        self.sock.bind((self.host, self.port))
        self.sock.listen(1)
        self.conn, self.addr = self.sock.accept()

    def closeConnection(self):
        self.conn.close()

    def __del__(self):
        self.conn.close()