Ejemplo n.º 1
0
    def test_naivebayes_compare(self):
        basepath = '../resource/'
        naivebayes = NaiveBayes()
        json_data = Serializer.load_json(os.path.join(basepath, 'ocr.json'))
        naivebayes.human_labels = json_data['translate']['country']
        x_list = ['ネツァワル王国', 'カセドリア連合王国', 'ゲブランド帝国', 'ホルデイン王国', 'エルソード王国']

        print(json_data['translate']['country'])
        out = naivebayes.predict_all(x_list)
        for i, y in enumerate(out):
            if x_list[i] != y:
                raise Exception('compare x:{0},predict:{1}'.format(
                    x_list[i], y))
Ejemplo n.º 2
0
class Ranking(object):
    def __init__(self, config):
        self.ocr = OCREngine()
        self.naivebayes = NaiveBayes()
        self.naivebayes.human_labels = self.ocr.settings['translate'][
            'country']

    def create_TemporyFile(self, buffer, verbose=False):
        """
            
            @param {io.BytesIO}buffer
                   {bool}verbose
            @return {string}create file tempory file
        """
        temp_file_name = ''
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            temp.write(buffer.getvalue())
            temp_file_name = temp.name
            if verbose:
                logger.info(temp_file_name)
        return temp_file_name

    def getResult(self, src, save_image=False):
        """
            @param {string} src
                   {bool}save_image output debug image
            @return {OCRDocument} doucument
        """
        pro = DataProcessor(src, ImageType.RAW, save_image=save_image)
        if pro.prepare() is None:
            logger.error('image error:{0}'.format(src))
            return None
        buffer = pro.tobinary(pro.batch())
        temp_file_name = self.create_TemporyFile(buffer, True)

        document = self.ocr.recognize(temp_file_name)
        os.remove(temp_file_name)

        output = '#' + datetime.now().strftime('%F %T.%f')[:-3] + '\n'
        output += '\n'.join(document.names()) + '\n'
        with Serializer.open_stream('../temp/corpus.txt', mode='a') as file:
            file.write(output)

        # ocr corpus data -> NaiveBayes classifier
        # ranking name swap
        change = self.naivebayes.predict_all(document.names())
        #doucument.changeNames(change)

        document.dump()
        return document