def analyzeDocumentWithTemplate(self, cr, uid, documentId, templateId, context): # Whether templateId is valid or not # Remove previous properties ids = self.pool.get('nan.document.property').search( cr, uid, [('document_id','=',documentId)], context=context ) self.pool.get('nan.document.property').unlink( cr, uid, ids, context ) if templateId: template = self.pool.get('nan.template').getTemplateFromId( cr, uid, templateId, context ) documents = self.read(cr, uid, [documentId], context=context) if not documents: return document = documents[0] fp, image = tempfile.mkstemp() fp = os.fdopen( fp, 'wb+' ) try: fp.write( base64.decodestring( document['datas'] ) ) finally: fp.close() recognizer = Recognizer() recognizer.recognize( QImage( image ) ) doc = recognizer.extractWithTemplate( image, template ) for box in doc.boxes: obj.create(cr, uid, { 'name': box.templateBox.name, 'value': box.text, 'document_id': document['id'], 'template_box_id': box.templateBox.id }, context) self.executeAttachs( cr, uid, [documentId], context ) self.executeActions( cr, uid, [documentId], True, context )
def scanDocumentWithTemplate(self, cr, uid, documentId, templateId): # Whether templateId is valid or not # Remove previous properties obj = self.pool.get('nan.document.property') ids = obj.search(cr, uid, [('document', '=', documentId)]) obj.unlink(cr, uid, ids) if templateId: # Initialize Ocr System (Gamera) initOcrSystem() template = self.pool.get('nan.template').getTemplateFromId( cr, uid, templateId) documents = self.read(cr, uid, [documentId]) if not documents: return document = documents[0] fp, image = tempfile.mkstemp() fp = os.fdopen(fp, 'wb+') fp.write(base64.decodestring(document['datas'])) fp.close() recognizer = Recognizer() recognizer.recognize(QImage(image)) doc = recognizer.extractWithTemplate(image, template) for box in doc.boxes: obj.create( cr, uid, { 'name': box.templateBox.name, 'value': box.text, 'document': document['id'], 'template_box': box.templateBox.id }) self.executeAttachs(cr, uid, [documentId]) self.executeActions(cr, uid, [documentId], True) cr.commit()
def scan_document(self, cr, uid, imageIds, notify=False): print "Scan_documentcalled" # Load templates into 'templates' list templates = self.pool.get('nan.template').getAllTemplates(cr, uid) # Initialize Ocr System (Gamera) initOcrSystem() recognizer = Recognizer() # Iterate over all images and try to find the most similar template for document in self.browse(cr, uid, imageIds): if document.state not in ('pending', 'scanning'): continue fp, image = tempfile.mkstemp() fp = os.fdopen(fp, 'wb+') fp.write(base64.decodestring(document.datas)) fp.close() recognizer.recognize(QImage(image)) result = recognizer.findMatchingTemplateByOffset(templates) template = result['template'] doc = result['document'] if not template: print "No template found for document %s." % document.name else: print "The best template found for document %s is %s." % ( document.name, template.name) if template: template_id = template.id else: template_id = False self.write(cr, uid, [document.id], { 'template': template_id, 'state': 'scanned' }) if doc: obj = self.pool.get('nan.document.property') for box in doc.boxes: obj.create( cr, uid, { 'name': box.templateBox.name, 'value': box.text, 'document': document.id, 'template_box': box.templateBox.id }) if notify: self.pool.get('res.request').create( cr, uid, { 'act_from': uid, 'act_to': uid, 'name': 'Finished scanning document', 'body': 'The auto_attach system has finished scanning the document you requested. A reference to the document can be found in field Document Ref 1.', 'ref_doc1': 'nan.document,%d' % document.id, }) self.executeAttachs(cr, uid, imageIds) self.executeActions(cr, uid, imageIds, True) cr.commit()
def analyze_document(self, cr, uid, imageIds, context=None): # Load templates into 'templates' list templates = self.pool.get('nan.template').getAllTemplates( cr, uid, context ) templatesWithAnalysis = [x for x in templates if x.analysisFunction] templatesWithoutAnalysis = [x for x in templates if not x.analysisFunction] # Search what recognizers are used so we do not execute unnecessary processes. recognizers = set() for template in templates: for box in template.boxes: recognizers.add( box.recognizer ) recognizers = list(recognizers) recognizer = Recognizer() # Iterate over all images and try to find the most similar template for document in self.browse(cr, uid, imageIds, context): if document.state not in ('pending','analyzing'): continue if not document.datas: continue fp, image = tempfile.mkstemp() fp = os.fdopen( fp, 'wb+' ) try: fp.write( base64.decodestring(document.datas) ) finally: fp.close() recognizer.recognize( QImage( image ), recognizers ) template = False doc = False for template in templatesWithAnalysis: function = re.sub( ' *', '', template.analysisFunction ) if function.endswith('()'): function = function[:-2] doc = eval( 'self.%s(cr, uid, document, template, recognizer, context)' % function ) if doc: break if not doc: result = recognizer.findMatchingTemplateByOffset( templatesWithoutAnalysis ) template = result['template'] doc = result['document'] if not template: print("No template found for document %s." % document.name) else: print("The best template found for document %s is %s." % (document.name, template.name)) if template: template_id = template.id else: template_id = False self.write(cr, uid, [document.id], { 'template_id': template_id, 'state': 'analyzed' }, context=context) if doc: for box in doc.boxes: self.pool.get('nan.document.property').create(cr, uid, { 'name': box.name, 'value': box.text, 'document_id': document.id, 'template_box_id': box.templateBox and box.templateBox.id or False }, context) if document.state == 'analyzing': self.pool.get('res.request').create( cr, uid, { 'act_from': uid, 'act_to': uid, 'name': 'Finished analyzing document', 'body': 'The auto_attach system has finished analyzing the document you requested. A reference to the document can be found in field Document Ref 1.', 'ref_doc1': 'nan.document,%d' % document.id, }, context) self.executeAttachs( cr, uid, imageIds, context ) self.executeActions( cr, uid, imageIds, True, context )