def upload_file(): # check if the post request has the file part if 'file' not in request.files: resp = jsonify({'message': 'No file part in the request'}) resp.status_code = 400 return resp file = request.files['file'] if file.filename == '': resp = jsonify({'message': 'No file selected for uploading'}) resp.status_code = 400 return resp if file and allowed_file(file.filename): filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) resp = jsonify({'message': 'File successfully uploaded'}) dpi = 300 documentText, fname = OCR.Convert(file_path, dpi, str(1)) data = Extract.Info(fname) data = Align.restructure(data) return str(data) else: resp = jsonify({'message': 'Allowed file type is pdf'}) resp.status_code = 400 return resp
path_pdf1 = 'data/*.pdf' files_pdf1 = glob.glob(path_pdf1) files_pdf1 # ### IMPORT OCR MODULE # In[3]: import OCR #PDFTOPPMPATH = r"./poppler/bin/pdftoppm.exe" # In[4]: k = 1 dpi = 300 documentText, fname = OCR.Convert(files_pdf1[k], dpi, str(k)) # In[5]: print(documentText[0]) # # ----XX---- # # # # # ### IMPORT INFORMATION EXTRACTION ( NLP) MODULE # In[6]: