コード例 #1
0
def upload_file():
    # check if the post request has the file part
    if 'file' not in request.files:
        resp = jsonify({'message': 'No file part in the request'})
        resp.status_code = 400
        return resp
    file = request.files['file']
    if file.filename == '':
        resp = jsonify({'message': 'No file selected for uploading'})
        resp.status_code = 400
        return resp
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)
        resp = jsonify({'message': 'File successfully uploaded'})
        dpi = 300
        documentText, fname = OCR.Convert(file_path, dpi, str(1))
        data = Extract.Info(fname)
        data = Align.restructure(data)
        return str(data)
    else:
        resp = jsonify({'message': 'Allowed file type is pdf'})
        resp.status_code = 400
        return resp
コード例 #2
0
path_pdf1 = 'data/*.pdf'
files_pdf1 = glob.glob(path_pdf1)
files_pdf1

# ### IMPORT OCR MODULE

# In[3]:

import OCR
#PDFTOPPMPATH = r"./poppler/bin/pdftoppm.exe"

# In[4]:

k = 1
dpi = 300
documentText, fname = OCR.Convert(files_pdf1[k], dpi, str(k))

# In[5]:

print(documentText[0])

# # ----XX----

#
#
#
#
# ### IMPORT INFORMATION EXTRACTION ( NLP) MODULE

# In[6]: