Exemple #1
0
def extract_file(filename):
    logger = "siddhesh"
    comp = User.objects.get(username=logger)
    comp = comp.company_name.company_name
    # print ("---------------------------------------"+comp.dtype+"----------------------------------------")
    raw = parser.from_file("templates\\Upload\\" + filename)
    print(raw['content'])
    nlp(raw['content'], comp, filename)
Exemple #2
0
def extract_image_file(filename):
    # file = request.FILES['document']
    logger = "siddhesh"
    comp = User.objects.get(username=logger)
    comp = comp.company_name.company_name
    finalpath = "templates\\Upload\\" + filename
    image = cv2.imread(finalpath)
    if image is not None:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                 cv2.THRESH_BINARY, 23, 10)
    gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    filename1 = "{}.png".format(os.getpid())
    cv2.imwrite(filename1, gray)
    text = pytesseract.image_to_string(Image.open(filename1), lang="eng")
    print(text)
    nlp(text, comp, filename)
Exemple #3
0
def extract_zip(name, request):
    logger = request.user
    comp = logger.company_name.company_name
    print(name)
    finalpath = "templates\\Media\\" + name
    print(finalpath)
    raw = parser.from_file(finalpath)
    print(raw['content'])
    return nlp(raw['content'], comp, name)
Exemple #4
0
def extract(name, request):

    logger = request.user
    comp = logger.company_name.company_name

    file = request.FILES['document'].name

    name = str(file)
    finalpath = "templates\\Media\\" + name
    print(finalpath)
    '''
    pdf = PyPDF2.PdfFileReader(file)
    text = ''
    for page in pdf.pages:
        text = page.extractText()
   '''
    raw = parser.from_file(finalpath)
    print(raw['content'])

    return nlp(raw['content'], comp, name)
Exemple #5
0
def extract_image_zip(name, request):

    logger = request.user
    comp = logger.company_name.company_name
    finalpath = "templates\\Media\\" + name
    image = cv2.imread(finalpath)
    #image = cv2.resize(image, None, fx=1, fy=1, interpolation=cv2.INTER_CUBIC)

    if image is not None:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                     cv2.THRESH_BINARY, 23, 10)
        gray = cv2.threshold(gray, 0, 255,
                             cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        filename = "{}.png".format(os.getpid())
        cv2.imwrite(filename, gray)
        text = pytesseract.image_to_string(Image.open(filename), lang="eng")
        os.remove(filename)

        print(text)
        return nlp(text, comp, name)