def read_docx_text(filepath, supplierID):
    imageInfo = image_detect(filepath)
    flagList = []
    flagDetails = {}
    data_text = docx2txt.process(filepath)
    rm_newline_char = data_text.rstrip('\n').split(',')
    rp_newline_empty = [i.replace('\n', ' ') for i in rm_newline_char]
    rp_newline_empty = [i.replace('\t', ' ') for i in rp_newline_empty]
    text_oneline = str(rp_newline_empty)
    email_url_phone = utility.email_phone_url(text_oneline)
    email_url_phoneFlags = email_url_phone[0]
    flagDetails = email_url_phone[1]
    supplierInfo = detect_supplier_info(data_text, supplierID)
    supplierBoolean = supplierInfo[0]
    imageBoolean = imageInfo[0]
    if imageBoolean[0]:
        flagList.append(imageBoolean)
        imageValue = imageInfo[1]
        flagDetails['image'] = imageValue
    else:
        flagList.append(imageBoolean)
    flagList.append(email_url_phoneFlags)
    suppliers = []
    if supplierBoolean:
        supplierVals = supplierInfo[1]
        flagDetails['suppliers'] = supplierVals
        suppliers.append(1)
        flagList.append(suppliers)
    else:
        suppliers.append(0)
        flagList.append(suppliers)
    return flagList, flagDetails
def read_rtf_text_catdoc(filepath):
    image = image_detect(filepath)
    data_text = ''
    flagList = []
    fileopen = os.popen('catdoc -w "%s"' % filepath)
    data_text = fileopen.read()
    rm_dump_spaces = data_text.rstrip('\n').split(',')
    data = [i.replace('\n', ' ') for i in rm_dump_spaces]
    text_oneline = str(data)
    email_url_phone = utility.email_phone_url(text_oneline)
    supplier = detect_supplier_info(data_text)
    flagList.append(image)
    flagList.append(email_url_phone)
    flagList.append(supplier)
    return flagList
def read_pdf_text(filepath, supplierID):
    imageInfo = image_detect(filepath)
    data_text = ''
    flagList = []
    flagDetails = {}
    # pdf_file_object = open(filepath, 'rb')
    # pdf_file = PdfFileReader(pdf_file_object)
    # for page in pdf_file.pages:
    #     data_text += page.extractText()
    # make the text in oneline remove new line charecter
    parsed = parser.from_file(filepath)
    data_text += parsed["content"]
    rm_newline_char = data_text.rstrip('\n').split(',')
    rp_newline_empty = [i.replace('\n', ' ') for i in rm_newline_char]
    rp_newline_empty = [i.replace('\t', ' ') for i in rp_newline_empty]
    text_oneline = str(rp_newline_empty)
    email_url_phone = utility.email_phone_url(text_oneline)
    email_url_phoneFlags = email_url_phone[0]
    flagDetails = email_url_phone[1]
    supplierInfo = detect_supplier_info(data_text, supplierID)
    supplierBoolean = supplierInfo[0]
    imageBoolean = imageInfo[0]
    if imageBoolean[0]:
        flagList.append(imageBoolean)
        imageValue = imageInfo[1]
        flagDetails['image'] = imageValue
    else:
        flagList.append(imageBoolean)
    flagList.append(email_url_phoneFlags)
    suppliers = []
    if supplierBoolean:
        supplierVals = supplierInfo[1]
        flagDetails['suppliers'] = supplierVals
        suppliers.append(1)
        flagList.append(suppliers)
    else:
        suppliers.append(0)
        flagList.append(suppliers)
    return flagList, flagDetails
def read_odt_text(filepath, supplierID):
    imageInfo = image_detect(filepath)
    flagList = []
    flagDetails = {}
    data_text = ''
    popen_param = ['odt2txt', filepath]
    popen_output = Popen(popen_param, stdout=PIPE)
    stdout, stderr = popen_output.communicate()
    data_text += stdout.decode('ascii', 'ignore')
    rm_newline_char = data_text.rstrip('\n').split(',')
    rp_newline_empty = [i.replace('\n', ' ') for i in rm_newline_char]
    rp_newline_empty = [i.replace('\t', ' ') for i in rp_newline_empty]
    text_oneline = str(rp_newline_empty)
    email_url_phone = utility.email_phone_url(text_oneline)
    email_url_phoneFlags = email_url_phone[0]
    flagDetails = email_url_phone[1]
    supplierInfo = detect_supplier_info(data_text, supplierID)
    supplierBoolean = supplierInfo[0]
    imageBoolean = imageInfo[0]
    if imageBoolean[0]:
        flagList.append(imageBoolean)
        imageValue = imageInfo[1]
        flagDetails['image'] = imageValue
    else:
        flagList.append(imageBoolean)
    flagList.append(email_url_phoneFlags)
    suppliers = []
    if supplierBoolean:
        supplierVals = supplierInfo[1]
        flagDetails['suppliers'] = supplierVals
        suppliers.append(1)
        flagList.append(suppliers)
    else:
        suppliers.append(0)
        flagList.append(suppliers)
    return flagList, flagDetails