Exemple #1
0
def export_ditie_img_to_txt(import_path, export_path='.'):
    """
    扫描该路径将所有地铁png图片转为txt
    :param import_path: 扫描的文件夹路径
    :param export_path:输出的文件夹路径
    :return: 如果成功,为True,如果失败为False
    """

    rst = False
    # pdf识别路径
    img_files = file_operate.get_files_by_extension(import_path,
                                                    extension_name='png')
    if img_files == []:
        print("文件夹没有文img文件")
        logging.warning('文件夹没有img文件')
        return False
    for img in img_files:
        try:
            txtname = os.path.join(
                export_path, f'{file_operate.split_path(img)[1]}ditie.txt')
            export_txtfile(img, txtname)
            rst = True
        except Exception as e:
            print(e)
            traceback.print_exc(file=open('log.txt', 'w+', encoding='utf-8'))
    return rst
Exemple #2
0
def export_pdf_to_txt(import_path, export_path='.'):
    """
    扫描该路径将所有pdf文件转为txt
    :param import_path: 扫描的文件夹路径
    :param export_path:输出的文件夹路径
    :return: 如果成功,为True,如果失败为False
    """

    rst = False
    # pdf识别路径
    pdf_files = file_operate.get_files_by_extension(import_path,
                                                    extension_name='pdf')
    if pdf_files == []:
        print("文件夹没有文pdf件")
        logging.warning('文件夹没有pdf文件')
        return False
    for pdf in pdf_files:
        try:
            pdf2text(pdf, export_path)
            rst = True
        except Exception as e:
            print(e)
            traceback.print_exc(file=open('log.txt', 'w+', encoding='utf-8'))
    return rst
Exemple #3
0
except:
    print("ditie_starttime或ditie_endtime或maildownload_endtime或maildownload_starttime格式错误")

#清空以前下载内容
for file in os.listdir(maildownloadfolder):
    os.remove(os.path.join(maildownloadfolder,file))
# for folder in exportfolder,importfolder,maildownloadfolder:
#     '' if os.path.exists(folder) else os.mkdir(folder)
# # 将滴滴附件和地铁附件件从邮箱下载到文件夹内
mailprocess.get_baoxiao_info(maildownload_starttime,maildownload_endtime,maildownloadfolder)

#地铁截图输出的文档信息路径
dite_data=[]
filelist=[]
if  ImageReconize.export_ditie_img_to_txt(importfolder, exportfolder):
    filelist=file_operate.get_files_by_extension(exportfolder,'txt')
    for filepath in filelist:
        i=file_operate.split_path(filepath)[1].find("ditie")
        if i<0:
            continue
        tmp=dataprocess.ditie_data_process(filepath,reason,ditie_starttime,ditie_endtime)
        if not len(tmp):
            print(f'文件路径:{filepath}   无法获取报销信息')
            continue
        if not len(dite_data):
            dite_data=tmp
            continue
        dite_data=np.vstack((dite_data,tmp))
#pdf信息提取---------------------------------------------------
didi_data=[]
Exemple #4
0
        if srt<0:
            break
        end=txt.find('\n',srt)
        if end<0:
            print(txt[srt:])
            break
        data=txt[srt:end].split(" ")
        if len(data)!=9:
            break
        data=[data[1],data[5],data[6],"出租车",reason,"","","","",data[8]]
        inf.append(data)
        srt=txt.find("快车",end)
    return inf
# ------------------------填写的信息--------------------------
REASON="检查样机进度"
filelist=file_operate.get_files_by_extension(r'C:\Users\123456\PycharmProjects\报销自动化\pdf识别','txt')
inf = get_excel_dididatas(filelist,REASON)
print(inf)
print(len(inf[0]))
#---------------------------excel操作---------------------------
SRT_ROW=11
END_ROW=11
excel=win32.gencache.EnsureDispatch('Excel.Application')
wb=excel.Workbooks.Open(r'C:\Users\123456\PycharmProjects\报销自动化\表格处理\交通费报销表昆山3.xlsx')
excel.Visible=True
ws=wb.Worksheets(1)
for data in inf:
    cell_range=f'B{SRT_ROW}:K{END_ROW}'
    print(cell_range)
    print(data)
    ws.Range(cell_range).Value=tuple(data)
Exemple #5
0
import sys
sys.path.append(r'C:\Users\123456\PycharmProjects\usermodules')
from file_operate import file_operate
from MyException import MyException
from MyException.MyException import FILE_ERROR_INFOR
import fitz
import os
import traceback
print(fitz.__doc__)
print(fitz.version)
# pdf识别路径
pdf_file = file_operate.get_files_by_extension(extension_name='pdf')


def pdf2text(file_path, output_path='.'):
    """

    :param filename: 文件路径
    :return: filename.txt
    """

    if not file_operate.exist_file(file_path):
        raise MyException.FileErrorException(
            FILE_ERROR_INFOR['FILE_NOT_FOUNT'])
    if not file_operate.check_extension(file_path):
        raise MyException.FileErrorException(
            FILE_ERROR_INFOR['FILE_EXTESION_ERROR'])
    pdf_file = fitz.open(file_path)

    page1 = pdf_file[0]
    text = page1.getText('text')