Python GFlist Beispiele, thtml.utilth.GFlist Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: txt2html.py Projekt: davischan3168/packages

def txt2htmlGF(path=None,
               m1=re.compile(r'^第\w{1,3}[编|篇]'),
               m2=re.compile(r'^第\w{1,3}章'),
               m3=re.compile(r'^第\w{1,3}节'),
               ind=True,
               regrex1=None,
               search=None,
               Startw=None):
    """
    path:文件夹的名称,若没有输入参数，则默认为None，即当前目录。
    func:txt2html_odir,形成一个个单独的文件，文件名与源文件相同，并保存在源文件的目录下。
        :txt2htmlv1，合并成一个文件，文件保存在当前工作目录下，输出为output.html。
    px: 按预先定义的方式进行排序
    path:所选择的文件夹
    """

    files = []
    if isinstance(path, list):
        files.extend(path)
    elif os.path.isfile(path):
        files.append(path)
    elif path is None:
        txtpath = os.getcwd()
        ss = GFlist(path, regrex1=regrex1, research=search, startw=Startw)
        files = [i[1] for i in ss]
    elif os.path.isdir(path):
        ss = GFlist(path, regrex1=regrex1, research=search, startw=Startw)
        files = [i[1] for i in ss]

    txt2htmlv1(files, m1=m1, m2=m2, m3=m3, index=ind)
    return

Beispiel #2

0

Datei anzeigen

Datei: abstract.py Projekt: davischan3168/newpackage

def absfile(path,func=abssplit,regrex1=None,\
            Research=None,Startw=None,p1=re.compile('裁判要点'),\
            p2=re.compile('相关法条'),rc=re.compile('裁判要点\W*(.*?\s*.*?)\W*相关法条')):

    files = []
    if isinstance(path, list):
        for i in path:
            if os.path.isfile(i):
                files.append(i)
            elif os.path.isdir(i):
                ss = GFlist(path,
                            regrex1=regrex1,
                            research=Research,
                            startw=Startw)
                files.extend([i[1] for i in ss])
    elif isinstance(path, str):
        if os.path.isfile(path):
            files.append(path)
    elif path is None:
        txtpath = os.getcwd()
        ss = GFlist(path, regrex1=regrex1, research=Research, startw=Startw)
        files.extend([i[1] for i in ss])
    elif os.path.isdir(path):
        ss = GFlist(path, regrex1=regrex1, research=Research, startw=Startw)
        files.extend([i[1] for i in ss])
    tdir = 'temp_dir'
    if not os.path.exists(tdir):
        os.mkdir(tdir)

    Tfile = []
    if func.__name__ == 'abstract':
        for f in files:
            bn = os.path.basename(f)
            nf = os.path.join(tdir, bn)
            text = func(f, rc=rc)
            try:
                with open(nf, 'w', encoding='utf8') as gf:
                    gf.write(text)
                Tfile.append(nf)
            except:
                pass
    elif func.__name__ == 'abssplit':
        for f in files:
            bn = os.path.basename(f)
            nf = os.path.join(tdir, bn)
            text = func(f, p1=p1, p2=p2)
            #print(text)
            try:
                with open(nf, 'w', encoding='utf8') as gf:
                    gf.write(text)
                Tfile.append(nf)
            except:
                pass
    return Tfile

Beispiel #3

0

Datei anzeigen

def absAPPhtml(path,
               outdir='',
               regrex1=re.compile('检例第(\d*)号'),
               rc=re.compile('(.*?案\s*（检例第\d*号）)'),
               p1=re.compile('【要旨】'),
               p2=re.compile('\【\w*】'),
               yz=True):
    if outdir == '':
        outdir = 'itempdit'
    absSPP(path=path, tdir=outdir, rc=rc, p1=p1, p2=p2, yz=yz)
    ss = GFlist(outdir, regrex1=regrex1)
    Tfile = [i[1] for i in ss]
    htmlcode = _hh(outdir)
    tb, ctt = make_Mulu_content(Tfile)
    htmlName = 'outputabsSPP.html'
    try:
        html = open(htmlName, 'w', encoding='utf8')
        html.write(htmlcode)
        html.write(tb)
        html.write(ctt)
    except:
        html = open(htmlName, 'w', encoding='gbk')
        html.write(htmlcode)
        html.write(tb)
        html.write(ctt)

    html.write('</body></html>')
    html.close()
    shutil.rmtree(outdir)
    return

    return

Beispiel #4

0

Datei anzeigen

def TopyhtmlGF(pf, regrex1=None, search=None, index=True, Startw=None):

    pfname = pf.replace('/', '')
    #print(pfname)
    htmlf = pfname + '_content.html'
    p = getcsspath()
    ll = title + '\n' + title1 + ft % p + title2 + '\n'
    if os.path.exists(htmlf):
        os.remove(htmlf)
    with open(htmlf, 'w', encoding='utf8') as f:
        f.write(ll)
        f.write('<div id="content"> \n')
        f.write('<h1 class="title">%s</h1>\n<ul class="org-ul">\n' % pfname)
        f.flush()

    files = []
    if isinstance(pf, list):
        files.extend(pf)
    elif pf is None:
        txtpath = os.getcwd()
        ss = GFlist(pf, regrex1=regrex1, research=search, startw=Startw)
        files = [i[1] for i in ss]
    elif os.path.isdir(pf):
        ss = GFlist(pf, regrex1=regrex1, research=search, startw=Startw)
        files = [i[1] for i in ss]

    with open(htmlf, 'w', encoding='utf8') as f:
        f.write(ll)
        f.write('<div id="content"> \n')
        f.write('<h1 class="title">%s</h1>\n<ul class="org-ul">\n' % pfname)
        f.flush()

    for ff in files:
        name = os.path.splitext(os.path.basename(ff))[0]
        #fpath=urllib.parse.quote(ff)
        fpath = pathname2url(ff)
        line = '<li><code>[&#xa0;]</code> <a href=%s>%s</a>\n</li>' % (fpath,
                                                                       name)
        try:
            write_file(htmlf, line)
        except Exception as e:
            print(e)

    write_file(htmlf, r"</ul>" + '\n')
    write_file(htmlf, '</div>\n</body>\n</html>')
    return

Beispiel #5

0

Datei anzeigen

def C2html_AllinOneGF(txtpath=None,regrex1=None,Research=None,index=True,Startw=None,py=False):
    """
    将目录txtpath下的txt文件内容全部转到output.html文件中
    px:文中排序的基准。
    """
    files=[]
    if isinstance(txtpath,list):
        files.extend(txtpath)
    elif txtpath is None:
        txtpath=os.getcwd()
        ss=GFlist(txtpath,regrex1=regrex1,research=Research,startw=Startw)
        files=[i[1] for i in ss]
    elif os.path.isdir(txtpath):
        ss=GFlist(txtpath,regrex1=regrex1,research=Research,startw=Startw)
        files=[i[1] for i in ss]

    C2html(files,index=index,py=py)
    return

Beispiel #6

0

Datei anzeigen

Datei: abstract.py Projekt: davischan3168/newpackage

def absAPPhtml(path,outdir='',regrex1=re.compile('检例第(\d*)号'),\
                 rc=re.compile('(.*?案\s*（检例第\d*号）)'),\
                 p1=re.compile('【要\s*旨】'),\
                 p2=re.compile('【\w*】'),\
                 yz=True,func=C2html):
    """
    主要是针对最高检察院的指导性案例，对每一个案例进行分类，或提取起裁判要旨
    """
    if outdir == '':
        outdir = 'itempdit'
    absSPP(path=path, tdir=outdir, rc=rc, p1=p1, p2=p2, yz=yz)
    ss = GFlist(outdir, regrex1=regrex1)
    Tfile = [i[1] for i in ss]
    if func.__name__ == 'C2html':
        func(Tfile)
    elif func.__name__ == "txt2htmlall":
        func(Tfile, mformat='AIO')
    shutil.rmtree(outdir)
    return

Beispiel #7

0

Datei anzeigen

def MainSpp(path,outdir='itempdit',regrex1=re.compile('检例第(\d*)号'),\
            rc=re.compile('(.*?案\s*（检例第\d*号）)'),\
            p1=re.compile('【要旨】'),p2=re.compile('\【\w*】'),\
            yz=True,OutFile='MainSpp',mtype='pad',\
            pyin=False,Total='max',item1_bool=False,item2_bool=False,\
            item0_bool=False):  


    if outdir=='':
        outdir='itempdit'
    absSPP(path=path,tdir=outdir,rc=rc,p1=p1,p2=p2,yz=yz)
    
    ss=GFlist(outdir,regrex1=regrex1)
    txt_files={}
    for i in ss:
        txt_files[i[0]]=Singal_input(i[1],pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool)
    
    if len(txt_files)>0:
        txt_files1=sorted(txt_files.items(),key=lambda txt_files:txt_files[0])

    else:
        print('No files 适合条件')
        sys.exit()
    ##########################3
    if Total=='max':
        OutFile1=OutFile+'.tex'
        fl=open(OutFile1,'w',encoding='utf8')
        fl.write(latexs[mtype]+'\n\n')        
        for f in txt_files1:
            fl.write('\input{%s}'%f[1])
            fl.write(r'\newpage')
            #fl.write('\n\n')
        
        fl.write(end)
        fl.close()
        os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1)
        os.system('xelatex -interaction=nonstopmode %s' %OutFile1)
        _removef(OutFile1)
        ###########################3
    elif isinstance(Total,int):
        for f in txt_files1:
            txp=[txt_files1[i:i+Total] for i in range(0,len(txt_files),Total)]
            fn=1
            for ff in txp:
                OutFile1=OutFile+'_%s.tex'%str(fn).zfill(2)
                fl=open(OutFile1,'w',encoding='utf8')
                fl.write(latexs[mtype]+'\n\n')
                for f in ff:
                    fl.write('\input{%s}'%f[1])
                    fl.write(r'\newpage')
                    #fl.write('\n\n')
        
                fl.write(end)
                fl.close()
                os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1)
                os.system('xelatex -interaction=nonstopmode %s' %OutFile1)
                _removef(OutFile1)
                
                fn +=1
        
    else:
        print('Total is max out int, please input the right parameter.')

    """
    for root,dirs,files in os.walk(tdir):
        for f in files:
            if os.path.splitext(f)[1] in ['.tex']:
                os.remove('%s'%os.path.abspath(root+'/'+f))
                pass"""
    shutil.rmtree(outdir)
    return

Beispiel #8

0

Datei anzeigen

def MainsAbs(txtpath,func=abssplit,OutFile='Mainabs',mtype='pad',\
             pyin=False,Total='max',regrex1=None,Research=None,\
             Startw=None,rc=re.compile('\裁判要点\W*(.*?)\W*相关法条'),\
             p1=re.compile('裁判要点'),p2=re.compile('相关法条'),\
             item1_bool=False,item2_bool=False,\
             item0_bool=False):
    txt_files={}
    rsch=[]

    if isinstance(Research,str):
        rsch.append(Research)
    elif isinstance(Research,list):
        rsch.extend(Research)    

    files=[]
    if isinstance(txtpath,list):
        files.extend(txtpath)
    elif txtpath is None:
        txtpath=os.getcwd()
        ss=GFlist(txtpath,regrex1=regrex1,research=Research,startw=Startw)
        files=[i[1] for i in ss]
    elif os.path.isdir(txtpath):
        ss=GFlist(txtpath,regrex1=regrex1,research=Research,startw=Startw)
        files=[i[1] for i in ss]
 
    tdir='temp_dir'
    if not os.path.exists(tdir):
        os.mkdir(tdir)
    for f in files:
        print(f)
        if func.__name__=='abstract':
            bn=os.path.basename(f)
            nf=os.path.join(tdir,bn)
            #print(nf)
            try:
                text=func(f,rc=rc)
                #print(text)
                with open(nf,'w',encoding='utf8') as gf:
                    gf.write(text)
                #Tfile.append(f[0],nf)
            except:
                print('没有相应的内容for abstract')
                pass
        elif func.__name__=='abssplit':
            bn=os.path.basename(f)
            nf=os.path.join(tdir,bn)
            try:
                text=func(f,p1=p1,p2=p2)
                #print(text)
                with open(nf,'w',encoding='utf8') as gf:
                    gf.write(text)
                #Tfile[f[0]]=nf
                #Tfile.append(f[0],nf)
            except:
                print('没有相应的内容for abssplit')
                pass     
    ss=GFlist(tdir,regrex1=regrex1)
    for i in ss:
        txt_files[i[0]]=Singal_input(i[1],pyin,item1_bool=item1_bool,item2_bool=item2_bool,item0_bool=item0_bool)
    
    if len(txt_files)>0:
        txt_files1=sorted(txt_files.items(),key=lambda txt_files:txt_files[0])

    else:
        print('No files 适合条件')
        sys.exit()
    ##########################3
    if Total=='max':
        OutFile1=OutFile+'.tex'
        fl=open(OutFile1,'w',encoding='utf8')
        fl.write(latexs[mtype]+'\n\n')        
        for f in txt_files1:
            fl.write('\input{%s}'%f[1])
            fl.write(r'\newpage')
            #fl.write('\n\n')
        
        fl.write(end)
        fl.close()
        os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1)
        os.system('xelatex -interaction=nonstopmode %s' %OutFile1)
        _removef(OutFile1)
        ###########################3
    elif isinstance(Total,int):
        for f in txt_files1:
            txp=[txt_files1[i:i+Total] for i in range(0,len(txt_files),Total)]
            fn=1
            for ff in txp:
                OutFile1=OutFile+'_%s.tex'%str(fn).zfill(2)
                fl=open(OutFile1,'w',encoding='utf8')
                fl.write(latexs[mtype]+'\n\n')
                for f in ff:
                    fl.write('\input{%s}'%f[1])
                    fl.write(r'\newpage')
                    #fl.write('\n\n')
        
                fl.write(end)
                fl.close()
                os.system('xelatex -no-pdf -interaction=nonstopmode %s' %OutFile1)
                os.system('xelatex -interaction=nonstopmode %s' %OutFile1)
                _removef(OutFile1)
                
                fn +=1
        
    else:
        print('Total is max out int, please input the right parameter.')

    """
    for root,dirs,files in os.walk(tdir):
        for f in files:
            if os.path.splitext(f)[1] in ['.tex']:
                os.remove('%s'%os.path.abspath(root+'/'+f))
                pass"""
    shutil.rmtree(tdir)
    return

Beispiel #9

0

Datei anzeigen

def GenerateBookGF(path,regrex1=None,\
               search=None,startw=None,\
               exclude=None,\
               func=C2html,\
                   item1_bool=False,\
                   item2_bool=False,\
                   item0_bool=False,\
               htmlfile='htmlfile/htmlbook_output',\
               pdffile='htmlbook_Main',mtype='article',\
               num=None,pyin=False,File_num='max',\
               m1=re.compile(r'^第\w{1,3}[编|篇]'),\
               m2=re.compile(r'^第\w{1,3}章'),\
               m3=re.compile(r'^第\w{1,3}节'),\
               m4=re.compile(r'^\w{1,3}、'),\
                   index=True,res=True,\
                   Spp=False,\
                   Spplit=False,\
                   rc=re.compile('(.*?案\s*（检例第\d*号）)'),\
                   p1=re.compile('【要\s*旨】'),p2=re.compile('【\w*】'),yz=True):

    """
    regrex:re.compile('\d*'),从文件名中提取中关键字作排序用
    search:str/list,民事诉讼，将文件名中符合含有关键字的文件提取出来
    startw:re.compile('^ok')，将文件名中以特定字开头的文件提取出来
    exclude:str/list,刑事诉讼，将含有exclude的文件予以排除
    num:regrex的作用相同，主要是用于latex的文件中
    m1:html文件中的一级目录
    m2:同上，是2级目录g
    m3:同上,是3级目录
    m4:同上，是4级目录
    
    """
    if func.__name__ in ['MainSpp']:
        func(path,yz=yz,mtype=mtype)
        return
    if func.__name__ in ['MainsAbs']:
        func(path,pyin=pyin,Startw=startw,mtype=mtype,regrex1=regrex1)
        return

    cc=re.compile('([，、:-》.《—_;；〈〉<>【】（）()])*\s*-')
    
    rs=[]
    if isinstance(search ,list):
        rs.extend(search)
    elif isinstance(search ,str):
        rs.append(search)

    excl=[]
    if isinstance(exclude ,list):
        excl.extend(exclude)
    elif isinstance(exclude,str):
        excl.append(exclude)    

    file_list = []
    path_list = []        
    if isinstance(path,list):
        for f in path:
            if isfile(f):
                file_list.append(f)
            elif isdir(f):
                path_list.append(f)
            
    elif isfile(path):
        file_list.append(path)
    elif isdir(path):
        path_list.append(path)
    elif path is None:
        txtpath = os.getcwd()

    else:
        print('Please in list of dir/file,or dir,file')
        sys.exit()
                   
    File_tmp = GFlist(path_list)

    for ff in File_tmp:
        file_list.append(ff[1])

    only_one = set()
    fls = []
    word = re.compile(r'[\u4e00-\u9fa5]+\d*')
    for ff in file_list:
        aa = os.path.basename(ff)
        nwd = ''.join(word.findall(aa))
        if nwd not in only_one:
            only_one.add(nwd)
            fls.append(ff)

    if len(fls) > 0:
        file_list = fls    

    temff = set()    
    if exclude is not None:
        for ff in file_list:
            for ex in excl:
                if ex in os.path.basename(ff):
                    temff.add(ff)

    File_tmp = [f for f in file_list if f not in temff]

    Final_list = {}
    for f in File_tmp:
        ff=basename(f)
        if regrex1 is not None:
            if splitext(ff)[1].lower() in ['.txt','.doc','.docx']:
                i1 = [i for i in regrex1.findall(ff) if len(i) > 0]
                i2 = [i for i in regrex1.findall(ut.ChNumToArab(ff)) if len(i) > 0]
                if len(i1) > 0:
                    num1 = int(i1[0])
                    Final_list[num1] = f
                elif len(i2) > 0:
                    num1= int(i2[0])
                    Final_list[num1] = f
        else:
            num1 = cc.sub('', ff).replace('&nbsp', '')
            Final_list[num1] = f
        
    if search is not None:
        Tem={}
        for k,v in Final_list.items():
            for rsch in rs:
                if rsch in basename(v):
                    Tem[k]=v
        if len(Tem)>0:
            Final_list=Tem
        else:
            print('没有关于 "%s" 的文件'%search)
            sys.exit()

    if startw is not None:
        dff={}
        for k,v in Final_list.items():
            if startw.match(basename(v)) is not None:
                #print('start word ...',v)
                dff[k]=v
        if len(dff)>0:
            Final_list=dff
        else:
            print('没有符合的文件')
            sys.exit()            
            
    if len(Final_list)>0:
        Final=sorted(Final_list.items(),key=lambda item:item[0],reverse=res)
        Final_files=[i[1] for i in Final]
        
        #if res:
        #    Final_files
        if func.__name__ in ['C2html','txt2htmlv1']:
            func(Final_files,output=htmlfile,m1=m1,m2=m2,m3=m3,index=index)
            pass
        elif func.__name__ in ['PdfFile']:
            func(Final_files,OutFile=pdffile,mtype=mtype,\
                 num=num,pyin=pyin,Total=File_num,\
                 item0_bool=item0_bool,\
                 item1_bool=item1_bool,item2_bool=item2_bool)
            #os.remove(pdffile+'.pdf','htmlfile/'+pdffile+'.pdf')
            pass
        else:
            print('Please input right function:','C2html','C2htmlBase','txt2htmlv1','txt2html_inonefile','PdfFile')
    if Spplit:
        shutil.rmtree(path)
    return#Final_files

Beispiel #10

0

Datei anzeigen

def TopyhtmlGF(pf, regrex1=None, search=None, index=True, Startw=None):
    #print(pf)
    if isinstance(pf, str):
        pfname = pf.replace('/', '_')
    elif isinstance(pf, list):
        pfname = 'selectdirs'
    elif pf is None:
        pfname = 'selectdirs'
    else:
        raise Exception('请输入文件目录')

    htmlf = pfname + '_content.html'
    p = getcsspath()
    ll = title + '\n' + title1 + ft % p + title2 + '\n'
    if os.path.exists(htmlf):
        os.remove(htmlf)

    files = []
    #print('test 1')
    if pf is None:
        pf = os.getcwd()
        ss = GFlist(pf, regrex1=regrex1, research=search, startw=Startw)
        files.extend([i[1] for i in ss])
    elif isinstance(pf, list):
        for ff in pf:
            if os.path.isdir(ff):
                files.append(ff)
                print(ff)
                ss = GFlist(ff,
                            regrex1=regrex1,
                            research=search,
                            startw=Startw)
                files.extend([i[1] for i in ss])
            elif os.path.isfile(ff):
                files.append(ff)
    elif isinstance(pf, str):
        if not os.path.exists(pf):
            raise Exception('文件不存在,请输入正确的文件或目录')
            #sys.exit()
        if os.path.isdir(pf):
            ss = GFlist(pf, regrex1=regrex1, research=search, startw=Startw)
            files.extend([i[1] for i in ss])
        elif os.path.isfile(pf):
            files.append(pf)

    with open(htmlf, 'w', encoding='utf8') as f:
        f.write(ll)
        f.write('<div id="content"> \n')
        f.write('<h1 class="title">%s</h1>\n<ul class="org-ul">\n' % pfname)
        f.flush()

    ss = set()
    #print(files)
    for ff in files:
        if os.path.isfile(ff):
            dn = os.path.dirname(ff)
            if dn not in ss:
                ss.add(dn)
                rname = os.path.split(dn)[1]
                dline = '\n<li>- d &ensp;<a href=%s>%s</a> \n</li>\n' % (
                    pathname2url(os.path.abspath(dn)), rname)
                write_file(htmlf, dline)
            name = os.path.splitext(os.path.basename(ff))[0]
            fpath = pathname2url(os.path.abspath(ff))
            line = '<ul>  - <code>[&#xa0;]</code> <a href=%s>%s</a>\n</ul>' % (
                fpath, name)
            try:
                write_file(htmlf, line)
            except Exception as e:
                print(e)

    write_file(htmlf, r"</ul>" + '\n')
    write_file(htmlf, '</div>\n</body>\n</html>')
    return

Beispiel #11

0

Datei anzeigen

def absTFilehtml(txtpath,
                 func=abssplit,
                 rc=re.compile('裁判要点\W*(.*?\s*.*?)\W*相关法条'),
                 p1=re.compile('裁判要点'),
                 p2=re.compile('相关法条'),
                 regrex1=None,
                 Research=None,
                 index=True,
                 Startw=None,
                 m1=re.compile(r'^第\w{1,3}[编|篇]'),
                 m2=re.compile(r'^第\w{1,3}章'),
                 m3=re.compile(r'^第\w{1,3}节')):
    """
    rc:需要提取的主要内容
    regrex1:
    """

    files = []
    if isinstance(txtpath, list):
        files.extend(txtpath)
    elif txtpath is None:
        txtpath = os.getcwd()
        ss = GFlist(txtpath, regrex1=regrex1, research=Research, startw=Startw)
        files = [i[1] for i in ss]
    elif os.path.isdir(txtpath):
        ss = GFlist(txtpath, regrex1=regrex1, research=Research, startw=Startw)
        files = [i[1] for i in ss]
    tdir = 'temp_dir'
    if not os.path.exists(tdir):
        os.mkdir(tdir)

    htmlcode = _hh(txtpath)
    Tfile = []
    if func.__name__ == 'abstract':
        for f in files:
            bn = os.path.basename(f)
            nf = os.path.join(tdir, bn)
            text = func(f, rc=rc)
            try:
                with open(nf, 'w', encoding='utf8') as gf:
                    gf.write(text)
                Tfile.append(nf)
            except:
                pass
    elif func.__name__ == 'abssplit':
        for f in files:
            bn = os.path.basename(f)
            nf = os.path.join(tdir, bn)
            text = func(f, p1=p1, p2=p2)
            #print(text)
            try:
                with open(nf, 'w', encoding='utf8') as gf:
                    gf.write(text)
                Tfile.append(nf)
            except:
                pass

    tb, ctt = make_Mulu_content(Tfile, m1=m1, m2=m2, m3=m3, index=index)
    htmlName = 'outputabs.html'
    try:
        html = open(htmlName, 'w', encoding='utf8')
        html.write(htmlcode)
        html.write(tb)
        html.write(ctt)
    except:
        html = open(htmlName, 'w', encoding='gbk')
        html.write(htmlcode)
        html.write(tb)
        html.write(ctt)

    html.write('</body></html>')
    html.close()
    shutil.rmtree(tdir)

    return

Beispiel #12

0

Datei anzeigen

Datei: abstract.py Projekt: davischan3168/newpackage

def absTFilehtml(txtpath,func=abssplit,\
                 rc=re.compile('裁判要点\W*(.*?\s*.*?)\W*相关法条'),\
                 p1=re.compile('裁判要点'),p2=re.compile('相关法条'),\
                 regrex1=None,Research=None,index=True,Startw=None,\
                 m1=re.compile(r'^第\w{1,3}[编|篇]'),\
                 m2=re.compile(r'^第\w{1,3}章'),\
                 m3=re.compile(r'^第\w{1,3}节'),\
                 thtmlfunc=C2html):
    """
    主要是针对最高法的指导性案例，提取起裁判要旨
    rc:需要提取的主要内容
    regrex1:
    """

    files = []
    if isinstance(txtpath, list):
        files.extend(txtpath)
    elif txtpath is None:
        txtpath = os.getcwd()
        ss = GFlist(txtpath, regrex1=regrex1, research=Research, startw=Startw)
        files = [i[1] for i in ss]
    elif os.path.isdir(txtpath):
        ss = GFlist(txtpath, regrex1=regrex1, research=Research, startw=Startw)
        files = [i[1] for i in ss]
    tdir = 'temp_dir'
    if not os.path.exists(tdir):
        os.mkdir(tdir)

    htmlcode = _hh(txtpath)
    Tfile = []
    if func.__name__ == 'abstract':
        for f in files:
            bn = os.path.basename(f)
            nf = os.path.join(tdir, bn)
            text = func(f, rc=rc)
            try:
                with open(nf, 'w', encoding='utf8') as gf:
                    gf.write(text)
                Tfile.append(nf)
            except:
                pass
    elif func.__name__ == 'abssplit':
        for f in files:
            bn = os.path.basename(f)
            nf = os.path.join(tdir, bn)
            text = func(f, p1=p1, p2=p2)
            #print(text)
            try:
                with open(nf, 'w', encoding='utf8') as gf:
                    gf.write(text)
                Tfile.append(nf)
            except:
                pass

    ss = GFlist(tdir, regrex1=regrex1, research=Research, startw=Startw)
    if thtmlfunc.__name__ == 'C2html':
        thtmlfunc(Tfile)
    elif thtmlfunc.__name__ == "txt2htmlall":
        thtmlfunc(Tfile, mformat='AIO')
    #shutil.rmtree(tdir)

    return