def __init__(self): self.struct = DocumentStruct() self.figurelib = FigureLib() self.tablelib = TableLib() self.eqlib = EqLib() self.LineLib = LineLib() self.paralib = ParagraphLib() self.tag = 'default'
class document(): def __init__(self): self.struct = DocumentStruct() self.figurelib = FigureLib() self.tablelib = TableLib() self.eqlib = EqLib() self.LineLib = LineLib() self.paralib = ParagraphLib() self.tag = 'default' def GetTreeDict(self): ''' obtain the document tree''' treedict = {} treedict = self.figurelib.GetLibDict('figure',treedict) treedict = self.tablelib.GetLibDict('table',treedict) treedict = self.eqlib.GetLibDict('equation',treedict) treedict = self.paralib.GetLibDict('paragraph',treedict) return treedict def import_tex(self,filename): """ import tex based files """ self.workdir = os.path.dirname(filename) if '.' not in filename: filename=filename+'.tex' f_full=open(filename,'rU') lines = f_full.readlines() nline = 1 for i in range(0,len(lines)): tempstr = lines[i] tempstr.strip() if tempstr.endswith('\n'): tempstr = tempstr[:len(tempstr)-1] if len(tempstr) > 1: if '\\input' not in tempstr: templine = line(tag=nline,seq=nline,content=tempstr) self.LineLib.Add(templine) # this function is defined in the meta class defination nline += 1 else: # get extended file from inputs command filepath = tempstr[tempstr.index('{')+1:tempstr.index('}')] tempdoc = document() tempdoc.import_tex(os.path.join(self.workdir,filepath)) for key,item in tempdoc.LineLib.itemlib.items(): templine = line(tag=nline,seq=nline,content=item.content) if len(item.content) <5: pass else: self.LineLib.Add(templine) # this function is defined in the meta class defination nline += 1 preline = tempstr def labeling(self): ''' judge the line type by the typical words ''' nline = 0 while 1: try: nline += 1 line = self.LineLib.itemlib[nline] except: break else: if line.content.startswith(r'%'): line.type = 'comments' elif line.content.startswith('\\input'): line.type = 'input' elif line.content.startswith('\\include'): line.type = 'include' elif line.content.startswith('\\'): if '{' in line.content: keys = re.findall(r"\\(.*){(.*)}",line.content) if keys != []: line.type = 'label:%s:%s' % keys[0] else: line.type = 'undefined' else: keys = re.findall(r"\\(.*)",line.content) line.type = 'label:%s' % keys[0] else: line.type = 'text' def AddParagraph(self,lines): pass def AddFigureFromTex(self,lines): f1 = self.figurelib.AddByTex(lines) return f1 def AddEquationFromTex(self,lines): e1 = self.eqlib.AddByTex(lines) return e1 def AddTableFromTex(self,lines): t1 = self.tablelib.AddByTex(lines) return t1 def AddParagraphFromTex(self,tag,lines): p1 = self.paralib.AddByTex(tag,lines) return p1 def AddItemsByTex(self,current_paragraph,current_paragraph_node): itemlist = extract_labels(current_paragraph.wholetext) if itemlist != None: current_paragraph.AddByDict(itemlist) def extract(self): """ Extract the content from the tex inputs """ self.labeling() line_content = 'text' tablecontentlist=[] figurecontentlist=[] equationcontentlist=[] maincontentlist=[] current_section = None current_subsection = None current_subsubsection = None current = None current_paragraph = None current_paragraph_node = None for key,line in self.LineLib.itemlib.items(): #line = self.LineLib.itemlib[i] if line.content[0] == '%': # comments continue if '\section' in line.content: text = line.content[line.content.index('{')+1:line.content.index('}')] tag = 'section' current_section = self.struct.AddSection(self.struct.root,tag,text) current = current_section line_content='section' elif '\subsection' in line.content: text = line.content[line.content.index('{')+1:line.content.index('}')] tag = 'subsection' current_subsection = self.struct.AddSection(current_section,tag,text) current = current_subsection line_content='section' elif '\subsubsection' in line.content: text = line.content[line.content.index('{')+1:line.content.index('}')] tag = 'subsubsection' current_subsubsection = self.struct.AddSection(current_subsection,tag,text) current = current_subsubsection line_content='section' elif '\\begin{table}' in line.content: line_content='table' temp_table=[] elif '\\end{table}' in line.content: line_content='tabletomain' elif '\\begin{figure}' in line.content: line_content='figure' temp_figure=[] elif '\\end{figure}' in line.content: line_content='figuretomain' elif '\\begin{equation}' in line.content: line_content='equation' temp_equation=[] elif '\\end{equation}' in line.content: line_content='equationtomain' elif line_content == 'section': line_content='text' #elif line.content[0]=='\\': # line_content='other' else: pass line.type = line_content ## apply action for each category if line_content=='table': temp_table.append(line) elif line_content=='tabletomain': temp_table.append(line) tempobj = self.AddTableFromTex(temp_table) tablecontentlist.append(temp_table) line_content='text' elif line_content=='figure': temp_figure.append(line) elif line_content=='figuretomain': temp_figure.append(line) tempobj = self.AddFigureFromTex(temp_figure)#figure({'content':temp_figure,'seq':temp_figure[0].seq}) line_content='text' figurecontentlist.append(temp_figure) elif line_content=='equation': temp_equation.append(line) elif line_content=='equationtomain': temp_equation.append(line) self.AddEquationFromTex(temp_equation) equationcontentlist.append(temp_equation) line_content='text' elif line_content == 'text' and line.content[0] != '\\': #tag = '_'.join(line.content.split(' ')[0:5]) tag = 'PARA'+randomword(10) #print tag current_paragraph = self.AddParagraphFromTex(tag,line.content) if current != None: # not support abstract for now current_paragraph_node = self.struct.AddParagraph(current,current_paragraph) self.AddItemsByTex(current_paragraph,current_paragraph_node) else: pass return 1 def GetPara(self,label): if label in self.paralib.itemlib.keys(): return self.paralib.itemlib[label] else: return None def export_struc(self,filename): ''' export the xml document structure''' self.struct.exportfile(filename) def docx_export(self,dirname,filename): self.docx_new() self.docx_add_by_tree() self.docx_add_figure_by_list() self.docx_add_table_by_list() self.docx_add_equation_by_list() self.docx_save(os.path.join(dirname, filename)) def docx_new(self): # Make a new document tree - this is the main part of a Word document mydocument = docx.newdocument() # This xpath location is where most interesting content lives body = mydocument.xpath('/w:document/w:body', namespaces=docx.nsprefixes)[0] relationships = docx.relationshiplist() self.docx = {'document':mydocument,'body':body,'relationships':relationships, 'figurelist':[],'tablelist':[],'equationlist':[],'referencelist':[] } def docx_add_by_tree(self): for node in self.struct.root.xpath("//*"): if node.tag == 'section': self.docx_add_section(node.text,1) elif node.tag == 'subsection': self.docx_add_section(node.text,2) elif node.tag == 'subsubsection': self.docx_add_section(node.text,3) elif node.tag == 'paragraph': para = self.GetPara(node.text) self.docx_add_paragraph('['+para.tag+']'+para.wholetext) # record figure list self.docx['figurelist'].extend(para.itemdict['Figure']) # record figure list self.docx['tablelist'].extend(para.itemdict['Table']) # record figure list self.docx['equationlist'].extend(para.itemdict['Equation']) # record figure list self.docx['referencelist'].extend(para.itemdict['Reference']) else: pass def docx_add_section(self,sectiontext,level): self.docx['body'].append(docx.heading(sectiontext, level)) def docx_add_paragraph(self,paragraphtext): self.docx['body'].append(docx.paragraph(paragraphtext)) def docx_add_figure(self,imagefilename,description): self.docx['relationships'], picpara = docx.picture(self.docx['relationships'], imagefilename, description) self.docx['body'].append(docx.pagebreak(type='page', orient='portrait')) self.docx['body'].append(picpara) self.docx['body'].append(docx.paragraph(description)) def docx_add_figure_from_lib(self,key): figure = self.figurelib.itemlib[key] if 'eps' in figure.itemlib.keys(): singlefigure = figure.itemlib['eps'] imgfile = os.path.join(self.exportdst,self.exportfolder['figure'],singlefigure.path) self.docx_add_figure(imgfile,figure.caption) def docx_add_eq_from_lib(self,key): equation = self.eqlib.itemlib[key] if 'png' in equation.itemlib.keys(): singleeq = equation.itemlib['png'] imgfile = os.path.join(self.exportdst,self.exportfolder['equation'],singleeq.path) self.docx_add_figure(imgfile,equation.caption) elif 'tex' in equation.itemlib.keys(): self.docx_add_paragraph(equation.caption) self.docx_add_paragraph(equation.itemlib['tex'].latex) def docx_add_table_from_lib(self,key): table = self.tablelib.itemlib[key] if 'png' in table.itemlib.keys(): singletable = table.itemlib['png'] imgfile = os.path.join(self.exportdst,self.exportfolder['table'],singletable.path) #self.docx_add_paragraph(table.caption) self.docx_add_figure(imgfile,table.caption) elif 'tex' in table.itemlib.keys(): self.docx_add_paragraph(table.itemlib['tex'].latex) def docx_add_figure_by_list(self): for key in self.docx['figurelist']: self.docx_add_figure_from_lib(key) def docx_add_table_by_list(self): for key in self.docx['tablelist']: self.docx_add_table_from_lib(key) def docx_add_equation_by_list(self): for key in self.docx['equationlist']: self.docx_add_eq_from_lib(key) def docx_cofigure(self): pass def docx_save(self,filename,title='title',subject='subject',creator='unknown',keywords=[]): title = title #'Python docx demo' subject = subject #'A practical example of making docx from Python' creator = creator #'Mike MacCana' keywords = keywords #['python', 'Office Open XML', 'Word'] coreprops = docx.coreproperties(title=title, subject=subject, creator=creator, keywords=keywords) appprops = docx.appproperties() contenttypes = docx.contenttypes() websettings = docx.websettings() wordrelationships = docx.wordrelationships(self.docx['relationships']) # Save our document docx.savedocx(self.docx['document'], coreprops, appprops, contenttypes, websettings, wordrelationships, filename) def EqTex2Png(self): for key,item in self.eqlib.itemlib.items(): latextext = '\n'.join(item.itemlib['tex'].latex) latextext = '\[' + latextext + '.\]' #print latextext math2png([latextext],os.path.join(self.exportdst,self.exportfolder['equation'],item.tag),prefix=item.tag) item.AddPng(os.path.join(item.tag,item.tag+'.png')) def TableTex2Png(self): for key,item in self.tablelib.itemlib.items(): latextext = item.itemlib['tex'].latex #latextext = '\[' + latextext + '.\]' #print latextext tablename = os.path.join(self.exportdst,self.exportfolder['table'],item.tag) table2png([latextext],tablename,prefix=item.tag) # add png file format to the library item.AddPng(os.path.join(item.tag,item.tag+'.png')) def FigureEps2Png(self): for key,item in self.figurelib.itemlib.items(): epsfile = os.path.join(self.exportdst,self.exportfolder['figure'],item.tag,item.tag + '.eps') pngfile = os.path.join(self.exportdst,self.exportfolder['figure'],item.tag,item.tag + '.png') try: im = Image.open(epsfile) im.save(pngfile) except: pass def FigureImportFolder(self,folder): ''' import image from desitnation folder''' for root, subdirs, files in os.walk(folder): for file in files: if os.path.splitext(file)[1].lower() in ('.jpg', '.jpeg','.JPG','.png','.PNG','.eps','.EPS'): print os.path.join(root, file) def ExportProject(self,desinationfolder): ''' save project in destination folder''' self.exportdst = desinationfolder self.exportfolder = {'equation':'equation','figure':'img','table':'table','paragraph':'paragraph'} # export all components self.ExportParaLib(os.path.join(desinationfolder,self.exportfolder['paragraph'])) self.ExportFigureLib(os.path.join(desinationfolder,self.exportfolder['figure'])) self.ExportTableLib(os.path.join(desinationfolder,self.exportfolder['table'])) self.ExportEqLib(os.path.join(desinationfolder,self.exportfolder['equation'])) filename = os.path.join(self.exportdst,self.tag+'.xml') self.export_struc(filename) fp = open(os.path.join(self.exportdst,self.tag+'.lib'),'w') lib = {'equation':self.eqlib,'figure':self.figurelib,'table':self.tablelib,'paragraph':self.paralib} pickle.dump(lib,fp) def ExportParaLib(self,parafolder): ensure_dir(parafolder) for key, item in self.paralib.itemlib.items(): #dstroot = os.path.join(parafolder,key) #ensure_dir(dstroot) fp = open(os.path.join(parafolder,item.tag+'.tex'),'w') fp.write(item.wholetext) fp.close() def ExportFigureLib(self,figurefolder): ''' save figure to figure folder ''' ensure_dir(figurefolder) for key, item in self.figurelib.itemlib.items(): dstroot = os.path.join(figurefolder,key) ensure_dir(dstroot) for singlefigurekey,singlefigure in item.itemlib.items(): scrfile = os.path.join(self.workdir,singlefigure.path) filename = key +'.' + singlefigurekey copyfile(scrfile,filename,dstroot) singlefigure.path = os.path.join(key,filename) def ExportTableLib(self,tablefolder): ensure_dir(tablefolder) for key, item in self.tablelib.itemlib.items(): dstroot = os.path.join(tablefolder,key) ensure_dir(dstroot) for singletablekey,singletable in item.itemlib.items(): if singletable.filepath != None: ''' direct copy table ''' scrfile = os.path.join(self.workdir,singletablekey,singletable.path) filename = key +'.' + singletablekey copyfile(scrfile,filename,dstroot) else: ''' write table file ''' pass filename = key +'.' + 'tex' self.TexWriteTableFile(key,os.path.join(tablefolder,key,filename)) singletable.path = os.path.join(key,filename) def TexWriteTableFile(self,tablelabel,filename): ''' write table to latex file ''' tableobj = self.tablelib.itemlib[tablelabel] fd = open(filename,'w') temp = '\\begin{table}\n\caption{%s}\n\label{%s}\centering\n' % (tableobj.caption,tableobj.tag) fd.write(temp) fd.write(tableobj.itemlib['tex'].latex) #for line in tableobj.itemlib['tex'].latex: # fd.write(line) fd.write('\n') fd.write('\end{table}\n') def ExportEqLib(self,eqfolder): ensure_dir(eqfolder) for key, item in self.eqlib.itemlib.items(): dstroot = os.path.join(eqfolder,key) ensure_dir(dstroot) for singleeqkey,singleeq in item.itemlib.items(): if singleeq.path != None: ''' direct copy table ''' scrfile = os.path.join(self.workdir,singleeqkey,singleeq.path) filename = key +'.' + singleeqkey copyfile(scrfile,filename,dstroot) else: ''' write table file ''' pass filename = key +'.' + 'tex' self.TexWriteEqFile(key,os.path.join(eqfolder,key,filename)) def TexWriteEqFile(self,eqlabel,filename): ''' write table to latex file ''' eqobj = self.eqlib.itemlib[eqlabel] fd = open(filename,'w') temp = '\\begin{equation}\label{%s}\n' % (eqobj.tag) fd.write(temp) #fd.write(tableobj.itemlib['tex'].latex) for line in eqobj.itemlib['tex'].latex: fd.write(line) fd.write('\n') fd.write('\end{equation} \n')