def qqmsg_extract(dbname, outputname, inputname): """提取群消息""" db = qqmsg_db.Qqmsg_db(dbname, errname_filename) fin = open(inputname, 'r', encoding='utf8', errors='ignore') outype = 0 # 0-stdout, 1-txt, 2-docx outitle = "群记录" file_ext = file_extension(outputname) if file_ext == ".txt": fout = open(outputname, 'w') fout.write(outitle + "\n\n") outype = 1 elif file_ext == ".docx": fout = Document() p = fout.add_paragraph("") p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # CENTER居中对齐 LEFT左对齐 RIGHT右对齐 run = docx_add_style_run(p, outitle, 18, True, "宋体") fout.add_paragraph("") outype = 2 else: print("stdout msg!") try: parse_msg(db, fout, fin.readlines(), outype) finally: if fin: fin.close() if file_ext == ".txt" and fout: fout.close() if file_ext == ".docx" and fout: fout.save(outputname)
def check_doc(input_file): try: #Opening .doc file doc = Document(input_file) for paragraph in doc.paragraphs: word_text = paragraph.text #extract emails match_mail = re.findall(r'[\w\.-]+@[\w\.-]+', word_text) for mails in match_mail: print(mails) #extract mobile numbers match_mob = re.findall(r'(\(\d{3}\)\d{3}-\d{4})', word_text) for mob in match_mob: print(mob) except: print("No file exists in the provided path!!") doc.close()
def main(): fileList = listFiles(fileDir) for fileObj in fileList: if os.path.splitext(fileObj)[1] == '.txt': f = open(fileObj, 'r+') all_the_lines = f.readlines() f.seek(0) f.truncate() for line in all_the_lines: f.write(line.replace(old_text, new_text)) f.close() elif os.path.splitext(fileObj)[1] == '.docx': print("准备修改" + fileObj) doc = Document(fileObj) replace_text(old_text, new_text, doc, fileObj) print(fileObj + "修改成功!") elif os.path.splitext(fileObj)[1] == '.doc': print("准备修改" + fileObj) doc = RemoteWord(fileObj) # 初始化一个doc对象 doc.replace_doc(old_text, new_text) # 替换doc文本内容 doc.close() print(fileObj + "修改成功")
doc.add_paragraph(start_tip) header = get_header("referer") try: base_url = 'http://www.mzitu.com/page/{}/' start = "################第 {} 页开始################" end = "################第 {} 页结束################" #for i in range(page_num): #pool_num = 10 for i in range(page_num): print(start.format(i + 1)) doc.add_paragraph(start.format(i + 1)) url = base_url.format(i + 1) get_pics_for_one_pages(url, header, pool_num) print(end.format(i + 1)) except: print("网络连接错误...") doc.add_paragraph("网络连接错误...") print("") print("##################全部下载完成!##################") doc.add_paragraph("") doc.add_paragraph("##################全部下载完成!##################") try: doc.save('mzitulog.docx') doc.close() except: pass
class docEditor: def __init__(self): speak.say("Word Document. Give the name of the file: ") filename = raw_input() if '.docx' not in filename: filename = filename + '.docx' self.filename = filename try: self.doc = Document(filename) except: try: #If new file self.filename = os.path.join("word", self.filename) self.doc = Document() self.saveAsDoc() except: speak.say( 'The document cannot be created due to some error.Try to use any other document name!' ) self.exit() self.para = self.doc.add_paragraph('') # kb = threading.Thread(target=self.listenKeyboard) # kb.start() self.menu() def __del__(self): self.doc.close() #Exit def exit(self): speak.say('Exiting Word doc handling Task') os._exit(1) # to get the whole document text for read operation def getFullText(self): speak.say('Content of file') fulltext = [] for paras in self.doc.paragraphs: fulltext.append(paras.text) return '\n'.join(fulltext) # to save the doc as a word document file def saveAsDoc(self): self.doc.save(self.filename) # Enter the text to be added def enterText(self): speak.say("Enter the text: ") lines = [] while True: line = raw_input() if line: lines.append(line) else: break text = '\n'.join(lines) return text # adding new paragraph at the end of the document def addNewPara(self): text = self.enterText() self.para = self.doc.add_paragraph(text) self.saveAsDoc() # add formatted text in the running paragraph def formattedtext(self, formats): types = formats.split(' ') speak.say( "Do u want to enter formatted text on a new line? Yes or No : ") lineBreak = raw_input().upper() run = self.para.add_run() text = self.enterText() run.add_text(text) # style = self.para.style # font = style.font if lineBreak == 'Y' or lineBreak == 'YES': run.add_break() if 'BOLD' or 'B' in types: # font.bold = not font.bold run.bold = True if 'ITALIC' or 'I' in types: # font.italic = not font.italic run.italic = True if 'UNDERLINE' or 'U' in types: # font.underline = not font.underline run.underline = True self.saveAsDoc() # to make a new table def makeTable(self): speak.say("Enter the following" + '\n' + 'Rows: ') row_count = int(raw_input()) speak.say("Columns: ") col_count = int(raw_input()) table = self.doc.add_table(row_count, col_count) table.style = 'Table Grid' speak.say("Enter the values in the table row by row: ") for row in table.rows: for cell in row.cells: cell.text = raw_input() print('\n') self.saveAsDoc() # to change the document's font name and font size def changeDocFont(self, fontName, Size): style = self.doc.styles['Normal'] font = style.font font.name = fontName font.size = Pt(Size) # to add a heading to the document def addHeading(self): speak.say("Enter the heading level: ") l = int(raw_input()) text = self.enterText() self.doc.add_heading(text, level=l) self.saveAsDoc() # to add a picture to the doc file def addPicture(self): speak.say( "Enter the name of the Picture (with extension i.e. .png or .jpeg): " ) pic_name = raw_input() pic_name = os.path.join("image", pic_name) speak.say("Enter the picture width in inches: ") pic_width = float(raw_input()) pic = self.doc.add_picture(pic_name, width=Inches(pic_width)) self.saveAsDoc() #to find the paragraph with the particular word def search_first_para(self): speak.say("Enter the word to search in paragraphs: ") word = raw_input() word = re.compile(word, re.I) for paras in self.doc.paragraphs: if word.search(paras.text) is not None: return paras return None #to delete a particular paragraph from the document def del_para(self, paragraph): p = paragraph._element if len(p): p.getparent().remove(p) p._p = p._element = None self.para = self.doc.paragraphs[-1] self.saveAsDoc() return True return False def menu(self): Flag = True while Flag == True: speak.say('Do you want to listen to menu? (Y or N)') choice = raw_input().upper() if (choice == 'Y' or choice == 'YES'): speak.say( '1. Read the doc' + '\n' + '2. Add a Heading' + '\n' + '3. Add new Para' + '\n' + '4. Add formatted text' + '\n' + '5. Change Font style of Doc' + '\n' '6. Add Table' + '\n' '7. Add Picture ' + '\n' '8. Delete a paragraph containing a particular word ' + '\n' '9. Delete the last paragraph ') speak.say("Enter your choice number: ") flag2 = False try: ch = int(raw_input()) flag2 = True except: speak.say('Oops! you have not entered a number.') if flag2: if ch == 1: try: speak.say(self.getFullText()) # to display the text except: speak.say("Error in reading the document!") elif ch == 2: try: self.addHeading() speak.say("Heading added!") except: speak.say("Error in adding the heading!") elif ch == 3: try: self.addNewPara() speak.say("Paragraph added!") except: speak.say("Error in adding new Paragraph!") elif ch == 4: while flag2 == True: try: new_format = '' speak.say( 'Enter your choice: bold(B), italic(I) or underline(U) (seperated by spaces): ' ) new_format = raw_input().upper() self.formattedtext(new_format) speak.say( "Do You want to add more formatted text? y or n: " ) Continue = raw_input().upper() if Continue == 'N' or Continue == 'NO': flag2 = False except: speak.say("Error in adding formatted text!") elif ch == 5: speak.say( "Enter the font name and font size (space seperated):") name, size = raw_input().split(' ') size = int(size) self.changeDocFont(name, size) elif ch == 6: try: self.makeTable() speak.say("Table added!") except: speak.say("Error in adding table!") elif ch == 7: try: self.addPicture() speak.say("Picture added!") except: speak.say("Error in adding picture!") elif ch == 8: try: para = self.search_first_para() if para is not None: r = self.del_para(para) if r: speak.say("Paragraph deleted!") else: speak.say("Unable to delete the paragraph") else: speak.say( "Paragraph with searched word doesn't exist") except: speak.say("Error in deleting paragraph!") elif ch == 9: try: r = self.del_para(self.para) if r: speak.say("Last Paragraph deleted!") else: speak.say("No last Paragraph exist") except: speak.say("Error in deleting paragraph!") else: speak.say('Invalid choice') speak.say("Do You want to perform another operation? y or n: ") another_operation = raw_input().upper() if another_operation == 'N' or another_operation == 'No': Flag = False self.exit()
class Writer(object): '''Custom implementation of a console/text/docx Writer''' paragraph = None def __init__(self, mode, out): super(Writer, self).__init__() self.mode = mode if self.mode == 'text': path = self._get_path(out) if os.path.exists(os.path.dirname(path)): self.file = open(path, 'w') elif self.mode == 'console': self.file = sys.stdout elif self.mode == 'docx': path = self._get_path(out) if os.path.exists(os.path.dirname(path)): self.path = path self.file = Document() self._add_styles() # ------------------ # MAIN # ------------------ def start_sequence(self, sequence): '''Starts the lines for a new protein''' if self.mode in ['text', 'console']: print('-------------------------', file=self.file) print(sequence[0], file=self.file) print(file=self.file) elif self.mode == 'docx': self.file.add_heading('-------------------------\n', 1) self.paragraph = self.file.add_paragraph(sequence[0] + '\n') self.paragraph.style = self.file.styles['Normal'] def write_sequence_line(self, sequence, index, indent=15): '''Writes a sequence line with indentation to file''' # grab offset to add to file offset = str(len(''.join(sequence[1:index]))) + ': ' offset_length = len(offset) # init line line = ' '*(indent-offset_length) line += offset line += sequence[index] if self.mode in ['text', 'console']: print(line, file=self.file) elif self.mode == 'docx': self.paragraph.add_run(line + '\n') def write_blank_line(self): '''Write blank line to file''' if self.mode in ['text', 'console']: print(file=self.file) elif self.mode == 'docx': self.paragraph.add_run('\n') def write_condition(self, header, coverage, cuts, sequence, index): '''Writes the condition with coverage to file''' output = self.process_condition(header, coverage, cuts, sequence, index) if self.mode in ['text', 'console']: print(output, file=self.file) elif self.mode == 'docx': self.paragraph.add_run('\n') def close_sequence(self): '''Closes the lines for a protein''' if self.mode in ['text', 'console']: print('-------------------------', file=self.file) print(file=self.file) elif self.mode == 'docx': self.file.add_heading('-------------------------\n', 1) # ------------------ # UTILS # ------------------ @staticmethod def get_header(condition, total=12): '''Grabs a 35 character header from the given condition''' length = min([len(condition), total]) header = condition[:total] header = ''.join([header, ' : ']) header = ''.join([header, ' '*(total-length)]) return header def close(self): '''Closes the writeable object''' if self.mode == 'text' and hasattr(self, "file"): self.file.close() elif self.mode == 'docx' and hasattr(self, "file"): self.file.save(self.path) def process_condition(self, header, coverage, cuts, sequence, index): ''' Processes the header to give the conditions coverage of the sequence. ''' # grab parameter lengths to determine range length = len(sequence[index]) offset = len(''.join(sequence[1:index])) # iteratively add null string or + if self.mode == 'docx': self.paragraph.add_run(header) keys = range(offset, offset+length) for key in keys: value = coverage[key] cut = cuts[key] # add 'o' if cutsite, '+' if not, ' ' if blank if value and cut and self.mode in ['text', 'console']: header += 'o' elif value and not cut and self.mode in ['text', 'console']: header += '+' elif self.mode in ['text', 'console']: header += ' ' # docx settings elif value and cut and self.mode == 'docx': self.paragraph.add_run('o', style='Red') elif value and not cut and self.mode == 'docx': self.paragraph.add_run('+', style='Black') else: self.paragraph.add_run(' ') return header def _add_styles(self): '''Sets the docx styles''' # create normal style style = self.file.styles['Normal'] font = style.font font.name = 'Courier New' font.size = Pt(8) # create red style style = self.file.styles.add_style('Red', WD_STYLE_TYPE.CHARACTER) font = style.font font.color.rgb = RGBColor(0xFF, 0x0, 0x0) font.name = 'Courier New' font.size = Pt(8) # create black style style = self.file.styles.add_style('Black', WD_STYLE_TYPE.CHARACTER) font = style.font font.color.rgb = RGBColor(0x0, 0x0, 0x0) font.name = 'Courier New' font.size = Pt(8) def _get_path(self, out): '''Returns the path for the outfile''' relative = out[0] not in ['/', '~'] if relative: path = os.path.join(PATH, out) else: path = out if self.mode == 'text' and os.path.splitext(path)[1] != '.txt': path = '.'.join([path, 'txt']) if self.mode == 'docx' and os.path.splitext(path)[1] != '.docx': path = '.'.join([path, 'docx']) return path