예제 #1
0
def qqmsg_extract(dbname, outputname, inputname):
    """提取群消息"""
    db = qqmsg_db.Qqmsg_db(dbname, errname_filename)
    fin = open(inputname, 'r', encoding='utf8', errors='ignore')

    outype = 0  # 0-stdout, 1-txt, 2-docx
    outitle = "群记录"
    file_ext = file_extension(outputname)
    if file_ext == ".txt":
        fout = open(outputname, 'w')
        fout.write(outitle + "\n\n")
        outype = 1
    elif file_ext == ".docx":
        fout = Document()
        p = fout.add_paragraph("")
        p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER  # CENTER居中对齐 LEFT左对齐 RIGHT右对齐
        run = docx_add_style_run(p, outitle, 18, True, "宋体")
        fout.add_paragraph("")
        outype = 2
    else:
        print("stdout msg!")

    try:
        parse_msg(db, fout, fin.readlines(), outype)
    finally:
        if fin:
            fin.close()
        if file_ext == ".txt" and fout:
            fout.close()
        if file_ext == ".docx" and fout:
            fout.save(outputname)
예제 #2
0
def check_doc(input_file):
    try:
        #Opening .doc file
        doc = Document(input_file)
        for paragraph in doc.paragraphs:
            word_text = paragraph.text
            #extract emails
            match_mail = re.findall(r'[\w\.-]+@[\w\.-]+', word_text)
            for mails in match_mail:
                print(mails)
            #extract mobile numbers
            match_mob = re.findall(r'(\(\d{3}\)\d{3}-\d{4})', word_text)
            for mob in match_mob:
                print(mob)
    except:
        print("No file exists in the provided path!!")
        doc.close()
예제 #3
0
def main():
    fileList = listFiles(fileDir)
    for fileObj in fileList:
        if os.path.splitext(fileObj)[1] == '.txt':
            f = open(fileObj, 'r+')
            all_the_lines = f.readlines()
            f.seek(0)
            f.truncate()
            for line in all_the_lines:
                f.write(line.replace(old_text, new_text))
            f.close()
        elif os.path.splitext(fileObj)[1] == '.docx':
            print("准备修改" + fileObj)
            doc = Document(fileObj)
            replace_text(old_text, new_text, doc, fileObj)
            print(fileObj + "修改成功!")
        elif os.path.splitext(fileObj)[1] == '.doc':
            print("准备修改" + fileObj)
            doc = RemoteWord(fileObj)  # 初始化一个doc对象
            doc.replace_doc(old_text, new_text)  # 替换doc文本内容
            doc.close()
            print(fileObj + "修改成功")
예제 #4
0
    doc.add_paragraph(start_tip)
    header = get_header("referer")
    try:

        base_url = 'http://www.mzitu.com/page/{}/'
        start = "################第 {} 页开始################"
        end = "################第 {} 页结束################"
        #for i in range(page_num):
        #pool_num = 10
        for i in range(page_num):
            print(start.format(i + 1))
            doc.add_paragraph(start.format(i + 1))
            url = base_url.format(i + 1)
            get_pics_for_one_pages(url, header, pool_num)
            print(end.format(i + 1))

    except:
        print("网络连接错误...")
        doc.add_paragraph("网络连接错误...")

    print("")
    print("##################全部下载完成!##################")
    doc.add_paragraph("")
    doc.add_paragraph("##################全部下载完成!##################")
    try:

        doc.save('mzitulog.docx')
        doc.close()
    except:
        pass
예제 #5
0
class docEditor:
    def __init__(self):

        speak.say("Word Document. Give the name of the file: ")
        filename = raw_input()
        if '.docx' not in filename:
            filename = filename + '.docx'
        self.filename = filename
        try:
            self.doc = Document(filename)
        except:
            try:
                #If new file
                self.filename = os.path.join("word", self.filename)
                self.doc = Document()
                self.saveAsDoc()
            except:
                speak.say(
                    'The document cannot be created due to some error.Try to use any other document name!'
                )
                self.exit()

        self.para = self.doc.add_paragraph('')

        # kb = threading.Thread(target=self.listenKeyboard)
        # kb.start()
        self.menu()

    def __del__(self):
        self.doc.close()

    #Exit
    def exit(self):
        speak.say('Exiting Word doc handling Task')
        os._exit(1)

# to get the whole document text for read operation

    def getFullText(self):
        speak.say('Content of file')
        fulltext = []
        for paras in self.doc.paragraphs:
            fulltext.append(paras.text)
        return '\n'.join(fulltext)

# to save the doc as a word document file

    def saveAsDoc(self):
        self.doc.save(self.filename)

# Enter the text to be added

    def enterText(self):
        speak.say("Enter the text: ")
        lines = []
        while True:
            line = raw_input()
            if line:
                lines.append(line)
            else:
                break
        text = '\n'.join(lines)
        return text

# adding new paragraph at the end of the document

    def addNewPara(self):
        text = self.enterText()
        self.para = self.doc.add_paragraph(text)
        self.saveAsDoc()

# add formatted text in the running paragraph

    def formattedtext(self, formats):
        types = formats.split(' ')
        speak.say(
            "Do u want to enter formatted text on a new line? Yes or No : ")

        lineBreak = raw_input().upper()
        run = self.para.add_run()
        text = self.enterText()
        run.add_text(text)
        # style = self.para.style
        # font  = style.font
        if lineBreak == 'Y' or lineBreak == 'YES':
            run.add_break()
        if 'BOLD' or 'B' in types:
            # font.bold = not font.bold
            run.bold = True
        if 'ITALIC' or 'I' in types:
            # font.italic = not font.italic
            run.italic = True
        if 'UNDERLINE' or 'U' in types:
            # font.underline = not font.underline
            run.underline = True

        self.saveAsDoc()

# to make a new table

    def makeTable(self):
        speak.say("Enter the following" + '\n' + 'Rows: ')

        row_count = int(raw_input())
        speak.say("Columns: ")
        col_count = int(raw_input())
        table = self.doc.add_table(row_count, col_count)
        table.style = 'Table Grid'
        speak.say("Enter the values in the table row by row: ")
        for row in table.rows:
            for cell in row.cells:
                cell.text = raw_input()
            print('\n')
        self.saveAsDoc()

# to change the document's font name and font size

    def changeDocFont(self, fontName, Size):
        style = self.doc.styles['Normal']
        font = style.font
        font.name = fontName
        font.size = Pt(Size)

# to add a heading to the document

    def addHeading(self):
        speak.say("Enter the heading level: ")
        l = int(raw_input())
        text = self.enterText()
        self.doc.add_heading(text, level=l)
        self.saveAsDoc()

# to add a picture to the doc file

    def addPicture(self):
        speak.say(
            "Enter the name of the Picture (with extension i.e. .png or .jpeg): "
        )
        pic_name = raw_input()
        pic_name = os.path.join("image", pic_name)
        speak.say("Enter the picture width in inches: ")
        pic_width = float(raw_input())
        pic = self.doc.add_picture(pic_name, width=Inches(pic_width))
        self.saveAsDoc()

#to find the paragraph with the particular word

    def search_first_para(self):
        speak.say("Enter the word to search in paragraphs: ")
        word = raw_input()
        word = re.compile(word, re.I)
        for paras in self.doc.paragraphs:
            if word.search(paras.text) is not None:
                return paras
        return None


#to delete a particular paragraph from the document

    def del_para(self, paragraph):
        p = paragraph._element
        if len(p):
            p.getparent().remove(p)
            p._p = p._element = None
            self.para = self.doc.paragraphs[-1]
            self.saveAsDoc()
            return True
        return False

    def menu(self):
        Flag = True
        while Flag == True:
            speak.say('Do you want to listen to menu? (Y or N)')
            choice = raw_input().upper()
            if (choice == 'Y' or choice == 'YES'):
                speak.say(
                    '1. Read the doc' + '\n' + '2. Add a Heading' + '\n' +
                    '3. Add new Para' + '\n' + '4. Add formatted text' + '\n' +
                    '5. Change Font style of Doc' + '\n'
                    '6. Add Table' + '\n'
                    '7. Add Picture ' + '\n'
                    '8. Delete a paragraph containing a particular word ' +
                    '\n'
                    '9. Delete the last paragraph ')
            speak.say("Enter your choice number: ")
            flag2 = False
            try:
                ch = int(raw_input())
                flag2 = True
            except:
                speak.say('Oops! you have not entered a number.')

            if flag2:
                if ch == 1:
                    try:
                        speak.say(self.getFullText())  # to display the text
                    except:
                        speak.say("Error in reading the document!")
                elif ch == 2:
                    try:
                        self.addHeading()
                        speak.say("Heading added!")
                    except:
                        speak.say("Error in adding the heading!")
                elif ch == 3:
                    try:
                        self.addNewPara()
                        speak.say("Paragraph added!")
                    except:
                        speak.say("Error in adding new Paragraph!")
                elif ch == 4:
                    while flag2 == True:
                        try:
                            new_format = ''
                            speak.say(
                                'Enter your choice: bold(B), italic(I) or underline(U) (seperated by spaces): '
                            )
                            new_format = raw_input().upper()
                            self.formattedtext(new_format)
                            speak.say(
                                "Do You want to add more formatted text? y or n: "
                            )
                            Continue = raw_input().upper()
                            if Continue == 'N' or Continue == 'NO':
                                flag2 = False
                        except:
                            speak.say("Error in adding formatted text!")
                elif ch == 5:
                    speak.say(
                        "Enter the font name and font size (space seperated):")
                    name, size = raw_input().split(' ')
                    size = int(size)
                    self.changeDocFont(name, size)
                elif ch == 6:
                    try:
                        self.makeTable()
                        speak.say("Table added!")
                    except:
                        speak.say("Error in adding table!")
                elif ch == 7:
                    try:
                        self.addPicture()
                        speak.say("Picture added!")
                    except:
                        speak.say("Error in adding picture!")
                elif ch == 8:
                    try:
                        para = self.search_first_para()
                        if para is not None:
                            r = self.del_para(para)
                            if r:
                                speak.say("Paragraph deleted!")
                            else:
                                speak.say("Unable to delete the paragraph")
                        else:
                            speak.say(
                                "Paragraph with searched word doesn't exist")
                    except:
                        speak.say("Error in deleting paragraph!")
                elif ch == 9:
                    try:
                        r = self.del_para(self.para)
                        if r:
                            speak.say("Last Paragraph deleted!")
                        else:
                            speak.say("No last Paragraph exist")
                    except:
                        speak.say("Error in deleting paragraph!")
                else:
                    speak.say('Invalid choice')
            speak.say("Do You want to perform another operation? y or n: ")
            another_operation = raw_input().upper()
            if another_operation == 'N' or another_operation == 'No':
                Flag = False
                self.exit()
class Writer(object):
    '''Custom implementation of a console/text/docx Writer'''

    paragraph = None

    def __init__(self, mode, out):
        super(Writer, self).__init__()

        self.mode = mode
        if self.mode == 'text':
            path = self._get_path(out)
            if os.path.exists(os.path.dirname(path)):
                self.file = open(path, 'w')
        elif self.mode == 'console':
            self.file = sys.stdout
        elif self.mode == 'docx':
            path = self._get_path(out)
            if os.path.exists(os.path.dirname(path)):
                self.path = path
                self.file = Document()
                self._add_styles()

    # ------------------
    #        MAIN
    # ------------------

    def start_sequence(self, sequence):
        '''Starts the lines for a new protein'''

        if self.mode in ['text', 'console']:
            print('-------------------------', file=self.file)
            print(sequence[0], file=self.file)
            print(file=self.file)
        elif self.mode == 'docx':
            self.file.add_heading('-------------------------\n', 1)
            self.paragraph = self.file.add_paragraph(sequence[0] + '\n')
            self.paragraph.style = self.file.styles['Normal']

    def write_sequence_line(self, sequence, index, indent=15):
        '''Writes a sequence line with indentation to file'''

        # grab offset to add to file
        offset = str(len(''.join(sequence[1:index]))) + ': '
        offset_length = len(offset)
        # init line
        line = ' '*(indent-offset_length)
        line += offset
        line += sequence[index]
        if self.mode in ['text', 'console']:
            print(line, file=self.file)
        elif self.mode == 'docx':
            self.paragraph.add_run(line + '\n')

    def write_blank_line(self):
        '''Write blank line to file'''

        if self.mode in ['text', 'console']:
            print(file=self.file)
        elif self.mode == 'docx':
            self.paragraph.add_run('\n')

    def write_condition(self, header, coverage, cuts, sequence, index):
        '''Writes the condition with coverage to file'''

        output = self.process_condition(header, coverage, cuts,
                                        sequence, index)
        if self.mode in ['text', 'console']:
            print(output, file=self.file)
        elif self.mode == 'docx':
            self.paragraph.add_run('\n')

    def close_sequence(self):
        '''Closes the lines for a protein'''

        if self.mode in ['text', 'console']:
            print('-------------------------', file=self.file)
            print(file=self.file)
        elif self.mode == 'docx':
            self.file.add_heading('-------------------------\n', 1)

    # ------------------
    #       UTILS
    # ------------------

    @staticmethod
    def get_header(condition, total=12):
        '''Grabs a 35 character header from the given condition'''

        length = min([len(condition), total])
        header = condition[:total]
        header = ''.join([header, ' : '])
        header = ''.join([header, ' '*(total-length)])
        return header

    def close(self):
        '''Closes the writeable object'''

        if self.mode == 'text' and hasattr(self, "file"):
            self.file.close()
        elif self.mode == 'docx' and hasattr(self, "file"):
            self.file.save(self.path)

    def process_condition(self, header, coverage, cuts, sequence, index):
        '''
        Processes the header to give the conditions coverage of the
        sequence.
        '''

        # grab parameter lengths to determine range
        length = len(sequence[index])
        offset = len(''.join(sequence[1:index]))
        # iteratively add null string or +
        if self.mode == 'docx':
            self.paragraph.add_run(header)
        keys = range(offset, offset+length)
        for key in keys:
            value = coverage[key]
            cut = cuts[key]
            # add 'o' if cutsite, '+' if not, ' ' if blank
            if value and cut and self.mode in ['text', 'console']:
                header += 'o'
            elif value and not cut and self.mode in ['text', 'console']:
                header += '+'
            elif self.mode in ['text', 'console']:
                header += ' '
            # docx settings
            elif value and cut and self.mode == 'docx':
                self.paragraph.add_run('o', style='Red')
            elif value and not cut and self.mode == 'docx':
                self.paragraph.add_run('+', style='Black')
            else:
                self.paragraph.add_run(' ')
        return header

    def _add_styles(self):
        '''Sets the docx styles'''

        # create normal style
        style = self.file.styles['Normal']
        font = style.font
        font.name = 'Courier New'
        font.size = Pt(8)
        # create red style
        style = self.file.styles.add_style('Red', WD_STYLE_TYPE.CHARACTER)
        font = style.font
        font.color.rgb = RGBColor(0xFF, 0x0, 0x0)
        font.name = 'Courier New'
        font.size = Pt(8)
        # create black style
        style = self.file.styles.add_style('Black', WD_STYLE_TYPE.CHARACTER)
        font = style.font
        font.color.rgb = RGBColor(0x0, 0x0, 0x0)
        font.name = 'Courier New'
        font.size = Pt(8)

    def _get_path(self, out):
        '''Returns the path for the outfile'''

        relative = out[0] not in ['/', '~']
        if relative:
            path = os.path.join(PATH, out)
        else:
            path = out
        if self.mode == 'text' and os.path.splitext(path)[1] != '.txt':
            path = '.'.join([path, 'txt'])
        if self.mode == 'docx' and os.path.splitext(path)[1] != '.docx':
            path = '.'.join([path, 'docx'])
        return path