def add_video(document, story): video_items = analyze_video(story) if not video_items: return para = document.add_paragraph() run = para.add_run() font = run.font font.size = Pt(10) font.color.rgb = RGBColor(0x7A, 0x7A, 0x7A) docx_ext.add_hyperlink(para, video_items[0], video_items[1], '4169E1', False) document.add_picture(video_items[3])
def add_location(document, story): location_items = analyze_loc(story) if len(location_items) <= 0: return link_name = location_items[2] google_map_url = 'https://maps.google.com/maps?q=' + location_items[ 0] + ',' + location_items[1] para = document.add_paragraph() run = para.add_run(u'位置:') font = run.font font.size = Pt(10) font.color.rgb = RGBColor(0x7A, 0x7A, 0x7A) docx_ext.add_hyperlink(para, google_map_url, link_name, '4169E1', False)
def add_video(document, story): video_items = analyze_video(story) if not video_items: return para = document.add_paragraph() run = para.add_run() font = run.font font.size = Pt(10) font.color.rgb = RGBColor(0x7A, 0x7A, 0x7A) docx_ext.add_hyperlink(para, video_items[0], video_items[1], '4169E1', False) try: document.add_picture(video_items[3], width=Inches(3)) except: print('视频封面插入出错:' + video_items[3])
def add_quotation(document, quotation): if not quotation: return quotation_items = analyze_quotation(quotation) para = document.add_paragraph(style='IntenseQuote') if len(quotation_items) == 1: run = para.add_run(quotation_items[0]) font = run.font font.bold = False font.size = Pt(12) font.color.rgb = RGBColor(0xA9, 0xA9, 0xA9) return run = para.add_run(quotation_items[0] + u':') font = run.font font.bold = False font.size = Pt(12) font.color.rgb = RGBColor(0x48, 0xD1, 0xCC) add_content(document, quotation_items[1] + '\n', para, 12) # add pictures filenames = analyze_pic_html(quotation) for filename in filenames: try: run_pic = para.add_run() run_pic.add_picture(filename, width=Inches(3)) para.add_run('\n') except: print '转帖插入图片出错:' + filename # add video video_items = analyze_video_html(quotation) if video_items: run_video = para.add_run() font_video = run_video.font font_video.size = Pt(8) font_video.color.rgb = RGBColor(0x7A, 0x7A, 0x7A) docx_ext.add_hyperlink(para, video_items[0], video_items[1], '4169E1', False) para.add_run('\n') try: run_pic.add_picture(video_items[3], width=Inches(3)) except: print '视频封面插入出错:' + video_items[3] # add time run_time = para.add_run(quotation_items[2]) font_time = run_time.font font_time.bold = False font_time.size = Pt(8) font_time.color.rgb = RGBColor(0x69, 0x69, 0x69)
def add_content(document, content, para=None, font_size=16): if content.__contains__('k.t.qq.com'): pattern = re.compile(r'(<a href="http://k.t.qq.com.*?</a>)', re.S) topics = re.findall(pattern, content) for topic in topics: topic_word = topic.split('#')[1] content = content.replace( topic, SPLIT_STRING + TOPIC_STRING + '#' + topic_word + '#' + SPLIT_STRING) if content.__contains__('www/mb/images/emoji'): pattern_emoji = re.compile(r'(<img.*?>)', re.S) pattern_emoji_img = re.compile(r"crs='(.*?)'", re.S) emojis = re.findall(pattern_emoji, content) for emoji in emojis: emoji_url = re.findall(pattern_emoji_img, emoji)[0] filename = download_pic(emoji_url, PNG_EXTENSION) content = content.replace( emoji, SPLIT_STRING + EMOJI_STRING + filename + SPLIT_STRING) if content.__contains__('em rel="@'): pattern_friend = re.compile(r'(<em rel=.*?</em>)', re.S) pattern_friend_name = re.compile(r'<em.*?title="(.*?)"', re.S) friends = re.findall(pattern_friend, content) for friend in friends: friend_name = re.findall(pattern_friend_name, friend)[0] content = content.replace( friend, SPLIT_STRING + FRIEND_STRING + friend_name + SPLIT_STRING) if content.__contains__('http://url.cn'): pattern_url = re.compile(r'(<a href=.*?</a>)', re.S) pattern_url_str = re.compile(r'<a href="(.*?)"', re.S) urls = re.findall(pattern_url, content) for url in urls: url_str = re.findall(pattern_url_str, url)[0] content = content.replace( url, SPLIT_STRING + URL_STRING + url_str + SPLIT_STRING) if content.__contains__('www/mb/images/face'): pattern_qqemo = re.compile(r'(<img.*?>)', re.S) pattern_qqemo_img = re.compile(r"crs='(.*?)'", re.S) qqemos = re.findall(pattern_qqemo, content) for qqemo in qqemos: qqemo_url = re.findall(pattern_qqemo_img, qqemo)[0] filename = download_pic(qqemo_url, GIF_EXTENSION) content = content.replace( qqemo, SPLIT_STRING + QQEMO_STRING + filename + SPLIT_STRING) if content.__contains__('<img class='): pattern_other_emo = re.compile(r'(<img.*?>)', re.S) pattern_other_emo_img = re.compile(r'<img.*?crs=(.*?) title=', re.S) pattern_other_emos = re.findall(pattern_other_emo, content) for other_emo in pattern_other_emos: other_emo_url = re.findall(pattern_other_emo_img, other_emo)[0] other_emo_url = other_emo_url[1: -1] # delete start and end mark ' " filename = download_pic(other_emo_url, other_emo_url[-4:]) content = content.replace( other_emo, SPLIT_STRING + OTHEREMO_STRING + filename + SPLIT_STRING) content_parts = content.split(SPLIT_STRING) if not para: para = document.add_paragraph() for content_part in content_parts: # delete first <div> mark if content_part.startswith('<div>'): content_part = content_part[5:] if content_part.startswith(TOPIC_STRING): run = para.add_run(content_part.replace(TOPIC_STRING, '')) font = run.font font.italic = True font.bold = False font.size = Pt(font_size) font.color.rgb = RGBColor(0x00, 0x00, 0xCD) elif content_part.startswith(EMOJI_STRING): run = para.add_run() filename = content_part.replace(EMOJI_STRING, '') run.add_picture(filename) elif content_part.startswith(FRIEND_STRING): run = para.add_run(content_part.replace(FRIEND_STRING, '')) font = run.font font.italic = True font.bold = False font.size = Pt(font_size - 2) font.color.rgb = RGBColor(0xFF, 0x45, 0x00) elif content_part.startswith(URL_STRING): docx_ext.add_hyperlink(para, content_part.replace(URL_STRING, ''), content_part.replace(URL_STRING, ''), '1E90FF', True) elif content_part.startswith(QQEMO_STRING): run = para.add_run() filename = content_part.replace(QQEMO_STRING, '') run.add_picture(filename) elif content_part.startswith(OTHEREMO_STRING): run = para.add_run() filename = content_part.replace(OTHEREMO_STRING, '') run.add_picture(filename) else: content_part = content_part.replace('&', '&') content_part = content_part.replace('>', '>') content_part = content_part.replace('"', '"') content_part = content_part.replace('<', '<') run = para.add_run(content_part) font = run.font font.bold = False font.size = Pt(font_size) font.color.rgb = RGBColor(0x08, 0x08, 0x08)
def add_content(document, content): if content.__contains__('k.t.qq.com'): pattern = re.compile(r'(<a href="http://k.t.qq.com.*?</a>)', re.S) topics = re.findall(pattern, content) for topic in topics: topic_word = topic.split('#')[1] content = content.replace( topic, SPLIT_STRING + TOPIC_STRING + '#' + topic_word + '#' + SPLIT_STRING) if content.__contains__('www/mb/images/emoji'): pattern_emoji = re.compile(r'(<img.*?>)', re.S) pattern_emoji_img = re.compile(r"crs='(.*?)'", re.S) emojis = re.findall(pattern_emoji, content) for emoji in emojis: emoji_url = re.findall(pattern_emoji_img, emoji)[0] filename = download_pic(emoji_url, PNG_EXTENSION) content = content.replace( emoji, SPLIT_STRING + EMOJI_STRING + filename + SPLIT_STRING) if content.__contains__('em rel="@'): pattern_friend = re.compile(r'(<em rel=.*?</em>)', re.S) pattern_friend_name = re.compile(r'<em.*?title="(.*?)"', re.S) friends = re.findall(pattern_friend, content) for friend in friends: friend_name = re.findall(pattern_friend_name, friend)[0] content = content.replace( friend, SPLIT_STRING + FRIEND_STRING + friend_name + SPLIT_STRING) # delete first <div> mark content = content[5:] if content.__contains__('http://url.cn'): pattern_url = re.compile(r'(<a href=.*?</a>)', re.S) pattern_url_str = re.compile(r'<a href="(.*?)"', re.S) urls = re.findall(pattern_url, content) for url in urls: url_str = re.findall(pattern_url_str, url)[0] content = content.replace( url, SPLIT_STRING + URL_STRING + url_str + SPLIT_STRING) content_parts = content.split(SPLIT_STRING) para = document.add_paragraph() for content_part in content_parts: if content_part.startswith(TOPIC_STRING): run = para.add_run(content_part.replace(TOPIC_STRING, '')) font = run.font font.italic = True font.size = Pt(16) font.color.rgb = RGBColor(0x00, 0x00, 0xCD) elif content_part.startswith(EMOJI_STRING): run = para.add_run() filename = content_part.replace(EMOJI_STRING, '') run.add_picture(filename) elif content_part.startswith(FRIEND_STRING): run = para.add_run(content_part.replace(FRIEND_STRING, '')) font = run.font font.italic = True font.size = Pt(16) font.color.rgb = RGBColor(0xFF, 0x45, 0x00) elif content_part.startswith(URL_STRING): docx_ext.add_hyperlink(para, content_part.replace(URL_STRING, ''), content_part.replace(URL_STRING, ''), '1E90FF', True) else: run = para.add_run(content_part) font = run.font font.size = Pt(16) font.color.rgb = RGBColor(0x08, 0x08, 0x08)