def filter_html(path): content = file_utils.read2mem(path) lines = content.split('\n') for line in lines: if '姓名:' in line or '性别:' in line or '身份证号' in line \ or '学号:' in line or '所在学院:' in line or '所在专业:' in line \ or '学制:' in line or '毕业时间:' in line or '校区:' in line or '宿舍楼:' in line or '房间号:' in line: file_utils.append2file('simple.txt', line.replace(' ', '').replace('<br/>', '').strip() + '\n')
def filter_html(path): content = file_utils.read2mem(path) lines = content.split('\n') for line in lines: if '姓名:' in line or '性别:' in line or '身份证号' in line \ or '学号:' in line or '所在学院:' in line or '所在专业:' in line \ or '学制:' in line or '毕业时间:' in line or '校区:' in line or '宿舍楼:' in line or '房间号:' in line: file_utils.append2file( 'simple.txt', line.replace(' ', '').replace('<br/>', '').strip() + '\n')
def build_summary(root_path): if os.path.exists(summary_path(root_path)): os.remove(summary_path(root_path)) summary = open(summary_path(root_path), 'a+') build_summary_helper(root_path, 0, summary) summary.close() content = read2mem(summary_path(root_path)) content = content.replace(root_path + '/', '') os.remove(summary_path(root_path)) append2file(summary_path(root_path), content)
def line2table(path): content = file_utils.read2mem(path) lines = content.split('\n') cur_line_cnt = 0 for line in lines: file_utils.append2file('table.txt', line.split(':')[-1]) cur_line_cnt += 1 if cur_line_cnt % 12 == 0: file_utils.append2file('table.txt', '\n') else: file_utils.append2file('table.txt', '\t')
def get_links(): html_code = file_utils.read2mem('/home/cwh/Mission/bookmarks_16_3_23.html') hp = LinkParser() hp.feed(html_code) hp.close() try: os.remove('star.md') except FileNotFoundError: pass file_utils.append2file('star.md', '#我的收藏\n>他山之石,可以攻玉\n\n开发过程中收藏在Chrome书签栏里的技术文章,与自己的文章分开\n\n主要涉及python,android,ubuntu等内容,我自己常常在这里面找回忘了的知识。') for word in hp.links: if word[1] == 'h3': file_utils.append2file('star.md', '##' + word[0] + '\n\n') print(word[0] + '\n') else: file_utils.append2file('star.md', '- [' + word[0] + '](' + word[1] + ')\n\n') print('- [' + word[0] + '](' + word[1] + ')\n')
def get_links(): html_code = file_utils.read2mem('/home/cwh/Mission/bookmarks_16_3_23.html') hp = MyHTMLParser() hp.feed(html_code) hp.close() try: os.remove('star.md') except FileNotFoundError: pass file_utils.append2file( 'star.md', '#我的收藏\n>他山之石,可以攻玉\n\n开发过程中收藏在Chrome书签栏里的技术文章,独立出来\n\n转换方式:') for word in hp.links: if word[1] == 'h3': file_utils.append2file('star.md', '##' + word[0] + '\n\n') print(word[0] + '\n') else: file_utils.append2file('star.md', '- [' + word[0] + '](' + word[1] + ')\n\n') print('- [' + word[0] + '](' + word[1] + ')\n')
def get_links(): html_code = file_utils.read2mem('/home/cwh/Mission/bookmarks_16_3_23.html') hp = LinkParser() hp.feed(html_code) hp.close() try: os.remove('star.md') except FileNotFoundError: pass file_utils.append2file( 'star.md', '#我的收藏\n>他山之石,可以攻玉\n\n开发过程中收藏在Chrome书签栏里的技术文章,与自己的文章分开\n\n主要涉及python,android,ubuntu等内容,我自己常常在这里面找回忘了的知识。' ) for word in hp.links: if word[1] == 'h3': file_utils.append2file('star.md', '##' + word[0] + '\n\n') print(word[0] + '\n') else: file_utils.append2file('star.md', '- [' + word[0] + '](' + word[1] + ')\n\n') print('- [' + word[0] + '](' + word[1] + ')\n')
def test_get_tds(): html_code = file_utils.read2mem('stu_dom.html') get_tds(html_code)