def filter_html(path): content = file_utils.read2mem(path) lines = content.split('\n') for line in lines: if '姓名:' in line or '性别:' in line or '身份证号' in line \ or '学号:' in line or '所在学院:' in line or '所在专业:' in line \ or '学制:' in line or '毕业时间:' in line or '校区:' in line or '宿舍楼:' in line or '房间号:' in line: file_utils.append2file('simple.txt', line.replace(' ', '').replace('<br/>', '').strip() + '\n')
def get_tds(src): hp = TdParser() hp.feed(src) hp.close() for word in hp.tds: if '姓名' in word[1] or ' 房间号:' in word[1]: file_utils.append2file('tds.txt', '\n') print(word[1] + '\n')
def filter_html(path): content = file_utils.read2mem(path) lines = content.split('\n') for line in lines: if '姓名:' in line or '性别:' in line or '身份证号' in line \ or '学号:' in line or '所在学院:' in line or '所在专业:' in line \ or '学制:' in line or '毕业时间:' in line or '校区:' in line or '宿舍楼:' in line or '房间号:' in line: file_utils.append2file( 'simple.txt', line.replace(' ', '').replace('<br/>', '').strip() + '\n')
def build_summary(root_path): if os.path.exists(summary_path(root_path)): os.remove(summary_path(root_path)) summary = open(summary_path(root_path), 'a+') build_summary_helper(root_path, 0, summary) summary.close() content = read2mem(summary_path(root_path)) content = content.replace(root_path + '/', '') os.remove(summary_path(root_path)) append2file(summary_path(root_path), content)
def query_info(): resp_code = 0 has_err = False resp_msg = "" # 21033429 # 20132000 for i in range(1055, 1500): url = '' % i try: resp = urlopen(url) except HTTPError as e: has_err = True resp_code = str(e.code) resp_msg = e.msg if not has_err: resp_msg = resp.read().decode() resp_code = str(resp.code) if '没有入住安排信息' in resp_msg: print('no msg for num: %d' % i) else: file_utils.append2file('data.txt', resp_msg)
def line2table(path): content = file_utils.read2mem(path) lines = content.split('\n') cur_line_cnt = 0 for line in lines: file_utils.append2file('table.txt', line.split(':')[-1]) cur_line_cnt += 1 if cur_line_cnt % 12 == 0: file_utils.append2file('table.txt', '\n') else: file_utils.append2file('table.txt', '\t')
def get_links(): html_code = file_utils.read2mem('/home/cwh/Mission/bookmarks_16_3_23.html') hp = LinkParser() hp.feed(html_code) hp.close() try: os.remove('star.md') except FileNotFoundError: pass file_utils.append2file('star.md', '#我的收藏\n>他山之石,可以攻玉\n\n开发过程中收藏在Chrome书签栏里的技术文章,与自己的文章分开\n\n主要涉及python,android,ubuntu等内容,我自己常常在这里面找回忘了的知识。') for word in hp.links: if word[1] == 'h3': file_utils.append2file('star.md', '##' + word[0] + '\n\n') print(word[0] + '\n') else: file_utils.append2file('star.md', '- [' + word[0] + '](' + word[1] + ')\n\n') print('- [' + word[0] + '](' + word[1] + ')\n')
def get_links(): html_code = file_utils.read2mem('/home/cwh/Mission/bookmarks_16_3_23.html') hp = MyHTMLParser() hp.feed(html_code) hp.close() try: os.remove('star.md') except FileNotFoundError: pass file_utils.append2file( 'star.md', '#我的收藏\n>他山之石,可以攻玉\n\n开发过程中收藏在Chrome书签栏里的技术文章,独立出来\n\n转换方式:') for word in hp.links: if word[1] == 'h3': file_utils.append2file('star.md', '##' + word[0] + '\n\n') print(word[0] + '\n') else: file_utils.append2file('star.md', '- [' + word[0] + '](' + word[1] + ')\n\n') print('- [' + word[0] + '](' + word[1] + ')\n')
def get_links(): html_code = file_utils.read2mem('/home/cwh/Mission/bookmarks_16_3_23.html') hp = LinkParser() hp.feed(html_code) hp.close() try: os.remove('star.md') except FileNotFoundError: pass file_utils.append2file( 'star.md', '#我的收藏\n>他山之石,可以攻玉\n\n开发过程中收藏在Chrome书签栏里的技术文章,与自己的文章分开\n\n主要涉及python,android,ubuntu等内容,我自己常常在这里面找回忘了的知识。' ) for word in hp.links: if word[1] == 'h3': file_utils.append2file('star.md', '##' + word[0] + '\n\n') print(word[0] + '\n') else: file_utils.append2file('star.md', '- [' + word[0] + '](' + word[1] + ')\n\n') print('- [' + word[0] + '](' + word[1] + ')\n')