.replace('<a href', '<span href') \ .replace('</a>', '</span>') def get_name(link): fname = link.split('/')[-1] return fname.split('.')[0] for child in root[0]: if child.tag != 'item': continue link = child.find('link').text title = child.find('title').text.encode('utf-8') name = get_name(link) + '.md' file_path = '../pages/' + channel + '/' + name #if True: if not os.path.exists(file_path): print file_path content = get_content(title, link) macros.write_page(channel, name, file_path, title, link, content) index_page += '#### [' + title + '](' + file_path + ') \n' index_file = open('../indexes/' + channel + '.md', 'w') index_file.write(index_page) index_file.close()
.replace('src="//img3.', 'src="https://img3.') \ .replace('src="//img4.', 'src="https://img4.') \ .replace('<a href', '<span href') \ .replace('</a>', '</span>') def get_name(link): fname = link.split('/')[-1] return fname.split('.')[0] # sync single page if len(sys.argv) > 3 : print sys.argv[5] content = get_content(sys.argv[4], sys.argv[5]) macros.write_page(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], content) sys.exit(1) tree = ET.parse(xml_file) root = tree.getroot() for child in root[0]: if child.tag != 'item': continue link = child.find('link').text title = child.find('title').text.encode('utf-8') name = get_name(link) + '.md' file_path = '../pages/' + channel + '/' + name
aid = fname.split('.')[0] return aid index_text = requests.get(channel_url).text.encode('utf-8') index_html = BeautifulSoup(index_text, 'html.parser') articles = index_html.find('div', attrs={ 'class': 'list_wrapper' }).find_all('div', attrs={'class': 'one_post'}) for article in articles: a_links = article.find_all('a') if len(a_links) < 2: continue link = a_links[1] a_url = link.get('href').encode('utf-8') a_title = link.text.encode('utf-8').strip() name = get_name(a_url) + '.md' file_path = '../pages/' + channel + '/' + name #content = get_content(a_url) if not os.path.exists(file_path): #if True: print file_path content = get_content(a_url) macros.write_page(channel, name, file_path, a_title, a_url, content) index_page += '#### [' + a_title + '](' + file_path + ') \n' index_file = open('../indexes/' + channel + '.md', 'w') index_file.write(index_page) index_file.close()