class ConversionController(object): def __init__(self, options): self.__input_wiki_file = options.filename self.__output_directory = options.output_dir self.__fill_blog = options.blog self.__create_individual_files = options.individual self.__converter = WikidotToMarkdown() def __prepare_output_dir(self): try: os.makedirs(self.__output_directory) except OSError as ex: print("Could not create output folder "+self.__output_directory+".") if ex.errno == os.errno.EEXIST: print("It already exists.") else: print "Error %i: %s" % (ex.errno, str(ex)); sys.exit(1) def convert(self): self.__prepare_output_dir() f = codecs.open(self.__input_wiki_file, encoding='utf-8') text = f.read() base_filename = os.path.splitext(os.path.basename(self.__input_wiki_file))[0] # write the complete files to the output directory: complete_text = self.__converter.convert(text) self.write_unicode_file("%s/%s" % (self.__output_directory, base_filename+'.mktxt'),complete_text) #html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % ('Converted Markdown',file('style.css').read()) #html_text += markdown.markdown(complete_text) #html_text += "</div></body></html>" #self.write_unicode_file("%s/%s" % (self.__output_directory, base_filename+'.html'),html_text) # now handle the texts split to little junks: if self.__create_individual_files: parts = self.__converter.split_text(text) if len(parts) < 2: return # we need at least 2 entries (the first part is trashed and one part with content!) i=0 for text_part in parts: text_part = self.__converter.convert(text_part) i += 1 if i == 1: print("\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s" % text_part) continue if self.__create_individual_files: self.write_unicode_file("%s/%i%s" % (self.__output_directory, i, '.mktxt'),text_part) lines = text_part.split("\n") if self.__fill_blog: title = lines[0].replace("# ","") content = string.join(lines[1:],'\n') date = dt.datetime(start[0],start[1],start[2], 17, 11, 11) + dt.timedelta(int((i-2)*gradient)) wprb.post_new(title, content,[],'','private',date) time.sleep(SLEEP_TIME) def write_unicode_file(self, path_to_file, content): try: out_file = codecs.open(path_to_file,encoding='utf-8', mode='w') out_file.write(content) except: print "Error on writing to file %s." % path_to_file
def __init__(self, options): self.__input_wiki_file = options.filename self.__output_directory = options.output_dir self.__fill_blog = options.blog self.__create_individual_files = options.individual self.__converter = WikidotToMarkdown()
class ConversionController(object): def __init__(self, options): self.__input_wiki_file = options.filename self.__output_directory = options.output_dir self.__fill_blog = options.blog self.__create_individual_files = options.individual self.__converter = WikidotToMarkdown() def __prepare_output_dir(self): try: os.makedirs(self.__output_directory) except OSError as ex: print("Could not create output folder " + self.__output_directory + ".") if ex.errno == os.errno.EEXIST: print("It already exists.") else: print "Error %i: %s" % (ex.errno, str(ex)) sys.exit(1) def convert(self): self.__prepare_output_dir() f = codecs.open(self.__input_wiki_file, encoding='utf-8') text = f.read() base_filename = os.path.splitext( os.path.basename(self.__input_wiki_file))[0] # write the complete files to the output directory: complete_text = self.__converter.convert(text) self.write_unicode_file( "%s/%s" % (self.__output_directory, base_filename + '.mktxt'), complete_text) #html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % ('Converted Markdown',file('style.css').read()) #html_text += markdown.markdown(complete_text) #html_text += "</div></body></html>" #self.write_unicode_file("%s/%s" % (self.__output_directory, base_filename+'.html'),html_text) # now handle the texts split to little junks: if self.__create_individual_files: parts = self.__converter.split_text(text) if len(parts) < 2: return # we need at least 2 entries (the first part is trashed and one part with content!) i = 0 for text_part in parts: text_part = self.__converter.convert(text_part) i += 1 if i == 1: print( "\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s" % text_part) continue if self.__create_individual_files: self.write_unicode_file( "%s/%i%s" % (self.__output_directory, i, '.mktxt'), text_part) lines = text_part.split("\n") if self.__fill_blog: title = lines[0].replace("# ", "") content = string.join(lines[1:], '\n') date = dt.datetime(start[0], start[1], start[2], 17, 11, 11) + dt.timedelta( int((i - 2) * gradient)) wprb.post_new(title, content, [], '', 'private', date) time.sleep(SLEEP_TIME) def write_unicode_file(self, path_to_file, content): try: out_file = codecs.open(path_to_file, encoding='utf-8', mode='w') out_file.write(content) except: print "Error on writing to file %s." % path_to_file
class ConversionController(object): def __init__(self, options): self.__input_wiki_file = options.filename self.__output_directory = options.output_dir self.__fill_blog = options.blog self.__create_individual_files = options.individual self.__converter = WikidotToMarkdown() def __prepare_output_dir(self): try: os.makedirs(self.__output_directory) except OSError as ex: print("Could not create output folder "+self.__output_directory+".") if ex.errno == os.errno.EEXIST: print("It already exists.") else: print "Error %i: %s" % (ex.errno, str(ex)); sys.exit(1) def convert(self): self.__prepare_output_dir() f = codecs.open(self.__input_wiki_file, encoding='utf-8') text = f.read() # write the complete files to the output directory: complete_text = self.__converter.convert(text) self.write_unicode_file("%s/%s" % (self.__output_directory, 'complete.mktxt'),complete_text) html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % ('Converted Markdown',file('style.css').read()) html_text += markdown.markdown(complete_text) html_text += "</div></body></html>" self.write_unicode_file("%s/%s" % (self.__output_directory, 'complete.html'),html_text) # now handle the texts split to little junks: if self.__create_individual_files: parts = self.__converter.split_text(text) if len(parts) < 2: return # we need at least 2 entries (the first part is trashed and one part with content!) i=0 if self.__fill_blog: wprb = WordPressPostingRobot(SITE,USER) start_day = raw_input('Please enter the start date for the posts: [%s] ' % dt.datetime.now().strftime("%Y-%m-%d") ) start_day = start_day if start_day != "" else dt.datetime.now().strftime("%Y-%m-%d") start = [int(value) for value in start_day.split("-")] end_day = raw_input('Please enter the end date for the posts: [%s] ' % dt.datetime.now().strftime("%Y-%m-%d") ) end_day = end_day if end_day != "" else dt.datetime.now().strftime("%Y-%m-%d") end = [int(value) for value in end_day.split("-")] days_difference = (dt.datetime(end[0],end[1],end[2])-dt.datetime(start[0],start[1],start[2])).days gradient = .0 if len(parts) == 2 else float(days_difference)/(len(parts)-2) for text_part in parts: text_part = self.__converter.convert(text_part) i += 1 if i == 1: print("\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s" % text_part) continue if self.__create_individual_files: self.write_unicode_file("%s/%i%s" % (self.__output_directory, i, '.mktxt'),text_part) lines = text_part.split("\n") if self.__fill_blog: title = lines[0].replace("# ","") content = string.join(lines[1:],'\n') date = dt.datetime(start[0],start[1],start[2], 17, 11, 11) + dt.timedelta(int((i-2)*gradient)) wprb.post_new(title, content,[],'','private',date) time.sleep(SLEEP_TIME) def write_unicode_file(self, path_to_file, content): try: out_file = codecs.open(path_to_file,encoding='utf-8', mode='w') out_file.write(content) except: print "Error on writing to file %s." % path_to_file
def __init__(self, options): self.__input_rss_file = options.rss_filename self.__input_source_directory = options.input_source_dir self.__output_directory = options.output_dir self.__converter = WikidotToMarkdown()
class ConversionController(object): def __init__(self, options): self.__input_rss_file = options.rss_filename self.__input_source_directory = options.input_source_dir self.__output_directory = options.output_dir self.__converter = WikidotToMarkdown() def __prepare_output_dir(self): try: os.makedirs(self.__output_directory) except OSError as ex: print("Could not create output folder "+self.__output_directory+".") if ex.errno == os.errno.EEXIST: print("It already exists.") else: print "Error %i: %s" % (ex.errno, str(ex)); sys.exit(1) def parse_rss(self, text): pattern_code = r'<tr>[\s]+?<td><a href="[\s\S]+?</tr>' pattern = re.compile(pattern_code) matchItems = pattern.findall(text) return matchItems def parse_index_item(self, text): pattern_code = r'<tr>[\s]+?<td><a href="([\s\S]+?)">([\s\S]+?)</a></td>[\s]+?<td><span[\s\S]+?>([\s\S]+?)</span></td>[\s]+?<td>([\s\S]*?)</td>' pattern = re.compile(pattern_code) item_info = pattern.findall(text) return item_info def gen_format_tags(self, src_tags): format_tags = u"" if u"" == src_tags: return format_tags tag_list = src_tags.split(' ') if None == tag_list: return format_tags for item in tag_list: format_tags += " - " + item + "\n" return format_tags def gen_markdown_context(self, article_url): wiki_filename = self.__input_source_directory + "/" + article_url + ".txt" #print "gen_markdown_context", wiki_filename try: f = codecs.open(wiki_filename, encoding='utf-8') text = f.read() f.close() return self.__converter.convert(text) except: print "Failed to gen_markdown_context : ", wiki_filename return None def gen_article_context(self, article_url, title, tags): context_format = """---\ntitle: '{0}'\nlayout: post\ntags:\n{1}---\n\n{2}""" format_tags = self.gen_format_tags(tags) format_context = self.gen_markdown_context(article_url) if None == format_context: return None return unicode(context_format).format(title, format_tags, format_context) def deal_one_file(self, item_info): #print "deal one file: ", item_info publish_date = dt.datetime.strptime(item_info[2], '%d %b %Y %H:%M') article_url = item_info[0][1:] filename = self.__output_directory \ + publish_date.strftime('/%Y-%m-%d-') \ + article_url \ + ".markdown" context = self.gen_article_context(article_url, item_info[1], item_info[3]) if None == context: print "Failed to deal : ", article_url return None #print filename md_file = codecs.open(filename, 'w', encoding='utf-8') md_file.write(context) md_file.close(); return None def get_rss_context(self, filename): try: f = codecs.open(filename, encoding='utf-8') text = f.read() f.close() return text except: return None def convert(self): self.__prepare_output_dir() text = self.get_rss_context(self.__input_rss_file) if None == text: print "Failed to open RSS file: ", self.__input_rss_file return None # read index info from rss index_info = self.parse_rss(text) if None == index_info: print "there is no index in rss." return None #print index_info[0] # for each index info, deal file one by one show_first = False for item in index_info: item_info = self.parse_index_item(item) self.deal_one_file(item_info[0]) print "====== Success ====="
class ConversionController(object): def __init__(self, options): self.__input_wiki_file = options.filename self.__output_directory = options.output_dir self.__fill_blog = options.blog self.__create_individual_files = options.individual self.__converter = WikidotToMarkdown() def __prepare_output_dir(self): try: os.makedirs(self.__output_directory) except OSError as ex: print("Could not create output folder " + self.__output_directory + ".") if ex.errno == os.errno.EEXIST: print("It already exists.") else: print "Error %i: %s" % (ex.errno, str(ex)) sys.exit(1) def convert(self): self.__prepare_output_dir() f = codecs.open(self.__input_wiki_file, encoding='utf-8') text = f.read() # write the complete files to the output directory: complete_text = self.__converter.convert(text) self.write_unicode_file( "%s/%s" % (self.__output_directory, 'complete.mktxt'), complete_text) html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % ( 'Converted Markdown', file('style.css').read()) html_text += markdown.markdown(complete_text) html_text += "</div></body></html>" self.write_unicode_file( "%s/%s" % (self.__output_directory, 'complete.html'), html_text) # now handle the texts split to little junks: if self.__create_individual_files: parts = self.__converter.split_text(text) if len(parts) < 2: return # we need at least 2 entries (the first part is trashed and one part with content!) i = 0 if self.__fill_blog: wprb = WordPressPostingRobot(SITE, USER) start_day = raw_input( 'Please enter the start date for the posts: [%s] ' % dt.datetime.now().strftime("%Y-%m-%d")) start_day = start_day if start_day != "" else dt.datetime.now( ).strftime("%Y-%m-%d") start = [int(value) for value in start_day.split("-")] end_day = raw_input( 'Please enter the end date for the posts: [%s] ' % dt.datetime.now().strftime("%Y-%m-%d")) end_day = end_day if end_day != "" else dt.datetime.now( ).strftime("%Y-%m-%d") end = [int(value) for value in end_day.split("-")] days_difference = ( dt.datetime(end[0], end[1], end[2]) - dt.datetime(start[0], start[1], start[2])).days gradient = .0 if len( parts) == 2 else float(days_difference) / (len(parts) - 2) for text_part in parts: text_part = self.__converter.convert(text_part) i += 1 if i == 1: print( "\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s" % text_part) continue if self.__create_individual_files: self.write_unicode_file( "%s/%i%s" % (self.__output_directory, i, '.mktxt'), text_part) lines = text_part.split("\n") if self.__fill_blog: title = lines[0].replace("# ", "") content = string.join(lines[1:], '\n') date = dt.datetime(start[0], start[1], start[2], 17, 11, 11) + dt.timedelta( int((i - 2) * gradient)) wprb.post_new(title, content, [], '', 'private', date) time.sleep(SLEEP_TIME) def write_unicode_file(self, path_to_file, content): try: out_file = codecs.open(path_to_file, encoding='utf-8', mode='w') out_file.write(content) except: print "Error on writing to file %s." % path_to_file