Ejemplo n.º 1
0
class ConversionController(object):
    def __init__(self, options):
        self.__input_wiki_file = options.filename
        self.__output_directory = options.output_dir
        self.__fill_blog = options.blog
        self.__create_individual_files = options.individual
        self.__converter = WikidotToMarkdown()

    def __prepare_output_dir(self):
        try:
            os.makedirs(self.__output_directory)
        except OSError as ex:
            print("Could not create output folder "+self.__output_directory+".")
            if ex.errno == os.errno.EEXIST: print("It already exists.")
            else: print "Error %i: %s" % (ex.errno, str(ex)); sys.exit(1)

    def convert(self):
        self.__prepare_output_dir()
        f = codecs.open(self.__input_wiki_file, encoding='utf-8')
        text = f.read()
        base_filename = os.path.splitext(os.path.basename(self.__input_wiki_file))[0]
        
        # write the complete files to the output directory:
        complete_text = self.__converter.convert(text)
        self.write_unicode_file("%s/%s" % (self.__output_directory, base_filename+'.mktxt'),complete_text)
        #html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % ('Converted Markdown',file('style.css').read())
        #html_text += markdown.markdown(complete_text)
        #html_text += "</div></body></html>"
        #self.write_unicode_file("%s/%s" % (self.__output_directory, base_filename+'.html'),html_text)

        # now handle the texts split to little junks:
        if self.__create_individual_files:
            parts = self.__converter.split_text(text)
            if len(parts) < 2: return # we need at least 2 entries (the first part is trashed and one part with content!)
            i=0
            for text_part in parts:
                text_part =  self.__converter.convert(text_part)
                i += 1
                if i == 1:
                    print("\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s" % text_part)
                    continue
                if self.__create_individual_files: self.write_unicode_file("%s/%i%s" % (self.__output_directory, i, '.mktxt'),text_part)
                lines = text_part.split("\n")
                if self.__fill_blog:
                    title = lines[0].replace("# ","")
                    content = string.join(lines[1:],'\n')
                    date = dt.datetime(start[0],start[1],start[2], 17, 11, 11) + dt.timedelta(int((i-2)*gradient))
                    wprb.post_new(title, content,[],'','private',date)
                    time.sleep(SLEEP_TIME)

    def write_unicode_file(self, path_to_file, content):
        try:
            out_file = codecs.open(path_to_file,encoding='utf-8', mode='w')
            out_file.write(content)
        except:
            print "Error on writing to file %s." % path_to_file
Ejemplo n.º 2
0
 def __init__(self, options):
     self.__input_wiki_file = options.filename
     self.__output_directory = options.output_dir
     self.__fill_blog = options.blog
     self.__create_individual_files = options.individual
     self.__converter = WikidotToMarkdown()
Ejemplo n.º 3
0
class ConversionController(object):
    def __init__(self, options):
        self.__input_wiki_file = options.filename
        self.__output_directory = options.output_dir
        self.__fill_blog = options.blog
        self.__create_individual_files = options.individual
        self.__converter = WikidotToMarkdown()

    def __prepare_output_dir(self):
        try:
            os.makedirs(self.__output_directory)
        except OSError as ex:
            print("Could not create output folder " + self.__output_directory +
                  ".")
            if ex.errno == os.errno.EEXIST: print("It already exists.")
            else:
                print "Error %i: %s" % (ex.errno, str(ex))
                sys.exit(1)

    def convert(self):
        self.__prepare_output_dir()
        f = codecs.open(self.__input_wiki_file, encoding='utf-8')
        text = f.read()
        base_filename = os.path.splitext(
            os.path.basename(self.__input_wiki_file))[0]

        # write the complete files to the output directory:
        complete_text = self.__converter.convert(text)
        self.write_unicode_file(
            "%s/%s" % (self.__output_directory, base_filename + '.mktxt'),
            complete_text)
        #html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % ('Converted Markdown',file('style.css').read())
        #html_text += markdown.markdown(complete_text)
        #html_text += "</div></body></html>"
        #self.write_unicode_file("%s/%s" % (self.__output_directory, base_filename+'.html'),html_text)

        # now handle the texts split to little junks:
        if self.__create_individual_files:
            parts = self.__converter.split_text(text)
            if len(parts) < 2:
                return  # we need at least 2 entries (the first part is trashed and one part with content!)
            i = 0
            for text_part in parts:
                text_part = self.__converter.convert(text_part)
                i += 1
                if i == 1:
                    print(
                        "\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s"
                        % text_part)
                    continue
                if self.__create_individual_files:
                    self.write_unicode_file(
                        "%s/%i%s" % (self.__output_directory, i, '.mktxt'),
                        text_part)
                lines = text_part.split("\n")
                if self.__fill_blog:
                    title = lines[0].replace("# ", "")
                    content = string.join(lines[1:], '\n')
                    date = dt.datetime(start[0], start[1], start[2], 17, 11,
                                       11) + dt.timedelta(
                                           int((i - 2) * gradient))
                    wprb.post_new(title, content, [], '', 'private', date)
                    time.sleep(SLEEP_TIME)

    def write_unicode_file(self, path_to_file, content):
        try:
            out_file = codecs.open(path_to_file, encoding='utf-8', mode='w')
            out_file.write(content)
        except:
            print "Error on writing to file %s." % path_to_file
Ejemplo n.º 4
0
 def __init__(self, options):
     self.__input_wiki_file = options.filename
     self.__output_directory = options.output_dir
     self.__fill_blog = options.blog
     self.__create_individual_files = options.individual
     self.__converter = WikidotToMarkdown()
Ejemplo n.º 5
0
class ConversionController(object):
    def __init__(self, options):
        self.__input_wiki_file = options.filename
        self.__output_directory = options.output_dir
        self.__fill_blog = options.blog
        self.__create_individual_files = options.individual
        self.__converter = WikidotToMarkdown()

    def __prepare_output_dir(self):
        try:
            os.makedirs(self.__output_directory)
        except OSError as ex:
            print("Could not create output folder "+self.__output_directory+".")
            if ex.errno == os.errno.EEXIST: print("It already exists.")
            else: print "Error %i: %s" % (ex.errno, str(ex)); sys.exit(1)

    def convert(self):
        self.__prepare_output_dir()
        f = codecs.open(self.__input_wiki_file, encoding='utf-8')
        text = f.read()

        # write the complete files to the output directory:
        complete_text = self.__converter.convert(text)
        self.write_unicode_file("%s/%s" % (self.__output_directory, 'complete.mktxt'),complete_text)
        html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % ('Converted Markdown',file('style.css').read())
        html_text += markdown.markdown(complete_text)
        html_text += "</div></body></html>"
        self.write_unicode_file("%s/%s" % (self.__output_directory, 'complete.html'),html_text)

        # now handle the texts split to little junks:
        if self.__create_individual_files:
            parts = self.__converter.split_text(text)
            if len(parts) < 2: return # we need at least 2 entries (the first part is trashed and one part with content!)
            i=0
            if self.__fill_blog:
                wprb = WordPressPostingRobot(SITE,USER)
                start_day = raw_input('Please enter the start date for the posts: [%s] ' % dt.datetime.now().strftime("%Y-%m-%d") )
                start_day = start_day if start_day != "" else dt.datetime.now().strftime("%Y-%m-%d")
                start = [int(value) for value in start_day.split("-")]
                end_day = raw_input('Please enter the end date for the posts: [%s] ' % dt.datetime.now().strftime("%Y-%m-%d") )
                end_day = end_day if end_day != "" else dt.datetime.now().strftime("%Y-%m-%d")
                end = [int(value) for value in end_day.split("-")]
                days_difference = (dt.datetime(end[0],end[1],end[2])-dt.datetime(start[0],start[1],start[2])).days
                gradient = .0 if len(parts) == 2 else float(days_difference)/(len(parts)-2)
            for text_part in parts:
                text_part =  self.__converter.convert(text_part)
                i += 1
                if i == 1:
                    print("\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s" % text_part)
                    continue
                if self.__create_individual_files: self.write_unicode_file("%s/%i%s" % (self.__output_directory, i, '.mktxt'),text_part)
                lines = text_part.split("\n")
                if self.__fill_blog:
                    title = lines[0].replace("# ","")
                    content = string.join(lines[1:],'\n')
                    date = dt.datetime(start[0],start[1],start[2], 17, 11, 11) + dt.timedelta(int((i-2)*gradient))
                    wprb.post_new(title, content,[],'','private',date)
                    time.sleep(SLEEP_TIME)

    def write_unicode_file(self, path_to_file, content):
        try:
            out_file = codecs.open(path_to_file,encoding='utf-8', mode='w')
            out_file.write(content)
        except:
            print "Error on writing to file %s." % path_to_file
 def __init__(self, options):
     self.__input_rss_file = options.rss_filename
     self.__input_source_directory = options.input_source_dir
     self.__output_directory = options.output_dir
     self.__converter = WikidotToMarkdown()
class ConversionController(object):
    def __init__(self, options):
        self.__input_rss_file = options.rss_filename
        self.__input_source_directory = options.input_source_dir
        self.__output_directory = options.output_dir
        self.__converter = WikidotToMarkdown()

    def __prepare_output_dir(self):
        try:
            os.makedirs(self.__output_directory)
        except OSError as ex:
            print("Could not create output folder "+self.__output_directory+".")
            if ex.errno == os.errno.EEXIST: print("It already exists.")
            else: print "Error %i: %s" % (ex.errno, str(ex)); sys.exit(1)

    def parse_rss(self, text):
        pattern_code = r'<tr>[\s]+?<td><a href="[\s\S]+?</tr>'
        pattern = re.compile(pattern_code)
        matchItems = pattern.findall(text)
        return matchItems

    def parse_index_item(self, text):
        pattern_code = r'<tr>[\s]+?<td><a href="([\s\S]+?)">([\s\S]+?)</a></td>[\s]+?<td><span[\s\S]+?>([\s\S]+?)</span></td>[\s]+?<td>([\s\S]*?)</td>'
        pattern = re.compile(pattern_code)
        item_info = pattern.findall(text)
        return item_info

    def gen_format_tags(self, src_tags):
        format_tags = u""
        if u"" == src_tags:
            return format_tags

        tag_list = src_tags.split(' ')
        if None == tag_list:
            return format_tags

        for item in tag_list:
            format_tags += "    - " + item + "\n"

        return format_tags

    def gen_markdown_context(self, article_url):
        wiki_filename = self.__input_source_directory + "/" + article_url + ".txt"
        #print "gen_markdown_context", wiki_filename
        try:
            f = codecs.open(wiki_filename, encoding='utf-8')
            text = f.read()
            f.close()
            return self.__converter.convert(text)
        except:
            print "Failed to gen_markdown_context : ", wiki_filename
            return None


    def gen_article_context(self, article_url, title, tags):
        context_format = """---\ntitle: '{0}'\nlayout: post\ntags:\n{1}---\n\n{2}"""

        format_tags = self.gen_format_tags(tags)
        format_context = self.gen_markdown_context(article_url)
        if None == format_context:
            return None

        return unicode(context_format).format(title, format_tags, format_context)

    def deal_one_file(self, item_info):
        #print "deal one file: ", item_info

        publish_date = dt.datetime.strptime(item_info[2], '%d %b %Y %H:%M')
        article_url = item_info[0][1:]
        filename = self.__output_directory \
            + publish_date.strftime('/%Y-%m-%d-') \
            + article_url \
            + ".markdown"

        context = self.gen_article_context(article_url, item_info[1], item_info[3])
        if None == context:
            print "Failed to deal : ", article_url
            return None

        #print filename
        md_file = codecs.open(filename, 'w', encoding='utf-8')
        md_file.write(context)
        md_file.close();
        return None

    def get_rss_context(self, filename):
        try:
            f = codecs.open(filename, encoding='utf-8')
            text = f.read()
            f.close()
            return text
        except:
            return None

    def convert(self):
        self.__prepare_output_dir()
        text = self.get_rss_context(self.__input_rss_file)
        if None == text:
            print "Failed to open RSS file: ", self.__input_rss_file
            return None

        # read index info from rss
        index_info = self.parse_rss(text)
        if None == index_info:
            print "there is no index in rss."
            return None

        #print index_info[0]

        # for each index info, deal file one by one
        show_first = False
        for item in index_info:
            item_info = self.parse_index_item(item)
            self.deal_one_file(item_info[0])

        print "====== Success ====="
Ejemplo n.º 8
0
class ConversionController(object):
    def __init__(self, options):
        self.__input_wiki_file = options.filename
        self.__output_directory = options.output_dir
        self.__fill_blog = options.blog
        self.__create_individual_files = options.individual
        self.__converter = WikidotToMarkdown()

    def __prepare_output_dir(self):
        try:
            os.makedirs(self.__output_directory)
        except OSError as ex:
            print("Could not create output folder " + self.__output_directory +
                  ".")
            if ex.errno == os.errno.EEXIST: print("It already exists.")
            else:
                print "Error %i: %s" % (ex.errno, str(ex))
                sys.exit(1)

    def convert(self):
        self.__prepare_output_dir()
        f = codecs.open(self.__input_wiki_file, encoding='utf-8')
        text = f.read()

        # write the complete files to the output directory:
        complete_text = self.__converter.convert(text)
        self.write_unicode_file(
            "%s/%s" % (self.__output_directory, 'complete.mktxt'),
            complete_text)
        html_text = '<html><head><title>%s</title><style type="text/css">%s</style></head><body><div class="wikistyle">' % (
            'Converted Markdown', file('style.css').read())
        html_text += markdown.markdown(complete_text)
        html_text += "</div></body></html>"
        self.write_unicode_file(
            "%s/%s" % (self.__output_directory, 'complete.html'), html_text)

        # now handle the texts split to little junks:
        if self.__create_individual_files:
            parts = self.__converter.split_text(text)
            if len(parts) < 2:
                return  # we need at least 2 entries (the first part is trashed and one part with content!)
            i = 0
            if self.__fill_blog:
                wprb = WordPressPostingRobot(SITE, USER)
                start_day = raw_input(
                    'Please enter the start date for the posts: [%s] ' %
                    dt.datetime.now().strftime("%Y-%m-%d"))
                start_day = start_day if start_day != "" else dt.datetime.now(
                ).strftime("%Y-%m-%d")
                start = [int(value) for value in start_day.split("-")]
                end_day = raw_input(
                    'Please enter the end date for the posts: [%s] ' %
                    dt.datetime.now().strftime("%Y-%m-%d"))
                end_day = end_day if end_day != "" else dt.datetime.now(
                ).strftime("%Y-%m-%d")
                end = [int(value) for value in end_day.split("-")]
                days_difference = (
                    dt.datetime(end[0], end[1], end[2]) -
                    dt.datetime(start[0], start[1], start[2])).days
                gradient = .0 if len(
                    parts) == 2 else float(days_difference) / (len(parts) - 2)
            for text_part in parts:
                text_part = self.__converter.convert(text_part)
                i += 1
                if i == 1:
                    print(
                        "\nAttention! We skip the first output part (when splitting the text into parts):\n\n%s"
                        % text_part)
                    continue
                if self.__create_individual_files:
                    self.write_unicode_file(
                        "%s/%i%s" % (self.__output_directory, i, '.mktxt'),
                        text_part)
                lines = text_part.split("\n")
                if self.__fill_blog:
                    title = lines[0].replace("# ", "")
                    content = string.join(lines[1:], '\n')
                    date = dt.datetime(start[0], start[1], start[2], 17, 11,
                                       11) + dt.timedelta(
                                           int((i - 2) * gradient))
                    wprb.post_new(title, content, [], '', 'private', date)
                    time.sleep(SLEEP_TIME)

    def write_unicode_file(self, path_to_file, content):
        try:
            out_file = codecs.open(path_to_file, encoding='utf-8', mode='w')
            out_file.write(content)
        except:
            print "Error on writing to file %s." % path_to_file