Beispiel #1
0
    def assert_html2Creole(self, raw_markup, raw_html, debug=False, **kwargs):
        """
        Compare the genereted markup from the given >raw_html< html code, with
        the given >raw_markup< reference string.
        """
#        assert isinstance(raw_html, unicode)
#        raw_markup = unicode(raw_markup, encoding="utf8")
#        raw_html = unicode(raw_html, "utf8")

        self.assertNotEqual(raw_markup, raw_html)

        # prepare whitespace on test strings
        markup = self._prepare_text(raw_markup)
        assert isinstance(markup, unicode)
        if debug:
            self._debug_text("assert_Creole2html() markup", markup)

        html = self._prepare_text(raw_html)
        assert isinstance(html, unicode)

        # convert html code into creole markup
        out_string = html2creole(html, debug, **kwargs)
        if debug:
            self._debug_text("assert_html2Creole() html2creole", out_string)

        # compare
        self.assertEqual(out_string, markup)
    def assert_html2creole(self, raw_creole, raw_html, \
                strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}):
        """
        Compare the genereted markup from the given >raw_html< html code, with
        the given >creole_string< reference string.
        """
#        assert isinstance(raw_html, unicode)
#        creole_string = unicode(creole_string, encoding="utf8")
#        raw_html = unicode(raw_html, "utf8")

        self.assertNotEqual(raw_creole, raw_html)

        # prepare whitespace on test strings
        markup = self._prepare_text(raw_creole)
        assert isinstance(markup, unicode)
        if debug:
            self._debug_text("assert_creole2html() markup", markup)

        html = self._prepare_text(raw_html)
        assert isinstance(html, unicode)

        # convert html code into creole markup
        out_string = html2creole(html, debug, parser_kwargs, emitter_kwargs)
        if debug:
            self._debug_text("assert_html2creole() html2creole", out_string)

        # compare
        try:
            self.assertEqual(out_string, markup)
        except:
            print " *** Error in html2creole:"
            raise
Beispiel #3
0
    def assert_html2creole2(self, creole, html, debug=False, unknown_emit=None):
        # convert html code into creole markup
        out_string = html2creole(html, debug, unknown_emit=unknown_emit)
        if debug:
            self._debug_text("assert_html2creole() html2creole", out_string)

        # compare
        self.assertEqual(out_string, creole, msg="html2creole")
Beispiel #4
0
    def run(self):
        tz = pytz.timezone("Asia/Taipei")
        #list_urls = self.getListUrls() #拼接明星列表页url

        #actor_ids = self.getActorIds(list_urls) #获取艺人ID号
        #ids = ','.join(actor_ids)
        #f = open('sina_actor_ids.txt', 'w+')
        #f.write(ids)
        #f.close
        #actor_ids = ['7915']

        f = open('sina_actor_ids.txt', 'r')
        actor_content = f.read()
        actor_ids = actor_content.split(',')
        f.close
        for actor_id in actor_ids:
            actor_data = ''
            actor_data = self.getActorBaseInfo(actor_id)
            if actor_data:
                actor_data['model'] = 'actor'
                actor_data['created_at'] = datetime.now().replace(tzinfo=tz)
                actor_data['updated_at'] = datetime.now().replace(tzinfo=tz)
                actor_data['title'] = actor_data['title'].replace('-', '·')
                print actor_data
            else:
                print '跳过组合'
                continue

            actor_content = self.getActorContent(actor_id)
            print actor_content
            actor_data['content'] = html2creole(actor_content)
            #过滤掉自动生成的wiki链接
            actor_data['content'] = actor_data['content'].replace('[[', '')
            actor_data['content'] = actor_data['content'].replace(']]', '')

            actor_data['html_cache'] = creole2html(actor_data['content'])

            slug = proposal = re.sub(
                "[^\x61-\xff\d\w]", "-",
                actor_data['title'].decode("utf-8").encode(
                    "gb18030",
                    "ignore")).decode("gb18030",
                                      "ignore").encode("utf-8").strip("-")
            similarSlugs = []
            regex = re.compile("^%s" % slug)
            for result in self.mongo_wiki.find({"slug": regex}):
                similarSlugs.append(result['slug'])
            i = 1
            while slug in similarSlugs:
                i = i + 1
                slug = proposal + "-" + str(i)

            actor_data['slug'] = slug

            self.mongo_wiki.insert(actor_data)
    def assert_html2creole2(self,
                            creole,
                            html,
                            debug=False,
                            unknown_emit=None):
        # convert html code into creole markup
        out_string = html2creole(html, debug, unknown_emit=unknown_emit)
        if debug:
            self._debug_text("assert_html2creole() html2creole", out_string)

        # compare
        self.assertEqual(out_string, creole, msg="html2creole")
Beispiel #6
0
    def run(self):
        tz = pytz.timezone("Asia/Taipei")
        #list_urls = self.getListUrls() #拼接明星列表页url

        #actor_ids = self.getActorIds(list_urls) #获取艺人ID号
        #ids = ','.join(actor_ids)
        #f = open('sina_actor_ids.txt', 'w+')
        #f.write(ids)
        #f.close
        #actor_ids = ['7915']
        
        f = open('sina_actor_ids.txt', 'r')
        actor_content = f.read()
        actor_ids = actor_content.split(',')
        f.close 
        for actor_id in actor_ids:
            actor_data = ''
            actor_data = self.getActorBaseInfo(actor_id)
            if actor_data:
                actor_data['model'] = 'actor'
                actor_data['created_at'] = datetime.now().replace(tzinfo=tz)
                actor_data['updated_at'] = datetime.now().replace(tzinfo=tz)
                actor_data['title'] = actor_data['title'].replace('-', '·')
                print actor_data
            else:
                print '跳过组合'
                continue
            
            actor_content = self.getActorContent(actor_id)
            print actor_content
            actor_data['content'] = html2creole(actor_content)
            #过滤掉自动生成的wiki链接
            actor_data['content'] = actor_data['content'].replace('[[', '')
            actor_data['content'] = actor_data['content'].replace(']]', '')
            
            actor_data['html_cache'] = creole2html(actor_data['content'])

            slug = proposal = re.sub("[^\x61-\xff\d\w]", "-", actor_data['title'].decode("utf-8").encode("gb18030", "ignore")).decode("gb18030", "ignore").encode("utf-8").strip("-")
            similarSlugs = []
            regex = re.compile("^%s" % slug)
            for result in self.mongo_wiki.find({"slug": regex}):
                similarSlugs.append(result['slug'])
            i = 1
            while slug in similarSlugs:
                i = i + 1
                slug = proposal + "-" + str(i)

            actor_data['slug'] = slug

            self.mongo_wiki.insert(actor_data) 
Beispiel #7
0
def convert_markup(raw_content, source_markup_no, dest_markup_no, request):
    """
    Convert one markup in a other.
    """
    page_msg = FileLikeMessages(request, messages.INFO)

    html_source = source_markup_no in (MARKUP_HTML, MARKUP_HTML_EDITOR)
    html_dest = dest_markup_no in (MARKUP_HTML, MARKUP_HTML_EDITOR)

    if source_markup_no == dest_markup_no or (html_source and html_dest):
        # Nothing to do ;)
        return raw_content

    if not html_dest and dest_markup_no != MARKUP_CREOLE:
        raise NotImplementedError("Converting into %r not supported." %
                                  dest_markup_no)

    if html_source:  # Source markup is HTML
        html_content = raw_content
    else:
        # cut out every Django tags from content
        assembler = DjangoTagAssembler()
        raw_content2, cut_data = assembler.cut_out(raw_content)

        # convert to html
        html_content = convert(raw_content2, source_markup_no, page_msg)

    if html_dest:  # Destination markup is HTML
        new_content = html_content
    else:
        # Skip: if dest_markup_no == MARKUP_CREOLE: - only creole supported here
        from creole import html2creole
        new_content = html2creole(html_content)

    if not html_source:  # Source markup is not HTML
        # reassembly cut out django tags into text
        new_content = assembler.reassembly(new_content, cut_data)

    return new_content
Beispiel #8
0
def convert_markup(raw_content, source_markup_no, dest_markup_no, request):
    """
    Convert one markup in a other.
    """
    page_msg = FileLikeMessages(request, messages.INFO)

    html_source = source_markup_no in (MARKUP_HTML, MARKUP_HTML_EDITOR)
    html_dest = dest_markup_no in (MARKUP_HTML, MARKUP_HTML_EDITOR)

    if source_markup_no == dest_markup_no or (html_source and html_dest):
        # Nothing to do ;)
        return raw_content

    if not html_dest and dest_markup_no != MARKUP_CREOLE:
        raise NotImplementedError("Converting into %r not supported." % dest_markup_no)

    if html_source: # Source markup is HTML
        html_content = raw_content
    else:
        # cut out every Django tags from content
        assembler = DjangoTagAssembler()
        raw_content2, cut_data = assembler.cut_out(raw_content)

        # convert to html
        html_content = convert(raw_content2, source_markup_no, page_msg)

    if html_dest: # Destination markup is HTML
        new_content = html_content
    else:
        # Skip: if dest_markup_no == MARKUP_CREOLE: - only creole supported here
        from creole import html2creole
        new_content = html2creole(html_content)

    if not html_source: # Source markup is not HTML
        # reassembly cut out django tags into text
        new_content = assembler.reassembly(new_content, cut_data)

    return new_content
Beispiel #9
0
== simple demo
You can convert from:

* from //creole// to **html**
* from **html** back to //creole//

=== e.g. a table:
|=headline 1 |= headline 2 |
| 1.1. cell  | 1.2. cell   |
| 2.1. cell  | 2.2. cell   |
----

More info on our [[http://code.google.com/p/python-creole/|Homepage]]."""

print "*" * 79
print " Source creole markup text:"
print "-" * 79
print source

print "*" * 79
print " Convert it into html:"
print "-" * 79
html = creole2html(source)
print html

print "*" * 79
print " Convert the html code back into creole:"
print "-" * 79
creole = html2creole(html)
print creole
Beispiel #10
0
* from //creole// to **html**
* from **html** back to //creole//

=== e.g. a table:
|=headline 1 |= headline 2 |
| 1.1. cell  | 1.2. cell   |
| 2.1. cell  | 2.2. cell   |
----

More info on our [[http://code.google.com/p/python-creole/|Homepage]]."""


print "*" * 79
print " Source creole markup text:"
print "-" * 79
print source


print "*" * 79
print " Convert it into html:"
print "-" * 79
html = creole2html(source)
print html


print "*" * 79
print " Convert the html code back into creole:"
print "-" * 79
creole = html2creole(html)
print creole