Esempio n. 1
0
 def compile_html(self, source, dest, is_two_file=True):
     if creole is None:
         req_missing(['creole'], 'build this site (compile CreoleWiki)')
     makedirs(os.path.dirname(dest))
     with codecs.open(dest, "w+", "utf8") as out_file:
         with codecs.open(source, "r", "utf8") as in_file:
             data = in_file.read()
             document = Parser(data).parse()
         output = HtmlEmitter(document).emit()
         out_file.write(output)
Esempio n. 2
0
 def compile(self, source, dest, is_two_file=True, post=None, lang=None):
     """Compile the source file into HTML and save as dest."""
     if creole is None:
         req_missing(['creole'], 'build this site (compile CreoleWiki)')
     makedirs(os.path.dirname(dest))
     with codecs.open(dest, "w+", "utf8") as out_file:
         with codecs.open(source, "r", "utf8") as in_file:
             data = in_file.read()
             document = Parser(data).parse()
         output = HtmlEmitter(document).emit()
         out_file.write(output)
Esempio n. 3
0
 def compile_html(self, source, dest):
     if creole is None:
         raise Exception('To build this site, you need to install the '
                         '"creole" package.')
     try:
         os.makedirs(os.path.dirname(dest))
     except:
         pass
     with codecs.open(dest, "w+", "utf8") as out_file:
         with codecs.open(source, "r", "utf8") as in_file:
             data = in_file.read()
             document = Parser(data).parse()
         output = HtmlEmitter(document).emit()
         out_file.write(output)
Esempio n. 4
0
    def preformatted_emit(self, node):
        return u"<pre>%s</pre>" % self.html_escape(node.content)

    def default_emit(self, node):
        """Fallback function for emitting unknown nodes."""

        raise TypeError

    def emit_children(self, node):
        """Emit all the children of a node."""

        return u''.join([self.emit_node(child) for child in node.children])

    def emit_node(self, node):
        """Emit a single node."""

        emit = getattr(self, '%s_emit' % node.kind, self.default_emit)
        return emit(node)

    def emit(self):
        """Emit the document represented by self.root DOM tree."""

        return self.emit_node(self.root)


if __name__ == "__main__":
    import sys
    document = Parser(unicode(sys.stdin.read(), 'utf-8', 'ignore')).parse()
    sys.stdout.write(HtmlEmitter(document).emit().encode('utf-8', 'ignore'))
Esempio n. 5
0
 def render(self, text, **kwargs):
     from django_markup.bundles.WikiCreole.creole import Parser
     from django_markup.bundles.WikiCreole.creole2html import HtmlEmitter
     return HtmlEmitter(Parser(text).parse()).emit()
Esempio n. 6
0
def render(src):
    doc = Parser(src).parse()
    return DjikiHtmlEmitter(doc).emit().encode('utf-8', 'ignore')
Esempio n. 7
0
def parse(text, emitter=PinaxBlogHtmlEmitter):
    return emitter(Parser(text).parse()).emit()
Esempio n. 8
0
def parse(text, emitter=BiblionHtmlEmitter):
    return emitter(Parser(text).parse()).emit()
Esempio n. 9
0
def pluck_airport_meta_data(in_file, out_file, start_on_line=1):
    with codecs.open(in_file, 'r+b', 'utf8') as f:
        wikipedia_pages = [
            json.loads(line) for line in f.read().strip().split('\n')
        ]

    with codecs.open(out_file, 'a+b', 'utf8') as f:
        for index, (title, markdown) in enumerate(wikipedia_pages, start=1):
            if index < start_on_line:
                continue

            if index and not index % 100:
                print '%d of %d' % (index, len(wikipedia_pages))

            document = Parser(markdown).parse()

            html = WikiLinkHtmlEmitter(document).emit()

            try:
                soup = BeautifulSoup(html, "html5lib")
            except RuntimeError as exc:
                if 'maximum recursion depth exceeded' in exc.message:
                    soup = None
                else:
                    raise exc

            if soup is None:
                continue

            try:
                airport = get_airport_meta_data(soup)
            except RuntimeError as exc:
                if 'maximum recursion depth exceeded' in exc.message:
                    airport = {}
                else:
                    raise exc

            if not airport:
                continue

            # If too much meta data wasn't collected then try the alternative
            # meta data plucker. Seems to work better with South American
            # airports.
            if len([1 for val in airport.values() if val is None]) > 6:
                try:
                    _html = markdown_to_html_pandocs(markdown)
                except (sh.ErrorReturnCode_2):
                    _html = ''

                try:
                    _soup = BeautifulSoup(_html, "html5lib")
                    airport = get_airport_meta_data2(_soup)
                except RuntimeError as exc:
                    if 'maximum recursion depth exceeded' in exc.message:
                        airport = {}
                    else:
                        raise exc

            if 'name' not in airport or not airport['name']:
                continue

            lat_long = get_lat_long(airport)

            if not lat_long:
                continue

            url_key = '/wiki/' + slugify(airport['name'])
            _airport = {
                "airport_name": airport['name'],
                "iata": airport['IATA'],
                "latitude": float(lat_long.lat),
                "longitude": float(lat_long.lon),
                'url': url_key,
            }

            try:
                passenger_numbers = pluck_passenger_numbers(soup)
            except RuntimeError as exc:
                if 'maximum recursion depth exceeded' in exc.message:
                    passenger_numbers = {}
                else:
                    raise exc

            # Try and get the real HTML from Wikipedia to see if it parses any
            # better than the markdown generated in this script
            if not passenger_numbers:
                try:
                    html = get_wikipedia_page(url_key)
                except (AssertionError, requests.exceptions.ConnectionError):
                    pass  # Some pages link to 404s, just move on...
                else:
                    try:
                        soup = BeautifulSoup(html, "html5lib")
                        passenger_numbers = pluck_passenger_numbers(soup)
                    except RuntimeError as exc:
                        if 'maximum recursion depth exceeded' in exc.message:
                            passenger_numbers = {}
                        else:
                            raise exc

            if passenger_numbers:
                _airport['passengers'] = passenger_numbers

            f.write(json.dumps(_airport, sort_keys=True))
            f.write('\n')