def compile_html(self, source, dest, is_two_file=True): if creole is None: req_missing(['creole'], 'build this site (compile CreoleWiki)') makedirs(os.path.dirname(dest)) with codecs.open(dest, "w+", "utf8") as out_file: with codecs.open(source, "r", "utf8") as in_file: data = in_file.read() document = Parser(data).parse() output = HtmlEmitter(document).emit() out_file.write(output)
def compile(self, source, dest, is_two_file=True, post=None, lang=None): """Compile the source file into HTML and save as dest.""" if creole is None: req_missing(['creole'], 'build this site (compile CreoleWiki)') makedirs(os.path.dirname(dest)) with codecs.open(dest, "w+", "utf8") as out_file: with codecs.open(source, "r", "utf8") as in_file: data = in_file.read() document = Parser(data).parse() output = HtmlEmitter(document).emit() out_file.write(output)
def compile_html(self, source, dest): if creole is None: raise Exception('To build this site, you need to install the ' '"creole" package.') try: os.makedirs(os.path.dirname(dest)) except: pass with codecs.open(dest, "w+", "utf8") as out_file: with codecs.open(source, "r", "utf8") as in_file: data = in_file.read() document = Parser(data).parse() output = HtmlEmitter(document).emit() out_file.write(output)
def preformatted_emit(self, node): return u"<pre>%s</pre>" % self.html_escape(node.content) def default_emit(self, node): """Fallback function for emitting unknown nodes.""" raise TypeError def emit_children(self, node): """Emit all the children of a node.""" return u''.join([self.emit_node(child) for child in node.children]) def emit_node(self, node): """Emit a single node.""" emit = getattr(self, '%s_emit' % node.kind, self.default_emit) return emit(node) def emit(self): """Emit the document represented by self.root DOM tree.""" return self.emit_node(self.root) if __name__ == "__main__": import sys document = Parser(unicode(sys.stdin.read(), 'utf-8', 'ignore')).parse() sys.stdout.write(HtmlEmitter(document).emit().encode('utf-8', 'ignore'))
def render(self, text, **kwargs): from django_markup.bundles.WikiCreole.creole import Parser from django_markup.bundles.WikiCreole.creole2html import HtmlEmitter return HtmlEmitter(Parser(text).parse()).emit()
def render(src): doc = Parser(src).parse() return DjikiHtmlEmitter(doc).emit().encode('utf-8', 'ignore')
def parse(text, emitter=PinaxBlogHtmlEmitter): return emitter(Parser(text).parse()).emit()
def parse(text, emitter=BiblionHtmlEmitter): return emitter(Parser(text).parse()).emit()
def pluck_airport_meta_data(in_file, out_file, start_on_line=1): with codecs.open(in_file, 'r+b', 'utf8') as f: wikipedia_pages = [ json.loads(line) for line in f.read().strip().split('\n') ] with codecs.open(out_file, 'a+b', 'utf8') as f: for index, (title, markdown) in enumerate(wikipedia_pages, start=1): if index < start_on_line: continue if index and not index % 100: print '%d of %d' % (index, len(wikipedia_pages)) document = Parser(markdown).parse() html = WikiLinkHtmlEmitter(document).emit() try: soup = BeautifulSoup(html, "html5lib") except RuntimeError as exc: if 'maximum recursion depth exceeded' in exc.message: soup = None else: raise exc if soup is None: continue try: airport = get_airport_meta_data(soup) except RuntimeError as exc: if 'maximum recursion depth exceeded' in exc.message: airport = {} else: raise exc if not airport: continue # If too much meta data wasn't collected then try the alternative # meta data plucker. Seems to work better with South American # airports. if len([1 for val in airport.values() if val is None]) > 6: try: _html = markdown_to_html_pandocs(markdown) except (sh.ErrorReturnCode_2): _html = '' try: _soup = BeautifulSoup(_html, "html5lib") airport = get_airport_meta_data2(_soup) except RuntimeError as exc: if 'maximum recursion depth exceeded' in exc.message: airport = {} else: raise exc if 'name' not in airport or not airport['name']: continue lat_long = get_lat_long(airport) if not lat_long: continue url_key = '/wiki/' + slugify(airport['name']) _airport = { "airport_name": airport['name'], "iata": airport['IATA'], "latitude": float(lat_long.lat), "longitude": float(lat_long.lon), 'url': url_key, } try: passenger_numbers = pluck_passenger_numbers(soup) except RuntimeError as exc: if 'maximum recursion depth exceeded' in exc.message: passenger_numbers = {} else: raise exc # Try and get the real HTML from Wikipedia to see if it parses any # better than the markdown generated in this script if not passenger_numbers: try: html = get_wikipedia_page(url_key) except (AssertionError, requests.exceptions.ConnectionError): pass # Some pages link to 404s, just move on... else: try: soup = BeautifulSoup(html, "html5lib") passenger_numbers = pluck_passenger_numbers(soup) except RuntimeError as exc: if 'maximum recursion depth exceeded' in exc.message: passenger_numbers = {} else: raise exc if passenger_numbers: _airport['passengers'] = passenger_numbers f.write(json.dumps(_airport, sort_keys=True)) f.write('\n')