def load_content (path) : transcripts.printf_information ('Parsing file [%s]...', path) stream = file (path, 'rt') if path.lower () .endswith ('.pdf') : content = execute_load_content (['/usr/bin/pdftotext', path, '-']) elif path.lower () .endswith ('.ps') : content = execute_load_content (['/usr/bin/pstotext', path]) else : transcripts.printf_error ('File extension is unknown; ignoring.') content = None return content
def execute_load_content (arguments) : process = subprocess.Popen ( arguments, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = None, close_fds = True, bufsize = (32 * 1024), universal_newlines = True) process.stdin.close () content = process.stdout.read () process.stdout.close () outcome = process.wait () if outcome != 0 : transcripts.printf_error ('Subprocess returned an error code, thus the file seems broken; ignoring.') content = None if content is not None and len (content) == 0 : transcripts.printf_error ('Subprocess returned an empty content, thus the file seems broken; ignoring.') content = None return content
def dump (self, title, content) : if not isinstance (title, types.StringType) : title = title.encode ("utf-8") title_size = len (title) if not isinstance (title, types.StringType) : content = content.encode ("utf-8") content_size = len (content) if (title_size != 0) and (content_size != 0) : total_size = 4 + title_size + 4 + content_size self.stream.write (struct.pack (">L", total_size)) self.stream.write (struct.pack (">L", title_size)) self.stream.write (title) self.stream.write (struct.pack (">L", content_size)) self.stream.write (content) else : transcripts.printf_error ('Title or content is empty for [%s]; ignoring.', title) self.stepper.step ()
def dump(map, dumper): id = map["id"][0] if "topic" in map: topic = map["topic"][0] else: transcripts.printf_warning("Missing topic(s) from article with id [%s]; assuming unknown.", id) topic = "{unknown}" if "title" in map: title = map["title"][0] else: transcripts.printf_error("Missing title from article with id [%s]; assuming id.", id) title = "(%s)" % (id) if "content" in map: content = "\n".join(map["content"]) else: transcripts.printf_error("Missing content from article with id [%s]; ignoring.", id) return title = "[%s] %s" % (topic, title) dumper.dump(title, content)
def __call__ (self, message = '', context = '?', severity = 0, traceback = False) : transcripts.printf_error ('Cherrypy: %s [%d]: %s', context, severity, message)