def test_parser_class(self): """ Test that a the correct parser class is returned for a file extension. """ test_data = [('file.rst', RstParser), ('file.md', MarkdownParser), ('file.markdown', MarkdownParser), ('file.html', Parser), ('file.htm', Parser), ('file.xml', Parser), ('file.xhtml', None), ('file.png', None), ('', None)] for data in test_data: self.assertEqual(get_parser_for_filename(data[0]), data[1])
def _parse(self, input_data): "Parses the input data" for input_dir in input_data: pages, static_files = input_data[input_dir] # special case: static files at the top level of the project dir # are not associated with any pages if input_dir == self.settings['project_dir']: self.static_files = static_files static_files = [] for path in pages: page = dict(static_files=static_files) page.update(self._get_default_headers(path)) # parse the page parser_cls = parser.get_parser_for_filename(page['path']) with open(path, 'r', encoding='utf8') as f: parser_inst = parser_cls(self.settings, f.read()) try: parsed = parser_inst.parse() except parser.ParserException as parser_error: logging.error(parser_error) logging.error('skipping article "%s"', path) continue # update the values in the page dict page.update(content=parsed[1], **parsed[0]) if parser_cls.output_ext: page.update(output_ext=parser_cls.output_ext) # skip drafts if page['status'] == 'draft': logging.debug('skipping %s (draft)', path) continue # skip pages with a date that is in the future elif page['date'] > datetime.today(): logging.debug('skipping %s (future-dated)', path) continue # update the url page['url'] = get_url(page) self.pages.append(page) sys.stdout.write('.') sys.stdout.write('\n')
def test_parser_class(self): """ Test that a the correct parser class is returned for a file extension. """ test_data = [ ('file.rst', RstParser), ('file.md', MarkdownParser), ('file.markdown', MarkdownParser), ('file.html', Parser), ('file.htm', Parser), ('file.xml', Parser), ('file.xhtml', None), ('file.png', None), ('', None)] for data in test_data: self.assertEqual(get_parser_for_filename(data[0]), data[1])
def _read_files(self): """ Walks through the project directory and separates files into parseable files (file extensions for which a parser exists) and static files (file extensions for which no parser exists) """ data = OrderedDict() for root, dirs, files in walk_ignore(self.settings['project_dir']): pages = [] # parseable files; rename to (pages) static = [] # rename to (static) # check if a parser exists and append to corresponding list for file in files: path = join(root, file) if parser.get_parser_for_filename(path): pages.append(path) else: static.append(path) # assign static files with pages if pages: data[root] = (pages, static) elif static: # dir has static file(s) but no pages. check if one of # the parent dirs has a page and associate the static files # with it has_parent = False if root != self.settings['project_dir']: parent_dir = dirname(root) while parent_dir != self.settings['project_dir']: if parent_dir in data: data.setdefault(parent_dir, ([], []))[1].\ extend(static) has_parent = True parent_dir = dirname(parent_dir) # if no parent dir could be found, or the file is in the # root dir associate the files with the root of the project dir if not has_parent: data.setdefault(self.settings['project_dir'], ([], []))[1].extend(static) return data