def test_h2_parsing(self): data = process_file(os.path.join( base_dir, 'files/api.fjson', )) self.assertEqual(data['sections'][1]['id'], 'a-basic-api-client-using-slumber') # Only capture h2's after the first section for obj in data['sections'][1:]: self.assertEqual(obj['content'][:5], '\n<h2>')
def test_h2_parsing(self): data = process_file( os.path.join( base_dir, 'files/api.fjson', ) ) self.assertEqual(data['sections'][1]['id'], 'a-basic-api-client-using-slumber') # Only capture h2's after the first section for obj in data['sections'][1:]: self.assertEqual(obj['content'][:5], '\n<h2>')
def get_processed_json(self): file_path = self.json_file_path try: return process_file(file_path) except Exception: log.warning( 'Unhandled exception during search processing file: %s' % file_path ) return { 'headers': [], 'content': '', 'path': file_path, 'title': '', 'sections': [] }
def get_processed_json(self): """ Get the parsed JSON for search indexing. Check for two paths for each index file This is because HTMLDir can generate a file from two different places: * foo.rst * foo/index.rst Both lead to `foo/index.html` https://github.com/rtfd/readthedocs.org/issues/5368 """ file_path = None if settings.RTD_BUILD_MEDIA_STORAGE: storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() fjson_paths = [] basename = os.path.splitext(self.path)[0] fjson_paths.append(basename + '.fjson') if basename.endswith('/index'): new_basename = re.sub(r'\/index$', '', basename) fjson_paths.append(new_basename + '.fjson') storage_path = self.project.get_storage_path( type_='json', version_slug=self.version.slug, include_file=False) try: for fjson_path in fjson_paths: file_path = storage.join(storage_path, fjson_path) if storage.exists(file_path): return process_file(file_path) except Exception: log.warning( 'Unhandled exception during search processing file: %s', file_path, ) else: log.warning( 'Skipping HTMLFile processing because of no storage backend') return { 'path': file_path, 'title': '', 'sections': [], 'domain_data': {}, }
def test_h2_parsing(self): data = process_file(os.path.join( base_dir, 'files/api.fjson', ), ) self.assertEqual(data['path'], 'api') self.assertEqual(data['sections'][1]['id'], 'a-basic-api-client-using-slumber') self.assertTrue( data['sections'][1]['content'].startswith('You can use Slumber')) self.assertEqual(data['title'], 'Read the Docs Public API') self.assertTrue( len(data['sections']) > 0, 'There are many sections for the processed file') # There should be no new line character present for section in data['sections']: self.assertFalse('\n' in section['content'])
def get_processed_json(self): """ Get the parsed JSON for search indexing. Check for two paths for each index file This is because HTMLDir can generate a file from two different places: * foo.rst * foo/index.rst Both lead to `foo/index.html` https://github.com/rtfd/readthedocs.org/issues/5368 """ paths = [] basename = os.path.splitext(self.path)[0] paths.append(basename + '.fjson') if basename.endswith('/index'): new_basename = re.sub(r'\/index$', '', basename) paths.append(new_basename + '.fjson') full_json_path = self.project.get_production_media_path( type_='json', version_slug=self.version.slug, include_file=False ) try: for path in paths: file_path = os.path.join(full_json_path, path) if os.path.exists(file_path): return process_file(file_path) except Exception: log.warning( 'Unhandled exception during search processing file: %s', file_path, ) return { 'headers': [], 'content': '', 'path': file_path, 'title': '', 'sections': [], }