Python PARSERの例、backend.markdown_parser.PARSER Pythonの例

コード例 #1

0

ファイルを表示

ファイル: github.py プロジェクト: kallewesterling/django-app

    def parse(self):
        files = self.contents_of('pages')
        super().parse(files)
        insights = []
        for i, file in enumerate(files):
            self.log.BAR.update(i)
            file_contents = self.read_file(
                os.path.join(self.destination_dir, 'pages', file))

            if not file_contents:
                continue

            _ = {
                'image':
                self._get_image_from_first_line(file_contents),
                'insight':
                list(
                    split_into_sections(file_contents,
                                        level_granularity=1).keys())[0],
                'introduction':
                '',
                'os_specific': {},
                'sections': {}
            }

            _['image'] = self._fix_image(_['image'],
                                         additional_parents=['pages'])

            sections = split_into_sections(file_contents, level_granularity=2)
            order = 0
            for section, content in sections.items():
                if section == _['insight']:
                    _['introduction'] = PARSER.fix_html(content)
                else:
                    order += 1
                    _['sections'][section] = {
                        'order': order,
                        'content': PARSER.fix_html(content)
                    }
                    has_os_specific_instruction = '### ' in content
                    if has_os_specific_instruction:
                        for operating_system, os_content in split_into_sections(
                                content, level_granularity=3).items():
                            if operating_system == 'MacOS':
                                operating_system = 'macOS'
                            _['os_specific'][operating_system] = {
                                'content': PARSER.fix_html(os_content),
                                'related_section': section
                            }
            insights.append(_)

        self.log.BAR.finish()
        return insights

コード例 #2

0

ファイルを表示

ファイル: buildblurbs.py プロジェクト: kallewesterling/django-app

    def handle(self, *args, **options):
        log = Logger(path=__file__,
            force_verbose=options.get('verbose'),
            force_silent=options.get('silent')
        )

        log.log('Building blurbs... Please be patient as this can take some time.')

        for cat in list(settings.AUTO_USERS.keys()):
            for u in settings.AUTO_USERS[cat]:
                if u.get('blurb'):
                    text = u.get(
                        'blurb', {'text': None, 'workshop': None}).get('text')
                    workshop = u.get(
                        'blurb', {'text': None, 'workshop': None}).get('workshop')
                    if text and workshop:
                        SAVE_DIR = f'{settings.BUILD_DIR}_workshops/{workshop}'

                        if find_dir(workshop):
                            with open(f'{SAVE_DIR}/{DATA_FILE}', 'w+') as file:
                                file.write(yaml.dump({
                                    'workshop': workshop,
                                    'user': u.get('username'),
                                    'text': PARSER.fix_html(text)
                                }))

                                log.log(f'Saved blurb datafile: {SAVE_DIR}/{DATA_FILE}.')
                        else:
                            log.error(
                                f'No directory available for `{workshop}` ({SAVE_DIR}). Did you run `python manage.py build --repo {workshop}` before running this script?', kill=True)

        if log._save(data='buildblurbs', name='warnings.md', warnings=True) or log._save(data='buildblurbs', name='logs.md', warnings=False, logs=True) or log._save(data='buildblurbs', name='info.md', warnings=False, logs=False, info=True):
            log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)

コード例 #3

0

ファイルを表示

ファイル: github.py プロジェクト: kallewesterling/django-app

    def _fix_contributor(string):
        def get_correct_role(string):
            if 'author' in string.lower() or 'contributor' in string.lower():
                return 'Au'
            if 'review' in string.lower():
                return 'Re'
            if 'editor' in string.lower():
                return 'Ed'

            raise RuntimeError(
                f'Could not get correct role from `{string}`. Roles can be `author`, `contributor`, `reviewer`, or `editor`. Please correct the markdown file.'
            )

        def split_names(full_name: str) -> tuple:
            """Uses the `nameparser` library to interpret names."""
            name = HumanName(full_name)
            first_name = name.first
            if name.middle:
                first_name += " " + name.middle
            last_name = name.last
            return ((first_name, last_name))

        soup = BeautifulSoup(PARSER.convert(string), 'lxml')
        link = soup.find('a')
        if link:
            href = link['href']
        else:
            href = None

        current = 'current' in string.lower()
        past = 'past' in string.lower()
        full_name, first_name, last_name = None, None, None
        try:
            full_name = soup.text.split(':')[1].strip()
        except IndexError:
            pass

        if full_name:
            first_name, last_name = split_names(full_name)

        return {
            'full_name': full_name,
            'first_name': first_name,
            'last_name': last_name,
            'role': get_correct_role(string),
            'current': current,
            'past': past,
            'link': href
        }

コード例 #4

0

ファイルを表示

ファイル: github.py プロジェクト: kallewesterling/django-app

    def _fix_praxis(self):
        fixing = self.sections['theory-to-practice']

        fixing['intro'] = PARSER.fix_html(fixing['intro'])

        # Make lists correct
        for _list in [
                'discussion_questions', 'next_steps', 'tutorials',
                'further_readings', 'further_projects'
        ]:
            if _list in fixing:
                fixing[_list] = [
                    self._fix_list_element(x) for x in as_list(fixing[_list])
                ]

        return fixing

コード例 #5

0

ファイルを表示

ファイル: ingestsnippets.py プロジェクト: kallewesterling/django-app

    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        data = AUTO_SNIPPETS

        for identifier, snippetdata in data.items():
            snippet, created = Snippet.objects.get_or_create(
                identifier=identifier)

            if not created and not options.get('force'):
                choice = input.ask(
                    f'Snippet `{identifier}` already exists. Update with new definition? [y/N]'
                )
                if choice.lower() != 'y':
                    continue

            Snippet.objects.filter(identifier=identifier).update(
                snippet=PARSER.convert(snippetdata))

        log.log('Added/updated snippets: ' + ', '.join([x for x in data]))

        if log._save(data='ingestsnippets', name='warnings.md',
                     warnings=True) or log._save(data='ingestsnippets',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestsnippets',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)

コード例 #6

0

ファイルを表示

ファイル: ingestblurbs.py プロジェクト: kallewesterling/django-app

    def handle(self, *args, **options):
        log = Logger(
            path=__file__,
            force_verbose=options.get('verbose'),
            force_silent=options.get('silent')
        )
        input = Input(path=__file__)

        workshops = get_all_existing_workshops()

        if options.get('name'):
            workshops = get_all_existing_workshops(options.get('name'))

        for _ in workshops:
            name, path = _
            DATAFILE = f'{path}/blurb.yml'

            try:
                data = get_yaml(DATAFILE, log=log, catch_error=True)
            except Exception as e:
                log.warning(f'Found no blurb for workshop `{name}`. Skipping and moving ahead...')
                continue

            if not data.get('user'):
                log.error(
                    f'Username was not defined for the blurb for workshop {name} was not found. Check the datafile {DATAFILE} to verify the username attributed to the blurb.')

            if not data.get('workshop'):
                log.warning(
                    f'Blurb had no workshop assigned, but will proceed with the blurb\'s parent folder ({name}) as assumed workshop. To fix this warning, you can try running python manage.py buildblurbs before running ingestblurbs.')
                data['workshop'] = name

            if not data.get('text'):
                log.error(
                    f'Blurb has no text assigned, and thus could not be ingested. Check the datafile {DATAFILE} to verify the workshop attributed to the blurb.')

            try:
                user = User.objects.get(username=data.get('user'))
            except:
                log.error(
                    f'The user attributed to the blurb ({data.get("username")}) was not found in the database. Did you try running python manage.py ingestusers before running ingestblurbs?')

            try:
                workshop = Workshop.objects.get(slug=data.get('workshop'))
            except:
                log.error(
                    f'The blurb\'s attached workshop ({data.get("workshop")}) was not found in the database. Did you try running python manage.py ingestworkshop --name {data.get("workshop")} before running ingestblurbs?')

            blurb, created = Blurb.objects.get_or_create(user=user, workshop=workshop, defaults={
                                                         'text': PARSER.fix_html(data.get('text'))})

            if not created and not options.get('force'):
                choice = input.ask(
                    f'Frontmatter for workshop `{workshop}` already exists. Update with new content? [y/N]')
                if choice.lower() != 'y':
                    continue

            blurb.text = data.get('text')
            blurb.save()

        log.log('Added/updated blurbs for workshops: ' + ', '.join([x[0] for x in workshops]))

        if log._save(data='ingestblurbs', name='warnings.md', warnings=True) or log._save(data='ingestblurbs', name='logs.md', warnings=False, logs=True) or log._save(data='ingestblurbs', name='info.md', warnings=False, logs=False, info=True):
            log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)

コード例 #7

0

ファイルを表示

ファイル: github.py プロジェクト: kallewesterling/django-app

        def mini_parse_eval(markdown: str):
            ''' Set up standards '''
            dict_collector = list()
            d = reset_eval_dict()
            in_q = False

            in_code = False
            for current_line_number, line in enumerate(markdown.splitlines()):
                is_empty = line.strip() == ''
                is_answer = line.startswith('- ')

                try:
                    if markdown.splitlines()[current_line_number +
                                             1].startswith('```'):
                        # next line contains code.. Thus this is not empty, set is_empty to False
                        is_empty = False
                        if not in_code:
                            in_code = True
                        else:
                            in_code = False
                except IndexError:
                    pass

                if not is_answer and not is_empty:
                    in_q = True
                    d['question'] += line + '\n'
                elif in_q and is_answer:
                    if line.strip().endswith('*'):
                        answer = line.strip()[2:-1].strip()
                        answer = PARSER.fix_html(answer)
                        d['answers']['correct'].append(answer)
                    else:
                        answer = line.strip()[2:].strip()
                        answer = PARSER.fix_html(answer)
                        d['answers']['incorrect'].append(answer)
                elif is_empty and in_q and in_code == False:
                    d['question'] = d['question'].strip()
                    dict_collector.append(d)
                    in_q = False
                    d = reset_eval_dict()
                elif is_answer:
                    # stray answer belonging to the latest question so attach it...
                    try:
                        if line.strip().endswith('*'):
                            answer = line.strip()[2:-1].strip()
                            answer = PARSER.fix_html(answer)
                            dict_collector[len(dict_collector) -
                                           1]['answers']['correct'].append(
                                               answer)
                        else:
                            answer = line.strip()[2:].strip()
                            answer = PARSER.fix_html(answer)
                            dict_collector[len(dict_collector) -
                                           1]['answers']['incorrect'].append(
                                               answer)
                    except IndexError:
                        self.log.warning(
                            f'Found and skipping a stray answer that cannot be attached to a question: {line.strip()}'
                        )

            # add final element
            d['question'] = PARSER.fix_html(d['question'])
            dict_collector.append(d)

            # clean up dict_collector
            for i, item in enumerate(dict_collector):
                if not item.get('question') and not len(
                        item.get('answers').get('correct')) and not len(
                            item.get('answers').get('incorrect')):
                    del dict_collector[i]
            return (dict_collector)

コード例 #8

0

ファイルを表示

ファイル: github.py プロジェクト: kallewesterling/django-app

    def _fix_lessons(self):
        def reset_eval_dict():
            return {
                'question': '',
                'answers': {
                    'correct': [],
                    'incorrect': []
                }
            }

        def mini_parse_eval(markdown: str):
            ''' Set up standards '''
            dict_collector = list()
            d = reset_eval_dict()
            in_q = False

            in_code = False
            for current_line_number, line in enumerate(markdown.splitlines()):
                is_empty = line.strip() == ''
                is_answer = line.startswith('- ')

                try:
                    if markdown.splitlines()[current_line_number +
                                             1].startswith('```'):
                        # next line contains code.. Thus this is not empty, set is_empty to False
                        is_empty = False
                        if not in_code:
                            in_code = True
                        else:
                            in_code = False
                except IndexError:
                    pass

                if not is_answer and not is_empty:
                    in_q = True
                    d['question'] += line + '\n'
                elif in_q and is_answer:
                    if line.strip().endswith('*'):
                        answer = line.strip()[2:-1].strip()
                        answer = PARSER.fix_html(answer)
                        d['answers']['correct'].append(answer)
                    else:
                        answer = line.strip()[2:].strip()
                        answer = PARSER.fix_html(answer)
                        d['answers']['incorrect'].append(answer)
                elif is_empty and in_q and in_code == False:
                    d['question'] = d['question'].strip()
                    dict_collector.append(d)
                    in_q = False
                    d = reset_eval_dict()
                elif is_answer:
                    # stray answer belonging to the latest question so attach it...
                    try:
                        if line.strip().endswith('*'):
                            answer = line.strip()[2:-1].strip()
                            answer = PARSER.fix_html(answer)
                            dict_collector[len(dict_collector) -
                                           1]['answers']['correct'].append(
                                               answer)
                        else:
                            answer = line.strip()[2:].strip()
                            answer = PARSER.fix_html(answer)
                            dict_collector[len(dict_collector) -
                                           1]['answers']['incorrect'].append(
                                               answer)
                    except IndexError:
                        self.log.warning(
                            f'Found and skipping a stray answer that cannot be attached to a question: {line.strip()}'
                        )

            # add final element
            d['question'] = PARSER.fix_html(d['question'])
            dict_collector.append(d)

            # clean up dict_collector
            for i, item in enumerate(dict_collector):
                if not item.get('question') and not len(
                        item.get('answers').get('correct')) and not len(
                            item.get('answers').get('incorrect')):
                    del dict_collector[i]
            return (dict_collector)

        _ = []
        lessons = self._get_raw()['lessons']
        lesson_sections = split_into_sections(lessons,
                                              level_granularity=1,
                                              clear_empty_lines=False)
        for order, lesson_data in enumerate(lesson_sections.items(), start=1):
            __ = {
                'raw_content': '',
                'order': order,
                'header': '',
                'has_lesson_sections': {},
                'content': '',
                'lesson_images': [],
                'challenge': {
                    'header': '',
                    'content': ''
                },
                'solution': {
                    'header': '',
                    'content': ''
                },
                'keywords': {
                    'header': '',
                    'content': []
                },
                'evaluation': {
                    'header': '',
                    'content': ''
                }
            }
            __['header'], __['raw_content'] = lesson_data
            __['has_lesson_sections'] = WorkshopCache._check_for_lesson_sections(
                __['raw_content'])

            if __['raw_content'].startswith('#') == False:
                __['content'] += list(
                    split_into_sections(
                        '# ' + __['header'] + '\n' + __['raw_content'],
                        level_granularity=2).values())[0] + '\n'

            for subheader, content in split_into_sections(
                    __['raw_content'],
                    level_granularity=2,
                    keep_levels=True,
                    clear_empty_lines=False).items():
                is_evaluation = subheader.lower(
                ) == '## evaluation' or subheader.lower(
                ) == '## evaluations' or subheader.split(
                    ':')[0].lower() == '## evaluation'
                is_challenge = subheader.lower(
                ) == '## challenge' or subheader.lower(
                ) == '## challenges' or subheader.split(
                    ':')[0].lower() == '## challenge'
                is_solution = subheader.lower(
                ) == '## solution' or subheader.lower(
                ) == '## solutions' or subheader.split(
                    ':')[0].lower() == '## solution'
                is_keywords = subheader.lower(
                ) == '## keyword' or subheader.lower() == '## keywords'
                if not any(
                    [is_evaluation, is_challenge, is_solution, is_keywords]):
                    __['content'] += subheader + '\n'
                    __['content'] += content + '\n'
                if is_challenge:
                    __['challenge'] = {
                        'header': subheader.split('#')[-1].strip(),
                        'content': PARSER.fix_html(content)
                    }
                if is_solution:
                    __['solution'] = {
                        'header': subheader.split('#')[-1].strip(),
                        'content': PARSER.fix_html(content)
                    }
                if is_keywords:
                    __['keywords'] = {
                        'header':
                        subheader.split('#')[-1].strip(),
                        'content':
                        [self._fix_list_element(x) for x in as_list(content)],
                    }
                    __['keywords']['content'] = [
                        x.get('linked_text') for x in __['keywords']['content']
                    ]
                if is_evaluation:
                    __['evaluation'] = {
                        'header': subheader.split('#')[-1].strip(),
                        'content': mini_parse_eval(content)
                    }

            # Remove raw content
            __.pop('raw_content')

            __['header'] = PARSER.fix_html(__['header'])
            __['content'] = PARSER.fix_html(__['content'])
            __['content'], __['lesson_images'] = self._get_images_from_html(
                __['content'])

            # Make sure we capture images from solution as well
            add_to_lesson_images = []
            __['solution'][
                'content'], add_to_lesson_images = self._get_images_from_html(
                    __['solution'].get('content', ''))

            if add_to_lesson_images:
                before = len(__['lesson_images'])
                __['lesson_images'].extend(add_to_lesson_images)
                after = len(__['lesson_images'])
                if after - before:
                    self.log.info(
                        'Found additional images in solution, and added them to the built lesson files.'
                    )

            # Final clean-up
            for check_up in [
                    'solution', 'challenge', 'evaluation', 'keywords'
            ]:
                if not __.get(check_up).get('content') and not __.get(
                        check_up).get('header'):
                    __[check_up] = None

            _.append(__)

        return _

コード例 #9

0

ファイルを表示

ファイル: github.py プロジェクト: kallewesterling/django-app

    def _fix_frontmatter(self):
        fixing = self.sections['frontmatter']

        # Fix estimated_time
        fixing['estimated_time'] = self._fix_estimated_time(
            fixing['estimated_time'])

        fixing['abstract'] = PARSER.fix_html(fixing['abstract'])

        # Make lists correct
        for _list in [
                'readings', 'projects', 'learning_objectives',
                'ethical_considerations', 'cheat_sheets', 'datasets',
                'prerequisites'
        ]:
            if _list in fixing:
                fixing[_list] = [
                    self._fix_list_element(x) for x in as_list(fixing[_list])
                ]
            else:
                fixing[_list] = []

        # Fixing contributors
        fixing['contributors'] = [
            self._fix_contributor(x) for x in as_list(fixing['contributors'])
        ]

        # Fixing prerequisites
        _ = []
        for prerequisite_data in fixing['prerequisites']:
            text = None
            url = prerequisite_data.get('url')
            url_text = prerequisite_data.get('linked_text')
            html = prerequisite_data.get('annotation')

            install_link = 'shortcuts/install/' in url
            insight_link = '/shortcuts/insight/' in url
            workshop_link = '/shortcuts/workshop/' in url

            #TODO #429: Somehow determine what is a cheatsheet and ingest that here...

            text = self.process_prereq_text(html, log=self.log)
            if install_link and not text:
                self.log.warning(
                    f'No clarifying text was found when processing prerequired installation (`{url_text}`) for workshop `{self.name}`. Note that the clarifying text will be replaced by the "why" text from the installation instructions. You may want to change this in the frontmatter\'s requirements for the workshop {self.name} and re-run `buildworkshop --name {self.repository}'
                )
            if insight_link and not text:
                self.log.warning(
                    f'No clarifying text was found when processing prerequired insight (`{url_text}`) for workshop `{self.name}`. Note that the clarifying text will be replaced by the default text presenting the insight. You may want to change this in the frontmatter\'s requirements for the workshop {self.name} and re-run `buildworkshop --name {self.repository}'
                )
            if workshop_link and not text:
                self.log.warning(
                    f'No clarifying text was found when processing prerequired workshop (`{url_text}`) for workshop `{self.name}`. Note that the clarifying text will not be replaced by any default text and can thus be confusing to the user. You may want to change this in the frontmatter\'s requirements for the workshop {self.name} and re-run `buildworkshop --name {self.repository}'
                )

            if install_link:
                _.append({
                    'type':
                    'install',
                    'potential_name':
                    self._extract_from_p(url_text),
                    'text':
                    text,
                    'potential_slug_fragment':
                    os.path.basename(url).replace('.md', ''),
                    'required':
                    '(required)' in html.lower(),
                    'recommended':
                    '(recommended)' in html.lower()
                })
            if insight_link:
                _.append({
                    'type':
                    'insight',
                    'potential_name':
                    self._extract_from_p(url_text),
                    'text':
                    text,
                    'potential_slug_fragment':
                    os.path.basename(url).replace('.md', ''),
                    'required':
                    '(required)' in html.lower(),
                    'recommended':
                    '(recommended)' in html.lower()
                })
            if workshop_link:
                _.append({
                    'type': 'workshop',
                    'potential_name': self._extract_from_p(url_text),
                    'text': text,
                    'required': '(required)' in html.lower(),
                    'recommended': '(recommended)' in html.lower()
                })
            if not install_link and not insight_link and not workshop_link:
                _.append({
                    'type': 'external_link',
                    'url_text': self._extract_from_p(url_text),
                    'text': text,
                    'url': url,
                    'required': '(required)' in html.lower(),
                    'recommended': '(recommended)' in html.lower()
                })
        fixing['prerequisites'] = _
        return fixing

コード例 #10

0

ファイルを表示

ファイル: buildusers.py プロジェクト: kallewesterling/django-app

    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent')
                     )

        log.log('Building user files... Please be patient as this can take some time.')

        users = list()

        if not pathlib.Path(SAVE_DIR).exists():
            pathlib.Path(SAVE_DIR).mkdir(parents=True)

        if not pathlib.Path(SAVE_DIR_IMG).exists():
            pathlib.Path(SAVE_DIR_IMG).mkdir(parents=True)

        all_categories = list(settings.AUTO_USERS.keys())

        for cat in all_categories:

            all_users = settings.AUTO_USERS[cat]
            
            log.BAR(all_users, max_value=len(all_users))

            for i, u in enumerate(all_users):
                log.BAR.update(i)
                is_staff = cat == 'STAFF'
                is_super = cat == 'SUPER'

                if is_super:
                    is_staff = True

                user = {
                    'username': u.get('username'),
                    'password': u.get('password', ''),
                    'first_name': u.get('first_name', ''),
                    'last_name': u.get('last_name', ''),
                    'email': u.get('email', ''),
                    'profile': {
                        'image': '',
                        'bio': '',
                        'pronouns': u.get('pronouns'),
                        'links': []
                    },
                    'superuser': is_super,
                    'staff': is_staff,
                    'groups': u.get('groups', [])
                }

                if u.get('bio'):
                    user['profile']['bio'] = PARSER.fix_html(u.get('bio'))

                if u.get('img'):
                    if options.get('nocrop'):
                        filename = u['img'].split('/')[-1]
                        user['profile']['image'] = f'{SAVE_DIR_IMG}/{filename}'
                        copyfile(u['img'], user['profile']['image'])
                    else:
                        filename = u['img'].split('/')[-1].split('.')[0]
                        user['profile']['image'] = f'{SAVE_DIR_IMG}/{filename}.jpg'
                        crop_and_save(u['img'], user['profile']['image'], MAX_SIZE)
                else:
                    log.warning(f'User `{u.get("username")}` does not have an image assigned to them and will be assigned the default picture. Add filepaths to an existing file in your datafile (`{SAVE_DIR}/{DATA_FILE}`) or follow the steps in the documentation to add user images if you want to make sure the specific user has a profile picture. Then, rerun `python manage.py buildusers` or `python manage.py build`')

                for link in u.get('links', []):
                    user['profile']['links'].append({
                        'label': link.get('text'),
                        'url': link.get('url'),
                        'cat': link.get('cat')
                    })

                users.append(user)

            log.BAR.finish()

        # Save all data
        with open(f'{SAVE_DIR}/{DATA_FILE}', 'w+') as file:
            file.write(
                yaml.dump({'users': users, 'default': settings.AUTO_USER_DEFAULT}))

        log.log(f'Saved user datafile: {SAVE_DIR}/{DATA_FILE}.')

        if log._save(data='buildusers', name='warnings.md', warnings=True) or log._save(data='buildusers', name='logs.md', warnings=False, logs=True) or log._save(data='buildusers', name='info.md', warnings=False, logs=False, info=True):
            log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)