def parse(self): files = self.contents_of('pages') super().parse(files) insights = [] for i, file in enumerate(files): self.log.BAR.update(i) file_contents = self.read_file( os.path.join(self.destination_dir, 'pages', file)) if not file_contents: continue _ = { 'image': self._get_image_from_first_line(file_contents), 'insight': list( split_into_sections(file_contents, level_granularity=1).keys())[0], 'introduction': '', 'os_specific': {}, 'sections': {} } _['image'] = self._fix_image(_['image'], additional_parents=['pages']) sections = split_into_sections(file_contents, level_granularity=2) order = 0 for section, content in sections.items(): if section == _['insight']: _['introduction'] = PARSER.fix_html(content) else: order += 1 _['sections'][section] = { 'order': order, 'content': PARSER.fix_html(content) } has_os_specific_instruction = '### ' in content if has_os_specific_instruction: for operating_system, os_content in split_into_sections( content, level_granularity=3).items(): if operating_system == 'MacOS': operating_system = 'macOS' _['os_specific'][operating_system] = { 'content': PARSER.fix_html(os_content), 'related_section': section } insights.append(_) self.log.BAR.finish() return insights
def handle(self, *args, **options): log = Logger(path=__file__, force_verbose=options.get('verbose'), force_silent=options.get('silent') ) log.log('Building blurbs... Please be patient as this can take some time.') for cat in list(settings.AUTO_USERS.keys()): for u in settings.AUTO_USERS[cat]: if u.get('blurb'): text = u.get( 'blurb', {'text': None, 'workshop': None}).get('text') workshop = u.get( 'blurb', {'text': None, 'workshop': None}).get('workshop') if text and workshop: SAVE_DIR = f'{settings.BUILD_DIR}_workshops/{workshop}' if find_dir(workshop): with open(f'{SAVE_DIR}/{DATA_FILE}', 'w+') as file: file.write(yaml.dump({ 'workshop': workshop, 'user': u.get('username'), 'text': PARSER.fix_html(text) })) log.log(f'Saved blurb datafile: {SAVE_DIR}/{DATA_FILE}.') else: log.error( f'No directory available for `{workshop}` ({SAVE_DIR}). Did you run `python manage.py build --repo {workshop}` before running this script?', kill=True) if log._save(data='buildblurbs', name='warnings.md', warnings=True) or log._save(data='buildblurbs', name='logs.md', warnings=False, logs=True) or log._save(data='buildblurbs', name='info.md', warnings=False, logs=False, info=True): log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)
def _fix_contributor(string): def get_correct_role(string): if 'author' in string.lower() or 'contributor' in string.lower(): return 'Au' if 'review' in string.lower(): return 'Re' if 'editor' in string.lower(): return 'Ed' raise RuntimeError( f'Could not get correct role from `{string}`. Roles can be `author`, `contributor`, `reviewer`, or `editor`. Please correct the markdown file.' ) def split_names(full_name: str) -> tuple: """Uses the `nameparser` library to interpret names.""" name = HumanName(full_name) first_name = name.first if name.middle: first_name += " " + name.middle last_name = name.last return ((first_name, last_name)) soup = BeautifulSoup(PARSER.convert(string), 'lxml') link = soup.find('a') if link: href = link['href'] else: href = None current = 'current' in string.lower() past = 'past' in string.lower() full_name, first_name, last_name = None, None, None try: full_name = soup.text.split(':')[1].strip() except IndexError: pass if full_name: first_name, last_name = split_names(full_name) return { 'full_name': full_name, 'first_name': first_name, 'last_name': last_name, 'role': get_correct_role(string), 'current': current, 'past': past, 'link': href }
def _fix_praxis(self): fixing = self.sections['theory-to-practice'] fixing['intro'] = PARSER.fix_html(fixing['intro']) # Make lists correct for _list in [ 'discussion_questions', 'next_steps', 'tutorials', 'further_readings', 'further_projects' ]: if _list in fixing: fixing[_list] = [ self._fix_list_element(x) for x in as_list(fixing[_list]) ] return fixing
def handle(self, *args, **options): log = Logger(path=__file__, force_verbose=options.get('verbose'), force_silent=options.get('silent')) input = Input(path=__file__) data = AUTO_SNIPPETS for identifier, snippetdata in data.items(): snippet, created = Snippet.objects.get_or_create( identifier=identifier) if not created and not options.get('force'): choice = input.ask( f'Snippet `{identifier}` already exists. Update with new definition? [y/N]' ) if choice.lower() != 'y': continue Snippet.objects.filter(identifier=identifier).update( snippet=PARSER.convert(snippetdata)) log.log('Added/updated snippets: ' + ', '.join([x for x in data])) if log._save(data='ingestsnippets', name='warnings.md', warnings=True) or log._save(data='ingestsnippets', name='logs.md', warnings=False, logs=True) or log._save( data='ingestsnippets', name='info.md', warnings=False, logs=False, info=True): log.log( f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)
def handle(self, *args, **options): log = Logger( path=__file__, force_verbose=options.get('verbose'), force_silent=options.get('silent') ) input = Input(path=__file__) workshops = get_all_existing_workshops() if options.get('name'): workshops = get_all_existing_workshops(options.get('name')) for _ in workshops: name, path = _ DATAFILE = f'{path}/blurb.yml' try: data = get_yaml(DATAFILE, log=log, catch_error=True) except Exception as e: log.warning(f'Found no blurb for workshop `{name}`. Skipping and moving ahead...') continue if not data.get('user'): log.error( f'Username was not defined for the blurb for workshop {name} was not found. Check the datafile {DATAFILE} to verify the username attributed to the blurb.') if not data.get('workshop'): log.warning( f'Blurb had no workshop assigned, but will proceed with the blurb\'s parent folder ({name}) as assumed workshop. To fix this warning, you can try running python manage.py buildblurbs before running ingestblurbs.') data['workshop'] = name if not data.get('text'): log.error( f'Blurb has no text assigned, and thus could not be ingested. Check the datafile {DATAFILE} to verify the workshop attributed to the blurb.') try: user = User.objects.get(username=data.get('user')) except: log.error( f'The user attributed to the blurb ({data.get("username")}) was not found in the database. Did you try running python manage.py ingestusers before running ingestblurbs?') try: workshop = Workshop.objects.get(slug=data.get('workshop')) except: log.error( f'The blurb\'s attached workshop ({data.get("workshop")}) was not found in the database. Did you try running python manage.py ingestworkshop --name {data.get("workshop")} before running ingestblurbs?') blurb, created = Blurb.objects.get_or_create(user=user, workshop=workshop, defaults={ 'text': PARSER.fix_html(data.get('text'))}) if not created and not options.get('force'): choice = input.ask( f'Frontmatter for workshop `{workshop}` already exists. Update with new content? [y/N]') if choice.lower() != 'y': continue blurb.text = data.get('text') blurb.save() log.log('Added/updated blurbs for workshops: ' + ', '.join([x[0] for x in workshops])) if log._save(data='ingestblurbs', name='warnings.md', warnings=True) or log._save(data='ingestblurbs', name='logs.md', warnings=False, logs=True) or log._save(data='ingestblurbs', name='info.md', warnings=False, logs=False, info=True): log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)
def mini_parse_eval(markdown: str): ''' Set up standards ''' dict_collector = list() d = reset_eval_dict() in_q = False in_code = False for current_line_number, line in enumerate(markdown.splitlines()): is_empty = line.strip() == '' is_answer = line.startswith('- ') try: if markdown.splitlines()[current_line_number + 1].startswith('```'): # next line contains code.. Thus this is not empty, set is_empty to False is_empty = False if not in_code: in_code = True else: in_code = False except IndexError: pass if not is_answer and not is_empty: in_q = True d['question'] += line + '\n' elif in_q and is_answer: if line.strip().endswith('*'): answer = line.strip()[2:-1].strip() answer = PARSER.fix_html(answer) d['answers']['correct'].append(answer) else: answer = line.strip()[2:].strip() answer = PARSER.fix_html(answer) d['answers']['incorrect'].append(answer) elif is_empty and in_q and in_code == False: d['question'] = d['question'].strip() dict_collector.append(d) in_q = False d = reset_eval_dict() elif is_answer: # stray answer belonging to the latest question so attach it... try: if line.strip().endswith('*'): answer = line.strip()[2:-1].strip() answer = PARSER.fix_html(answer) dict_collector[len(dict_collector) - 1]['answers']['correct'].append( answer) else: answer = line.strip()[2:].strip() answer = PARSER.fix_html(answer) dict_collector[len(dict_collector) - 1]['answers']['incorrect'].append( answer) except IndexError: self.log.warning( f'Found and skipping a stray answer that cannot be attached to a question: {line.strip()}' ) # add final element d['question'] = PARSER.fix_html(d['question']) dict_collector.append(d) # clean up dict_collector for i, item in enumerate(dict_collector): if not item.get('question') and not len( item.get('answers').get('correct')) and not len( item.get('answers').get('incorrect')): del dict_collector[i] return (dict_collector)
def _fix_lessons(self): def reset_eval_dict(): return { 'question': '', 'answers': { 'correct': [], 'incorrect': [] } } def mini_parse_eval(markdown: str): ''' Set up standards ''' dict_collector = list() d = reset_eval_dict() in_q = False in_code = False for current_line_number, line in enumerate(markdown.splitlines()): is_empty = line.strip() == '' is_answer = line.startswith('- ') try: if markdown.splitlines()[current_line_number + 1].startswith('```'): # next line contains code.. Thus this is not empty, set is_empty to False is_empty = False if not in_code: in_code = True else: in_code = False except IndexError: pass if not is_answer and not is_empty: in_q = True d['question'] += line + '\n' elif in_q and is_answer: if line.strip().endswith('*'): answer = line.strip()[2:-1].strip() answer = PARSER.fix_html(answer) d['answers']['correct'].append(answer) else: answer = line.strip()[2:].strip() answer = PARSER.fix_html(answer) d['answers']['incorrect'].append(answer) elif is_empty and in_q and in_code == False: d['question'] = d['question'].strip() dict_collector.append(d) in_q = False d = reset_eval_dict() elif is_answer: # stray answer belonging to the latest question so attach it... try: if line.strip().endswith('*'): answer = line.strip()[2:-1].strip() answer = PARSER.fix_html(answer) dict_collector[len(dict_collector) - 1]['answers']['correct'].append( answer) else: answer = line.strip()[2:].strip() answer = PARSER.fix_html(answer) dict_collector[len(dict_collector) - 1]['answers']['incorrect'].append( answer) except IndexError: self.log.warning( f'Found and skipping a stray answer that cannot be attached to a question: {line.strip()}' ) # add final element d['question'] = PARSER.fix_html(d['question']) dict_collector.append(d) # clean up dict_collector for i, item in enumerate(dict_collector): if not item.get('question') and not len( item.get('answers').get('correct')) and not len( item.get('answers').get('incorrect')): del dict_collector[i] return (dict_collector) _ = [] lessons = self._get_raw()['lessons'] lesson_sections = split_into_sections(lessons, level_granularity=1, clear_empty_lines=False) for order, lesson_data in enumerate(lesson_sections.items(), start=1): __ = { 'raw_content': '', 'order': order, 'header': '', 'has_lesson_sections': {}, 'content': '', 'lesson_images': [], 'challenge': { 'header': '', 'content': '' }, 'solution': { 'header': '', 'content': '' }, 'keywords': { 'header': '', 'content': [] }, 'evaluation': { 'header': '', 'content': '' } } __['header'], __['raw_content'] = lesson_data __['has_lesson_sections'] = WorkshopCache._check_for_lesson_sections( __['raw_content']) if __['raw_content'].startswith('#') == False: __['content'] += list( split_into_sections( '# ' + __['header'] + '\n' + __['raw_content'], level_granularity=2).values())[0] + '\n' for subheader, content in split_into_sections( __['raw_content'], level_granularity=2, keep_levels=True, clear_empty_lines=False).items(): is_evaluation = subheader.lower( ) == '## evaluation' or subheader.lower( ) == '## evaluations' or subheader.split( ':')[0].lower() == '## evaluation' is_challenge = subheader.lower( ) == '## challenge' or subheader.lower( ) == '## challenges' or subheader.split( ':')[0].lower() == '## challenge' is_solution = subheader.lower( ) == '## solution' or subheader.lower( ) == '## solutions' or subheader.split( ':')[0].lower() == '## solution' is_keywords = subheader.lower( ) == '## keyword' or subheader.lower() == '## keywords' if not any( [is_evaluation, is_challenge, is_solution, is_keywords]): __['content'] += subheader + '\n' __['content'] += content + '\n' if is_challenge: __['challenge'] = { 'header': subheader.split('#')[-1].strip(), 'content': PARSER.fix_html(content) } if is_solution: __['solution'] = { 'header': subheader.split('#')[-1].strip(), 'content': PARSER.fix_html(content) } if is_keywords: __['keywords'] = { 'header': subheader.split('#')[-1].strip(), 'content': [self._fix_list_element(x) for x in as_list(content)], } __['keywords']['content'] = [ x.get('linked_text') for x in __['keywords']['content'] ] if is_evaluation: __['evaluation'] = { 'header': subheader.split('#')[-1].strip(), 'content': mini_parse_eval(content) } # Remove raw content __.pop('raw_content') __['header'] = PARSER.fix_html(__['header']) __['content'] = PARSER.fix_html(__['content']) __['content'], __['lesson_images'] = self._get_images_from_html( __['content']) # Make sure we capture images from solution as well add_to_lesson_images = [] __['solution'][ 'content'], add_to_lesson_images = self._get_images_from_html( __['solution'].get('content', '')) if add_to_lesson_images: before = len(__['lesson_images']) __['lesson_images'].extend(add_to_lesson_images) after = len(__['lesson_images']) if after - before: self.log.info( 'Found additional images in solution, and added them to the built lesson files.' ) # Final clean-up for check_up in [ 'solution', 'challenge', 'evaluation', 'keywords' ]: if not __.get(check_up).get('content') and not __.get( check_up).get('header'): __[check_up] = None _.append(__) return _
def _fix_frontmatter(self): fixing = self.sections['frontmatter'] # Fix estimated_time fixing['estimated_time'] = self._fix_estimated_time( fixing['estimated_time']) fixing['abstract'] = PARSER.fix_html(fixing['abstract']) # Make lists correct for _list in [ 'readings', 'projects', 'learning_objectives', 'ethical_considerations', 'cheat_sheets', 'datasets', 'prerequisites' ]: if _list in fixing: fixing[_list] = [ self._fix_list_element(x) for x in as_list(fixing[_list]) ] else: fixing[_list] = [] # Fixing contributors fixing['contributors'] = [ self._fix_contributor(x) for x in as_list(fixing['contributors']) ] # Fixing prerequisites _ = [] for prerequisite_data in fixing['prerequisites']: text = None url = prerequisite_data.get('url') url_text = prerequisite_data.get('linked_text') html = prerequisite_data.get('annotation') install_link = 'shortcuts/install/' in url insight_link = '/shortcuts/insight/' in url workshop_link = '/shortcuts/workshop/' in url #TODO #429: Somehow determine what is a cheatsheet and ingest that here... text = self.process_prereq_text(html, log=self.log) if install_link and not text: self.log.warning( f'No clarifying text was found when processing prerequired installation (`{url_text}`) for workshop `{self.name}`. Note that the clarifying text will be replaced by the "why" text from the installation instructions. You may want to change this in the frontmatter\'s requirements for the workshop {self.name} and re-run `buildworkshop --name {self.repository}' ) if insight_link and not text: self.log.warning( f'No clarifying text was found when processing prerequired insight (`{url_text}`) for workshop `{self.name}`. Note that the clarifying text will be replaced by the default text presenting the insight. You may want to change this in the frontmatter\'s requirements for the workshop {self.name} and re-run `buildworkshop --name {self.repository}' ) if workshop_link and not text: self.log.warning( f'No clarifying text was found when processing prerequired workshop (`{url_text}`) for workshop `{self.name}`. Note that the clarifying text will not be replaced by any default text and can thus be confusing to the user. You may want to change this in the frontmatter\'s requirements for the workshop {self.name} and re-run `buildworkshop --name {self.repository}' ) if install_link: _.append({ 'type': 'install', 'potential_name': self._extract_from_p(url_text), 'text': text, 'potential_slug_fragment': os.path.basename(url).replace('.md', ''), 'required': '(required)' in html.lower(), 'recommended': '(recommended)' in html.lower() }) if insight_link: _.append({ 'type': 'insight', 'potential_name': self._extract_from_p(url_text), 'text': text, 'potential_slug_fragment': os.path.basename(url).replace('.md', ''), 'required': '(required)' in html.lower(), 'recommended': '(recommended)' in html.lower() }) if workshop_link: _.append({ 'type': 'workshop', 'potential_name': self._extract_from_p(url_text), 'text': text, 'required': '(required)' in html.lower(), 'recommended': '(recommended)' in html.lower() }) if not install_link and not insight_link and not workshop_link: _.append({ 'type': 'external_link', 'url_text': self._extract_from_p(url_text), 'text': text, 'url': url, 'required': '(required)' in html.lower(), 'recommended': '(recommended)' in html.lower() }) fixing['prerequisites'] = _ return fixing
def handle(self, *args, **options): log = Logger(path=__file__, force_verbose=options.get('verbose'), force_silent=options.get('silent') ) log.log('Building user files... Please be patient as this can take some time.') users = list() if not pathlib.Path(SAVE_DIR).exists(): pathlib.Path(SAVE_DIR).mkdir(parents=True) if not pathlib.Path(SAVE_DIR_IMG).exists(): pathlib.Path(SAVE_DIR_IMG).mkdir(parents=True) all_categories = list(settings.AUTO_USERS.keys()) for cat in all_categories: all_users = settings.AUTO_USERS[cat] log.BAR(all_users, max_value=len(all_users)) for i, u in enumerate(all_users): log.BAR.update(i) is_staff = cat == 'STAFF' is_super = cat == 'SUPER' if is_super: is_staff = True user = { 'username': u.get('username'), 'password': u.get('password', ''), 'first_name': u.get('first_name', ''), 'last_name': u.get('last_name', ''), 'email': u.get('email', ''), 'profile': { 'image': '', 'bio': '', 'pronouns': u.get('pronouns'), 'links': [] }, 'superuser': is_super, 'staff': is_staff, 'groups': u.get('groups', []) } if u.get('bio'): user['profile']['bio'] = PARSER.fix_html(u.get('bio')) if u.get('img'): if options.get('nocrop'): filename = u['img'].split('/')[-1] user['profile']['image'] = f'{SAVE_DIR_IMG}/{filename}' copyfile(u['img'], user['profile']['image']) else: filename = u['img'].split('/')[-1].split('.')[0] user['profile']['image'] = f'{SAVE_DIR_IMG}/{filename}.jpg' crop_and_save(u['img'], user['profile']['image'], MAX_SIZE) else: log.warning(f'User `{u.get("username")}` does not have an image assigned to them and will be assigned the default picture. Add filepaths to an existing file in your datafile (`{SAVE_DIR}/{DATA_FILE}`) or follow the steps in the documentation to add user images if you want to make sure the specific user has a profile picture. Then, rerun `python manage.py buildusers` or `python manage.py build`') for link in u.get('links', []): user['profile']['links'].append({ 'label': link.get('text'), 'url': link.get('url'), 'cat': link.get('cat') }) users.append(user) log.BAR.finish() # Save all data with open(f'{SAVE_DIR}/{DATA_FILE}', 'w+') as file: file.write( yaml.dump({'users': users, 'default': settings.AUTO_USER_DEFAULT})) log.log(f'Saved user datafile: {SAVE_DIR}/{DATA_FILE}.') if log._save(data='buildusers', name='warnings.md', warnings=True) or log._save(data='buildusers', name='logs.md', warnings=False, logs=True) or log._save(data='buildusers', name='info.md', warnings=False, logs=False, info=True): log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)