def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        log.log(
            'Building files with fragile data... Please be patient as this can take some time.'
        )

        if pathlib.Path(SAVE_DIR).exists():
            # Make sure it is empty as we don't want to save any old fragile data information
            [file.unlink() for file in pathlib.Path(SAVE_DIR).glob('*')]

        if not pathlib.Path(SAVE_DIR).exists():
            pathlib.Path(SAVE_DIR).mkdir(parents=True)

        for cat in data:
            dataset = data[cat]['model'].objects.all()
            if not dataset.count():
                log.warning(data[cat]['model']._meta.object_name +
                            ' has no objects. No file will be written.')
            else:
                d = serializers.serialize('yaml',
                                          dataset,
                                          fields=data[cat]['fields'],
                                          use_natural_primary_keys=data[cat]
                                          ['use_natural_primary_keys'],
                                          use_natural_foreign_keys=data[cat]
                                          ['use_natural_foreign_keys'])
                with open(data[cat]['data_file'], 'w+') as f:
                    f.write(d)

                log.log(
                    f'Saved {data[cat]["model"]._meta.object_name} fragile data in: {data[cat]["data_file"]}'
                )
Beispiel #2
0
def _is_expired(path,
                age_checker=TEST_AGES['ROOT'],
                force_download=FORCE_DOWNLOAD) -> bool:
    """Checks the age for any path against a set expiration date (a timedelta)"""

    if isinstance(path, str):
        path = pathlib.Path(path)
    log = Logger(name='cache-age-check')
    if not path.exists() or force_download == True:
        return (True)
    file_mod_time = datetime.datetime.fromtimestamp(path.stat().st_ctime)
    now = datetime.datetime.today()

    if now - file_mod_time > age_checker:
        log.warning(
            f'Cache has expired for {path} - older than {age_checker}...')
        return True

    if CACHE_VERBOSE == True:
        log.log(f'Cache is OK for {path} - not older than {age_checker}....',
                force=True)
    return False
Beispiel #3
0
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        workshops = get_all_existing_workshops()

        if options.get('name'):
            workshops = get_all_existing_workshops(options.get('name'))

        for _ in workshops:
            slug, path = _
            DATAFILE = f'{path}/{slug}.yml'

            d = get_yaml(DATAFILE, log=log)

            # Separate out data
            imagedata = d.get('image')
            frontmatterdata = d.get('sections').get('frontmatter')
            praxisdata = d.get('sections').get('theory-to-practice')
            lessondata = d.get('sections').get('lessons')

            full_name = d.get('name')
            parent_backend = d.get('parent_backend')
            parent_branch = d.get('parent_branch')
            parent_repo = d.get('parent_repo')

            # 1. ENTER WORKSHOP
            workshop, created = Workshop.objects.update_or_create(
                name=full_name,
                slug=dhri_slugify(full_name),
                defaults={
                    'parent_backend': parent_backend,
                    'parent_branch': parent_branch,
                    'parent_repo': parent_repo,
                    'image_alt': imagedata['alt']
                })

            def _get_valid_name(filename):
                return filename.replace(
                    '@', '')  # TODO: should exist a built-in for django here?

            def _get_media_path(valid_filename):
                return settings.MEDIA_ROOT + '/' + Workshop.image.field.upload_to + valid_filename

            def _get_media_url(valid_filename):
                return Workshop.image.field.upload_to + valid_filename

            def _image_exists(valid_filename):
                media_path = _get_media_path(valid_filename)
                return os.path.exists(media_path)

            def _get_default_image():
                return Workshop.image.field.default

            if imagedata:
                source_file = imagedata['url']
                valid_filename = _get_valid_name(
                    slug + '-' + os.path.basename(imagedata['url']))
                if not _image_exists(valid_filename) or filecmp.cmp(
                        source_file,
                        _get_media_path(valid_filename),
                        shallow=False) == False:
                    try:
                        with open(source_file, 'rb') as f:
                            workshop.image = File(f, name=valid_filename)
                            workshop.save()
                    except FileNotFoundError:
                        log.error(
                            f'File `{source_file}` could not be found. Did you run `python manage.py buildworkshop` before you ran this command?'
                        )
                workshop.image.name = _get_media_url(valid_filename)
                workshop.save()
            else:
                log.warning(
                    f'Workshop {workshop.name} does not have an image assigned to it. Add filepaths to an existing file in your datafile ({DATAFILE}) if you want to update the specific workshop. Default workshop image (`{os.path.basename(_get_default_image())}`) will be assigned.'
                )
                workshop.image.name = Workshop.image.field.default
                workshop.save()

                if not _image_exists(
                        _get_valid_name(os.path.basename(
                            _get_default_image()))):
                    log.warning(
                        f'Default workshop image does not exist. You will want to add it manually to the correct folder: {_get_media_path("")}'
                    )

            # Saving the slug in a format that matches the GitHub repositories (special method `save_slug`)
            workshop.slug = slug
            workshop.save_slug()

            # 2. ENTER FRONTMATTER
            frontmatter, created = Frontmatter.objects.update_or_create(
                workshop=workshop,
                defaults={
                    'abstract': frontmatterdata.get('abstract'),
                    'estimated_time': frontmatterdata.get('estimated_time')
                })

            if frontmatterdata.get('ethical_considerations'):
                for point in frontmatterdata.get('ethical_considerations'):
                    _, created = EthicalConsideration.objects.update_or_create(
                        frontmatter=frontmatter, label=point.get('annotation'))

            if frontmatterdata.get('learning_objectives'):
                for point in frontmatterdata.get('learning_objectives'):
                    _, created = LearningObjective.objects.update_or_create(
                        frontmatter=frontmatter, label=point.get('annotation'))

            for cat in ['projects', 'readings', 'cheat_sheets', 'datasets']:
                if frontmatterdata.get(cat):
                    category, add_field = None, None
                    if cat == 'projects':
                        category = Resource.PROJECT
                        add_field = frontmatter.projects
                    elif cat == 'readings':
                        category = Resource.READING
                        add_field = frontmatter.readings
                    elif cat == 'cheat_sheets':
                        category = Resource.CHEATSHEET
                        add_field = frontmatter.cheat_sheets
                    elif cat == 'datasets':
                        category = Resource.DATASET
                        add_field = frontmatter.datasets

                    for point in frontmatterdata.get(cat):
                        if not add_field or not category:
                            log.error(
                                'Cannot interpret category `{cat}`. Make sure the script is correct and corresponds with the database structure.'
                            )

                        obj, created = Resource.objects.update_or_create(
                            category=category,
                            title=point.get('linked_text'),
                            url=point.get('url'),
                            annotation=point.get('annotation'))
                        if obj not in add_field.all():
                            add_field.add(obj)

            if frontmatterdata.get('contributors'):
                for point in frontmatterdata.get('contributors'):
                    profile = None
                    try:
                        profile = Profile.objects.get(
                            user__first_name=point.get('first_name'),
                            user__last_name=point.get('last_name'))
                    except:
                        for p in Profile.objects.all():
                            if f'{p.user.first_name} {p.user.last_name}' == point.get(
                                    'full_name'):
                                profile = p
                                log.info(
                                    f'In-depth search revealed a profile matching the full name for `{workshop.name}` contributor `{point.get("first_name")} {point.get("last_name")}`. It may or may not be the correct person, so make sure you verify it manually.'
                                )

                        if not p:
                            log.info(
                                f'Could not find user profile on the curriculum website for contributor `{point.get("full_name")}` (searching by first name `{point.get("first_name")}` and last name `{point.get("last_name")}`).'
                            )

                    contributor, created = Contributor.objects.update_or_create(
                        first_name=point.get('first_name'),
                        last_name=point.get('last_name'),
                        defaults={
                            'url': point.get('link'),
                            'profile': profile
                        })

                    collaboration, created = Collaboration.objects.update_or_create(
                        frontmatter=frontmatter,
                        contributor=contributor,
                        defaults={
                            'current': point.get('current'),
                            'role': point.get('role')
                        })

            # 3. ENTER PRAXIS
            praxis, created = Praxis.objects.update_or_create(
                workshop=workshop,
                defaults={
                    'intro': praxisdata.get('intro'),
                })

            for cat in ['discussion_questions', 'next_steps']:
                if praxisdata.get(cat):
                    obj = None
                    if cat == 'discussion_questions':
                        obj = DiscussionQuestion
                    elif cat == 'next_steps':
                        obj = NextStep

                    for order, point in enumerate(
                            praxisdata[cat], start=1
                    ):  # TODO: Should we pull out order manually here? Not necessary, right?
                        obj.objects.update_or_create(
                            praxis=praxis,
                            label=point.get('annotation'),
                            defaults={'order': order})

            for cat in ['further_readings', 'further_projects', 'tutorials']:
                if praxisdata.get(cat):
                    category, add_field = None, None
                    if cat == 'further_readings':
                        category = Resource.READING
                        add_field = praxis.further_readings
                    elif cat == 'further_projects':
                        category = Resource.PROJECT
                        add_field = praxis.further_projects
                    elif cat == 'tutorials':
                        category = Resource.TUTORIAL
                        add_field = praxis.tutorials

                    for point in praxisdata.get(cat):
                        if not add_field or not category:
                            log.error(
                                'Cannot interpret category `{cat}`. Make sure the script is correct and corresponds with the database structure.'
                            )

                        try:
                            obj, created = Resource.objects.update_or_create(
                                category=category,
                                title=point.get('linked_text'),
                                url=point.get('url'),
                                annotation=point.get('annotation'))
                            if obj not in add_field.all():
                                add_field.add(obj)
                        except IntegrityError:
                            obj = Resource.objects.get(
                                category=category,
                                title=point.get('linked_text'),
                                url=point.get('url'),
                            )
                            obj.annotation = point.get('annotation')
                            if obj not in add_field.all():
                                add_field.add(obj)
                            log.info(
                                f'Another resource with the same URL, title, and category already existed so updated with a new annotation: **{point.get("linked_text")} (old)**\n{point.get("annotation")}\n-------\n**{obj.title} (new)**\n{obj.annotation}'
                            )

            # 4. ENTER LESSONS

            for lessoninfo in lessondata:
                lesson, created = Lesson.objects.update_or_create(
                    workshop=workshop,
                    title=lessoninfo.get('header'),
                    defaults={
                        'order': lessoninfo.get('order'),
                        'text': lessoninfo.get('content'),
                    })

                #print(lesson)
                for image in lessoninfo.get('lesson_images'):
                    #print('image time!')
                    LessonImage.objects.update_or_create(url=image.get('path'),
                                                         lesson=lesson,
                                                         alt=image.get('alt'))

                if not lessoninfo.get('challenge') and lessoninfo.get(
                        'solution'):
                    log.error(
                        f'Lesson `{lesson.title}` (in workshop {workshop}) has a solution but no challenge. Correct the files on GitHub and rerun the buildworkshop command and then re-attempt the ingestworkshop command. Alternatively, you can change the datafile content manually.'
                    )

                if lessoninfo.get('challenge'):
                    challenge, created = Challenge.objects.update_or_create(
                        lesson=lesson,
                        title=lessoninfo['challenge'].get('header'),
                        defaults={
                            'text': lessoninfo['challenge'].get('content')
                        })

                    if lessoninfo.get('solution'):
                        solution, created = Solution.objects.update_or_create(
                            challenge=challenge,
                            title=lessoninfo['solution'].get('header'),
                            defaults={
                                'text': lessoninfo['solution'].get('content')
                            })

                if lessoninfo.get('evaluation'):
                    evaluation, created = Evaluation.objects.get_or_create(
                        lesson=lesson)
                    for point in lessoninfo['evaluation'].get('content'):
                        question, created = Question.objects.update_or_create(
                            evaluation=evaluation, label=point.get('question'))
                        for is_correct, answers in point.get(
                                'answers').items():
                            is_correct = is_correct == 'correct'
                            for answertext in answers:
                                answer, created = Answer.objects.update_or_create(
                                    question=question,
                                    label=answertext,
                                    defaults={'is_correct': is_correct})

                if lessoninfo.get('keywords'):
                    # lessoninfo['keywords'].get('header') # TODO: not doing anything with keyword header yet
                    for keyword in lessoninfo['keywords'].get('content'):
                        terms = Term.objects.filter(term__iexact=keyword)
                        if terms.count() == 1:
                            lesson.terms.add(terms[0])
                        elif terms.count() == 0:
                            log.warning(
                                f'Keyword `{keyword}` (used in lesson `{lesson.title}`, workshop `{workshop}` cannot be found in the existing glossary. Are you sure it is in the glossary and synchronized with the database? Make sure the data file for glossary is available ({GLOSSARY_FILE}) and that the term is defined in the file. Then run python manage.py ingestglossary.'
                            )
                        else:
                            log.error(
                                f'Multiple definitions of `{keyword}` exists in the database. Try resetting the glossary and rerun python manage.py ingestglossary before you run the ingestworkshop command again.'
                            )

        log.log('Added/updated workshops: ' +
                ', '.join([x[0] for x in workshops]))
        if not options.get('no_reminder'):
            log.log(
                'Do not forget to run `ingestprerequisites` after running the `ingestworkshop` command (without the --name flag).',
                color='yellow')

        if log._save(data='ingestworkshop', name='warnings.md',
                     warnings=True) or log._save(data='ingestworkshop',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestworkshop',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)
    def handle(self, *args, **options):
        log = Logger(
            path=__file__,
            force_verbose=options.get('verbose'),
            force_silent=options.get('silent')
        )
        input = Input(path=__file__)

        workshops = get_all_existing_workshops()

        if options.get('name'):
            workshops = get_all_existing_workshops(options.get('name'))

        for _ in workshops:
            name, path = _
            DATAFILE = f'{path}/blurb.yml'

            try:
                data = get_yaml(DATAFILE, log=log, catch_error=True)
            except Exception as e:
                log.warning(f'Found no blurb for workshop `{name}`. Skipping and moving ahead...')
                continue

            if not data.get('user'):
                log.error(
                    f'Username was not defined for the blurb for workshop {name} was not found. Check the datafile {DATAFILE} to verify the username attributed to the blurb.')

            if not data.get('workshop'):
                log.warning(
                    f'Blurb had no workshop assigned, but will proceed with the blurb\'s parent folder ({name}) as assumed workshop. To fix this warning, you can try running python manage.py buildblurbs before running ingestblurbs.')
                data['workshop'] = name

            if not data.get('text'):
                log.error(
                    f'Blurb has no text assigned, and thus could not be ingested. Check the datafile {DATAFILE} to verify the workshop attributed to the blurb.')

            try:
                user = User.objects.get(username=data.get('user'))
            except:
                log.error(
                    f'The user attributed to the blurb ({data.get("username")}) was not found in the database. Did you try running python manage.py ingestusers before running ingestblurbs?')

            try:
                workshop = Workshop.objects.get(slug=data.get('workshop'))
            except:
                log.error(
                    f'The blurb\'s attached workshop ({data.get("workshop")}) was not found in the database. Did you try running python manage.py ingestworkshop --name {data.get("workshop")} before running ingestblurbs?')

            blurb, created = Blurb.objects.get_or_create(user=user, workshop=workshop, defaults={
                                                         'text': PARSER.fix_html(data.get('text'))})

            if not created and not options.get('force'):
                choice = input.ask(
                    f'Frontmatter for workshop `{workshop}` already exists. Update with new content? [y/N]')
                if choice.lower() != 'y':
                    continue

            blurb.text = data.get('text')
            blurb.save()

        log.log('Added/updated blurbs for workshops: ' + ', '.join([x[0] for x in workshops]))

        if log._save(data='ingestblurbs', name='warnings.md', warnings=True) or log._save(data='ingestblurbs', name='logs.md', warnings=False, logs=True) or log._save(data='ingestblurbs', name='info.md', warnings=False, logs=False, info=True):
            log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)
Beispiel #5
0
class GitHubParser():
    def __init__(self, string: str = None, log=None):
        if log == None:
            self.log = Logger(name='github-parser')
        else:
            self.log = log

    def convert(self, string):
        c = GitHubParserCache(string=string)
        return (c.data.get('markdown', '').strip())

    def strip_from_p(self, html):
        soup = BeautifulSoup(html, 'lxml')
        if soup.p:
            return ''.join([str(x) for x in soup.p.children])
        else:
            return html

    def _fix_link(self, tag):
        def find_workshop(elements):
            if elements[-1] == 'DHRI-Curriculum':
                return '{GH_CURRICULUM}'
            for element in elements:
                for workshop in [x[0] for x in AUTO_REPOS]:
                    if workshop == element: return workshop
            return ''

        elements = tag['href'].split('/')

        if 'http:' in elements or 'https:' in elements:
            link_type = 'absolute'
        elif elements[0].startswith('#'):
            link_type = 'local'
        else:
            link_type = 'relative'

        raw_file = False
        if link_type == 'absolute':
            if 'DHRI-Curriculum' in elements:
                if 'glossary' in elements and 'terms' in elements:
                    term = elements[-1].replace('.md', '')
                    self.log.info(
                        f'Found link to an **glossary term** and adding shortcut link to: curriculum.dhinstitutes.org/shortcuts/term/{term}'
                    )
                    tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/term/{term}'
                elif 'insights' in elements and 'pages' in elements:
                    insight = elements[-1].replace(".md", "")
                    self.log.info(
                        f'Found link to an **insight** and adding shortcut link to: curriculum.dhinstitutes.org/shortcuts/insight/{insight}'
                    )
                    tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/insight/{insight}'
                elif 'install' in elements and 'guides' in elements:
                    install = elements[-1].replace(".md", "")
                    self.log.info(
                        f'Found link to an **installation** and adding shortcut link to: curriculum.dhinstitutes.org/shortcuts/install/{install}'
                    )
                    tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/install/{install}'
                elif 'raw.githubusercontent.com' in elements:
                    raw_link = '/'.join(elements)
                    self.log.info(
                        f'Found link to **raw file** and will not change link: {raw_link}'
                    )
                else:
                    workshop = find_workshop(elements)
                    if workshop == '{GH_CURRICULUM}':
                        gh_link = '/'.join(elements)
                        self.log.info(
                            f'Link found to **the DHRI Curriculum on GitHub**, linking to it: {gh_link}'
                        )
                    elif workshop == '':
                        gh_link = '/'.join(elements)
                        self.log.warning(
                            f'Found link to workshop, which is not currently being loaded into the website, will therefore redirect to **workshop on GitHub**: {gh_link}'
                        )
                    else:
                        self.log.info(
                            f'Found link to **workshop** which (will) exist(s) on website, so changing to that: curriculum.dhinstitutes.org/workshops/{workshop}'
                        )
                        tag['href'] = f'https://curriculum.dhinstitutes.org/shortcuts/workshop/{workshop}'
            else:
                pass  # print(tag['href'])
        return tag

    def fix_html(self, text):
        def has_children(tag):
            children = []
            try:
                tag.children
                children = [x for x in tag.children]
            except:
                pass
            return children

        if not text:
            return ''

        multiline = False
        if '\n' in text:
            multiline = True

        # Make text into HTML...
        text = self.convert(text)
        text = smartypants.smartypants(text)  # curly quote it

        soup = BeautifulSoup(text, 'lxml')

        for tag in soup.descendants:
            if tag.name == 'a':
                # if element.text == None: # TODO: Drop links that have no text
                tag = self._fix_link(tag)

        if not multiline:
            if len([x for x in soup.body.children]) == 1 and soup.body.p:
                # We only have one paragraph, so return the _text only_ from the p
                return ''.join([str(x) for x in soup.body.p.children])
            else:
                # We have multiline
                html_string = ''.join(
                    [str(x) for x in soup.html.body.children])
        else:
            html_string = ''.join([str(x) for x in soup.html.body.children])

        return html_string

    def quote_converter(self, string, reverse=False):
        """Takes a string and returns it with dumb quotes, single and double,
        replaced by smart quotes. Accounts for the possibility of HTML tags
        within the string."""

        if string == None:
            return None

        if not isinstance(string, str):
            print('Not a string:')
            print(string)
            exit()

        if string == '':
            return string

        if reverse == True:
            string = string.replace('“', '"').replace('”', '"')
            string = string.replace('‘', "'").replace("’", "'")
            return string

        # Find dumb double quotes coming directly after letters or punctuation,
        # and replace them with right double quotes.
        string = re.sub(r'([a-zA-Z0-9.,?!;:\'\"])"', r'\1”', string)
        # Find any remaining dumb double quotes and replace them with
        # left double quotes.
        string = string.replace('"', '“')

        # Follow the same process with dumb/smart single quotes
        string = re.sub(r"([a-zA-Z0-9.,?!;:\"\'])'", r'\1’', string)
        string = string.replace("'", '‘')

        return string
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent')
                     )

        log.log('Building user files... Please be patient as this can take some time.')

        users = list()

        if not pathlib.Path(SAVE_DIR).exists():
            pathlib.Path(SAVE_DIR).mkdir(parents=True)

        if not pathlib.Path(SAVE_DIR_IMG).exists():
            pathlib.Path(SAVE_DIR_IMG).mkdir(parents=True)

        all_categories = list(settings.AUTO_USERS.keys())

        for cat in all_categories:

            all_users = settings.AUTO_USERS[cat]
            
            log.BAR(all_users, max_value=len(all_users))

            for i, u in enumerate(all_users):
                log.BAR.update(i)
                is_staff = cat == 'STAFF'
                is_super = cat == 'SUPER'

                if is_super:
                    is_staff = True

                user = {
                    'username': u.get('username'),
                    'password': u.get('password', ''),
                    'first_name': u.get('first_name', ''),
                    'last_name': u.get('last_name', ''),
                    'email': u.get('email', ''),
                    'profile': {
                        'image': '',
                        'bio': '',
                        'pronouns': u.get('pronouns'),
                        'links': []
                    },
                    'superuser': is_super,
                    'staff': is_staff,
                    'groups': u.get('groups', [])
                }

                if u.get('bio'):
                    user['profile']['bio'] = PARSER.fix_html(u.get('bio'))

                if u.get('img'):
                    if options.get('nocrop'):
                        filename = u['img'].split('/')[-1]
                        user['profile']['image'] = f'{SAVE_DIR_IMG}/{filename}'
                        copyfile(u['img'], user['profile']['image'])
                    else:
                        filename = u['img'].split('/')[-1].split('.')[0]
                        user['profile']['image'] = f'{SAVE_DIR_IMG}/{filename}.jpg'
                        crop_and_save(u['img'], user['profile']['image'], MAX_SIZE)
                else:
                    log.warning(f'User `{u.get("username")}` does not have an image assigned to them and will be assigned the default picture. Add filepaths to an existing file in your datafile (`{SAVE_DIR}/{DATA_FILE}`) or follow the steps in the documentation to add user images if you want to make sure the specific user has a profile picture. Then, rerun `python manage.py buildusers` or `python manage.py build`')

                for link in u.get('links', []):
                    user['profile']['links'].append({
                        'label': link.get('text'),
                        'url': link.get('url'),
                        'cat': link.get('cat')
                    })

                users.append(user)

            log.BAR.finish()

        # Save all data
        with open(f'{SAVE_DIR}/{DATA_FILE}', 'w+') as file:
            file.write(
                yaml.dump({'users': users, 'default': settings.AUTO_USER_DEFAULT}))

        log.log(f'Saved user datafile: {SAVE_DIR}/{DATA_FILE}.')

        if log._save(data='buildusers', name='warnings.md', warnings=True) or log._save(data='buildusers', name='logs.md', warnings=False, logs=True) or log._save(data='buildusers', name='info.md', warnings=False, logs=False, info=True):
            log.log(f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`', force=True)
Beispiel #7
0
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        test_for_required_files(REQUIRED_PATHS=REQUIRED_PATHS, log=log)
        data = get_yaml(FULL_PATH, log=log)

        for installdata in data:
            for operating_system in installdata.get('instructions'):
                software, created = Software.objects.get_or_create(
                    operating_system=operating_system,
                    software=installdata.get('software'))
                instruction, created = Instruction.objects.update_or_create(
                    software=software,
                    defaults={
                        'what': installdata.get('what'),
                        'why': installdata.get('why')
                    })

                original_file = installdata.get('image')
                if original_file:
                    if instruction_image_exists(original_file) and filecmp.cmp(
                            original_file,
                            get_instruction_image_path(original_file),
                            shallow=False) == True:
                        log.log(
                            f'Instruction image already exists. Ensuring path is in database: `{get_instruction_image_path(original_file)}`'
                        )
                        instruction.image.name = get_instruction_image_path(
                            original_file, True)
                        instruction.save()
                    else:
                        with open(original_file, 'rb') as f:
                            instruction.image = File(f,
                                                     name=os.path.basename(
                                                         f.name))
                            instruction.save()
                        if filecmp.cmp(
                                original_file,
                                get_instruction_image_path(original_file)):
                            log.info(
                                f'Instruction image has been updated so being copied to media path: `{get_instruction_image_path(original_file)}`'
                            )
                        else:
                            log.info(
                                f'Instruction image has been copied to media path: `{get_instruction_image_path(original_file)}`'
                            )
                else:
                    log.warning(
                        f'An image for `{software}` does not exist. A default image will be saved instead. If you want a particular image for the installation instructions, follow the documentation.'
                    )
                    instruction.image.name = get_default_instruction_image()
                    instruction.save()

                for stepdata in installdata.get('instructions').get(
                        operating_system):
                    step, created = Step.objects.update_or_create(
                        instruction=instruction,
                        order=stepdata.get('step'),
                        defaults={
                            'header': stepdata.get('header'),
                            'text': stepdata.get('html')
                        })

                    for order, d in enumerate(stepdata.get('screenshots'),
                                              start=1):
                        path = d['path']
                        alt_text = d['alt']
                        if os.path.exists(get_screenshot_media_path(
                                path)) and filecmp.cmp(
                                    path,
                                    get_screenshot_media_path(path),
                                    shallow=False) == True:
                            s, _ = Screenshot.objects.get_or_create(
                                step=step, alt_text=alt_text, order=order)
                            s.image = get_screenshot_media_path(
                                path, relative_to_upload_field=True)
                            s.save()
                            log.log(
                                f'Screenshot already exists: `{get_screenshot_media_path(path)}`'
                            )
                        else:
                            s, _ = Screenshot.objects.get_or_create(
                                step=step, alt_text=alt_text, order=order)
                            with open(path, 'rb') as f:
                                s.image = File(f,
                                               name=os.path.basename(f.name))
                                s.save()
                            if filecmp.cmp(path,
                                           get_screenshot_media_path(path),
                                           shallow=False) == False:
                                log.log(
                                    f'Screenshot was updated so re-saved: `{get_screenshot_media_path(path)}`'
                                )
                            else:
                                log.log(
                                    f'New screenshot saved: `{get_screenshot_media_path(path)}`'
                                )

        log.log('Added/updated installation instructions: ' +
                ', '.join([f'{x["software"]}' for x in data]))

        if log._save(data='ingestinstalls', name='warnings.md',
                     warnings=True) or log._save(data='ingestinstalls',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestinstalls',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)
    def handle(self, *args, **options):
        log = Logger(path=__file__,
                     force_verbose=options.get('verbose'),
                     force_silent=options.get('silent'))
        input = Input(path=__file__)

        test_for_required_files(REQUIRED_PATHS=REQUIRED_PATHS, log=log)
        data = get_yaml(FULL_PATH, log=log)

        for insightdata in data:
            # TODO: Insights and Software are also connected in a database table (insight_insight_software) but this relationship is not developed yet.
            insight, created = Insight.objects.update_or_create(
                title=insightdata.get('insight'),
                defaults={
                    'text': insightdata.get('introduction'),
                    'image_alt': insightdata.get('image').get('alt')
                })

            original_file = insightdata.get('image').get('url')
            if original_file:
                if insight_image_exists(original_file) and filecmp.cmp(
                        original_file,
                        get_insight_image_path(original_file),
                        shallow=False) == True:
                    log.log(
                        f'Insight image already exists. Connecting existing paths to database: `{get_insight_image_path(original_file)}`'
                    )
                    insight.image.name = get_insight_image_path(
                        original_file, True)
                    insight.save()
                else:
                    with open(original_file, 'rb') as f:
                        insight.image = File(f,
                                             name=self.os.path.basename(
                                                 f.name))
                        insight.save()

                    if filecmp.cmp(original_file,
                                   get_insight_image_path(original_file),
                                   shallow=False):
                        log.info(
                            f'Insight image has been updated and thus was copied to the media path: `{get_insight_image_path(original_file)}`'
                        )
                    else:
                        log.info(
                            f'Insight image was not found and is copied to media path: `{get_insight_image_path(original_file)}`'
                        )
            else:
                log.warning(
                    f'An image for `{insight}` does not exist. A default image will be saved instead. If you want a particular image for the installation instructions, follow the documentation.'
                )
                insight.image.name = get_default_insight_image()
                insight.save()

            for sectiondata in insightdata.get('sections', []):
                title = sectiondata
                sectiondata = insightdata.get('sections').get(sectiondata)
                section, created = Section.objects.update_or_create(
                    insight=insight,
                    title=title,
                    defaults={
                        'order': sectiondata.get('order'),
                        'text': sectiondata.get('content')
                    })

            for operating_system, osdata in insightdata.get(
                    'os_specific').items():
                related_section = Section.objects.get(
                    title=osdata.get('related_section'))

                OperatingSystemSpecificSection.objects.update_or_create(
                    section=related_section,
                    operating_system=operating_system,
                    defaults={'text': osdata.get('content')})

        log.log('Added/updated insights: ' +
                ', '.join([x.get("insight") for x in data]))

        if log._save(data='ingestinsights', name='warnings.md',
                     warnings=True) or log._save(data='ingestinsights',
                                                 name='logs.md',
                                                 warnings=False,
                                                 logs=True) or log._save(
                                                     data='ingestinsights',
                                                     name='info.md',
                                                     warnings=False,
                                                     logs=False,
                                                     info=True):
            log.log(
                f'Log files with any warnings and logging information is now available in: `{log.LOG_DIR}`',
                force=True)