Example #1
0
def _get_derived_website_content_data(request_data: dict,
                                      site_config: SiteConfig,
                                      website_pk: str) -> dict:
    """Derives values that should be added to the request data if a WebsiteContent object is being created"""
    added_data = {}
    if "text_id" not in request_data:
        added_data["text_id"] = uuid_string()
    content_type = request_data.get("type")
    config_item = (site_config.find_item_by_name(
        name=content_type) if content_type is not None else None)
    is_page_content = False
    if site_config and config_item is not None:
        is_page_content = site_config.is_page_content(config_item)
        added_data["is_page_content"] = is_page_content
    dirpath = request_data.get("dirpath")
    if dirpath is None and config_item is not None and is_page_content:
        dirpath = config_item.file_target
        added_data["dirpath"] = dirpath
    slug_key = config_item.item.get(
        "slug") if config_item is not None else None
    if not slug_key:
        slug_key = "title"
    slug = (added_data.get(slug_key) or request_data.get(slug_key)
            or request_data.get("metadata", {}).get(slug_key))
    if slug is not None:
        added_data["filename"] = get_valid_new_filename(
            website_pk=website_pk,
            dirpath=dirpath,
            filename_base=slugify(get_valid_base_filename(slug, content_type)),
        )
    return added_data
Example #2
0
def test_data_file_deserialize(serializer_cls, file_content):
    """
    JsonFileSerializer and YamlFileSerializer.deserialize should create the expected content object
    from some data file contents
    """
    website = WebsiteFactory.create()
    site_config = SiteConfig(website.starter.config)
    file_config_item = next(
        config_item
        for config_item in site_config.iter_items()
        if "file" in config_item.item
    )
    filepath = file_config_item.item["file"]
    website_content = serializer_cls(site_config).deserialize(
        website=website,
        filepath=filepath,
        file_contents=file_content,
    )
    assert website_content.title == "Content Title"
    assert website_content.type == file_config_item.item["name"]
    assert website_content.text_id == file_config_item.item["name"]
    assert website_content.is_page_content is False
    assert website_content.metadata == {
        "tags": ["Design"],
        "description": "**This** is the description",
    }
    def handle(self, *args, **options):

        filter_str = options["filter"].lower()
        starter_str = options["starter"]
        source_str = options["source"]
        type_str = options["type"]

        content_qset = WebsiteContent.objects.filter(
            website__starter__slug=starter_str, type=type_str)
        if filter_str:
            content_qset = content_qset.filter(
                Q(website__name__startswith=filter_str)
                | Q(website__short_id__startswith=filter_str))
        if source_str:
            content_qset = content_qset.filter(website__source=source_str)

        self.stdout.write(
            f"Update {type_str} metadata for websites based on starter {starter_str}, source={source_str}"
        )

        base_metadata = SiteConfig(
            WebsiteStarter.objects.get(
                slug=starter_str).config).generate_item_metadata(
                    type_str, cls=WebsiteContent)
        with transaction.atomic():
            for content in content_qset.iterator():
                if set(base_metadata.keys()).symmetric_difference(
                        set(content.metadata.keys())):
                    content.metadata = {**base_metadata, **content.metadata}
                    content.save()

        self.stdout.write(
            f"Done Updating {type_str} metadata for websites based on starter {starter_str}, source {source_str}"
        )
def test_generate_item_metadata(parsed_site_config, cls, resource_type,
                                file_type, with_kwargs):
    """generate_item_metadata should return the expected dict"""
    class_data = {} if cls else {"title": "", "file": ""}
    expected_data = {
        "description": "",
        "resourcetype": (resource_type or "") if with_kwargs else "",
        "file_type": (file_type or "") if with_kwargs else "",
        "learning_resource_types": [],
        "license": "",
        "image_metadata": {
            "image-alt": "",
            "caption": "",
            "credit": ""
        },
        "video_metadata": {
            "youtube_id": "",
            "video_speakers": "",
            "video_tags": ""
        },
        "video_files": {
            "video_thumbnail_file": "",
            "video_captions_file": "",
            "video_transcript_file": "",
        },
        **class_data,
    }
    site_config = SiteConfig(parsed_site_config)
    kwargs = ({
        "resourcetype": resource_type,
        "file_type": file_type
    } if with_kwargs else {})
    assert (site_config.generate_item_metadata("resource", cls,
                                               **kwargs) == expected_data)
Example #5
0
 def apply_rule(data):
     faulty_path_tuples = {}
     site_config = SiteConfig(data)
     for _, config_item in enumerate(site_config.iter_items()):
         non_menu_fields, menu_fields = partition_to_lists(
             config_item.fields,
             predicate=lambda field: field["widget"] == CONTENT_MENU_FIELD,
         )
         if not menu_fields:
             continue
         if non_menu_fields:
             faulty_path_tuples[config_item.name] = (
                 config_item.path,
                 ", ".join([field["widget"] for field in non_menu_fields]),
             )
     if faulty_path_tuples:
         return [
             "Config with 'menu' fields must not have any fields with other widget types.\n{}".format(
                 "\n".join(
                     [
                         f"{' ' * 8}'{name}' ({path_fields_tuple[0]}) – widgets: {path_fields_tuple[1]}"
                         for name, path_fields_tuple in faulty_path_tuples.items()
                     ]
                 ),
             )
         ]
     return []
def test_find_config_item_by_filepath(basic_site_config):
    """SiteConfig.find_item_by_filepath should return a config item if one is found with the given filepath"""
    site_config = SiteConfig(basic_site_config)
    all_config_items = list(site_config.iter_items())
    assert (site_config.find_item_by_filepath("data/metadata.json") ==
            all_config_items[3])
    assert site_config.find_item_by_filepath("bad/path") is None
def test_find_config_item_name_singleton(basic_site_config):
    """SiteConfig.find_item_by_name should return a singleton config item if one is found with the given name"""
    site_config = SiteConfig(basic_site_config)
    config_item = next(item for item in site_config.iter_items()
                       if item.is_file_item())
    assert config_item is not None
    assert site_config.find_item_by_name(config_item.name) == config_item
    assert site_config.find_item_by_name("other-name-123") is None
Example #8
0
    def get_content_context(self, instance):  # pylint:disable=too-many-branches
        """
        Create mapping of uuid to a display name for any values in the metadata
        """
        if not self.context or not self.context.get("content_context"):
            return None

        lookup = defaultdict(list)  # website name -> list of text_id
        metadata = instance.metadata or {}
        site_config = SiteConfig(instance.website.starter.config)
        for field in site_config.iter_fields():  # pylint:disable=too-many-nested-blocks
            widget = field.field.get("widget")
            if widget in ("relation", "menu"):
                try:
                    if field.parent_field is None:
                        value = metadata.get(field.field["name"])
                    else:
                        value = metadata.get(field.parent_field["name"],
                                             {}).get(field.field["name"])

                    if widget == "relation":
                        content = value["content"]
                        website_name = value["website"]
                        if isinstance(content, str):
                            content = [content]

                        if (isinstance(content, list) and len(content) > 0
                                and isinstance(content[0], list)):
                            # this is the data from a 'global' relation widget,
                            # which is a list of [content_uuid, website_name]
                            # tuples
                            for [content_uuid, website_name] in content:
                                lookup[website_name].extend([content_uuid])
                        else:
                            lookup[website_name].extend(content)

                    elif widget == "menu":
                        website_name = instance.website.name
                        lookup[website_name].extend([
                            item["identifier"] for item in value
                            if not item["identifier"].startswith(
                                constants.EXTERNAL_IDENTIFIER_PREFIX)
                        ])

                except (AttributeError, KeyError, TypeError):
                    # Either missing or malformed relation field value
                    continue

        contents = []
        for website_name, text_ids in lookup.items():
            contents.extend(
                WebsiteContent.objects.filter(website__name=website_name,
                                              text_id__in=text_ids))
        return WebsiteContentDetailSerializer(contents,
                                              many=True,
                                              context={
                                                  "content_context": False
                                              }).data
def test_find_file_field(basic_site_config, content_type, field_name):
    """The expected file field should be returned if any"""
    site_config = SiteConfig(basic_site_config)
    config_item = next(
        (item
         for item in site_config.iter_items() if item.name == content_type),
        None)
    file_field = site_config.find_file_field(config_item)
    if field_name:
        assert file_field["name"] == "image"
    else:
        assert file_field is None
Example #10
0
def test_is_page_content(basic_site_config, content_dir, folder_file_target,
                         exp_result):
    """
    SiteConfig.is_page_content should return True if the folder target of the repeatable config item starts with the
    content directory in the site config (or a default value)
    """
    site_config = SiteConfig(basic_site_config)
    site_config.raw_data[WEBSITE_CONFIG_CONTENT_DIR_KEY] = content_dir
    config_item = next(item for item in site_config.iter_items()
                       if item.is_folder_item())
    config_item.item["folder"] = folder_file_target
    assert site_config.is_page_content(config_item) is exp_result
Example #11
0
def create_gdrive_resource_content(drive_file: DriveFile):
    """Create a WebsiteContent resource from a Google Drive file"""
    try:
        resource_type = get_resource_type(drive_file.s3_key)
        resource = drive_file.resource
        if not resource:
            site_config = SiteConfig(drive_file.website.starter.config)
            config_item = site_config.find_item_by_name(
                name=CONTENT_TYPE_RESOURCE)
            dirpath = config_item.file_target if config_item else None
            basename, _ = os.path.splitext(drive_file.name)

            filename = get_valid_new_filename(
                website_pk=drive_file.website.pk,
                dirpath=dirpath,
                filename_base=slugify(
                    get_valid_base_filename(basename, CONTENT_TYPE_RESOURCE)),
            )
            resource_type_fields = {
                field: resource_type
                for field in settings.RESOURCE_TYPE_FIELDS
            }
            resource = WebsiteContent.objects.create(
                website=drive_file.website,
                title=drive_file.name,
                file=drive_file.s3_key,
                type=CONTENT_TYPE_RESOURCE,
                is_page_content=True,
                dirpath=dirpath,
                filename=filename,
                metadata={
                    **SiteConfig(drive_file.website.starter.config).generate_item_metadata(
                        CONTENT_TYPE_RESOURCE,
                        cls=WebsiteContent,
                        file_type=drive_file.mime_type,
                        **resource_type_fields,
                    )
                },
            )
        else:
            resource.file = drive_file.s3_key
            resource.save()
        drive_file.resource = resource
        drive_file.update_status(DriveFileStatus.COMPLETE)
    except:  # pylint:disable=bare-except
        log.exception("Error creating resource for drive file %s",
                      drive_file.file_id)
        drive_file.sync_error = (
            f"Could not create a resource from google drive file {drive_file.name}"
        )
        drive_file.update_status(DriveFileStatus.FAILED)
Example #12
0
def test_get_destination_url(is_page_content, dirpath, filename, expected):
    """get_destination_url should create a url for a piece of content"""
    content = WebsiteContentFactory.create(is_page_content=is_page_content,
                                           dirpath=dirpath,
                                           filename=filename)
    assert (get_destination_url(
        content, SiteConfig(content.website.starter.config)) == expected)
Example #13
0
def test_get_destination_filepath_errors(mocker, has_missing_name,
                                         is_bad_config_item):
    """
    get_destination_filepath should log an error and return None if the site config is missing the given name, or if
    the config item does not have a properly configured destination.
    """
    patched_log = mocker.patch("content_sync.utils.log")
    # From basic-site-config.yml
    config_item_name = "blog"
    if is_bad_config_item:
        mocker.patch.object(
            SiteConfig,
            "find_item_by_name",
            return_value=ConfigItem(item={
                "name": config_item_name,
                "poorly": "configured"
            }),
        )
    starter = WebsiteStarterFactory.build()
    content = WebsiteContentFactory.build(
        is_page_content=False,
        type="non-existent-config-name"
        if has_missing_name else config_item_name,
    )
    return_value = get_destination_filepath(content=content,
                                            site_config=SiteConfig(
                                                starter.config))
    patched_log.error.assert_called_once()
    assert return_value is None
Example #14
0
def _transform_hugo_menu_data(website_content: WebsiteContent,
                              site_config: SiteConfig) -> dict:
    """
    Adds 'url' property to internal links in menu data.

    Returns the dict of all values that will be serialized to the target file, including the transformed
    "menu" fields.
    """
    config_item = site_config.find_item_by_name(website_content.type)
    menu_fields = {
        field["name"]
        for field in config_item.fields
        if field.get("widget") == CONTENT_MENU_FIELD
    }
    transformed_menu_fields = {}
    for field_name, field_data in website_content.metadata.items():
        if field_name not in menu_fields:
            continue
        uuid_content_map = _get_uuid_content_map(field_data)
        result_menu_items = []
        for menu_item in field_data:
            updated_menu_item = menu_item
            # Add/update the 'url' value if this is an internal link
            if menu_item["identifier"] in uuid_content_map:
                menu_item_content = uuid_content_map[menu_item["identifier"]]
                updated_menu_item["url"] = get_destination_url(
                    menu_item_content, site_config)
            result_menu_items.append(updated_menu_item)
        transformed_menu_fields[field_name] = result_menu_items
    return {**website_content.metadata, **transformed_menu_fields}
Example #15
0
 def for_content(
         site_config: SiteConfig,
         website_content: WebsiteContent) -> BaseContentFileSerializer:
     """
     Given a WebsiteContent object and site config, returns a serializer object of the correct type for
     serializing the WebsiteContent object into file contents.
     """
     if website_content.is_page_content:
         return HugoMarkdownFileSerializer(site_config=site_config)
     config_item = site_config.find_item_by_name(website_content.type)
     destination_filepath = config_item.file_target
     if not destination_filepath:
         raise ValueError(
             f"WebsiteContent object is not page content, but has no 'file' destination in config ({website_content.text_id})."
         )
     file_ext = get_file_extension(destination_filepath)
     if file_ext == "json":
         cls = JsonFileSerializer
     elif file_ext in {"yml", "yaml"}:
         # HACK: Hugo-specific logic for properly transforming data if the "menu" widget is used
         if _has_menu_fields(config_item):
             cls = HugoMenuYamlFileSerializer
         else:
             cls = YamlFileSerializer
     else:
         raise ValueError(
             f"Website content cannot be serialized to a file ({website_content.text_id})."
         )
     return cls(site_config=site_config)
Example #16
0
def _untransform_hugo_menu_data(data: dict, filepath: str,
                                site_config: SiteConfig) -> dict:
    """
    Removes 'url' property from internal links in serialized menu data.

    Returns the dict of all values that will be deserialized to website content, including the transformed
    "menu" fields.
    """
    config_item = site_config.find_item_by_filepath(filepath)
    menu_fields = {
        field["name"]
        for field in config_item.fields
        if field.get("widget") == CONTENT_MENU_FIELD
    }
    transformed_menu_fields = {}
    for field_name, field_data in data.items():
        if field_name not in menu_fields:
            continue
        result_menu_items = []
        for menu_item in field_data:
            updated_menu_item = menu_item.copy()
            if (is_valid_uuid(updated_menu_item["identifier"])
                    and "url" in updated_menu_item):
                del updated_menu_item["url"]
            result_menu_items.append(updated_menu_item)
        transformed_menu_fields[field_name] = result_menu_items
    return {**data, **transformed_menu_fields}
Example #17
0
def test_factory_for_file_invalid():
    """ContentFileSerializerFactory.for_file should raise when given an unsupported file type"""
    site_config = SiteConfig(WebsiteStarterFactory.build().config)
    with pytest.raises(ValueError):
        assert ContentFileSerializerFactory.for_file(
            site_config=site_config, filepath="/path/to/myfile.tar.gz"
        )
Example #18
0
def test_hugo_file_serialize(markdown, exp_sections):
    """HugoMarkdownFileSerializer.serialize should create the expected file contents"""
    metadata = {"metadata1": "dummy value 1", "metadata2": "dummy value 2"}
    content = WebsiteContentFactory.create(
        text_id="abcdefg",
        title="Content Title",
        type="sometype",
        markdown=markdown,
        metadata=metadata,
    )
    site_config = SiteConfig(content.website.starter.config)
    file_content = HugoMarkdownFileSerializer(site_config).serialize(
        website_content=content
    )
    md_file_sections = [
        part
        for part in re.split(re.compile(r"^---\n", re.MULTILINE), file_content)
        # re.split returns a blank string as the first item here even though the file contents begin with the given
        # pattern.
        if part
    ]
    assert len(md_file_sections) == exp_sections
    front_matter = md_file_sections[0]
    front_matter_lines = list(filter(None, sorted(front_matter.split("\n"))))
    assert front_matter_lines == sorted(
        [
            f"title: {content.title}",
            f"content_type: {content.type}",
            f"uid: {content.text_id}",
        ]
        + [f"{k}: {v}" for k, v in metadata.items()]
    )
    if exp_sections > 1:
        assert md_file_sections[1] == markdown
Example #19
0
def test_content_dir(basic_site_config, content_dir_value, exp_result):
    """SiteConfig.content_dir should return the content dir value or a default if it doesn't exist"""
    updated_site_config = basic_site_config.copy()
    if content_dir_value is None:
        del updated_site_config[WEBSITE_CONFIG_CONTENT_DIR_KEY]
    else:
        updated_site_config[WEBSITE_CONFIG_CONTENT_DIR_KEY] = content_dir_value
    site_config = SiteConfig(updated_site_config)
    assert site_config.content_dir == exp_result
Example #20
0
def test_website_publish_serializer_base_url(settings, is_root_site):
    """ The WebsitePublishSerializer should return the correct base_url value """
    site = WebsiteFactory.create()
    site_config = SiteConfig(site.starter.config)
    settings.ROOT_WEBSITE_NAME = site.name if is_root_site else "some_other_root_name"
    serializer = WebsitePublishSerializer(site)
    assert serializer.data["base_url"] == (
        "" if is_root_site else
        f"{site_config.root_url_path}/{site.name}".strip("/"))
Example #21
0
 def upload_file_to(self, filename):
     """Return the appropriate filepath for an upload"""
     site_config = SiteConfig(self.website.starter.config)
     url_parts = [
         site_config.root_url_path,
         self.website.name,
         f"{self.text_id.replace('-', '')}_{filename}",
     ]
     return "/".join([part for part in url_parts if part != ""])
Example #22
0
def test_factory_for_file(filepath, exp_serializer_cls):
    """ContentFileSerializerFactory.for_file should return the correct serializer class"""
    site_config = SiteConfig(WebsiteStarterFactory.build().config)
    assert isinstance(
        ContentFileSerializerFactory.for_file(
            site_config=site_config, filepath=filepath
        ),
        exp_serializer_cls,
    )
Example #23
0
def mock_api_wrapper(settings, mocker, db_data):
    """Create a GithubApiWrapper with a mock Github object"""
    settings.GIT_TOKEN = "faketoken"
    settings.GIT_ORGANIZATION = "fake_org"
    settings.CONTENT_SYNC_RETRIES = 3

    mocker.patch("content_sync.apis.github.Github", autospec=True)
    return GithubApiWrapper(website=db_data.website,
                            site_config=SiteConfig(
                                db_data.website.starter.config))
Example #24
0
def test_factory_for_content_hugo_markdown():
    """
    ContentFileSerializerFactory.for_content should return the Hugo markdown serializer if the content object
    is page content.
    """
    content = WebsiteContentFactory.build(is_page_content=True)
    site_config = SiteConfig(content.website.starter.config)
    assert isinstance(
        ContentFileSerializerFactory.for_content(site_config, content),
        HugoMarkdownFileSerializer,
    )
Example #25
0
    def upload_file_to(self, filename):
        """Return the appropriate filepath for an upload"""
        site_config = SiteConfig(self.website.starter.config)
        source_folder = self.source_key.split("/")[-2]

        url_parts = [
            site_config.root_url_path,
            self.website.name,
            f"{source_folder}_{filename}",
        ]
        return "/".join([part for part in url_parts if part != ""])
Example #26
0
def test_site_config_iter_items(basic_site_config):
    """SiteConfig.iter_items should yield each individual config item"""
    site_config = SiteConfig(basic_site_config)
    config_items = list(site_config.iter_items())
    assert len(config_items) == 5
    collections = basic_site_config["collections"]
    assert config_items[0] == ConfigItem(item=collections[0],
                                         parent_item=None,
                                         path="collections.0")
    assert config_items[1] == ConfigItem(item=collections[1],
                                         parent_item=None,
                                         path="collections.1")
    assert config_items[2] == ConfigItem(item=collections[2],
                                         parent_item=None,
                                         path="collections.2")
    assert config_items[3] == ConfigItem(
        item=collections[2]["files"][0],
        parent_item=collections[2],
        path="collections.2.files.0",
    )
Example #27
0
 def apply_rule(data):
     faulty_paths = {}
     site_config = SiteConfig(data)
     for _, config_item in enumerate(site_config.iter_items()):
         if config_item.is_folder_item() and not site_config.is_page_content(
             config_item
         ):
             faulty_paths[config_item.name] = config_item.path
     if faulty_paths:
         return [
             "Found 'folder' item(s) that do not point to the content directory ({}).\n{}".format(
                 site_config.content_dir,
                 "\n".join(
                     [
                         f"{' ' * 8}'{name}' ({path})"
                         for name, path in faulty_paths.items()
                     ]
                 ),
             )
         ]
     return []
Example #28
0
 def __init__(self, website: Website, site_config: Optional[SiteConfig]):
     """ Initialize the Github API backend for a specific website"""
     self.website = website
     self.site_config = site_config or SiteConfig(self.website.starter.config)
     self.repo = None
     self.git = Github(
         login_or_token=get_token(),
         **(
             {"base_url": settings.GIT_API_URL}
             if settings.GIT_API_URL is not None
             else {}
         ),
     )
     self.org = self.git.get_organization(settings.GIT_ORGANIZATION)
Example #29
0
 def apply_rule(data):
     faulty_paths = {}
     site_config = SiteConfig(data)
     for _, config_item in enumerate(site_config.iter_items()):
         title_field = first_or_none(
             [field for field in config_item.fields if field["name"] == "title"]
         )
         if title_field is not None and (
             title_field.get("required", False) is False
             or title_field.get("widget", "string") != "string"
         ):
             faulty_paths[config_item.name] = config_item.path
     if faulty_paths:
         return [
             "'title' fields must use the 'string' widget, and must be set to be required.\n{}".format(
                 "\n".join(
                     [
                         f"{' ' * 8}'{name}' ({path})"
                         for name, path in faulty_paths.items()
                     ]
                 ),
             )
         ]
     return []
Example #30
0
def test_get_destination_url_errors(mocker):
    """
    get_destination_url should log an error if it is called with a a WebsiteContent object without
    is_page_content set to true
    """
    patched_log = mocker.patch("content_sync.utils.log")
    # From basic-site-config.yml
    config_item_name = "blog"
    starter = WebsiteStarterFactory.build()
    content = WebsiteContentFactory.build(
        is_page_content=False,
        type=config_item_name,
    )
    return_value = get_destination_url(content=content,
                                       site_config=SiteConfig(starter.config))
    patched_log.error.assert_called_once()
    assert return_value is None