Esempio n. 1
0
def update_ocw2hugo_course(
    bucket_name, prefix, path, content_update_field, create_new_content=False
):
    """
    Extract OCW course content for a course

    Args:
        bucket_name (str): An s3 bucket name
        prefix (str): S3 prefix before start of course_id path
        path (str): The course URL path
        content_update_field (string): Website content field that should be overwritten
        create_new_content (bool): Create new content if it doesn't exist
    """
    s3 = get_s3_resource()
    bucket = s3.Bucket(bucket_name)
    name = path.replace("/data/course_legacy.json", "", 1)

    website = Website.objects.filter(name=name).first()

    if website:
        update_ocw2hugo_content(
            bucket,
            prefix,
            website,
            content_update_field,
            create_new_content=create_new_content,
        )
Esempio n. 2
0
def setup_s3_tmpdir(settings, tmpdir, courses=None):
    """
    Set up the fake s3 data
    """
    # Fake the settings
    settings.AWS_ACCESS_KEY_ID = "abc"
    settings.AWS_SECRET_ACCESS_KEY = "abc"
    # Create our fake bucket
    conn = get_s3_resource()
    conn.create_bucket(Bucket=MOCK_BUCKET_NAME)
    # Copy test data to tmpdir
    rmtree(tmpdir)
    if courses is not None:
        for course in courses:
            copytree(f"./test_ocw2hugo/{course}/", f"{tmpdir}/{course}/")
    else:
        copytree("./test_ocw2hugo/", f"{tmpdir}/")

    # Add data to the fake bucket
    test_bucket = conn.Bucket(name=MOCK_BUCKET_NAME)
    test_bucket.objects.all().delete()
    for file in get_ocw2hugo_files(tmpdir):
        file_key = file.replace(f"{tmpdir}/", "")
        with open(file, "r") as f:
            test_bucket.put_object(Key=file_key, Body=f.read())
Esempio n. 3
0
def test_get_resource_type(settings, filename, mimetype, expected_type) -> str:
    """get_resource_type should return the expected value for an S3 object"""
    settings.AWS_ACCESS_KEY_ID = "abc"
    settings.AWS_SECRET_ACCESS_KEY = "abc"
    settings.AWS_STORAGE_BUCKET_NAME = "test-bucket"
    conn = get_s3_resource()
    conn.create_bucket(Bucket=settings.AWS_STORAGE_BUCKET_NAME)
    test_bucket = conn.Bucket(name=settings.AWS_STORAGE_BUCKET_NAME)
    test_bucket.objects.all().delete()
    test_bucket.put_object(Key=filename, Body=b"", ContentType=mimetype)
    assert get_resource_type(filename) == expected_type
Esempio n. 4
0
def import_ocw2hugo_course(bucket_name, prefix, path, starter_id=None):
    """
    Extract OCW course content for a course

    Args:
        bucket_name (str): An s3 bucket name
        prefix (str): S3 prefix before start of course_id path
        path (str): The course URL path
        starter_id (int or None): The id of the WebsiteStarter to associated with the created Website
    """
    s3 = get_s3_resource()
    bucket = s3.Bucket(bucket_name)
    course_data = json.loads(get_s3_object_and_read(bucket.Object(path)).decode())
    name = path.replace("/data/course_legacy.json", "", 1)
    menu_data = yaml.load(
        get_s3_object_and_read(
            bucket.Object(f"{prefix}{name}/config/_default/menus.yaml")
        ),
        Loader=yaml.FullLoader,
    )
    if name in NON_ID_COURSE_NAMES:
        return
    try:
        first_published_to_production = parse_date(course_data.get("publishdate", None))
    except (ValueError, TypeError):
        first_published_to_production = None
        course_data["publishdate"] = None
    try:
        website, _ = Website.objects.update_or_create(
            name=name,
            defaults={
                "title": course_data.get("course_title", f"Course Site ({name})"),
                "first_published_to_production": first_published_to_production,
                "metadata": course_data,
                "short_id": get_short_id(name, course_data),
                "starter_id": starter_id,
                "source": WEBSITE_SOURCE_OCW_IMPORT,
            },
        )
        import_ocw2hugo_sitemetadata(course_data, website)
        import_ocw2hugo_menu(menu_data, website)
        import_ocw2hugo_content(bucket, prefix, website)
        if is_gdrive_enabled() and website.gdrive_folder is None:
            create_gdrive_folders(website.short_id)
    except:  # pylint:disable=bare-except
        log.exception("Error saving website %s", path)
Esempio n. 5
0
def setup_s3(settings):
    """
    Set up the fake s3 data
    """
    # Fake the settings
    settings.AWS_ACCESS_KEY_ID = "abc"
    settings.AWS_SECRET_ACCESS_KEY = "abc"
    # Create our fake bucket
    conn = get_s3_resource()
    conn.create_bucket(Bucket=MOCK_BUCKET_NAME)

    # Add data to the fake bucket
    test_bucket = conn.Bucket(name=MOCK_BUCKET_NAME)
    test_bucket.objects.all().delete()
    for file in get_ocw2hugo_files("./test_ocw2hugo"):
        file_key = file.replace("./test_ocw2hugo/", "")
        with open(file, "r") as f:
            test_bucket.put_object(Key=file_key, Body=f.read())
Esempio n. 6
0
def fetch_ocw2hugo_course_paths(bucket_name, prefix="", filter_list=None):
    """
    Generator that yields the path to every course JSON document in the S3 bucket matching
    a prefix and filter (or all of them if no prefix or filter is provided)

    Args:
        bucket_name (str): S3 bucket name
        prefix (str): (Optional) S3 prefix before start of course_id path
        filter_list (str): (Optional) If specified, only yield course paths containing this string

    Yields:
        str: The path to a course JSON document in S3
    """
    s3 = get_s3_resource()
    bucket = s3.Bucket(bucket_name)
    paginator = bucket.meta.client.get_paginator("list_objects")
    for resp in paginator.paginate(Bucket=bucket.name, Prefix=f"{prefix}"):
        for obj in resp["Contents"]:
            key = obj["Key"]
            if key.endswith("course_legacy.json") and (
                not filter_list or key.split("/")[-3] in filter_list
            ):
                yield key