def update_ocw2hugo_course( bucket_name, prefix, path, content_update_field, create_new_content=False ): """ Extract OCW course content for a course Args: bucket_name (str): An s3 bucket name prefix (str): S3 prefix before start of course_id path path (str): The course URL path content_update_field (string): Website content field that should be overwritten create_new_content (bool): Create new content if it doesn't exist """ s3 = get_s3_resource() bucket = s3.Bucket(bucket_name) name = path.replace("/data/course_legacy.json", "", 1) website = Website.objects.filter(name=name).first() if website: update_ocw2hugo_content( bucket, prefix, website, content_update_field, create_new_content=create_new_content, )
def setup_s3_tmpdir(settings, tmpdir, courses=None): """ Set up the fake s3 data """ # Fake the settings settings.AWS_ACCESS_KEY_ID = "abc" settings.AWS_SECRET_ACCESS_KEY = "abc" # Create our fake bucket conn = get_s3_resource() conn.create_bucket(Bucket=MOCK_BUCKET_NAME) # Copy test data to tmpdir rmtree(tmpdir) if courses is not None: for course in courses: copytree(f"./test_ocw2hugo/{course}/", f"{tmpdir}/{course}/") else: copytree("./test_ocw2hugo/", f"{tmpdir}/") # Add data to the fake bucket test_bucket = conn.Bucket(name=MOCK_BUCKET_NAME) test_bucket.objects.all().delete() for file in get_ocw2hugo_files(tmpdir): file_key = file.replace(f"{tmpdir}/", "") with open(file, "r") as f: test_bucket.put_object(Key=file_key, Body=f.read())
def test_get_resource_type(settings, filename, mimetype, expected_type) -> str: """get_resource_type should return the expected value for an S3 object""" settings.AWS_ACCESS_KEY_ID = "abc" settings.AWS_SECRET_ACCESS_KEY = "abc" settings.AWS_STORAGE_BUCKET_NAME = "test-bucket" conn = get_s3_resource() conn.create_bucket(Bucket=settings.AWS_STORAGE_BUCKET_NAME) test_bucket = conn.Bucket(name=settings.AWS_STORAGE_BUCKET_NAME) test_bucket.objects.all().delete() test_bucket.put_object(Key=filename, Body=b"", ContentType=mimetype) assert get_resource_type(filename) == expected_type
def import_ocw2hugo_course(bucket_name, prefix, path, starter_id=None): """ Extract OCW course content for a course Args: bucket_name (str): An s3 bucket name prefix (str): S3 prefix before start of course_id path path (str): The course URL path starter_id (int or None): The id of the WebsiteStarter to associated with the created Website """ s3 = get_s3_resource() bucket = s3.Bucket(bucket_name) course_data = json.loads(get_s3_object_and_read(bucket.Object(path)).decode()) name = path.replace("/data/course_legacy.json", "", 1) menu_data = yaml.load( get_s3_object_and_read( bucket.Object(f"{prefix}{name}/config/_default/menus.yaml") ), Loader=yaml.FullLoader, ) if name in NON_ID_COURSE_NAMES: return try: first_published_to_production = parse_date(course_data.get("publishdate", None)) except (ValueError, TypeError): first_published_to_production = None course_data["publishdate"] = None try: website, _ = Website.objects.update_or_create( name=name, defaults={ "title": course_data.get("course_title", f"Course Site ({name})"), "first_published_to_production": first_published_to_production, "metadata": course_data, "short_id": get_short_id(name, course_data), "starter_id": starter_id, "source": WEBSITE_SOURCE_OCW_IMPORT, }, ) import_ocw2hugo_sitemetadata(course_data, website) import_ocw2hugo_menu(menu_data, website) import_ocw2hugo_content(bucket, prefix, website) if is_gdrive_enabled() and website.gdrive_folder is None: create_gdrive_folders(website.short_id) except: # pylint:disable=bare-except log.exception("Error saving website %s", path)
def setup_s3(settings): """ Set up the fake s3 data """ # Fake the settings settings.AWS_ACCESS_KEY_ID = "abc" settings.AWS_SECRET_ACCESS_KEY = "abc" # Create our fake bucket conn = get_s3_resource() conn.create_bucket(Bucket=MOCK_BUCKET_NAME) # Add data to the fake bucket test_bucket = conn.Bucket(name=MOCK_BUCKET_NAME) test_bucket.objects.all().delete() for file in get_ocw2hugo_files("./test_ocw2hugo"): file_key = file.replace("./test_ocw2hugo/", "") with open(file, "r") as f: test_bucket.put_object(Key=file_key, Body=f.read())
def fetch_ocw2hugo_course_paths(bucket_name, prefix="", filter_list=None): """ Generator that yields the path to every course JSON document in the S3 bucket matching a prefix and filter (or all of them if no prefix or filter is provided) Args: bucket_name (str): S3 bucket name prefix (str): (Optional) S3 prefix before start of course_id path filter_list (str): (Optional) If specified, only yield course paths containing this string Yields: str: The path to a course JSON document in S3 """ s3 = get_s3_resource() bucket = s3.Bucket(bucket_name) paginator = bucket.meta.client.get_paginator("list_objects") for resp in paginator.paginate(Bucket=bucket.name, Prefix=f"{prefix}"): for obj in resp["Contents"]: key = obj["Key"] if key.endswith("course_legacy.json") and ( not filter_list or key.split("/")[-3] in filter_list ): yield key