# Regex to extract a course_id from a course URL. # from openedx/core/constants.py: COURSE_KEY_PATTERN = r'(?P<course_key_string>[^/+]+(/|\+)[^/+]+(/|\+)[^/?]+)' COURSE_ID_PATTERN = COURSE_KEY_PATTERN.replace('course_key_string', 'course_id') # from openedx/core/lib/request_utils.py: COURSE_REGEX = re.compile(r'^(.*?/courses/)(?!v[0-9]+/[^/]+){}'.format(COURSE_ID_PATTERN)) # Regex to extract a course_id from a block URL/block ID. BLOCK_ID_PATTERN = r'(?P<block_id>[^/+]+(/|\+)[^/+]+(/|\+)[^/?#]+)' BLOCK_REGEX = re.compile(r'^(.*?/xblock/){}'.format(BLOCK_ID_PATTERN)) # Make sure that Opaque Keys' Stevedore extensions are loaded, this can sometimes fail to happen in EMR # and it is vitally important that they be there, or jobs will succeed but produce incorrect data. See # https://openedx.atlassian.net/wiki/spaces/DE/pages/1934263829/RCA+Insights+data+issues+2020-10-14+-+2020-10-15 # This will raise an error early on in import if the plugins don't exist. CourseKey.get_namespace_plugin('course-v1') def normalize_course_id(course_id): """Make a best effort to rescue malformed course_ids""" if course_id: return course_id.strip() else: return course_id def is_valid_course_id(course_id): """ Determines if a course_id from an event log is possibly legitimate. """ if course_id and course_id[-1] == '\n':