def test_get_course_key_from_nonascii_url(self): url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=VALID_NONASCII_LEGACY_COURSE_ID) course_key = opaque_key_util.get_course_key_from_url(url) self.assertEquals(unicode(course_key), VALID_NONASCII_LEGACY_COURSE_ID) url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=INVALID_NONASCII_LEGACY_COURSE_ID) course_key = opaque_key_util.get_course_key_from_url(url) self.assertIsNone(course_key)
def _parse_server_event(self, event): # Always check context first for server events. org_id = event.get('context', {}).get('org_id') if org_id: return org_id # Try to infer the institution from the event data evt_type = event['event_type'] if '/courses/' in evt_type: course_key = opaque_key_util.get_course_key_from_url(evt_type) if course_key and '/' not in unicode(course_key): return course_key.org else: # It doesn't matter if we found a good deprecated key. # We need to provide backwards-compatibility. return get_slash_value(evt_type, 2) elif '/' in evt_type: return None else: # Specific server logging. One-off parser for each type. # Survey of logs showed 4 event types: # reset_problem, save_problem_check, # save_problem_check_fail, save_problem_fail. All # four of these have a problem_id, which for legacy events # we could extract from. For newer events, we assume this # won't be needed, because context will be present. try: return get_slash_value(event['event']['problem_id'], 2) except Exception: # pylint: disable=broad-except return None return None
def get_course_id(event, from_url=False): """Gets course_id from event's data.""" # Get the event data: event_context = event.get('context') if event_context is None: # Assume it's old, and not worth logging... return None # Get the course_id from the data, and validate. course_id = event_context.get('course_id', '') if course_id: if opaque_key_util.is_valid_course_id(course_id): return course_id else: log.error("encountered event with bogus course_id: %s", event) return None # Try to get the course_id from the URLs in `event_type` (for implicit # server events) and `page` (for browser events). if from_url: source = event.get('event_source') if source == 'server': url = event.get('event_type', '') elif source == 'browser': url = event.get('page', '') else: url = '' course_key = opaque_key_util.get_course_key_from_url(url) if course_key: return unicode(course_key) return None
def get_course_id(self, event): """Gets course_id from event.""" # TODO: This is an arbitrary way to get the course_id. A more complete # routine should deal with all the corner cases as in the `get_org_id` # function below. The subset of event that will return a course_id is # considered a compromise between the events that are useful and # increasing the complexity of the code. # Try to get the course from the context course_id = event.get('context', {}).get('course_id') if course_id: return course_id # Try to get the course_id from the URLs in `event_type` (for implicit # server events) and `page` (for browser events). source = event.get('event_source') if source == 'server': url = event.get('event_type', '') elif source == 'browser': url = event.get('page', '') else: url = '' course_key = opaque_key_util.get_course_key_from_url(url) if course_key: return unicode(course_key) return None
def _parse_browser_event(self, event): # TODO: Note that for browser events we are not using the org_id from the context. page = event['page'] if 'courses' in page: # This is different than the original algorithm in that it assumes # the page contains a valid coursename. The original code # merely looked for what followed "http[s]://<host>/courses/" # (and also hoped there were no extra slashes or different content). course_key = opaque_key_util.get_course_key_from_url(page) if course_key and '/' not in unicode(course_key): return course_key.org else: # It doesn't matter if we found a good deprecated key. # We need to provide backwards-compatibility. return get_slash_value(page, 4) return None
def test_get_course_key_from_url(self, course_id): url = u"https://courses.edx.org/courses/{course_id}/stuff".format( course_id=course_id) course_key = opaque_key_util.get_course_key_from_url(url) self.assertEquals(unicode(course_key), course_id)
def test_get_course_key_from_invalid_url(self, course_id): url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=course_id) course_key = opaque_key_util.get_course_key_from_url(url) self.assertIsNone(course_key)
def test_get_course_key_from_invalid_url(self): url = "https://courses.edx.org/courses/{course_id}/stuff".format(course_id=INVALID_LEGACY_COURSE_ID) course_key = opaque_key_util.get_course_key_from_url(url) self.assertIsNone(course_key)
def test_get_course_key_from_legacy_url(self): url = "https://courses.edx.org/courses/{course_id}/stuff".format(course_id=VALID_LEGACY_COURSE_ID) course_key = opaque_key_util.get_course_key_from_url(url) self.assertEquals(unicode(course_key), VALID_LEGACY_COURSE_ID)
def test_get_course_key_from_url(self, block_id): url = u"https://courses.edx.org/xblock/{block_id}?stuff=things".format( block_id=block_id) print(url) course_key = opaque_key_util.get_course_key_from_url(url) self.assertEquals(unicode(course_key), VALID_COURSE_ID)
def get_org_id(self, item): """ Attempt to determine the organization that is associated with this particular event. This method may return incorrect results, so a white list of valid organization names is used to filter out the noise. None is returned if no org information is found in the item. """ def get_slash_value(input_value, index): """Return index value after splitting input on slashes.""" try: return input_value.split('/')[index] except IndexError: return None try: # Different behavior based on type of event source. if item['event_source'] == 'server': # Always check context first for server events. org_id = item.get('context', {}).get('org_id') if org_id: return org_id # Try to infer the institution from the event data evt_type = item['event_type'] if '/courses/' in evt_type: course_key = opaque_key_util.get_course_key_from_url( evt_type) if course_key and '/' not in unicode(course_key): return course_key.org else: # It doesn't matter if we found a good deprecated key. # We need to provide backwards-compatibility. return get_slash_value(evt_type, 2) elif '/' in evt_type: return None else: # Specific server logging. One-off parser for each type. # Survey of logs showed 4 event types: # reset_problem, save_problem_check, # save_problem_check_fail, save_problem_fail. All # four of these have a problem_id, which for legacy events # we could extract from. For newer events, we assume this # won't be needed, because context will be present. try: return get_slash_value(item['event']['problem_id'], 2) except Exception: # pylint: disable=broad-except return None elif item['event_source'] == 'browser': # Note that the context of browser events is ignored. page = item['page'] if 'courses' in page: # This is different than the original algorithm in that it assumes # the page contains a valid coursename. The original code # merely looked for what followed "http[s]://<host>/courses/" # (and also hoped there were no extra slashes or different content). course_key = opaque_key_util.get_course_key_from_url(page) if course_key and '/' not in unicode(course_key): return course_key.org else: # It doesn't matter if we found a good deprecated key. # We need to provide backwards-compatibility. return get_slash_value(page, 4) else: # TODO: Handle other event source values (e.g. task or mobile). return None except Exception: # pylint: disable=broad-except log.exception('Unable to determine institution for event: %s', unicode(item).encode('utf8')) return None
def test_get_course_key_from_invalid_block_url(self, block_id): url = u"https://courses.edx.org/xblock/{block_id}?stuff=things".format( block_id=block_id) print(url) course_key = opaque_key_util.get_course_key_from_url(url) self.assertIsNone(course_key)
def test_get_course_key_from_url(self, course_id): url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=course_id) course_key = opaque_key_util.get_course_key_from_url(url) self.assertEquals(unicode(course_key), course_id)
def test_get_course_key_from_invalid_url(self, course_id): url = u"https://courses.edx.org/courses/{course_id}/stuff".format( course_id=course_id) course_key = opaque_key_util.get_course_key_from_url(url) self.assertIsNone(course_key)
def get_org_id(self, item): """ Attempt to determine the organization that is associated with this particular event. This method may return incorrect results, so a white list of valid organization names is used to filter out the noise. None is returned if no org information is found in the item. """ def get_slash_value(input_value, index): """Return index value after splitting input on slashes.""" try: return input_value.split('/')[index] except IndexError: return None try: # Different behavior based on type of event source. if item['event_source'] == 'server': # Always check context first for server events. org_id = item.get('context', {}).get('org_id') if org_id: return org_id # Try to infer the institution from the event data evt_type = item['event_type'] if '/courses/' in evt_type: course_key = opaque_key_util.get_course_key_from_url(evt_type) if course_key and '/' not in unicode(course_key): return course_key.org else: # It doesn't matter if we found a good deprecated key. # We need to provide backwards-compatibility. return get_slash_value(evt_type, 2) elif '/' in evt_type: return None else: # Specific server logging. One-off parser for each type. # Survey of logs showed 4 event types: # reset_problem, save_problem_check, # save_problem_check_fail, save_problem_fail. All # four of these have a problem_id, which for legacy events # we could extract from. For newer events, we assume this # won't be needed, because context will be present. try: return get_slash_value(item['event']['problem_id'], 2) except Exception: # pylint: disable=broad-except return None elif item['event_source'] == 'browser': # Note that the context of browser events is ignored. page = item['page'] if 'courses' in page: # This is different than the original algorithm in that it assumes # the page contains a valid coursename. The original code # merely looked for what followed "http[s]://<host>/courses/" # (and also hoped there were no extra slashes or different content). course_key = opaque_key_util.get_course_key_from_url(page) if course_key and '/' not in unicode(course_key): return course_key.org else: # It doesn't matter if we found a good deprecated key. # We need to provide backwards-compatibility. return get_slash_value(page, 4) else: # TODO: Handle other event source values (e.g. task or mobile). return None except Exception: # pylint: disable=broad-except log.exception('Unable to determine institution for event: %s', unicode(item).encode('utf8')) return None