def extract_units(url, headers, file_formats): """ Parses a webpage and extracts its resources e.g. video_url, sub_url, etc. """ #logging.info("Processing '%s'", url) page = get_page_contents(url, headers) page_extractor = get_page_extractor(url) units = page_extractor.extract_units_from_html(page, BASE_URL, file_formats) return units
def get_available_sections(url, headers): """ Extracts the sections and subsections from a given url """ logging.debug("Extracting sections for :" + url) page = get_page_contents(url, headers) page_extractor = get_page_extractor(url) sections = page_extractor.extract_sections_from_html(page, BASE_URL) logging.debug("Extracted sections: " + str(sections)) return sections
def get_courses_info(url, headers): """ Extracts the courses information from the dashboard. """ logging.info('Extracting course information from dashboard.') page = get_page_contents(url, headers) page_extractor = get_page_extractor(url) courses = page_extractor.extract_courses_from_html(page, BASE_URL) logging.debug('Data extracted: %s', courses) return courses