# -*- coding: utf-8 -*- """ Parsing and extraction functions """ import re import json from datetime import timedelta, datetime from six.moves import html_parser from bs4 import BeautifulSoup as BeautifulSoup_ from .common import Course, Section, SubSection, Unit, Video # Force use of bs4 with html.parser BeautifulSoup = lambda page: BeautifulSoup_(page, 'html.parser') def edx_json2srt(o): """ Transform the dict 'o' into the srt subtitles format """ if o == {}: return '' base_time = datetime(1, 1, 1) output = [] for i, (s, e, t) in enumerate(zip(o['start'], o['end'], o['text'])): if t == '': continue
def beautiful_soup(page): from bs4 import BeautifulSoup as BeautifulSoup_ return BeautifulSoup_(page, 'html.parser')
import StringIO import subprocess import sys import tempfile import time import urllib import urllib2 try: from BeautifulSoup import BeautifulSoup except ImportError: from bs4 import BeautifulSoup as BeautifulSoup_ # Use html5lib for parsing if available try: import html5lib BeautifulSoup = lambda page: BeautifulSoup_(page, 'html5lib') except ImportError: BeautifulSoup = BeautifulSoup_ csrftoken = '' session = '' AUTH_URL = 'https://www.coursera.org/maestro/api/user/login' class ClassNotFound(BaseException): """ Class to be thrown if a course is not found in Coursera's site. """ pass
def BeautifulSoup(page): return BeautifulSoup_(page, 'html.parser')