class Chami(object): def __init__(self, username=None, password=None, site_url=None): self.username = username self.password = password self.site_url = 'https://elearning.esi.heb.be' self.db = Db() self.session = requests.Session() def connect(self, username=None, password=None, ssl_verify=False): payload = {'login': username, 'password': password} return not 'user_password_incorrect' in self.session.post(self.index_url, data=payload, verify=ssl_verify).url def disconnect(self): self.session.get('{}/index.php?logout=logout'.format(self.site_url)) @property def index_url(self): return '%s/%s' % (self.site_url, 'index.php') @property def courses_url(self): return '%s/%s' % (self.site_url, 'main/auth/courses.php?action=display_courses&category_code=ALL&hidden_links=0') @property def my_courses_url(self): return '%s/%s' % (self.site_url, 'user_portal.php') def get_courses(self): html = self.session.get(self.courses_url, verify=False).content soup = bs(html).find('body', {'class': 'section-mycourses '}) for course in soup.findAll('div', {'class': 'span4'}): # if course in span and has link to access it if course.find('div', {'class': 'categories-course-description'}) \ and course.find('a', {'class': 'btn btn-primary'}): name = course.find('h3').text url = course.find('a', {'class': 'btn btn-primary'})['href'] print(name, url) self.db.insert_course(Course(name, url)) return self.db.select_courses() def get_my_courses(self): def extract_course_id(url): return url.split('/')[4] html = self.session.get(self.my_courses_url, verify=False).content soup = bs(html).find('section', {'id': 'main_content'}) for course in soup.findAll('div', {'class': 'well course-box'}): self.db.insert_my_course(extract_course_id(course.find('a')['href'])) return self.db.select_my_courses() def download(self): course = self.courses[2] print("course url " + course.url) def update_db(self, courses): for course in courses: # # Initialize a pool, 5 threads in this case # pool = workerpool.WorkerPool(size=5) # # Loop over urls.txt and create a job to download the URL on each line # for url in open("urls.txt"): # job = DownloadJob(url.strip()) # pool.put(job) # # Send shutdown jobs to all threads, and wait until all the jobs have been completed # pool.shutdown() # # pool.wait() print('update_db_course', course, time.strftime('%Y-%m-%d %H:%M:%S')) self.course_folders(course_id=course) self.db.update_course(course, date=time.strftime('%Y-%m-%d %H:%M:%S')) def courses_to_be_updated(self): courses_to_update = [] for course in self.db.select_courses(): print(course) date_updated_course = course[2] course_url = '%s/main/document/document.php?cidReq=%s' % (self.site_url, course[0]) soup = bs(self.session.get(course_url).content) if soup.find('table', {'class': 'data_table'}): folders = soup.find('table', {'class': 'data_table'}).findAll('tr')[1:] # don't take first line for folder in folders: date_updated_folder = folder.find('small').text if date_updated_folder > date_updated_course: #print('Needs to be updated!') courses_to_update.append(course[0]) break print('{} courses need to be updated'.format(len(courses_to_update))) return courses_to_update def course_folders(self, url=None, course_id=None): if not course_id: course_id = extract_id(url) doc_url = '%s/main/document/document.php?cidReq=%s' % (self.site_url, course_id) print('doc_url', doc_url) soup = bs(self.session.get(doc_url).content) s_folders = soup.findAll('option') level = 0 parents = [None] * 50 for folder in s_folders: name = folder.text location = folder['value'] if 'DELETED' in name: continue folder_level = convert_folder_level(name) if folder_level > 0: parent = parents[folder_level - 1] else: parent = None parents[folder_level] = location actual_level = folder_level self.db.insert_folder(Folder(clean_folder_name(name), location, course_id, parent)) self.folder_files(location, course_id) def folder_files(self, folder_location, course_id): folder_url = '%s/main/document/document.php?id=%s&cidReq=%s' % (self.site_url, folder_location, course_id) soup = bs(self.session.get(folder_url).content) soup = soup.find('table', {'class': 'data_table'}) # do not check first line of table if soup: for line in soup.findAll('tr')[1:]: href = line.find('a')['href'] # is link downloadable? if 'courses' in href and 'document' in href: #print(line) url = line.find('a')['href'] name = line.find('a')['title'] date = line.find('small').text #print(url, name, date) #print(folder_location, course_id, name, date, url) self.db.insert_file(File(folder_location, course_id, name, date, url))