def get_vanpeople_posts(page_numbers): """ get vanpeople's posts :param page_numbers: List[int] :return: Dict """ # init posts list posts = [] # loop through each page for page_number in page_numbers: # get all posts' link in the page post_links = parse_list(request(main_page(page_number))) # parse content in each post for post_link in post_links: # assign post link post = {'link': post_link} # assign other fields to post post.update(parse_post(request(post_link))) # push post into posts list posts.append(post) return posts
def get_vanpeople_posts(oldestPostDays, maxPostCount): """ get vanpeople's posts :param page_numbers: List[int] :return: Dict """ # init posts list posts = [] today = datetime.datetime.today() postDate = today forumPageNumber = 0 postCount = 0 while (postDate - today).days < oldestPostDays and postCount < maxPostCount: requestUrl = url_builder.rentForumPage(forumPageNumber) postLinks = parse_list(request(requestUrl)) # import pdb; pdb.set_trace() for post_link in postLinks: # assign post link post = {'link': post_link} # assign other fields to post postInfo, postDate = parse_post(request(post_link)) post.update(postInfo) # push post into posts list posts.append(post) # print progress to screen print 'Processed posts: {0}'.format(len(posts)) sys.stdout.write("\033[F") sys.stdout.write("\033[K") # increment forumPageNumber = forumPageNumber + 60 postCount = postCount + len(postLinks) print postDate, today print postCount, maxPostCount return posts
def resolve_url(url, play=False): headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest' } result = requester.request(url, headers=urllib.urlencode(headers)) result = result.decode('iso-8859-1').encode('utf-8') name = requester.parseDOM(result, 'meta', ret='content', attrs={'property': 'og:title'})[0] fileid = requester.parseDOM(result, 'input', ret='value', attrs={'name': 'fileId'})[0] token = requester.parseDOM(result, 'input', ret='value', attrs={'name': '__RequestVerificationToken'})[0] formurl = CopiaPopURL + requester.parseDOM( result, 'form', ret='action', attrs={'class': 'download_form'})[0] headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest' } post = {'fileId': fileid, '__RequestVerificationToken': token} result = json.loads( requester.request(formurl, post=urllib.urlencode(post), headers=urllib.urlencode(headers))) if result['DownloadUrl'].startswith('http'): if play == True: play_url(result['DownloadUrl'], name, original_url=url, original_filename=name) else: return result['DownloadUrl']
def open_folder(url): headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest' } if len(url.split('/')) > 4: url = url + '/list,1,1' result = requester.request(url, headers=urllib.urlencode(headers)) if checkvalid(result): list = list_folders(url, result=result) list.extend(list_items(url, result=result)) show_items(list)
def list_folders(url, query=None, result=None): try: list = [] items = [] headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest' } if result == None: result = requester.request(url, headers=urllib.urlencode(headers)) result = result.decode('iso-8859-1').encode('utf-8') result = requester.parseDOM( result, 'div', attrs={'class': 'collections_list responsive_width'})[0] items = requester.parseDOM(result, 'li') for indiv in items: if re.search('name', indiv): name = requester.replaceHTMLCodes( requester.parseDOM(indiv, 'a', attrs={'class': 'name'})[0].encode('utf-8')) length = re.compile('(\d+)').findall( requester.parseDOM(indiv, 'p', attrs={'class': 'info'})[0].encode('utf-8'))[0] pageurl = SiteURL + requester.parseDOM( indiv, 'a', attrs={'class': 'name'}, ret='href')[0] thumb = requester.parseDOM(indiv, 'img', ret='src')[0].replace( '/thumbnail', '') list.append({ 'type': 'folder', 'name': name, 'length': length, 'thumb': thumb, 'pageurl': pageurl }) list = sorted( list, key=lambda k: re.sub('(^the |^a )', '', k['name'].lower())) return list except: return []
def open_folder_recents(url): formating = '' headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest' } if len(url.split('/')) > 4: final_url = url + '/list,1,%s?ref=pager' % page else: final_url = url result = requester.request(final_url, headers=urllib.urlencode(headers)) if checkvalid(result): list = list_folders_recents(final_url, result=result) list.extend(list_items(final_url, result=result)) show_items(list) page_check(result=result, baseurl=url) endDirectory()
def list_items(url, query=None, result=None, content_type=None): list = [] try: if result == None: headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest' } if query: post = { 'Mode': 'List', 'Type': content_type, 'Phrase': query, 'SizeFrom': '0', 'SizeTo': '0', 'Extension': '', 'ref': 'pager', 'pageNumber': '1' } result = requester.request(url, post=urllib.urlencode(post), headers=urllib.urlencode(headers)) else: result = requester.request(url, headers=urllib.urlencode(headers)) except: pass result = result.decode('iso-8859-1').encode('utf-8') items = requester.parseDOM(result, 'div', attrs={'class': 'list_row'}) try: thumb = requester.parseDOM(result, 'meta', ret='content', attrs={'property': 'og:image'})[0] except: thumb = None for indiv in items: name = requester.replaceHTMLCodes( requester.parseDOM( requester.parseDOM(indiv, 'div', attrs={'class': 'name'}), 'a')[0].encode('utf-8')) size = requester.parseDOM( requester.parseDOM(indiv, 'div', attrs={'class': 'size'}), 'p')[0].encode('utf-8') pageurl = CopiaPopURL + requester.parseDOM(requester.parseDOM( indiv, 'div', attrs={'class': 'name'}), 'a', ret='href')[0] temp = requester.parseDOM( requester.parseDOM(indiv, 'div', attrs={'class': 'date'})[0], 'div')[0] fileid = requester.parseDOM(temp, 'input', ret='value', attrs={'name': 'fileId'})[0] list.append({ 'type': 'content', 'name': name, 'size': size, 'fileid': fileid, 'thumb': thumb, 'pageurl': pageurl }) return list
def login(): import requests headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'pt-PT,pt;q=0.8,en-US;q=0.6,en;q=0.4', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Upgrade-Insecure-Requests': '1', 'User-Agent': user_agent } cookie = requester.request(SiteURL, headers=urllib.urlencode(headers), output='cookie') timestamp = str(int(time.time())) + str(randint(0, 9)) + str(randint( 0, 9)) + str(randint(0, 9)) url = SiteURL + '/action/Account/Login?returnUrl=%2F&TimeStamp=' + timestamp headers = { 'Cookie': cookie, 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'Accept': '*/*', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': user_agent, 'Referer': SiteURL, 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'pt-PT,pt;q=0.8,en-US;q=0.6,en;q=0.4' } result = requester.request(url, headers=headers) resdec = result.decode('ascii', 'ignore') headers = { 'Cookie': cookie, 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'pt-PT,pt;q=0.8,en-US;q=0.6,en;q=0.4', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Origin': SiteURL, 'Pragma': 'no-cache', 'Referer': SiteURL, 'User-Agent': user_agent, 'X-Requested-With': 'XMLHttpRequest' } newJSON = json.loads(resdec)['Content'] token = requester.parseDOM(newJSON, 'input', ret='value', attrs={'name': '__RequestVerificationToken'})[0] if setting('copiapop-enable') == 'true': post = { 'UserName': setting('copiapop-username'), 'Password': setting('copiapop-password'), '__RequestVerificationToken': token } if setting('diskokosmiko-enable') == 'true': post = { 'UserName': setting('diskokosmiko-username'), 'Password': setting('diskokosmiko-password'), '__RequestVerificationToken': token } if setting('kumpulbagi-enable') == 'true': post = { 'UserName': setting('kumpulbagi-username'), 'Password': setting('kumpulbagi-password'), '__RequestVerificationToken': token } formurl = SiteURL + '/action/Account/Login?returnUrl=%2F' raw = requests.post(formurl, data=post, headers=headers) success = raw.json()['Type'] if success == 'Redirect': setSetting('request_cookie', raw.headers['Set-Cookie'].split(';')[0]) else: dialog.ok( 'CopiaDB', 'Verifique se os dados de conta introduzidos estão correctos.') execute('Addon.OpenSettings(%s)' % (addon_id)) sys.exit(0)
from config import URLS, FREQUENCY, SMTP_RECIPIENTS, ENROLLMENT_PAGE from sender import notify counter = 0 while True: # clear email sending list every 10 iterations if counter % 10 == 0: emails = [] # start searching print "Start " + str(counter) + " search " + str(datetime.datetime.now()) # fetch information from each url for url in URLS: response = request(url) courses = parse(response.content) # notify user when courses are available for course in courses: if course not in emails: message = "Course " + course + " is available to enroll at " + \ str(datetime.datetime.now()) + "\n\n" + ENROLLMENT_PAGE notify(SMTP_RECIPIENTS, "FIC Course Available", message) emails.append(course) # finish search print "Search Done" counter += 1 # wait for a period