def runOne_v2(loadFn, parseFn, url): #returns list of all links def submit(): _, html = loadFn(url) links = parseFn(url, html) return links try: return Promise.resolve(submit()) except Exception as ex: Promise.reject(ex)
def get_all_page_program_detail(resolve, reject): print("Getting Program Lists " + str(counter)) if counter != 1: selected_program_list_data = urllib.parse.urlencode( { 'action': program_list_page_token, 'orderBy': '', 'oldOrderBy': '', 'sortDirection': 'Forward', 'keyword': '', 'searchBy': 'jobViewCountCurrentTerm', 'searchType': '', 'initialSearchAction': 'displayViewedJobs', 'postings': 'infoForPostings', 'page': str(counter), 'currentPage': str(counter - 1), 'rand': '1'}).encode() try: time.sleep(counter) program_list_content = urllib.request.urlopen(for_my_program_url, selected_program_list_data).read() # print(re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content))) except Exception: error_log("Error Found In Acquiring Page " + str(counter)) else: program_list_content = first_page_content project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>', str(program_list_content)) print(str(len(project_detail_link)) + " for Page: " + str(counter) + " " + str( re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content)))) while threading.active_count() > 50: time.sleep(2) # error_log("Page " + str(counter) + " Waiting") attempt_success = False def page_complete_call_back(res): print("Finished Getting All From Page: " + str(counter)) resolve(res) while attempt_success is False: try: promises_list = [get_program_detail_content_promise(counter, i, project_detail_link[i]) for i in range(len(project_detail_link))] Promise.all(promises_list).then(page_complete_call_back) attempt_success = True except Exception as e: error_log(str(e) + " Page " + str(counter) + " Failed") attempt_success = False time.sleep(2)
def request(self, payload, timeout): try: pck = package.create_package(payload) self.broker.get_output_channel().write(pck) except Exception as excp: return Promise.reject(excp) response_channel = self.broker.get_response_channel(pck.ID) def request_in_thread(resolve, reject): try: data = response_channel.read(timeout) resolve(data) except Exception as excp: reject(excp) return Promise(request_in_thread)
def run(self): def rejection(cause): print(cause) Promise(lambda resolve, reject: resolve(self.task() or True)).then( lambda success: self.on_complete() or True if self.on_complete else True, rejection)
def get_all_page_program_detail(resolve, reject): print("Getting Program Lists " + str(counter)) if counter != 1: selected_program_list_data = urllib.parse.urlencode({ 'action': program_list_page_token, 'orderBy': '', 'oldOrderBy': '', 'sortDirection': 'Forward', 'keyword': '', 'searchBy': 'jobViewCountCurrentTerm', 'searchType': '', 'initialSearchAction': 'displayViewedJobs', 'postings': 'infoForPostings', 'page': str(counter), 'currentPage': str(counter - 1), 'rand': '1', }).encode() # go to the list want to select selected_program_list_page = urllib.request.urlopen( for_my_program_url, selected_program_list_data) program_list_content = selected_program_list_page.read() else: program_list_content = first_page_content project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>', str(program_list_content)) promises_list = [ get_program_detail_content_promise(counter, i, project_detail_link[i]) for i in range(len(project_detail_link)) ] Promise.all(promises_list).then(lambda res: resolve(res))
def run(self): def rejection(cause): print(cause) Promise(lambda resolve, reject: Thread(target=lambda: resolve( self.do_task(self.call_on_data) or True)).start()).then( lambda success: (Thread(target=self.call_on_complete).start() or True), rejection)
def async_read(self, to): def read_in_thread(resolve, reject): try: pck = self.read(to) resolve(pck) except Exception as inst: reject(inst) return Promise(read_in_thread)
def runOne_v1(loadFn, parseFn, url): #returns list of all links def actualOp(resolve, reject): def submit(): _, html = loadFn(url) links = parseFn(url, html) return links try: resolve(submit()) #asynchronous !!! except Exception as ex: reject(ex) return Promise(actualOp)
def get_program_detail_content_promise(page_counter, counter, link): def get_program_detail(resolve, reject): # print(str(page_counter * 100 - 100 + counter + 1) + " Getting Program Lists") program_form_page = urllib.request.urlopen(for_my_program_url + link) program_form_content = program_form_page.read() # it get another form and we need to submit it input_form_data = {} input_soup = BeautifulSoup(program_form_content, "html.parser") for form_input in input_soup.findAll('input'): input_tokens = re.search( '\<input name=\"(.*?)\" type=\"hidden\" value\=\"(.*?)\"\/\>', str(form_input)) input_form_data[input_tokens.group(1)] = input_tokens.group(2) print( str(page_counter * 100 - 100 + counter + 1) + " Acquiring Program Information") complete_form_data = urllib.parse.urlencode(input_form_data).encode() program_detail_page = urllib.request.urlopen(for_my_program_url, complete_form_data) program_detail_content = program_detail_page.read() print( str(page_counter * 100 - 100 + counter + 1) + " Gathering Program Information") project_category = BeautifulSoup(program_detail_content, "html.parser") info = "" for project_info in project_category.findAll('td', attrs={"width": "75%"}): info_tokens = re.search('\<td width=\"75%\"\>([\w\W]+?)\<\/td\>', str(project_info)) if info_tokens is not None: # add_word(info_tokens.group(1).split(" ")) info += info_tokens.group(1).lstrip().rstrip() resolve(info) return Promise(get_program_detail)
def get_job_promise(job): def action(resolve, reject): try: print("Attemping To Get " + job.organization) job.organization_price = get_stoke_price(job.organization) print(str(job.fake_id) + " - " + job.organization + " Got") job.save() except Exception as e: error_log(e) while threading.active_count() > 200: time.sleep(2) success = False while success is False: try: p = Promise(action) success = True except Exception: time.sleep(2) success = False return p
def get_program_detail_content_promise(page_counter, counter, link): def get_program_detail(resolve, reject): program_form_page = urllib.request.urlopen(for_my_program_url + link) program_form_content = program_form_page.read() # it get another form and we need to submit it input_form_data = {} input_soup = BeautifulSoup(program_form_content, "html.parser") for form_input in input_soup.findAll('input'): input_tokens = re.search('\<input name=\"(.*?)\" type=\"hidden\" value\=\"(.*?)\"\/\>', str(form_input)) input_form_data[input_tokens.group(1)] = input_tokens.group(2) print(str(page_counter * 100 - 100 + counter + 1) + " Acquiring Program Information") complete_form_data = urllib.parse.urlencode(input_form_data).encode() program_detail_page = urllib.request.urlopen(for_my_program_url, complete_form_data) program_detail_content = program_detail_page.read() info = {} info["ID"] = str(re.search(r"Job ID[\w\W]+?(\d+)", str(program_detail_content)).group(1)) # print(str(program_detail_content)) # get rid of td in table program_detail_content = re.sub(r'\<td class\=\"\"\>(.+?)\<\/td\>[\w\W]+?\<\/tr\>', r'\g<1>', str(program_detail_content)) res = re.findall( r'\<tr\>[\w\W]+?\<td style\=\"width\: 25\%\;\"\>(?:[\\rtn]|\<strong\>)+([\w\W]+?)\\[\w\W]+?\<td width=\"75%\"\>(?:[\\rtn]|<strong>)+([\w\W]+?)\<\/tr\>', str(program_detail_content)) for i in res: info[clean_up_word(i[0])] = clean_up_word(i[1]) print(str(page_counter * 100 - 100 + counter + 1) + " Gathered Program Information For " + info['Job Title:']) resolve(info) return Promise(get_program_detail)
def on_mic_button_clicked(self, button): Promise(lambda resolve, reject: resolve(self.app_window. on_mic_pressed()))
g = gdataDict[a['properties']['LOTPLAN']] except KeyError: print('NO MATCH:', a['properties']['ADDRESS']) continue except KeyboardInterrupt: print(KeyboardInterrupt) break geojsonAddress = a geojsonGeometry = g print(f'({i}): ', a['properties']['ADDRESS']) promiseAll = Promise.all([ Promise(lambda resolve, reject: createGeojsonGeometry( resolve, reject, geojsonGeometry)), Promise(lambda resolve, reject: createGeojsonProperties( resolve, reject, geojsonAddress)), ]).then(lambda results: Promise(lambda resolve, reject: createGeojson( resolve, reject, results, geojsonAddress))).then( lambda res: print(res.text)) time.sleep(0.1) if false: ####### len(gdat['features']) # 575226 len(address['features']) # 602593 #### only 2 missing matches for a in address['features'][:50]: try:
def log_in(): # The action/ target from the form log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm' username = input("UserName: "******"Password: "******"Log in Into Waterloo Website") # go in to the page of "For My Program" for_my_program_page = urllib.request.urlopen(for_my_program_url) for_my_program_page_content = for_my_program_page.read() token = "" token_soup = BeautifulSoup(for_my_program_page_content, "html.parser") for link in token_soup.findAll('a'): # if link.string == "For My Program ": if link.string == "\r\n\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tViewed\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t": # if link.string == "Application Deadlines in the next 10 Days\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t": # if link.string == "Application Deadlines Today\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t": token = re.search("action':'(.+?)'", str(link)).group(1) print("Getting Program Lists") # try post to "For My Program" program_data = urllib.parse.urlencode({ 'action': token, 'rand': '1' }).encode() # get the default first page first_page = urllib.request.urlopen(for_my_program_url, program_data) first_page_content = first_page.read() program_list_page_token = re.search( r"loadPostingTable\(orderBy, oldOrderBy, sortDirection, page.+?action.+?'(.+?)\\'", str(first_page_content), re.DOTALL).group(1) page_count = max( map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content)))) all_pages_programs_promise_list = [ get_all_page_program_detail_content_promise(program_list_page_token, i, first_page_content) for i in range(1, page_count) ] def final_call_back(data): print("Organizing data...") for words in data: add_word(words) create_dictionary() print("Done! ") print("---- %s seconds ----" % (time.time() - start_time)) Promise.all(all_pages_programs_promise_list).then( lambda res: final_call_back(res))
def log_in(): # The action/ target from the form log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm' # request password # username = input("UserName: "******"Password: "******"l78zhu" password = "******" data = urllib.parse.urlencode( {'username': username, 'password': password, '_eventId': 'submit', 'submit': 'LOGIN', 'lt': 'e1s1'}).encode() start_time = time.time() print("Log in Into Waterloo Website") urllib.request.urlopen(log_in_url) urllib.request.urlopen(log_in_url, data) # go in to the page of "For My Program" for_my_program_page = urllib.request.urlopen(for_my_program_url) for_my_program_page_content = for_my_program_page.read() token = "" token_soup = BeautifulSoup(for_my_program_page_content, "html.parser") for link in token_soup.findAll('a'): if link.string.strip() == target_section: token = re.search("action':'(.+?)'", str(link)).group(1) if token is "": error_log("Error: " + target_section + " Token Unfound") exit() print("Getting Target Section Program List") # try post to target section program_data = urllib.parse.urlencode( {'action': token, 'rand': '1'}).encode() # get the default first page first_page = urllib.request.urlopen(for_my_program_url, program_data) first_page_content = first_page.read() try: program_list_page_token = re.search( r"loadPostingTable\(orderBy\, oldOrderBy\, sortDirection\, page[\w\W]+?action[\w\W]+?\'([\w\W]+?)\\\'", str(first_page_content)).group(1) except Exception: error_log("Program Page Switch Token Unfound") exit() # get the number of pages page_count = max(map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content)))) print("Attempting to Get All Pages") all_pages_programs_promise_list = [ get_all_page_program_detail_content_promise(program_list_page_token, i, first_page_content) for i in range(1, page_count + 1) ] def final_call_back(data): print("Organizing data...") print("Done! ") for each_page in data: for each_program in each_page: organize_program_info_to_database(each_program) print("---- %s seconds ----" % (time.time() - start_time)) Promise.all(all_pages_programs_promise_list).then(lambda res: final_call_back(res))
from async_promises import Promise #from promise import Promise from time import sleep def thing(resolve, reject): sleep(10) return resolve("CHEESE!") promices = [] for i in range(100): promise = Promise(thing) promices.append(promise) Promise.all(promices).then(print)