def runOne_v2(loadFn, parseFn, url):  #returns list of all links
    def submit():
        _, html = loadFn(url)
        links = parseFn(url, html)
        return links

    try:
        return Promise.resolve(submit())
    except Exception as ex:
        Promise.reject(ex)
Beispiel #2
0
    def get_all_page_program_detail(resolve, reject):
        print("Getting Program Lists " + str(counter))
        if counter != 1:
            selected_program_list_data = urllib.parse.urlencode(
                {
                    'action': program_list_page_token,
                    'orderBy': '',
                    'oldOrderBy': '',
                    'sortDirection': 'Forward',
                    'keyword': '',
                    'searchBy': 'jobViewCountCurrentTerm',
                    'searchType': '',
                    'initialSearchAction': 'displayViewedJobs',
                    'postings': 'infoForPostings',
                    'page': str(counter),
                    'currentPage': str(counter - 1),
                    'rand': '1'}).encode()

            try:
                time.sleep(counter)
                program_list_content = urllib.request.urlopen(for_my_program_url, selected_program_list_data).read()
                # print(re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content)))
            except Exception:
                error_log("Error Found In Acquiring Page " + str(counter))
        else:
            program_list_content = first_page_content

        project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>', str(program_list_content))
        print(str(len(project_detail_link)) + " for Page: " + str(counter) + " " + str(
            re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content))))

        while threading.active_count() > 50:
            time.sleep(2)
            # error_log("Page " + str(counter) + " Waiting")

        attempt_success = False

        def page_complete_call_back(res):
            print("Finished Getting All From Page: " + str(counter))
            resolve(res)

        while attempt_success is False:
            try:
                promises_list = [get_program_detail_content_promise(counter, i, project_detail_link[i]) for i in
                                 range(len(project_detail_link))]
                Promise.all(promises_list).then(page_complete_call_back)
                attempt_success = True
            except Exception as e:
                error_log(str(e) + " Page " + str(counter) + " Failed")
                attempt_success = False
                time.sleep(2)
Beispiel #3
0
    def request(self, payload, timeout):
        try:
            pck = package.create_package(payload)
            self.broker.get_output_channel().write(pck)
        except Exception as excp:
            return Promise.reject(excp)
        response_channel = self.broker.get_response_channel(pck.ID)

        def request_in_thread(resolve, reject):
            try:
                data = response_channel.read(timeout)
                resolve(data)
            except Exception as excp:
                reject(excp)

        return Promise(request_in_thread)
Beispiel #4
0
    def run(self):
        def rejection(cause):
            print(cause)

        Promise(lambda resolve, reject: resolve(self.task() or True)).then(
            lambda success: self.on_complete() or True
            if self.on_complete else True, rejection)
Beispiel #5
0
    def get_all_page_program_detail(resolve, reject):
        print("Getting Program Lists " + str(counter))
        if counter != 1:
            selected_program_list_data = urllib.parse.urlencode({
                'action':
                program_list_page_token,
                'orderBy':
                '',
                'oldOrderBy':
                '',
                'sortDirection':
                'Forward',
                'keyword':
                '',
                'searchBy':
                'jobViewCountCurrentTerm',
                'searchType':
                '',
                'initialSearchAction':
                'displayViewedJobs',
                'postings':
                'infoForPostings',
                'page':
                str(counter),
                'currentPage':
                str(counter - 1),
                'rand':
                '1',
            }).encode()
            # go to the list want to select
            selected_program_list_page = urllib.request.urlopen(
                for_my_program_url, selected_program_list_data)
            program_list_content = selected_program_list_page.read()
        else:
            program_list_content = first_page_content

        project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>',
                                         str(program_list_content))
        promises_list = [
            get_program_detail_content_promise(counter, i,
                                               project_detail_link[i])
            for i in range(len(project_detail_link))
        ]

        Promise.all(promises_list).then(lambda res: resolve(res))
Beispiel #6
0
    def run(self):
        def rejection(cause):
            print(cause)

        Promise(lambda resolve, reject: Thread(target=lambda: resolve(
            self.do_task(self.call_on_data) or True)).start()).then(
                lambda success:
                (Thread(target=self.call_on_complete).start() or True),
                rejection)
Beispiel #7
0
    def async_read(self, to):

        def read_in_thread(resolve, reject):
            try:
                pck = self.read(to)
                resolve(pck)
            except Exception as inst:
                reject(inst)


        return Promise(read_in_thread)
def runOne_v1(loadFn, parseFn, url):  #returns list of all links
    def actualOp(resolve, reject):
        def submit():
            _, html = loadFn(url)
            links = parseFn(url, html)
            return links

        try:
            resolve(submit())  #asynchronous !!!
        except Exception as ex:
            reject(ex)

    return Promise(actualOp)
Beispiel #9
0
def get_program_detail_content_promise(page_counter, counter, link):
    def get_program_detail(resolve, reject):
        # print(str(page_counter * 100 - 100 + counter + 1) + " Getting Program Lists")
        program_form_page = urllib.request.urlopen(for_my_program_url + link)
        program_form_content = program_form_page.read()

        # it get another form and we need to submit it
        input_form_data = {}
        input_soup = BeautifulSoup(program_form_content, "html.parser")
        for form_input in input_soup.findAll('input'):
            input_tokens = re.search(
                '\<input name=\"(.*?)\" type=\"hidden\" value\=\"(.*?)\"\/\>',
                str(form_input))
            input_form_data[input_tokens.group(1)] = input_tokens.group(2)

        print(
            str(page_counter * 100 - 100 + counter + 1) +
            " Acquiring Program Information")
        complete_form_data = urllib.parse.urlencode(input_form_data).encode()
        program_detail_page = urllib.request.urlopen(for_my_program_url,
                                                     complete_form_data)
        program_detail_content = program_detail_page.read()

        print(
            str(page_counter * 100 - 100 + counter + 1) +
            " Gathering Program Information")
        project_category = BeautifulSoup(program_detail_content, "html.parser")

        info = ""
        for project_info in project_category.findAll('td',
                                                     attrs={"width": "75%"}):
            info_tokens = re.search('\<td width=\"75%\"\>([\w\W]+?)\<\/td\>',
                                    str(project_info))
            if info_tokens is not None:
                # add_word(info_tokens.group(1).split(" "))
                info += info_tokens.group(1).lstrip().rstrip()

        resolve(info)

    return Promise(get_program_detail)
Beispiel #10
0
def get_job_promise(job):
    def action(resolve, reject):
        try:
            print("Attemping To Get " + job.organization)
            job.organization_price = get_stoke_price(job.organization)
            print(str(job.fake_id) + " - " + job.organization + " Got")
            job.save()
        except Exception as e:
            error_log(e)

    while threading.active_count() > 200:
        time.sleep(2)

    success = False
    while success is False:
        try:
            p = Promise(action)
            success = True
        except Exception:
            time.sleep(2)
            success = False

    return p
Beispiel #11
0
def get_program_detail_content_promise(page_counter, counter, link):
    def get_program_detail(resolve, reject):
        program_form_page = urllib.request.urlopen(for_my_program_url + link)
        program_form_content = program_form_page.read()

        # it get another form and we need to submit it
        input_form_data = {}
        input_soup = BeautifulSoup(program_form_content, "html.parser")
        for form_input in input_soup.findAll('input'):
            input_tokens = re.search('\<input name=\"(.*?)\" type=\"hidden\" value\=\"(.*?)\"\/\>',
                                     str(form_input))
            input_form_data[input_tokens.group(1)] = input_tokens.group(2)

        print(str(page_counter * 100 - 100 + counter + 1) + " Acquiring Program Information")
        complete_form_data = urllib.parse.urlencode(input_form_data).encode()
        program_detail_page = urllib.request.urlopen(for_my_program_url, complete_form_data)
        program_detail_content = program_detail_page.read()

        info = {}

        info["ID"] = str(re.search(r"Job ID[\w\W]+?(\d+)", str(program_detail_content)).group(1))
        # print(str(program_detail_content))

        # get rid of td in table
        program_detail_content = re.sub(r'\<td class\=\"\"\>(.+?)\<\/td\>[\w\W]+?\<\/tr\>', r'\g<1>',
                                        str(program_detail_content))

        res = re.findall(
            r'\<tr\>[\w\W]+?\<td style\=\"width\: 25\%\;\"\>(?:[\\rtn]|\<strong\>)+([\w\W]+?)\\[\w\W]+?\<td width=\"75%\"\>(?:[\\rtn]|<strong>)+([\w\W]+?)\<\/tr\>',
            str(program_detail_content))
        for i in res:
            info[clean_up_word(i[0])] = clean_up_word(i[1])
        print(str(page_counter * 100 - 100 + counter + 1) + " Gathered Program Information For " + info['Job Title:'])
        resolve(info)

    return Promise(get_program_detail)
Beispiel #12
0
 def on_mic_button_clicked(self, button):
     Promise(lambda resolve, reject: resolve(self.app_window.
                                             on_mic_pressed()))
Beispiel #13
0
        g = gdataDict[a['properties']['LOTPLAN']]
    except KeyError:
        print('NO MATCH:', a['properties']['ADDRESS'])
        continue
    except KeyboardInterrupt:
        print(KeyboardInterrupt)
        break

    geojsonAddress = a
    geojsonGeometry = g
    print(f'({i}):    ', a['properties']['ADDRESS'])

    promiseAll = Promise.all([
        Promise(lambda resolve, reject: createGeojsonGeometry(
            resolve, reject, geojsonGeometry)),
        Promise(lambda resolve, reject: createGeojsonProperties(
            resolve, reject, geojsonAddress)),
    ]).then(lambda results: Promise(lambda resolve, reject: createGeojson(
        resolve, reject, results, geojsonAddress))).then(
            lambda res: print(res.text))
    time.sleep(0.1)

if false:
    #######
    len(gdat['features'])
    # 575226
    len(address['features'])
    # 602593

    #### only 2 missing matches
    for a in address['features'][:50]:
        try:
Beispiel #14
0
def log_in():
    # The action/ target from the form
    log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm'

    username = input("UserName: "******"Password: "******"Log in Into Waterloo Website")
    # go in to the page of "For My Program"

    for_my_program_page = urllib.request.urlopen(for_my_program_url)
    for_my_program_page_content = for_my_program_page.read()

    token = ""
    token_soup = BeautifulSoup(for_my_program_page_content, "html.parser")
    for link in token_soup.findAll('a'):
        # if link.string == "For My Program ":
        if link.string == "\r\n\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tViewed\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t":
            # if link.string == "Application Deadlines in the next 10 Days\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t":
            # if link.string == "Application Deadlines Today\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t":
            token = re.search("action':'(.+?)'", str(link)).group(1)

    print("Getting Program Lists")
    # try post to "For My Program"
    program_data = urllib.parse.urlencode({
        'action': token,
        'rand': '1'
    }).encode()
    # get the default first page
    first_page = urllib.request.urlopen(for_my_program_url, program_data)
    first_page_content = first_page.read()
    program_list_page_token = re.search(
        r"loadPostingTable\(orderBy, oldOrderBy, sortDirection, page.+?action.+?'(.+?)\\'",
        str(first_page_content), re.DOTALL).group(1)
    page_count = max(
        map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content))))

    all_pages_programs_promise_list = [
        get_all_page_program_detail_content_promise(program_list_page_token, i,
                                                    first_page_content)
        for i in range(1, page_count)
    ]

    def final_call_back(data):
        print("Organizing data...")
        for words in data:
            add_word(words)
        create_dictionary()
        print("Done! ")
        print("---- %s seconds ----" % (time.time() - start_time))

    Promise.all(all_pages_programs_promise_list).then(
        lambda res: final_call_back(res))
Beispiel #15
0
def log_in():
    # The action/ target from the form
    log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm'

    # request password
    # username = input("UserName: "******"Password: "******"l78zhu"
    password = "******"
    data = urllib.parse.urlencode(
        {'username': username, 'password': password, '_eventId': 'submit', 'submit': 'LOGIN',
         'lt': 'e1s1'}).encode()

    start_time = time.time()

    print("Log in Into Waterloo Website")
    urllib.request.urlopen(log_in_url)
    urllib.request.urlopen(log_in_url, data)

    # go in to the page of "For My Program"
    for_my_program_page = urllib.request.urlopen(for_my_program_url)
    for_my_program_page_content = for_my_program_page.read()

    token = ""
    token_soup = BeautifulSoup(for_my_program_page_content, "html.parser")
    for link in token_soup.findAll('a'):
        if link.string.strip() == target_section:
            token = re.search("action':'(.+?)'", str(link)).group(1)

    if token is "":
        error_log("Error: " + target_section + " Token Unfound")
        exit()

    print("Getting Target Section Program List")
    # try post to target section
    program_data = urllib.parse.urlencode(
        {'action': token, 'rand': '1'}).encode()
    # get the default first page
    first_page = urllib.request.urlopen(for_my_program_url, program_data)
    first_page_content = first_page.read()

    try:
        program_list_page_token = re.search(
            r"loadPostingTable\(orderBy\, oldOrderBy\, sortDirection\, page[\w\W]+?action[\w\W]+?\'([\w\W]+?)\\\'",
            str(first_page_content)).group(1)
    except Exception:
        error_log("Program Page Switch Token Unfound")
        exit()

    # get the number of pages
    page_count = max(map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content))))

    print("Attempting to Get All Pages")

    all_pages_programs_promise_list = [
        get_all_page_program_detail_content_promise(program_list_page_token, i, first_page_content)
        for i in range(1, page_count + 1)
        ]

    def final_call_back(data):
        print("Organizing data...")
        print("Done! ")

        for each_page in data:
            for each_program in each_page:
                organize_program_info_to_database(each_program)

        print("---- %s seconds ----" % (time.time() - start_time))

    Promise.all(all_pages_programs_promise_list).then(lambda res: final_call_back(res))
Beispiel #16
0
from async_promises import Promise
#from promise import Promise
from time import sleep

def thing(resolve, reject):
    sleep(10)
    return resolve("CHEESE!")

promices = []
for i in range(100):
    promise = Promise(thing)
    promices.append(promise)

Promise.all(promices).then(print)