예제 #1
0
def __search_by_config__(config):
    '''
        Returns an array of elements with: name, detail, url
    '''
    # ----- start browser -----
    browser = RoboBrowser(history=True)
    browser.parser = "lxml"

    query_url = __get_url__(config["QUERY_TEXT"])
    log ("Query Url: \t\t" + query_url)
    browser.open(query_url)

    all_content = browser.find_all().__str__()
    has_init_results =  NO_RESULT_STR in all_content
    log ("Has initial results: \t" + has_init_results.__str__())

    # ----- create raw results list -----
    all_divs = browser.select("div")
    results = []
    current_result = {}
    for div in all_divs:
        div_id = div.get("id")

        if (div_id == "menu_detalle_buscador"):
            current_result = {}
            current_result["name"] = div.select("#menu_titulo_buscador a")[0].text
            # print  dir(div)

        if (div_id == "buscador_detalle"):
            current_result["detail"] = div.select("#buscador_detalle_sub")[0].text
            current_links = div.select("#buscador_detalle_sub_datos a")
            for current_link in current_links:
                current_link_href = current_link.get("href").__str__()
                if "bajar.php" in current_link_href:
                    current_result["url"] = current_link_href
            results.append(current_result)

    # ----- apply regex -----
    result_divs = filter(lambda item: __match_exp__(config["REFINE_REGEX"], item["detail"]) , results)

    # ----- apply limit -----
    result_divs = result_divs[:config["RESULT_LIMIT"]]

    return result_divs
    def authenticate(self, username=None, password=None):
        login_url = 'https://www.mymedicare.gov/default.aspx'
        rb = RoboBrowser()
        rb.parser = 'lxml'
        rb.open(login_url)
        # Get the form content
        form = rb.get_form()
        if settings.DEBUG:
            print("Page:", rb)
        # We will be working with these form fields.
        # Set them as variables for easier re-use
        form_pwd = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SWEPassword"
        form_usr = "******"
        form_agree = "ctl00$ContentPlaceHolder1$ctl00$HomePage$Agree"
        form_create_acc = "ctl00$ContentPlaceHolder1$ctl00$HomePage$lnk" \
                          "CreateAccount"
        # Set the form field values
        form.fields[form_usr].value = username
        form.fields[form_pwd].value = password
        # There is a javascript popup after hitting submit
        # that the form_agree to  "True"
        # Default in form is "False"
        form.fields[form_agree].value = "True"
        # Remove the CreateAccount field. It seems to drive the form
        # to the registration page.
        form.fields.pop(form_create_acc)
        # Capture the dynamic elements from these damned aspnetForms
        # We need to feed them back to allow the form to validate
        VIEWSTATEGENERATOR = form.fields['__VIEWSTATEGENERATOR']._value
        EVENTVALIDATION = form.fields['__EVENTVALIDATION']._value
        VIEWSTATE = form.fields['__VIEWSTATE']._value
        # Set the validator fields back in to the form
        form.fields['__VIEWSTATEGENERATOR'].value = VIEWSTATEGENERATOR
        form.fields['__VIEWSTATE'].value = VIEWSTATE
        form.fields['__EVENTVALIDATION'].value = EVENTVALIDATION
        # Prepare the form for submission
        form.serialize()
        # submit the form
        rb.submit_form(form)
        # If the login was successful then we would be redirected to the dashboard.
        if rb.url == "https://www.mymedicare.gov/dashboard.aspx":
            """The login worked."""
            # Get the name
            my_name = rb.find("li", {"id": "welcomeli"})
            if my_name:
                my_name = my_name.contents[0].replace("Welcome, ", "")

            split_name = my_name.split(' ')
            first_name = split_name[0]
            last_name = split_name[-1]
            if not last_name:
                last_name = split_name[-2]

            try:
                user = User.objects.get(username=username)
            except User.DoesNotExist:
                # Create a new user. Note that we can set password
                # to anything, because it won't be checked; the password
                # from the external backend is checked (coming from settings).
                user = User(username=username, password='******',
                            first_name=first_name,
                            last_name=last_name)
                user.save()
                up, created = UserProfile.objects.get_or_create(user=user,
                                                                user_type='BEN')
                group = Group.objects.get(name='BlueButton')
                user.groups.add(group)

            return user
        # The MyMedicare login failed.
        return None
    def authenticate(self, username=None, password=None):
        username = username.rstrip().lstrip().lower()
        login_url = 'https://www.mymedicare.gov/default.aspx'
        rb = RoboBrowser()
        rb.parser = 'lxml'
        rb.open(login_url)
        # Get the form content
        form = rb.get_form()
        if settings.DEBUG:
            print("Page:", rb)
        # We will be working with these form fields.
        # Set them as variables for easier re-use
        form_pwd = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SWEPassword"
        form_usr = "******"
        form_agree = "ctl00$ContentPlaceHolder1$ctl00$HomePage$Agree"
        form_create_acc = "ctl00$ContentPlaceHolder1$ctl00$HomePage$lnk" \
                          "CreateAccount"
        # Set the form field values
        form.fields[form_usr].value = username
        form.fields[form_pwd].value = password
        # There is a javascript popup after hitting submit
        # that the form_agree to  "True"
        # Default in form is "False"
        form.fields[form_agree].value = "True"
        # Remove the CreateAccount field. It seems to drive the form
        # to the registration page.
        form.fields.pop(form_create_acc)
        # Capture the dynamic elements from these damned aspnetForms
        # We need to feed them back to allow the form to validate
        VIEWSTATEGENERATOR = form.fields['__VIEWSTATEGENERATOR']._value
        EVENTVALIDATION = form.fields['__EVENTVALIDATION']._value
        VIEWSTATE = form.fields['__VIEWSTATE']._value
        # Set the validator fields back in to the form
        form.fields['__VIEWSTATEGENERATOR'].value = VIEWSTATEGENERATOR
        form.fields['__VIEWSTATE'].value = VIEWSTATE
        form.fields['__EVENTVALIDATION'].value = EVENTVALIDATION
        # Prepare the form for submission
        form.serialize()
        # submit the form
        rb.submit_form(form)
        # If the login was successful then we would be redirected to the
        # dashboard.
        if rb.url == "https://www.mymedicare.gov/dashboard.aspx":
            """The login worked."""
            # Get the name
            my_name = rb.find("li", {"id": "welcomeli"})
            if my_name:
                my_name = my_name.contents[0].replace("Welcome, ", "")

            split_name = my_name.split(' ')
            first_name = split_name[0]
            last_name = split_name[-1]
            if not last_name:
                last_name = split_name[-2]

            try:
                user = User.objects.get(username=username)
            except User.DoesNotExist:
                # Create a new user. Note that we can set password
                # to anything, because it won't be checked; the password
                # from the external backend is checked (coming from settings).
                user = User(username=username,
                            password='******',
                            first_name=first_name,
                            last_name=last_name)
                user.save()
                up, created = UserProfile.objects.get_or_create(
                    user=user, user_type='BEN')
                group = Group.objects.get(name='BlueButton')
                user.groups.add(group)

            return user
        # The MyMedicare login failed.
        return None
예제 #4
0
def connect(request, mmg):
    """
    Login to MyMedicare.gov using RoboBrowser
    :param request:
    :param username:
    :param password:
    :return:

    """
    mmg_back = mmg
    mmg_back['status'] = "FAIL"
    PARSER = BS_PARSER
    if not PARSER:
        logger.debug('Default Parser for BeautifulSoup:', 'lxml')
        PARSER = 'lxml'

    login_url = 'https://www.mymedicare.gov/default.aspx'

    # This is for testing. Next step is to receive as parameters
    username = mmg['mmg_user']  # 'MBPUSER202A'
    # password = '******'# 'CMSPWD2USE'
    password = mmg['mmg_pwd']  # 'CMSPWD2USE'

    # Call the default page
    # We will then want to get the Viewstate and eventvalidation entries
    # we need to submit them with the form
    rb = RoboBrowser()
    mmg_back['robobrowser'] = rb

    # Set the default parser (lxml)
    # This avoids BeautifulSoup reporting an issue in the console/log
    rb.parser = PARSER

    # Open the form to start the login
    rb.open(login_url)

    # Get the form content
    form = rb.get_form()

    # if settings.DEBUG:
    #    print("Page:", rb)

    # We will be working with these form fields.
    # Set them as variables for easier re-use
    form_pwd = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SWEPassword"
    form_usr = "******"
    form_agree = "ctl00$ContentPlaceHolder1$ctl00$HomePage$Agree"
    # sign_in = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SignIn"
    # EVENTTARGET = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SignIn"
    form_create_acc = "ctl00$ContentPlaceHolder1$ctl00$HomePage$lnk" \
                      "CreateAccount"

    # Set the form field values
    form.fields[form_usr].value = username
    form.fields[form_pwd].value = password

    # There is a javascript popup after hitting submit
    # It seems to set the following field to "True"
    # Default in form is "False"
    form.fields[form_agree].value = "True"

    # Remove the CreateAccount field. It seems to drive the form
    # to the registration page.
    form.fields.pop(form_create_acc)

    # Capture the dynamic elements from these damned aspnetForms
    # We need to feed them back to allow the form to validate
    VIEWSTATEGENERATOR = form.fields['__VIEWSTATEGENERATOR']._value
    EVENTVALIDATION = form.fields['__EVENTVALIDATION']._value
    VIEWSTATE = form.fields['__VIEWSTATE']._value

    # if settings.DEBUG:
    #     print("EventValidation:", EVENTVALIDATION )
    #     print("ViewStateGenerator:", VIEWSTATEGENERATOR)

    # Set the validator fields back in to the form
    form.fields['__VIEWSTATEGENERATOR'].value = VIEWSTATEGENERATOR
    form.fields['__VIEWSTATE'].value = VIEWSTATE
    form.fields['__EVENTVALIDATION'].value = EVENTVALIDATION

    # Prepare the form for submission
    form.serialize()

    # logger.debug("serialized form:", form)

    # submit the form
    rb.submit_form(form)

    # logger.debug("RB:", rb, "\nRB:", rb.__str__())

    browser = RoboBrowser(history=True)
    if browser:
        pass
    # browser.parser = PARSER

    # logger.debug("Browser History:", browser.history,
    #              "\nBrowser parser:", browser.parser,
    #              # "\nPage html:", rb.parsed
    #              )

    if not rb.url == "https://www.mymedicare.gov/dashboard.aspx":
        err_msg = rb.find("span",
                          {"id": "ctl00_ContentPlaceHolder1"
                                 "_ctl00_HomePage_lblError"})
        if err_msg:
            err_msg = err_msg.contents
            messages.error(request, err_msg)
        messages.error(request, "We had a problem connecting to your"
                                "Medicare account")
        mmg_back['status'] = "FAIL"
        mmg_back['url'] = rb.url
        return mmg_back

    # <ul id="headertoolbarright">
    #    <li class="welcometxt" id="welcomeli">Welcome, JOHN A DOE </li>
    my_name = rb.find("li", {"id": "welcomeli"})
    if my_name:
        my_name = my_name.contents[0].replace("Welcome, ", "")
    my_account = rb.find("div", {"id": "RightContent"})
    if my_account:
        my_account = my_account.prettify()
        my_account = my_account.replace('href="/',
                                        'target="_blank" '
                                        'href="https://www.mymedicare.gov/')
        # my_account = my_account.contents
    # href="/mymessages.aspx"
    # href="/myaccount.aspx"
    # href="/plansandcoverage.aspx"
    # my_account.str('href="/mymessages.aspx',
    #                'href="https://www.mymedicare.gov/mymessages.apsx')
    # my_account.str('href="/myaccount.aspx',
    #                'href="https://www.mymedicare.gov/myaccount.aspx')
    # my_account.str('href="/plansandcoverage.aspx',
    #                'href="https://www.mymedicare.gov/plansandcoverage.aspx')

    # if settings.DEBUG:
    #     print("\nMyAccount:", len(my_account), "|", my_account)

    # Need to pass data to context and then render to different
    # template with some data retrieved from MyMedicare.gov
    # If successfully logged in, Or return an error message.
    mmg_back['status'] = "OK"
    mmg_back['url'] = rb.url
    mmg_back['mmg_account'] = my_account
    mmg_back['mmg_name'] = my_name

    mmg_back['robobrowser'] = rb

    # logger.debug("RB post sign-in:", rb,
    #              "rb url:", rb.url)

    return mmg_back
예제 #5
0
def connect(request, mmg):
    """
    Login to MyMedicare.gov using RoboBrowser
    :param request:
    :param username:
    :param password:
    :return:

    """
    mmg_back = mmg
    mmg_back['status'] = "FAIL"
    PARSER = BS_PARSER
    if not PARSER:
        logger.debug('Default Parser for BeautifulSoup:', 'lxml')
        PARSER = 'lxml'

    login_url = 'https://www.mymedicare.gov/default.aspx'

    # This is for testing. Next step is to receive as parameters
    username = mmg['mmg_user']  # 'MBPUSER202A'
    # password = '******'# 'CMSPWD2USE'
    password = mmg['mmg_pwd']  # 'CMSPWD2USE'

    # Call the default page
    # We will then want to get the Viewstate and eventvalidation entries
    # we need to submit them with the form
    rb = RoboBrowser()
    mmg_back['robobrowser'] = rb

    # Set the default parser (lxml)
    # This avoids BeautifulSoup reporting an issue in the console/log
    rb.parser = PARSER

    # Open the form to start the login
    rb.open(login_url)

    # Get the form content
    form = rb.get_form()

    # if settings.DEBUG:
    #    print("Page:", rb)

    # We will be working with these form fields.
    # Set them as variables for easier re-use
    form_pwd = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SWEPassword"
    form_usr = "******"
    form_agree = "ctl00$ContentPlaceHolder1$ctl00$HomePage$Agree"
    # sign_in = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SignIn"
    # EVENTTARGET = "ctl00$ContentPlaceHolder1$ctl00$HomePage$SignIn"
    form_create_acc = "ctl00$ContentPlaceHolder1$ctl00$HomePage$lnk" \
                      "CreateAccount"

    # Set the form field values
    form.fields[form_usr].value = username
    form.fields[form_pwd].value = password

    # There is a javascript popup after hitting submit
    # It seems to set the following field to "True"
    # Default in form is "False"
    form.fields[form_agree].value = "True"

    # Remove the CreateAccount field. It seems to drive the form
    # to the registration page.
    form.fields.pop(form_create_acc)

    # Capture the dynamic elements from these damned aspnetForms
    # We need to feed them back to allow the form to validate
    VIEWSTATEGENERATOR = form.fields['__VIEWSTATEGENERATOR']._value
    EVENTVALIDATION = form.fields['__EVENTVALIDATION']._value
    VIEWSTATE = form.fields['__VIEWSTATE']._value

    # if settings.DEBUG:
    #     print("EventValidation:", EVENTVALIDATION )
    #     print("ViewStateGenerator:", VIEWSTATEGENERATOR)

    # Set the validator fields back in to the form
    form.fields['__VIEWSTATEGENERATOR'].value = VIEWSTATEGENERATOR
    form.fields['__VIEWSTATE'].value = VIEWSTATE
    form.fields['__EVENTVALIDATION'].value = EVENTVALIDATION

    # Prepare the form for submission
    form.serialize()

    # logger.debug("serialized form:", form)

    # submit the form
    rb.submit_form(form)

    # logger.debug("RB:", rb, "\nRB:", rb.__str__())

    browser = RoboBrowser(history=True)
    if browser:
        pass
    # browser.parser = PARSER

    # logger.debug("Browser History:", browser.history,
    #              "\nBrowser parser:", browser.parser,
    #              # "\nPage html:", rb.parsed
    #              )

    if not rb.url == "https://www.mymedicare.gov/dashboard.aspx":
        err_msg = rb.find(
            "span",
            {"id": "ctl00_ContentPlaceHolder1"
             "_ctl00_HomePage_lblError"})
        if err_msg:
            err_msg = err_msg.contents
            messages.error(request, err_msg)
        messages.error(
            request, "We had a problem connecting to your"
            "Medicare account")
        mmg_back['status'] = "FAIL"
        mmg_back['url'] = rb.url
        return mmg_back

    # <ul id="headertoolbarright">
    #    <li class="welcometxt" id="welcomeli">Welcome, JOHN A DOE </li>
    my_name = rb.find("li", {"id": "welcomeli"})
    if my_name:
        my_name = my_name.contents[0].replace("Welcome, ", "")
    my_account = rb.find("div", {"id": "RightContent"})
    if my_account:
        my_account = my_account.prettify()
        my_account = my_account.replace(
            'href="/', 'target="_blank" '
            'href="https://www.mymedicare.gov/')
        # my_account = my_account.contents
    # href="/mymessages.aspx"
    # href="/myaccount.aspx"
    # href="/plansandcoverage.aspx"
    # my_account.str('href="/mymessages.aspx',
    #                'href="https://www.mymedicare.gov/mymessages.apsx')
    # my_account.str('href="/myaccount.aspx',
    #                'href="https://www.mymedicare.gov/myaccount.aspx')
    # my_account.str('href="/plansandcoverage.aspx',
    #                'href="https://www.mymedicare.gov/plansandcoverage.aspx')

    # if settings.DEBUG:
    #     print("\nMyAccount:", len(my_account), "|", my_account)

    # Need to pass data to context and then render to different
    # template with some data retrieved from MyMedicare.gov
    # If successfully logged in, Or return an error message.
    mmg_back['status'] = "OK"
    mmg_back['url'] = rb.url
    mmg_back['mmg_account'] = my_account
    mmg_back['mmg_name'] = my_name

    mmg_back['robobrowser'] = rb

    # logger.debug("RB post sign-in:", rb,
    #              "rb url:", rb.url)

    return mmg_back
예제 #6
0
파일: tg.py 프로젝트: bingsong/codesnippet
#!/usr/bin/env python
# -*- encoding: utf-8 -*-

from robobrowser import RoboBrowser
from requests import Session
from fake_useragent import UserAgent
import re

url = 'http://www.baidu.com'
ua = UserAgent()
keyword = 'sp68'

s = Session()
br = RoboBrowser(session=s, history=True, user_agent=ua.chrome)
br.parser = 'lxml'
br.timeout = 1
br.open(url)

form = br.get_form(action='/s')
form['wd'].value = keyword
br.submit_form(form)

print br.url
for link in br.find_all('a', href=re.compile("^http://www.baidu.com/baidu.php")):
    print link['href']

s.close()

예제 #7
-1
def get_medicare_email(request, mmg):
    """

    :param request:
    :param mmg:
    :return:
    """

    mmg_back = mmg
    mmg_back['status'] = "FAIL"
    mmg_back['mmg_email'] = ""

    PARSER = settings.BS_PARSER
    if not PARSER:
        if settings.DEBUG:
            print('Default Parser for BeautifulSoup:', 'lxml')
        PARSER = 'lxml'

    # Call the default page
    rb = RoboBrowser()

    # Set the default parser (lxml)
    # This avoids BeautifulSoup reporting an issue in the console/log
    rb.parser = PARSER

    target_page = "https://www.mymedicare.gov/myaccount.aspx"
    # Open the form to start the login
    rb.open(target_page)
     # Get the form content
    page = rb.parsed

    if settings.DEBUG:
        print("===============================")
        print("on page:", rb.url)
        print("MyAccount:", page)


    my_email = rb.find("div",
                       attrs={"class":"ctl00_ctl00_ContentPlaceHolder1_ctl00_ctl00_ctl00_ctl01_UserInfo_pnlEmailSettings"})

    if settings.DEBUG:
        print("What email information:", my_email)
    for addr in my_email:
        mail_addr = my_email.find("div",
                       attrs={"class": "myaccount-data"})
        mail_address = mail_addr.text

    mmg_back['mmg_email'] = mail_address
    if rb.url == target_page:
        mmg_back['url'] = rb.url
        mmg_back['status'] = "OK"


    if settings.DEBUG:
        print("Email:", mail_address)
        print("url:", rb.url)

    return mmg_back