Esempio n. 1
0
def get_highest_id():
    search_url = 'http://www.fema.gov/photolibrary/photo_search.do'
    post_payload = {
        'pageStart': '1',
        'SKeywords': '',
        'SLocation': '',
        'SDisasterNumber': '',
        'SPhotographer': '',
        'SCategoryComboId': '',
        'SStartDate': '',
        'SEndDate': '',
        'sortBy': 'date',
        'pageSize': '15',
        'action': 'Search',
    }
    urlopen = urllib2.urlopen
    Request = urllib2.Request
    cj = cookielib.LWPCookieJar()
    # This is a subclass of FileCookieJar
    # that has useful load and save methods

    # Now we need to get our Cookie Jar
    # installed in the opener;
    # for fetching URLs
    # we get the HTTPCookieProcessor
    # and install the opener in urllib2
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)
    # fake a user agent, some websites (like google) don't like automated exploration
    txheaders = {
        'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    }

    # we have to step through the landing page and the search results pages
    # otherwise the site gives us session errors
    # so we go ahead and do that, picking up the necessary cookies along the way

    #req = Request('http://phil.cdc.gov/phil/home.asp', None, txheaders)
    #handle = urlopen(req)

    req = Request(search_url, urllib.urlencode(post_payload), txheaders)
    handle = urlopen(req)

    search_results_html = handle.read()

    return parser.get_first_result_index_from_quick_search_results(
        search_results_html)
Esempio n. 2
0
def get_highest_id():
    search_url = 'http://www.fema.gov/photolibrary/photo_search.do'
    post_payload = {
        'pageStart' : '1',
        'SKeywords' : '',
        'SLocation' : '',
        'SDisasterNumber' : '',
        'SPhotographer' : '',
        'SCategoryComboId' : '',
        'SStartDate' : '',
        'SEndDate' : '',
        'sortBy' : 'date',
        'pageSize' : '15',
        'action' : 'Search',
    }
    urlopen = urllib2.urlopen
    Request = urllib2.Request
    cj = cookielib.LWPCookieJar()
    # This is a subclass of FileCookieJar
    # that has useful load and save methods

    # Now we need to get our Cookie Jar
    # installed in the opener;
    # for fetching URLs
    # we get the HTTPCookieProcessor
    # and install the opener in urllib2
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)
    # fake a user agent, some websites (like google) don't like automated exploration
    txheaders =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}

    # we have to step through the landing page and the search results pages 
    # otherwise the site gives us session errors
    # so we go ahead and do that, picking up the necessary cookies along the way

    #req = Request('http://phil.cdc.gov/phil/home.asp', None, txheaders)
    #handle = urlopen(req)

    req = Request(search_url, urllib.urlencode(post_payload), txheaders)
    handle = urlopen(req)

    search_results_html = handle.read()

    return parser.get_first_result_index_from_quick_search_results(search_results_html)
Esempio n. 3
0
def get_highest_id():
    quicksearch_page_post_values = {
        'formaction':	'SEARCH',
        'illustrations':	'on',
        'keywords':	' ',
        'keywordstext':	' ',
        'photos':	'on',
        'searchtype':	'photo|illustration|video',
        'video':	'on',
    }
    urlopen = urllib2.urlopen
    Request = urllib2.Request
    cj = cookielib.LWPCookieJar()
    # This is a subclass of FileCookieJar
    # that has useful load and save methods

    # Now we need to get our Cookie Jar
    # installed in the opener;
    # for fetching URLs
    # we get the HTTPCookieProcessor
    # and install the opener in urllib2
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)
    # fake a user agent, some websites (like google) don't like automated exploration
    txheaders =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}

    # we have to step through the landing page and the search results pages 
    # otherwise the site gives us session errors
    # so we go ahead and do that, picking up the necessary cookies along the way
    req = Request('http://phil.cdc.gov/phil/home.asp', None, txheaders)
    #cj.save(COOKIEFILE)                     # save the cookies 
    handle = urlopen(req)
    req = Request('http://phil.cdc.gov/phil/quicksearch.asp', urllib.urlencode(quicksearch_page_post_values), txheaders)
    #cj.save(COOKIEFILE)                     # save the cookies again
    handle = urlopen(req)

    search_results_html = handle.read()

    return parser.get_first_result_index_from_quick_search_results(search_results_html)