Python Scraper.get_children Examples

Programming Language: Python

Namespace/Package Name: scraper

Class/Type: Scraper

Method/Function: get_children

Examples at hotexamples.com: 3

Python Scraper.get_children - 3 examples found. These are the top rated real world Python examples of scraper.Scraper.get_children extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Scraper(30)

matchTag(7)

connect(6)

__init__(5)

_time_now(5)

close(5)

submit(3)

find_docs(3)

get_children(3)

create_destination(2)

extractTag(2)

get_papers(2)

begin(2)

get_all_page_uris(1)

get_all_skills(1)

get_css(1)

get_and_write_records(1)

getZipLinks(1)

get_manga(1)

get_paths(1)

get_post_data_per_page(1)

get_all_manga(1)

getGameList(1)

getSlist(1)

getQlist(1)

getInformation(1)

getIndexhtm(1)

get_prices(1)

getEvents(1)

getDepts(1)

getAppList(1)

gather_reddit_data(1)

fetch_most_recent_transactions(1)

fetch_booster_usage(1)

extractText(1)

create_organization_sets(1)

create_http_link(1)

get_price(1)

DownloadImage(1)

get_script(1)

scrape_ingredients(1)

update_submission_content(1)

store_parse(1)

stopped(1)

sort(1)

seturldata(1)

set_started_callback(1)

set_output_file(1)

set_finished_callback(1)

set_broadcast_document_callback(1)

Example #1

Show file

File: obj_scrape.py Project: TheAdamEvans/RouteRobot

def traverse(node):
    """ Pre-order depth-first search of Mountain Project tree """

    children = []
    for href in node.children_href:
        # initialize Scraper for this page
        scrap = Scraper(href)
        if scrap.soup is None:
            pass
        else:
            # grab features from the soup
            dest = scrap.create_destination()
            # find children in the soup if any
            dest.children_href = scrap.get_children()
            # recursively deeper down the tree if this is an area
            if dest.children_href != None:
                print
                print '**'+dest.nickname+'**'
                traverse(dest)
            # inner traverse function has returned with destination object
            print dest.nickname + ' | ' + dest.href
            children.append(dest)

    node.children = children
    return node

Example #2

Show file

File: obj_scrape.py Project: TheAdamEvans/RouteRobot

def save_info_from(href, data_dir):

    # initialize child destination
    scrap = Scraper(href)
    dest = scrap.create_destination()
    dest.children_href = scrap.get_children()

    # check if we have already crawled this area
    OBJECT_OUTFILE = data_dir + dest.nickname + '.pickle'
    if os.path.exists(OBJECT_OUTFILE):
        print dest.nickname + ' has already been crawled'
        pass
    else:
        if not os.path.isdir(os.path.dirname(OBJECT_OUTFILE)):
            os.makedirs(os.path.dirname(OBJECT_OUTFILE))

        # traverse tree of areas-->routes
        all_dest = traverse(dest)
        # returns destination object

        # write out to file.. for viz??
        BIG_JSON = data_dir + dest.nickname + '.json'
        with open(BIG_JSON, 'w+') as dump:
            flat = json.dumps(all_dest, default=lambda o: o.__dict__)
            dump.write(flat)

        # save destination object as pickle
        BIG_PICKLE = data_dir + dest.nickname + '.pickle'
        with open(BIG_PICKLE, 'wb') as handle:
            pickle.dump(all_dest, handle)

        flourish = '<<<'+'-'*25
        print flourish + dest.nickname + flourish[::-1]
        print

Example #3

Show file

File: obj_scrape.py Project: TheAdamEvans/RouteRobot

def scrape_all(root_href, data_dir):
    """ Scrape Mountain Project and save Destination objects """
    
    scrap = Scraper(root_href)

    # iterate over children of the root (e.g. states in the US)
    for href in scrap.get_children():
        save_info_from(href, data_dir)