Python TabSpider Examples

Programming Language: Python

Namespace/Package Name: TabSpider

Class/Type: TabSpider

Examples at hotexamples.com: 2

Python TabSpider - 2 examples found. These are the top rated real world Python examples of TabSpider.TabSpider extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

has_more(2)

next_url(2)

Example #1

Show file

File: Bullshit.py Project: rclough/PickSlide

conf_file = join(expanduser("~"),"scrape.conf")

# Config
config = ConfigParser.ConfigParser()
config.read(conf_file)
salt = config.get('info', 'salt')
tab_page = config.get('info','tab_page')
domain = config.get('info','domain')
delay = int(config.get('info','delay'))

#tdl = TabDownloader(domain, tab_page, salt)
#tu = UserDownloader(domain)
#test_page = "http://tabs."+domain+"/m/megadeth/symphony_of_destruction_ver6_guitar_pro.htm"
#test_page = "http://tabs."+domain+"/r/ryan_clough/love_lust_power_tab.htm"
#test_page = "http://tabs."+domain+"/l/lynyrd_skynyrd/free_bird_guitar_pro.htm"
#test_page = "http://tabs."+domain+"/j/justin_bieber/all_that_matters_crd.htm"
#tab_data =  tdl.tab_download(test_page,True)

#tu.load_user("Bonsaischaap")

ts = TabSpider(domain, delay)
while ts.has_more():
    ts.next_url()

# if tab_data:
#     for comment in tab_data.comments:
#         print "level 1 comment by " + comment.author
#         if hasattr(comment, "child_comments"):
#             print "\tlevel 2 comments: " + str(comment.child_comments)

Example #2

Show file

File: PickSlide.py Project: rclough/PickSlide

def main(argv=None):
    # Config processing
    conf_file = join(expanduser("~"),"scrape.conf")
    config = ConfigParser.ConfigParser()
    config.read(conf_file)
    salt = config.get('info', 'salt')
    tab_page = config.get('info','tab_page')
    domain = config.get('info','domain')
    delay = int(config.get('info', 'delay'))
    
    # Start graph
    config = Config(NEO4J_URI)
    g = Graph(config)
    g.clear() # Change if youre working with persistent data store!!!!
    
    # Set up local indices 
    users = {}
    instruments = {}
    
    # Make page crawlers
    tab_loader = TabDownloader(domain, tab_page, salt, delay)
    user_loader = UserDownloader(domain, delay)
    
    # Unofficial iterator
    resource = TabSpider(domain)
    
    # Start crawling!
    while resource.has_more():
        # Get tab info
        tab_info = tab_loader.tab_download(resource.next_url())
        
        # has_more doesnt actually work because of the retarted logic needed to keep track of tabs
        if not tab_info:
            break
        
        # If tab is valid
        if tab_info:
            # Store base tab
            tab_node = g.vertices.create(name=tab_info.tab_file)
            tab_node.tab_file = tab_info.tab_file
            tab_node.title = tab_info.title
            tab_node.version = tab_info.version
            tab_node.rating = tab_info.rating
            tab_node.num_ratings = tab_info.num_ratings
            tab_node.num_comments = tab_info.num_comments
            tab_node.label = "tab"
            tab_node.save()
            
            # Add instruments
            for instrument in tab_info.instruments:
                if instrument not in instruments.keys():
                    i_node = g.vertices.create(name=instrument)
                    i_node.label = "instrument"
                    i_node.save()
                    instruments[instrument] = i_node
                i_node = instruments[instrument]
                g.edges.create(tab_node,"has_instrument",i_node)
            
            # Add comments (recursive)
            if tab_info.comments:
                for comment in tab_info.comments:
                    g.edges.create(tab_node,"has_comment",save_comment(g, comment))
            
            # Get info on the tabber if we don't have it
            if tab_info.tabber:
                if tab_info.tabber not in users.keys():
                    tabber = user_loader.load_user(tab_info.tabber)
		    if not tabber:
                        continue
                    # create user node for tabber
                    tempname = tabber.name
                    if not tempname:
                        tempname = ""
                    u_node = g.vertices.create(name=tabber.tempname)
                    u_node.registration_date = tabber.registration_date
                    u_node.num_contributions = tabber.num_contributions
                    u_node.rank = tabber.rank
                    u_node.save()
                    users[tab_info.tabber] = u_node
                
                # Add tab to tabber's transcriptions
                tabber = users[tab_info.tabber]
                g.edges.create(tabber,"tabbed",tab_node)
                
    print "Finished crawl! Woah!"