Ejemplo n.º 1
0
 def web_voyage(self):  #Recursive Crawling
     # self.update_pendingURLMemory()
     while True:
         url_current = mx.pickrandom(self.pendingURLMemory)
         self.visit(url_current)
         time.sleep(0.2)
     print('>>> Voyage ENDED')
Ejemplo n.º 2
0
 def web_voyage(self):  #Recursive Crawling
     while self.pendingURLMemory:
         try:
             url_current = mx.pickrandom(self.pendingURLMemory)
             self.visit(url_current)
         except Exception as e:
             raise e  # | DEPTH 1
     print('>>> Voyage ENDED')
Ejemplo n.º 3
0
def get_random_article():
    navdir = 'hyperScraper_profiles/'
    randomDomain = navdir + mx.pickrandom(os.listdir(navdir)) + '/data/'
    if os.listdir(randomDomain):
        randomPage = randomDomain + random.choice(os.listdir(randomDomain))
        pagedict = mx.jload(randomPage)
        pagedict['localpath'] = randomPage
        return pagedict
    else:
        get_random_article()
Ejemplo n.º 4
0
    def web_voyage(self):  #Recursive Crawling
        while self.pendingURLMemory:
            try:
                url_current = mx.pickrandom(self.pendingURLMemory)
                self.visit(url_current)
                time.sleep(0.2)

            except Exception as e:
                # pass
                raise e
                print(e)
        print('>>> Voyage ENDED')