def run(self): """ Function to be called by subclasses to start crawler. """ while True: # Crawl cycle start print_util.print_info( 'Starting new crawl with {0}.'.format(self.name), Colors.BLACK) # Add all URLs to task queue for url in self.url_list: self.task_queue.put({ 'type': 0, 'url': url, 'n_errors': 0 # No errors initially }) # Start all threads threads = [] # List for all threads for n in range(1, self.number_of_threads + 1): temp_thread = Thread( target=self.threader, # Worker function args=(n, ) # Pass thread id as argument ) threads.append(temp_thread) temp_thread.start() for temp_thread in threads: temp_thread.join()
def run(self): """ Function to be called by subclasses to start crawler """ while True: # Crawl cycle starts print_util.print_info( 'Starting crawl with {0}'.format( self.name ), Colors.BLACK ) # Add URLs to task queue for url in self.url_list: self.task_queue.put( { 'type': 0, 'url': url, 'n_errors': 0 } ) # Start all threads threads = [] for n in range(1, self.number_of_threads + 1): temp_thread = Thread( target=self.threader, args=(n,) ) threads.append(temp_thread) temp_thread.start() # Wait for threads to finish for temp_thread in threads: temp_thread.join()
def run(self): """ Method called from subclasses to start crawling process """ while True: # Crawl cycle starts print_util.print_info( 'Starting new crawl with {0}'.format( self.name ), Colors.BLACK ) # Add all URLs to task queue for url in self.url_list: self.task_queue.put( { 'type': 0, 'url': url, 'n_errors': 0 } ) # Start all threads threads = [] for n in range(1, self.number_of_threads + 1): temp_thread = Thread( target=self.threader, args=(n,) ) threads.append(temp_thread) temp_thread.start() for temp_thread in threads: temp_thread.join()
def run(self): """ Function to be called by subclasses to start crawler. """ while True: # Crawl cycle start print_util.print_info( 'Starting new crawl with {0}.'.format( self.name ), Colors.BLACK ) # Add all URLs to task queue for url in self.url_list: self.task_queue.put( { 'type': 0, 'url': url, 'n_errors': 0 # No errors initially } ) # Start all threads threads = [] # List for all threads for n in range(1, self.number_of_threads + 1): temp_thread = Thread( target=self.threader, # Worker function args=(n,) # Pass thread id as argument ) threads.append(temp_thread) temp_thread.start() for temp_thread in threads: temp_thread.join()
def run(self): """ Method called from subclasses to start crawling process """ while True: # Crawl cycle starts print_util.print_info( 'Starting new crawl with {0}'.format(self.name), Colors.BLACK) # Add all URLs to task queue for url in self.url_list: self.task_queue.put({'type': 0, 'url': url, 'n_errors': 0}) # Start all threads threads = [] for n in range(1, self.number_of_threads + 1): temp_thread = Thread(target=self.threader, args=(n, )) threads.append(temp_thread) temp_thread.start() for temp_thread in threads: temp_thread.join()
def run(self): """ Function to be called by subclasses to start crawler """ while True: # Crawl cycle starts print_util.print_info('Starting crawl with {0}'.format(self.name), Colors.BLACK) # Add URLs to task queue for url in self.url_list: self.task_queue.put({'type': 0, 'url': url, 'n_errors': 0}) # Start all threads threads = [] for n in range(1, self.number_of_threads + 1): temp_thread = Thread(target=self.threader, args=(n, )) threads.append(temp_thread) temp_thread.start() # Wait for threads to finish for temp_thread in threads: temp_thread.join()
def threader(self, thread_id): """ Worker function. :return: :param thread_id: Assigned ID of thread. """ while not self.task_queue.empty(): # While there are any tasks task = self.task_queue.get() # Get one of them if task['n_errors'] >= self.max_allowed_errors: # Too many errors print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task)) continue print_util.print_info('{0} --> New task : {1}'.format( thread_id, task)) # Log the task try: # Call corresponding function if task['type'] == 0: self.get_movies(thread_id, task['url']) elif task['type'] == 1: self.download_movie(thread_id, task['url'], task['movie']) elif task['type'] == 2: self.download_song(thread_id, task['url'], task['song'], task['movie'], task['movie_url']) print_util.print_info('{0} --> Task complete : {1}'.format( thread_id, task), Colors.GREEN) # Log success except Exception as e: # Some error print_util.print_error('{0} --> Error : {1}'.format( thread_id, e)) # Log it task['n_errors'] += 1 # Increment number of errors self.task_queue.put(task) # Put back in queue
def threader(self, thread_id): """ Worker function :param thread_id: Ass usual """ while not self.task_queue.empty(): task = self.task_queue.get() if task['n_errors'] >= self.max_allowed_errors: print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task)) continue print_util.print_info('{0} --> New task : {1}'.format( thread_id, task)) try: if task['type'] == 0: self.get_artists(thread_id, task['url']) elif task['type'] == 1: self.get_artist(thread_id, task['url'], task['artist']) elif task['type'] == 2: self.get_songs_from_page(thread_id, task['url'], task['artist']) elif task['type'] == 3: self.get_song(thread_id, task['url'], task['song'], task['artist']) print_util.print_info( '{0} --> Task complete : {1}'.format(thread_id, task), Colors.GREEN) except Exception as e: print_util.print_error('{0} --> Error : {1}'.format( thread_id, e)) task['n_errors'] += 1 self.task_queue.put(task)
def threader(self, thread_id): """ Worker function :param thread_id: Ass usual """ while not self.task_queue.empty(): task = self.task_queue.get() if task['n_errors'] >= self.max_allowed_errors: print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task ) ) continue print_util.print_info( '{0} --> New task : {1}'.format( thread_id, task ) ) try: if task['type'] == 0: self.get_artists( thread_id, task['url'] ) elif task['type'] == 1: self.get_artist( thread_id, task['url'], task['artist'] ) elif task['type'] == 2: self.get_songs_from_page( thread_id, task['url'], task['artist'] ) elif task['type'] == 3: self.get_song( thread_id, task['url'], task['song'], task['artist'] ) print_util.print_info( '{0} --> Task complete : {1}'.format( thread_id, task ), Colors.GREEN ) except Exception as e: print_util.print_error( '{0} --> Error : {1}'.format( thread_id, e ) ) task['n_errors'] += 1 self.task_queue.put(task)
def threader(self, thread_id): """ Worker function. :return: :param thread_id: Assigned ID of thread. """ while not self.task_queue.empty(): # While there are any tasks task = self.task_queue.get() # Get one of them if task['n_errors'] >= self.max_allowed_errors: # Too many errors print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task ) ) continue print_util.print_info( '{0} --> New task : {1}'.format( thread_id, task ) ) # Log the task try: # Call corresponding function if task['type'] == 0: self.get_movies( thread_id, task['url'] ) elif task['type'] == 1: self.download_movie( thread_id, task['url'], task['movie'] ) elif task['type'] == 2: self.download_song( thread_id, task['url'], task['song'], task['movie'], task['movie_url'] ) print_util.print_info( '{0} --> Task complete : {1}'.format( thread_id, task ), Colors.GREEN ) # Log success except Exception as e: # Some error print_util.print_error( '{0} --> Error : {1}'.format( thread_id, e ) ) # Log it task['n_errors'] += 1 # Increment number of errors self.task_queue.put(task) # Put back in queue