Exemple #1
0
    def run(self):
        """
        Function to be called by subclasses to start crawler.
        """
        while True:
            # Crawl cycle start
            print_util.print_info(
                'Starting new crawl with {0}.'.format(self.name), Colors.BLACK)
            # Add all URLs to task queue
            for url in self.url_list:
                self.task_queue.put({
                    'type': 0,
                    'url': url,
                    'n_errors': 0  # No errors initially
                })

            # Start all threads
            threads = []  # List for all threads
            for n in range(1, self.number_of_threads + 1):
                temp_thread = Thread(
                    target=self.threader,  # Worker function
                    args=(n, )  # Pass thread id as argument
                )
                threads.append(temp_thread)
                temp_thread.start()

            for temp_thread in threads:
                temp_thread.join()
Exemple #2
0
 def run(self):
     """
     Function to be called by subclasses to start crawler
     """
     while True:
         # Crawl cycle starts
         print_util.print_info(
             'Starting crawl with {0}'.format(
                 self.name
             ),
             Colors.BLACK
         )
         # Add URLs to task queue
         for url in self.url_list:
             self.task_queue.put(
                 {
                     'type': 0,
                     'url': url,
                     'n_errors': 0
                 }
             )
         # Start all threads
         threads = []
         for n in range(1, self.number_of_threads + 1):
             temp_thread = Thread(
                 target=self.threader,
                 args=(n,)
             )
             threads.append(temp_thread)
             temp_thread.start()
         # Wait for threads to finish
         for temp_thread in threads:
             temp_thread.join()
Exemple #3
0
 def run(self):
     """
     Method called from subclasses to start crawling process
     """
     while True:
         # Crawl cycle starts
         print_util.print_info(
             'Starting new crawl with {0}'.format(
                 self.name
             ),
             Colors.BLACK
         )
         # Add all URLs to task queue
         for url in self.url_list:
             self.task_queue.put(
                 {
                     'type': 0,
                     'url': url,
                     'n_errors': 0
                 }
             )
         # Start all threads
         threads = []
         for n in range(1, self.number_of_threads + 1):
             temp_thread = Thread(
                 target=self.threader,
                 args=(n,)
             )
             threads.append(temp_thread)
             temp_thread.start()
         for temp_thread in threads:
             temp_thread.join()
Exemple #4
0
    def run(self):
        """
        Function to be called by subclasses to start crawler.
        """
        while True:
            # Crawl cycle start
            print_util.print_info(
                'Starting new crawl with {0}.'.format(
                    self.name
                ),
                Colors.BLACK
            )
            # Add all URLs to task queue
            for url in self.url_list:
                self.task_queue.put(
                    {
                        'type': 0,
                        'url': url,
                        'n_errors': 0  # No errors initially
                    }
                )

            # Start all threads
            threads = []  # List for all threads
            for n in range(1, self.number_of_threads + 1):
                temp_thread = Thread(
                    target=self.threader,  # Worker function
                    args=(n,)  # Pass thread id as argument
                )
                threads.append(temp_thread)
                temp_thread.start()

            for temp_thread in threads:
                temp_thread.join()
Exemple #5
0
 def run(self):
     """
     Method called from subclasses to start crawling process
     """
     while True:
         # Crawl cycle starts
         print_util.print_info(
             'Starting new crawl with {0}'.format(self.name), Colors.BLACK)
         # Add all URLs to task queue
         for url in self.url_list:
             self.task_queue.put({'type': 0, 'url': url, 'n_errors': 0})
         # Start all threads
         threads = []
         for n in range(1, self.number_of_threads + 1):
             temp_thread = Thread(target=self.threader, args=(n, ))
             threads.append(temp_thread)
             temp_thread.start()
         for temp_thread in threads:
             temp_thread.join()
Exemple #6
0
 def run(self):
     """
     Function to be called by subclasses to start crawler
     """
     while True:
         # Crawl cycle starts
         print_util.print_info('Starting crawl with {0}'.format(self.name),
                               Colors.BLACK)
         # Add URLs to task queue
         for url in self.url_list:
             self.task_queue.put({'type': 0, 'url': url, 'n_errors': 0})
         # Start all threads
         threads = []
         for n in range(1, self.number_of_threads + 1):
             temp_thread = Thread(target=self.threader, args=(n, ))
             threads.append(temp_thread)
             temp_thread.start()
         # Wait for threads to finish
         for temp_thread in threads:
             temp_thread.join()
Exemple #7
0
    def threader(self, thread_id):
        """
        Worker function.
        :return:
        :param thread_id: Assigned ID of thread.
        """
        while not self.task_queue.empty():  # While there are any tasks

            task = self.task_queue.get()  # Get one of them

            if task['n_errors'] >= self.max_allowed_errors:  # Too many errors
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id, task))
                continue

            print_util.print_info('{0} --> New task : {1}'.format(
                thread_id, task))  # Log the task

            try:

                # Call corresponding function
                if task['type'] == 0:
                    self.get_movies(thread_id, task['url'])
                elif task['type'] == 1:
                    self.download_movie(thread_id, task['url'], task['movie'])
                elif task['type'] == 2:
                    self.download_song(thread_id, task['url'], task['song'],
                                       task['movie'], task['movie_url'])

                print_util.print_info('{0} --> Task complete : {1}'.format(
                    thread_id, task), Colors.GREEN)  # Log success

            except Exception as e:  # Some error
                print_util.print_error('{0} --> Error : {1}'.format(
                    thread_id, e))  # Log it
                task['n_errors'] += 1  # Increment number of errors
                self.task_queue.put(task)  # Put back in queue
Exemple #8
0
    def threader(self, thread_id):
        """
        Worker function
        :param thread_id: Ass usual
        """
        while not self.task_queue.empty():

            task = self.task_queue.get()
            if task['n_errors'] >= self.max_allowed_errors:
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id, task))
                continue

            print_util.print_info('{0} --> New task : {1}'.format(
                thread_id, task))

            try:
                if task['type'] == 0:
                    self.get_artists(thread_id, task['url'])
                elif task['type'] == 1:
                    self.get_artist(thread_id, task['url'], task['artist'])
                elif task['type'] == 2:
                    self.get_songs_from_page(thread_id, task['url'],
                                             task['artist'])
                elif task['type'] == 3:
                    self.get_song(thread_id, task['url'], task['song'],
                                  task['artist'])
                print_util.print_info(
                    '{0} --> Task complete : {1}'.format(thread_id, task),
                    Colors.GREEN)
            except Exception as e:
                print_util.print_error('{0} --> Error : {1}'.format(
                    thread_id, e))
                task['n_errors'] += 1
                self.task_queue.put(task)
Exemple #9
0
    def threader(self, thread_id):
        """
        Worker function
        :param thread_id: Ass usual
        """
        while not self.task_queue.empty():

            task = self.task_queue.get()
            if task['n_errors'] >= self.max_allowed_errors:
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id,
                        task
                    )
                )
                continue

            print_util.print_info(
                '{0} --> New task : {1}'.format(
                    thread_id,
                    task
                )
            )

            try:
                if task['type'] == 0:
                    self.get_artists(
                        thread_id,
                        task['url']
                    )
                elif task['type'] == 1:
                    self.get_artist(
                        thread_id,
                        task['url'],
                        task['artist']
                    )
                elif task['type'] == 2:
                    self.get_songs_from_page(
                        thread_id,
                        task['url'],
                        task['artist']
                    )
                elif task['type'] == 3:
                    self.get_song(
                        thread_id,
                        task['url'],
                        task['song'],
                        task['artist']
                    )
                print_util.print_info(
                    '{0} --> Task complete : {1}'.format(
                        thread_id,
                        task
                    ),
                    Colors.GREEN
                )
            except Exception as e:
                print_util.print_error(
                    '{0} --> Error : {1}'.format(
                        thread_id,
                        e
                    )
                )
                task['n_errors'] += 1
                self.task_queue.put(task)
Exemple #10
0
    def threader(self, thread_id):
        """
        Worker function.
        :return:
        :param thread_id: Assigned ID of thread.
        """
        while not self.task_queue.empty():  # While there are any tasks

            task = self.task_queue.get()  # Get one of them

            if task['n_errors'] >= self.max_allowed_errors:  # Too many errors
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id,
                        task
                    )
                )
                continue

            print_util.print_info(
                '{0} --> New task : {1}'.format(
                    thread_id,
                    task
                )
            )  # Log the task

            try:

                # Call corresponding function
                if task['type'] == 0:
                    self.get_movies(
                        thread_id,
                        task['url']
                    )
                elif task['type'] == 1:
                    self.download_movie(
                        thread_id,
                        task['url'],
                        task['movie']
                    )
                elif task['type'] == 2:
                    self.download_song(
                        thread_id,
                        task['url'],
                        task['song'],
                        task['movie'],
                        task['movie_url']
                    )

                print_util.print_info(
                    '{0} --> Task complete : {1}'.format(
                        thread_id,
                        task
                    ),
                    Colors.GREEN
                )  # Log success

            except Exception as e:  # Some error
                print_util.print_error(
                    '{0} --> Error : {1}'.format(
                        thread_id,
                        e
                    )
                )  # Log it
                task['n_errors'] += 1  # Increment number of errors
                self.task_queue.put(task)  # Put back in queue