def run(self, features, number_of_clusters=2, restarts=10, 
         precompute_distances=True, initialization='k-means++', 
         compute_in_parallel=True):
     if number_of_clusters == 1:
         result = numpy.zeros(len(features), dtype=numpy.int32)
         return [result]
     if compute_in_parallel:
         num_cpus = config_manager.get_num_workers()
     else:
         num_cpus = 1
     return [KMeans(k=number_of_clusters, init=initialization,
             n_init=restarts, precompute_distances=precompute_distances,
             n_jobs=num_cpus).fit_predict(features)]
Esempio n. 2
0
    def open_files(self, fullpaths, **kwargs):
        '''
            Open a multiple data files. Returns a list of 
        'list of trials created'.
        '''
        file_interpreters = plugin_manager.file_interpreters
        if len(fullpaths) == 1:
            try:
                results = open_data_file(fullpaths[0], file_interpreters, 
                        **kwargs)
            except:
                results = []
                traceback.print_exc()
            return results

        num_process_workers = config_manager.get_num_workers()
        if len(fullpaths) < num_process_workers:
            num_process_workers = len(fullpaths)

        # setup the input and return queues.
        input_queue = multiprocessing.Queue()
        for fullpath in fullpaths:
            input_queue.put(fullpath)
        for i in xrange(num_process_workers):
            input_queue.put(None)
        results_queue = multiprocessing.Queue()

        # start the jobs
        jobs = []
        for i in xrange(num_process_workers):
            job = multiprocessing.Process(target=open_file_worker, 
                                          args=(input_queue, 
                                                results_queue))
            job.start()
            jobs.append(job)

        # collect the results, waiting for all the jobs to complete
        results_list = []
        for i in xrange(len(fullpaths)):
            # file_interpreters return list of trial objects.
            results_list.extend(results_queue.get())

        for job in jobs:
            job.join() # halt this thread until processes are all complete.

        return results_list
Esempio n. 3
0
    def run_tasks(self, message_queue=multiprocessing.Queue()):
        '''
            Run all the tasks in self.task_manager
        (see self.prepare_to_run_strategy()).
        '''
        num_process_workers = config_manager.get_num_workers()
        num_tasks = self.task_manager.num_tasks
        if num_tasks == 0:
            raise NoTasksError('There are no tasks to run')
        if num_tasks < num_process_workers:
            num_process_workers = num_tasks

        input_queue = multiprocessing.Queue()
        results_queue = multiprocessing.Queue()

        # start the jobs
        jobs = []
        results_list = []
        for i in xrange(num_process_workers):
            job = multiprocessing.Process(target=task_worker, 
                                          args=(input_queue, 
                                                results_queue))
            job.start()
            jobs.append(job)

        task_index = {}
        for task in self.task_manager.tasks:
            task_index[task.task_id] = task
        message_queue.put(('TASKS', [str(t) for t in task_index.values()]))
            
        results_index = {}
        queued_tasks = 0
        base_time = time.time()
        while True:
            # queue up ready tasks
            ready_tasks = self.task_manager.get_ready_tasks()
            while ready_tasks:
                picked_task = random.choice(ready_tasks)
                task_info = self.task_manager.checkout_task(picked_task)
                message_queue.put(('RUNNING_TASK', str(picked_task)))
                message_queue.put(('DISPLAY_GRAPH', 
                        (self.task_manager.get_plot_dict(), 
                        time.time()-base_time)))
                input_queue.put(task_info)
                queued_tasks += 1

                ready_tasks = self.task_manager.get_ready_tasks()

            # wait for one result
            if queued_tasks > 0:
                result = results_queue.get()
                finished_task_id = result['task_id']
                finished_task = task_index[finished_task_id]
                results_index[finished_task_id] = result['result']
                if result['result'] is None:
                    message_queue.put(('TASK_ERROR', 
                            {'task':str(finished_task),
                             'traceback':result['traceback'],
                             'runtime':result['runtime']}))
                    self.task_manager.complete_task(finished_task)
                    self.task_manager.remove_all_tasks()
                else:
                    message_queue.put(('FINISHED_TASK', 
                            {'task':str(finished_task), 
                             'runtime':result['runtime']}))
                    self.task_manager.complete_task(finished_task, 
                            result['result'])
                message_queue.put(('DISPLAY_GRAPH', 
                        (self.task_manager.get_plot_dict(),
                        time.time()-base_time)))

            # are we done queueing up tasks? then add in the sentinals.
            if self.task_manager.num_tasks == 0:
                for i in xrange(num_process_workers):
                    input_queue.put(None)
            
                # are we done getting results? then exit.
                if len(results_index.keys()) == queued_tasks:
                    break

        for job in jobs:
            job.join() # halt this thread until processes are all complete.
        message_queue.put(('FINISHED_RUN', None))

        return task_index, results_index