def run(self, features, number_of_clusters=2, restarts=10, precompute_distances=True, initialization='k-means++', compute_in_parallel=True): if number_of_clusters == 1: result = numpy.zeros(len(features), dtype=numpy.int32) return [result] if compute_in_parallel: num_cpus = config_manager.get_num_workers() else: num_cpus = 1 return [KMeans(k=number_of_clusters, init=initialization, n_init=restarts, precompute_distances=precompute_distances, n_jobs=num_cpus).fit_predict(features)]
def open_files(self, fullpaths, **kwargs): ''' Open a multiple data files. Returns a list of 'list of trials created'. ''' file_interpreters = plugin_manager.file_interpreters if len(fullpaths) == 1: try: results = open_data_file(fullpaths[0], file_interpreters, **kwargs) except: results = [] traceback.print_exc() return results num_process_workers = config_manager.get_num_workers() if len(fullpaths) < num_process_workers: num_process_workers = len(fullpaths) # setup the input and return queues. input_queue = multiprocessing.Queue() for fullpath in fullpaths: input_queue.put(fullpath) for i in xrange(num_process_workers): input_queue.put(None) results_queue = multiprocessing.Queue() # start the jobs jobs = [] for i in xrange(num_process_workers): job = multiprocessing.Process(target=open_file_worker, args=(input_queue, results_queue)) job.start() jobs.append(job) # collect the results, waiting for all the jobs to complete results_list = [] for i in xrange(len(fullpaths)): # file_interpreters return list of trial objects. results_list.extend(results_queue.get()) for job in jobs: job.join() # halt this thread until processes are all complete. return results_list
def run_tasks(self, message_queue=multiprocessing.Queue()): ''' Run all the tasks in self.task_manager (see self.prepare_to_run_strategy()). ''' num_process_workers = config_manager.get_num_workers() num_tasks = self.task_manager.num_tasks if num_tasks == 0: raise NoTasksError('There are no tasks to run') if num_tasks < num_process_workers: num_process_workers = num_tasks input_queue = multiprocessing.Queue() results_queue = multiprocessing.Queue() # start the jobs jobs = [] results_list = [] for i in xrange(num_process_workers): job = multiprocessing.Process(target=task_worker, args=(input_queue, results_queue)) job.start() jobs.append(job) task_index = {} for task in self.task_manager.tasks: task_index[task.task_id] = task message_queue.put(('TASKS', [str(t) for t in task_index.values()])) results_index = {} queued_tasks = 0 base_time = time.time() while True: # queue up ready tasks ready_tasks = self.task_manager.get_ready_tasks() while ready_tasks: picked_task = random.choice(ready_tasks) task_info = self.task_manager.checkout_task(picked_task) message_queue.put(('RUNNING_TASK', str(picked_task))) message_queue.put(('DISPLAY_GRAPH', (self.task_manager.get_plot_dict(), time.time()-base_time))) input_queue.put(task_info) queued_tasks += 1 ready_tasks = self.task_manager.get_ready_tasks() # wait for one result if queued_tasks > 0: result = results_queue.get() finished_task_id = result['task_id'] finished_task = task_index[finished_task_id] results_index[finished_task_id] = result['result'] if result['result'] is None: message_queue.put(('TASK_ERROR', {'task':str(finished_task), 'traceback':result['traceback'], 'runtime':result['runtime']})) self.task_manager.complete_task(finished_task) self.task_manager.remove_all_tasks() else: message_queue.put(('FINISHED_TASK', {'task':str(finished_task), 'runtime':result['runtime']})) self.task_manager.complete_task(finished_task, result['result']) message_queue.put(('DISPLAY_GRAPH', (self.task_manager.get_plot_dict(), time.time()-base_time))) # are we done queueing up tasks? then add in the sentinals. if self.task_manager.num_tasks == 0: for i in xrange(num_process_workers): input_queue.put(None) # are we done getting results? then exit. if len(results_index.keys()) == queued_tasks: break for job in jobs: job.join() # halt this thread until processes are all complete. message_queue.put(('FINISHED_RUN', None)) return task_index, results_index