def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ A factory method that creates an WEKA operation based on the information given in the operation specification operation_spec """ assert(operation_spec["type"] == "weka_classification") # Determine all parameter combinations that should be tested parameter_settings = cls._get_parameter_space(operation_spec) # Read the command template from a file template_file = open(os.path.join(pySPACE.configuration.spec_dir, "operations", "weka_templates", operation_spec["template"]), 'r') command_template = template_file.read() template_file.close() # number of processes if "runs" in operation_spec: number_processes = len(input_paths) * len(parameter_settings) * \ operation_spec["runs"] else: # approximate the number of processes runs = [] for dataset_dir in input_paths: collection = BaseDataset.load(dataset_dir) runs.append(collection.meta_data["runs"]) runs = max(runs) number_processes = len(input_paths) * len(parameter_settings) * \ runs if debug == True: # To better debug creation of processes we don't limit the queue # and create all processes before executing them processes = processing.Queue() cls._createProcesses(processes, result_directory, operation_spec, parameter_settings, input_paths, command_template) # create and return the weka operation object return cls(processes, operation_spec, result_directory, number_processes) else: # Create all processes by calling a recursive helper method in # another thread so that already created processes can be executed in # parallel. Therefore a queue is used which size is maximized to # guarantee that not to much objects are created (because this costs # memory). However, the actual number of 100 is arbitrary and might # be reviewed. processes = processing.Queue(100) create_process = processing.Process(target=cls._createProcesses, args=( processes, result_directory, operation_spec, parameter_settings, input_paths, command_template)) create_process.start() # create and return the weka operation object return cls(processes, operation_spec, result_directory, number_processes, create_process)
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ A factory method that creates an Analysis operation based on the information given in the operation specification operation_spec """ assert (operation_spec["type"] == "analysis") input_path = operation_spec["input_path"] summary = BaseDataset.load( os.path.join(pySPACE.configuration.storage, input_path)) data_dict = summary.data # Determine the parameters that should be analyzed parameters = operation_spec["parameters"] # Determine the metrics that should be plotted metrics = operation_spec["metrics"] # Determine how many processes will be created number_parameter_values = [ len(set(data_dict[param])) for param in parameters ] number_processes = cls._numberOfProcesses(0, number_parameter_values) + 1 if debug == True: # To better debug creation of processes we don't limit the queue # and create all processes before executing them processes = processing.Queue() cls._createProcesses(processes, result_directory, data_dict, parameters, metrics, True) return cls(processes, operation_spec, result_directory, number_processes) else: # Create all plot processes by calling a recursive helper method in # another thread so that already created processes can be executed # although creation of processes is not finished yet. Therefore a queue # is used which size is limited to guarantee that not to much objects # are created (since this costs memory). However, the actual number # of 100 is arbitrary and might be changed according to the system at hand. processes = processing.Queue(100) create_process = processing.Process( target=cls._createProcesses, args=(processes, result_directory, data_dict, parameters, metrics, True)) create_process.start() # create and return the operation object return cls(processes, operation_spec, result_directory, number_processes, create_process)
def __init__(self): #put some fake component data in here for testing self.my_comps = {} if my_version < 2.6: self.my_comps['compA'] = processing.Queue(0) self.my_comps['compB'] = processing.Queue(0) self.my_comps['compC'] = processing.Queue(0) self.my_comps['compD'] = processing.Queue(0) self.my_comps['compE'] = processing.Queue(0) else: self.my_comps['compA'] = multiprocessing.Queue(0) self.my_comps['compB'] = multiprocessing.Queue(0) self.my_comps['compC'] = multiprocessing.Queue(0) self.my_comps['compD'] = multiprocessing.Queue(0) self.my_comps['compE'] = multiprocessing.Queue(0)
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ [Factory method] Create a ConcatenateOperation A factory method that creates a ConcatenateOperation based on the information given in the operation specification operation_spec """ assert(operation_spec["type"] == "concatenate") # Determine constraints for datasets that are combined and other # parameters dataset_constraints = [] if "dataset_constraints" in operation_spec: dataset_constraints.extend(operation_spec["dataset_constraints"]) if "name_pattern" in operation_spec: name_pattern = operation_spec["name_pattern"] else: name_pattern = None if "change_time" in operation_spec: change_time = operation_spec["change_time"] else: change_time = False # merging is not distributed over different processes number_processes = 1 processes = processing.Queue() # Create the Concatenate Process cls._createProcesses(processes, input_paths, result_directory, dataset_constraints, name_pattern, change_time) # create and return the Concatenate operation object return cls(processes, operation_spec, result_directory, number_processes)
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ Factory method for creating a :class:`GenericOperation`. Factory method for creating a GenericOperation based on the information given in the operation specification *operation_spec*. """ assert (operation_spec["type"] == "generic") configuration_template = operation_spec["configuration_template"] # Compute all possible parameter combinations parameter_settings = cls._get_parameter_space(operation_spec) processes = processing.Queue() for process_id, parameter_setting in enumerate(parameter_settings): process = GenericProcess( process_id=process_id, configuration_template=configuration_template, parameter_setting=parameter_setting, result_directory=result_directory, code_string=operation_spec["code"]) processes.put(process) # Indicate that we are at then end of the process queue and no more jobs # will be added processes.put(False) # create and return the operation object return cls(processes, operation_spec, result_directory, operation_spec["code"], len(parameter_settings))
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ [factory method] Create a MergeOperation object. A factory method that creates a MergeOperation based on the information given in the operation specification operation_spec """ assert (operation_spec["type"] == "merge") # Determine constraints for collections that are combined collection_constraints = [] if "collection_constraints" in operation_spec: collection_constraints.extend( operation_spec["collection_constraints"]) reverse = operation_spec.get("reverse", False) set_flag = operation_spec.get("set_flag", True) name_pattern = operation_spec.get("name_pattern", "Rest") # merging is not distributed over different processes number_processes = 1 processes = processing.Queue() # Create the Merge Process cls._createProcesses(processes, input_paths, result_directory, collection_constraints, reverse, set_flag, name_pattern) # create and return the merge operation object return cls(processes, operation_spec, result_directory, number_processes)
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ Factory method that creates a ShuffleOperation A factory method that creates a ShuffleOperation based on the information given in the operation specification operation_spec """ assert (operation_spec["type"] == "shuffle") # Determine constraints on datasets that are combined dataset_constraints = [] if "dataset_constraints" in operation_spec: dataset_constraints.extend(operation_spec["dataset_constraints"]) # Create the ShuffleProcess (shuffling is not distributed over different # processes) number_processes = 1 processes = processing.Queue() cls._createProcesses(processes, result_directory, input_paths, dataset_constraints) # create and return the shuffle operation object return cls(processes, operation_spec, result_directory, number_processes)
def __init__(self, numThreads): #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - super(ThreadPool, self).__init__(numThreads) self.out = pc.Queue() for i in range(numThreads): self.__startWorker()
def __init__(self, numThreads): #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - object.__init__(self) self.numth = numThreads self.jid = 0 self.results = {} self.q = pc.Queue() self.cnt = 0
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ A factory method that creates a statistic operation based on the information given in the operation specification operation_spec. If debug is TRUE the creation of the statistic processes will not be in a separated thread. """ assert (operation_spec["type"] == "statistic") input_path = operation_spec["input_path"] tabular = BaseDataset.load( os.path.join(pySPACE.configuration.storage, input_path)).data if operation_spec.has_key("filter"): conditions = csv_analysis.empty_dict(tabular) for key, l in operation_spec["filter"].items(): conditions[key].extend(l) tabular = csv_analysis.strip_dict(tabular, conditions) metric = operation_spec.get("metric", "Balanced_accuracy") parameter = operation_spec.get("parameter", "__Dataset__") rel_par = operation_spec.get("related_parameters", ["__Dataset__", "Key_Run", "Key_Fold"]) average = operation_spec.get("average", None) if average in rel_par: rel_par.remove(average) if metric in rel_par: rel_par.remove(metric) if parameter in rel_par: rel_par.remove(parameter) reduced_tabular = cls.reduce_tabular(tabular, rel_par, metric, parameter, average) number_processes = 1 processes = processing.Queue() cls._createProcesses(processes, result_directory, reduced_tabular) import shutil shutil.copy2( os.path.join(pySPACE.configuration.storage, input_path, "results.csv"), os.path.join(result_directory, "results.csv")) shutil.copy2( os.path.join(pySPACE.configuration.storage, input_path, "metadata.yaml"), os.path.join(result_directory, "metadata.yaml")) # create and return the shuffle operation object return cls(processes, operation_spec, result_directory, number_processes)
def test(): manager = processing.Manager() gc.disable() print '\n\t######## testing Queue.Queue\n' test_queuespeed(threading.Thread, Queue.Queue(), threading.Condition()) print '\n\t######## testing processing.Queue\n' test_queuespeed(processing.Process, processing.Queue(), processing.Condition()) print '\n\t######## testing Queue managed by server process\n' test_queuespeed(processing.Process, manager.Queue(), manager.Condition()) print '\n\t######## testing processing.Pipe\n' test_pipespeed() print print '\n\t######## testing list\n' test_seqspeed(range(10)) print '\n\t######## testing list managed by server process\n' test_seqspeed(manager.list(range(10))) print '\n\t######## testing Array("i", ..., lock=False)\n' test_seqspeed(processing.Array('i', range(10), lock=False)) print '\n\t######## testing Array("i", ..., lock=True)\n' test_seqspeed(processing.Array('i', range(10), lock=True)) print print '\n\t######## testing threading.Lock\n' test_lockspeed(threading.Lock()) print '\n\t######## testing threading.RLock\n' test_lockspeed(threading.RLock()) print '\n\t######## testing processing.Lock\n' test_lockspeed(processing.Lock()) print '\n\t######## testing processing.RLock\n' test_lockspeed(processing.RLock()) print '\n\t######## testing lock managed by server process\n' test_lockspeed(manager.Lock()) print '\n\t######## testing rlock managed by server process\n' test_lockspeed(manager.RLock()) print print '\n\t######## testing threading.Condition\n' test_conditionspeed(threading.Thread, threading.Condition()) print '\n\t######## testing processing.Condition\n' test_conditionspeed(processing.Process, processing.Condition()) print '\n\t######## testing condition managed by a server process\n' test_conditionspeed(processing.Process, manager.Condition()) gc.enable()
def test_queue(): q = processing.Queue() p = processing.Process(target=queue_func, args=(q, )) p.start() o = None while o != 'STOP': try: o = q.get(timeout=0.3) print o, sys.stdout.flush() except Empty: print 'TIMEOUT' print
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ A factory method that creates an MMLF operation based on the information given in the operation specification operation_spec """ assert (operation_spec["type"] == "mmlf") # The generic world configuration YAML file world_conf = """ worldPackage : %s environment: %s agent: %s monitor: policyLogFrequency : 100000 %s """ # Create directory for the experiment world_name = operation_spec['world_name'] world_path = "%s/config/%s" % (result_directory, world_name) if not os.path.exists(world_path): os.makedirs(world_path) # Compute all possible parameter combinations # Determine all parameter combinations that should be tested parameter_settings = cls._get_parameter_space(operation_spec) # If the operation spec defines parameters for a generalized domain: if "generalized_domain" in operation_spec: # We have to test each parameter setting in each instantiation of # the generalized domain. This can be achieved by computing the # crossproduct of parameter settings and domain settings augmented_parameter_settings = [] for parameter_setting in parameter_settings: for domain_parameter_setting in operation_spec[ "generalized_domain"]: instantiation = dict(parameter_setting) instantiation.update(domain_parameter_setting) augmented_parameter_settings.append(instantiation) parameter_settings = augmented_parameter_settings # Create and remember all worlds for the given parameter_settings world_pathes = [] # for all parameter setting for parameter_setting in parameter_settings: # Add 4 blanks to all lines in templates environment_template = operation_spec['environment_template'] environment_template = \ "\n".join(" " + line for line in environment_template.split("\n")) agent_template = operation_spec['agent_template'] agent_template = \ "\n".join(" " + line for line in agent_template.split("\n")) monitor_conf = \ operation_spec['monitor_conf'] if 'monitor_conf' in operation_spec else "" monitor_conf = \ "\n".join(" " + line for line in monitor_conf.split("\n")) # Instantiate the templates environment_conf = environment_template agent_conf = agent_template for parameter, value in parameter_setting.iteritems(): environment_conf = environment_conf.replace( parameter, str(value)) agent_conf = agent_conf.replace(parameter, str(value)) def get_parameter_str(parameter_name): return "".join(subpart[:4] for subpart in parameter_name.split("_")) configuration_str = "{" + "}{".join([ "%s:%s" % (get_parameter_str(parameter), str(value)[:6]) for parameter, value in parameter_setting.iteritems() ]) + "}" configuration_str = configuration_str.replace('_', '') world_file_name = "world_%s.yaml" % configuration_str open(os.path.join(world_path, world_file_name), 'w').write( world_conf % (world_name, environment_conf, agent_conf, monitor_conf)) world_pathes.append(os.path.join(world_path, world_file_name)) number_processes = len(world_pathes) * int(operation_spec["runs"]) if debug is True: # To better debug creation of processes we don't limit the queue # and create all processes before executing them processes = processing.Queue() cls._createProcesses(processes, world_pathes, operation_spec, result_directory) return cls(processes, operation_spec, result_directory, world_name, number_processes) else: # Create all processes by calling a recursive helper method # in another thread so that already created processes can be # executed although creation of processes is not finished yet. # Therefor a queue is used which size is limited to guarantee that # not to much objects are created (since this costs memory). # However, the actual number of 100 is arbitrary and might be # changed according to the system at hand. processes = processing.Queue(100) create_process = processing.Process(target=cls._createProcesses, args=(processes, world_pathes, operation_spec, result_directory)) create_process.start() # create and return the operation object return cls(processes, operation_spec, result_directory, world_name, number_processes, create_process)
95, 252, 44, 46, 47, 42, 28, ]) #packet.SetOption("client_identifier") # ignored #packet.SetOption("maximum_message_size") # ignored return type, packet if __name__ == '__main__': NUM_WORKERS = 10 db_requests = processing.Queue(NUM_WORKERS) server = PacketGenerator(db_requests) db_pool = processing.Pool(processes=NUM_WORKERS, initializer=dhcp_server.db_consumer, initargs=(db_requests, server.send_packet)) server.connect() nreq = 2000 start = datetime.datetime.now() print "starting at %s" % start for i in range(nreq): #while True: # Sleep an appropriate amount of time to get the appropriate number of packets per second
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ A factory method that creates an Analysis operation based on the information given in the operation specification operation_spec. If debug is TRUE the creation of the Analysis Processes will not be in a separated thread. """ assert (operation_spec["type"] == "comp_analysis") input_path = operation_spec["input_path"] summary = BaseDataset.load( os.path.join(pySPACE.configuration.storage, input_path)) data_dict = summary.data ## Done # Determine the parameters that should be analyzed parameters = operation_spec["parameters"] # Determine dependent parameters, which don't get extra resolution try: dep_par = operation_spec["dep_par"] except KeyError: dep_par = [] # Determine the metrics that should be plotted spec_metrics = operation_spec["metrics"] metrics = [] for metric in spec_metrics: if data_dict.has_key(metric): metrics.append(metric) else: import warnings warnings.warn('The metric "' + metric + '" is not contained in the results csv file.') if len(metrics) == 0: warnings.warn( 'No metric available from spec file, default to first dict entry.' ) metrics.append(data_dict.keys()[0]) # Determine how many processes will be created number_parameter_values = [ len(set(data_dict[param])) for param in parameters ] number_processes = cls._numberOfProcesses(0, number_parameter_values) + 1 logscale = False if operation_spec.has_key('logscale'): logscale = operation_spec['logscale'] markertype = 'x' if operation_spec.has_key('markertype'): markertype = operation_spec['markertype'] if debug == True: # To better debug creation of processes we don't limit the queue # and create all processes before executing them processes = processing.Queue() cls._createProcesses(processes, result_directory, data_dict, parameters, dep_par, metrics, logscale, markertype, True) return cls(processes, operation_spec, result_directory, number_processes) else: # Create all plot processes by calling a recursive helper method in # another thread so that already created processes can be executed # although creation of processes is not finished yet. Therefore a queue # is used which size is limited to guarantee that not to much objects # are created (since this costs memory). However, the actual number # of 100 is arbitrary and might be reviewed. processes = processing.Queue(100) create_process = processing.Process( target=cls._createProcesses, args=(processes, result_directory, data_dict, parameters, dep_par, metrics, logscale, markertype, True)) create_process.start() # create and return the comp_analysis operation object return cls(processes, operation_spec, result_directory, number_processes, create_process)
def __init__(self, sendq): # ie. send to backend self.sendq = sendq # ie. recieve from backend self.recvq = processing.Queue() self.packets_sent = 0
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]): """ A factory method that creates the processes which form an operation based on the information given in the operation specification, *operation_spec*. In debug mode this is done in serial. In the other default mode, at the moment 4 processes are created in parallel and can be immediately executed. So generation of processes and execution are made in parallel. This kind of process creation is done independently from the backend. For huge parameter spaces this is necessary! Otherwise numerous processes are created and corresponding data is loaded but the concept of spreading the computation to different processors can not really be used, because process creation is blocking only one processor and memory space, but nothing more is done, till the processes are all created. .. todo:: Use :class:`~pySPACE.resources.dataset_defs.dummy.DummyDataset` for empty data, when no input_path is given. """ assert (operation_spec["type"] == "node_chain") # Determine all parameter combinations that should be tested parameter_settings = cls._get_parameter_space(operation_spec) ## Use node_chain parameter if no templates are given ## if not operation_spec.has_key("templates"): if operation_spec.has_key("node_chain"): operation_spec["templates"] = [ operation_spec.pop("node_chain") ] # extract_key_str(operation_spec["base_file"], # keyword="node_chain")] # operation_spec.pop("node_chain") else: warnings.warn( "Specify parameter 'templates' or 'node_chain' in your operation spec!" ) elif operation_spec.has_key("node_chain"): operation_spec.pop("node_chain") warnings.warn( "node_chain parameter is ignored. Templates are used.") # load files in templates as dictionaries elif type(operation_spec["templates"][0]) == str: operation_spec["template_files"] = \ copy.deepcopy(operation_spec["templates"]) for i in range(len(operation_spec["templates"])): rel_node_chain_file = operation_spec["templates"][i] abs_node_chain_file_name = os.sep.join([ pySPACE.configuration.spec_dir, "node_chains", rel_node_chain_file ]) with open(abs_node_chain_file_name, "r") as read_file: node_chain = read_file.read() #node_chain = yaml.load(read_file) operation_spec["templates"][i] = node_chain storage = pySPACE.configuration.storage if not input_paths: raise Exception("No input datasets found in input_path %s in %s!" % (operation_spec["input_path"], storage)) # Get relative path rel_input_paths = [name[len(storage):] for name in input_paths] # Determine approximate number of runs if "runs" in operation_spec: runs = operation_spec["runs"] else: runs = [] for dataset_dir in rel_input_paths: abs_collection_path = \ pySPACE.configuration.storage + os.sep \ + dataset_dir collection_runs = \ BaseDataset.load_meta_data(abs_collection_path).get('runs',1) runs.append(collection_runs) runs = max(runs) # Determine splits dataset_dir = rel_input_paths[0] abs_collection_path = \ pySPACE.configuration.storage + os.sep + dataset_dir splits = BaseDataset.load_meta_data(abs_collection_path).get( 'splits', 1) # Determine how many processes will be created number_processes = len(operation_spec["templates"]) * \ len(parameter_settings) * len(rel_input_paths) * \ runs * splits if debug: # To better debug creation of processes we don't limit the queue # and create all processes before executing them processes = processing.Queue() cls._createProcesses(processes, result_directory, operation_spec, parameter_settings, rel_input_paths) # create and return the operation object return cls(processes, operation_spec, result_directory, number_processes) else: # Create all processes by calling a recursive helper method in # another thread so that already created processes can be executed in # parallel. Therefore a queue is used which size is maximized to # guarantee that not to much objects are created (because this costs # memory). However, the actual number of 4 is arbitrary and might # be changed according to the system at hand. processes = processing.Queue(4) create_process = \ processing.Process(target=cls._createProcesses, args=(processes, result_directory, operation_spec, parameter_settings, rel_input_paths)) create_process.start() # create and return the operation object return cls(processes, operation_spec, result_directory, number_processes, create_process)