def load_problem(path=(DATASET_CAMROT % 1), number=35, balance=True): if path in ['sd', 'sr']: return DatasetLoader._load_psvrt(path, number) button1 = 'Category 1' button2 = 'Category 2' try: with open(path + 'labels.txt', 'r') as file: lines = file.readlines() except FileNotFoundError as fnfe: print(path + 'labels.txt does not exist') raise fnfe except Exception as exc: print("Unexpected error:", sys.exc_info()[0]) raise exc if '[config]' in lines[0]: button1 = lines[1].split('=')[1].strip() button2 = lines[2].split('=')[1].strip() lines = lines[4:] splitted_lines = [[path + line.split()[0], int(line.split()[1])] for line in lines] # check if all files are present not_existing = [] for index, line in enumerate(splitted_lines): if not os.path.isfile(line[0]): not_existing.append(index) if not_existing: print("Files that aren't available:") for index in not_existing: print(splitted_lines[index][0]) splitted_lines = [ line for index, line in enumerate(splitted_lines) if index not in not_existing ] if balance: set_false = [x for x in splitted_lines if x[1] == 0] set_true = [x for x in splitted_lines if x[1] == 1] random.shuffle(set_false) random.shuffle(set_true) half = int((number + 1) / 2) splitted_lines = set_false[:half] + set_true[:half] random.shuffle(splitted_lines) if number % 2 == 1: del splitted_lines[-1] return Dataset(splitted_lines, text1=button1, text2=button2)
def load_network_with_dummy_x(dummy_x, sess, is_training, conf_file, global_step, base_path): dataset_conf, model_parameters, config = ArgoLauncher.process_conf_file(conf_file) dataset = Dataset.load_dataset(dataset_conf) check_dataset_shapes(dataset.x_shape_eval, dummy_x.shape[1:]) full_class_path = base_path + "." + model_parameters["model"] model_class = load_class(full_class_path) network, checkpoint_name = load_network(model_class, conf_file=conf_file, dataset=dataset, global_step=global_step) # LOAD NETWORK stuff = network(dummy_x, is_training = is_training) network.restore(sess, checkpoint_name) return network
def _load_psvrt(type='sd', number=35): samples = [] data_lines = [] if number % 4 == 0: samples = [number // 4] * 4 else: q, r = divmod(number, 4) samples = [q + 1] * r + [q] * (4 - r) for index, (dirpath, val) in enumerate(DatasetLoader.PSVRT[type]): files = glob.glob(dirpath + '*.png') random.shuffle(files) files = files[:samples[index]] data_lines += [[f.replace('\\', '/'), val] for f in files] random.shuffle(data_lines) return Dataset(data_lines)
def load_model(conf_file, dataset=None, gpu=0, seed=0, model_class_base_path=''): """Load a TFDeepLearningModel and optionally save its network Args: conf_file (str): the conf file of the model where to find the experiment. dataset (datasets.Dataset): (optional) the argo Dataset of the model for the training. If not passed it will be reloaded. global_step (int): the global step to load the checkpoint (if None the last checkpoint present will be loaded). gpu (int) : the gpu on which the model will create the session seed (int) : the seed that the model will set model_class_base_path (str): the base path where to look for the model class Returns: TFDeepLearningModel: The loaded Argo TFDeepLearningModel. datasets.Dataset: the argo Dataset of the model for the training. """ dataset_conf, model_parameters, config = ArgoLauncher.process_conf_file( conf_file) if not dataset: dataset = Dataset.load_dataset(dataset_conf) ArgoTFDeepLearningModelClass = load_class(model_parameters["model"], base_path=model_class_base_path) update_model_params(model_parameters, dataset) # baseDir = config["dirName"]+"/"+dataset.id model_dir = os.path.split(os.path.dirname(conf_file))[0] model = ArgoTFDeepLearningModelClass(model_parameters, model_dir, gpu=gpu, seed=seed) return model, dataset
def __init__(self, dataset: Dataset, fully_connected, bigger_is_more_similar, k): super().__init__(MAPEvaluator(SolutionComparator(dataset.solution_matrix()), dataset.all_original_req_file_names(), dataset.all_original_code_file_names(), fully_connected, bigger_is_more_similar, k))
def create_callgraph_from_raw_file(dataset: Dataset, create_class_callgraph=False): """ Extract class and method call graph from a raw call graph file generated by the java call graph tool The inout raw call graph file is automatically retrieved from dataset.raw_call_graph_path() Saves the call graphs as json files at dataset.method_callgraph_path() and dataset.class_callgraph_path() resulting class call graph: dict["classname"] = dict{ called_by=[str] calls=[str] } resulting method call graph: dict["classname.methodname(paramtyp1,paramtyp2)"] = dict{ called_by=[classname.methodname(paramtyp1,paramtyp2),...] calls=[classname.methodname(paramtyp1,paramtyp2),...] class_name=str method_name=str params=[str] } } """ raw_txt_path = dataset.raw_call_graph_path() output_class_callgraph = dataset.class_callgraph_path() output_method_callgraph = dataset.method_callgraph_path() text_rows = [] try: file = open(raw_txt_path, 'r', encoding='utf8') text_rows = file.readlines() except IOError: log.error("Unable to read " + str(raw_txt_path)) class_call_graph = dict() method_call_graph = dict() def insert_class(class_name, calls=set(), called_by=set()): if class_name in class_call_graph: class_call_graph[class_name][CALLS] |= calls class_call_graph[class_name][CALLED_BY] |= called_by else: class_ref = dict() class_ref[CALLED_BY] = called_by class_ref[CALLS] = calls class_call_graph[class_name] = class_ref def insert_entry(dict_key, class_name, method_name, param_list, called_by=set(), calls=set()): if dict_key in method_call_graph: method_call_graph[dict_key][CALLS] |= calls method_call_graph[dict_key][CALLED_BY] |= called_by else: method_dict = dict() method_dict[CALLS] = calls method_dict[CALLED_BY] = called_by method_dict[CLASS_NAME] = class_name method_dict[METHOD_NAME] = method_name method_dict[PARAMS] = param_list method_call_graph[dict_key] = method_dict def remove_external_calls(): for dict_key in method_call_graph: method_call_graph[dict_key][CALLS] = [ callee for callee in method_call_graph[dict_key][CALLS] if callee in method_call_graph ] method_call_graph[dict_key][CALLED_BY] = [ caller for caller in method_call_graph[dict_key][CALLED_BY] if caller in method_call_graph ] for row in text_rows: row_split = row.split(":") if row_split[0] == "C": # Class level call classes = row_split[1].split(" ") class_1 = _clean(classes[0]) class_2 = _clean(classes[1]) if _is_external_class(dataset, class_1) or _is_external_class( dataset, class_2): continue caller_class_name = _extract_name(classes[0]) callee_class_name = _extract_name(classes[1].replace('\r', '').replace( '\n', '')) if caller_class_name == callee_class_name: continue if "$" in caller_class_name or "$" in callee_class_name: continue # Leave out inner classes if create_class_callgraph: insert_class(caller_class_name, set([callee_class_name]), set()) insert_class(callee_class_name, set(), set([caller_class_name])) elif row_split[0] == "M": # method level call # row_split[1] = Class of caller method # row_split[2] = caller method<whitespace>calltype and class of callee method # row_split[3] = callee method split_2 = row_split[2].split(" ") split_3 = split_2[1].split(")") if _is_external_class(dataset, row_split[1]) or _is_external_class( dataset, split_3[1]): continue caller_method = split_2[0] callee_method = row_split[3] if _is_constructor(caller_method) or _is_constructor( callee_method): continue if _is_access(caller_method) or _is_access(callee_method): continue caller_class = _extract_name(row_split[1]) callee_class = _extract_name(split_3[1]) if "$" in caller_class or "$" in callee_class: continue # Leave out references to inner classes # call_type = split_3[0][1] split_4 = caller_method.split("(") caller_name = split_4[0] caller_param = [] if not split_4[1].startswith(")"): # params existing caller_param = _split_param( split_4[1][:-1]) # Leave out last character, which is a ) split_5 = callee_method.split("(") callee_name = split_5[0] callee_param = [] if not split_5[1].startswith(")"): # params existing callee_param = _split_param(split_5[1].replace( '\r', '').replace( '\n', '')[:-1]) # Leave out last character, which is ) caller_dict_key = build_class_method_param_dict_key( caller_class, caller_name, caller_param) callee_dict_key = build_class_method_param_dict_key( callee_class, callee_name, callee_param) # called_by = caller_dict_key # calls = callee_dict_key insert_entry(caller_dict_key, caller_class, caller_name, caller_param, set(), set([callee_dict_key])) insert_entry(callee_dict_key, callee_class, callee_name, callee_param, set([caller_dict_key]), set()) else: log.error("Unknow start character: " + row_split[0]) remove_external_calls() # convert all sets to lists since set is not json serializable if create_class_callgraph: for entry in class_call_graph: class_call_graph[entry][CALLS] = list( class_call_graph[entry][CALLS]) class_call_graph[entry][CALLED_BY] = list( class_call_graph[entry][CALLED_BY]) FileUtil.write_to_json(output_class_callgraph, class_call_graph) for entry in method_call_graph: method_call_graph[entry][CALLS] = list(method_call_graph[entry][CALLS]) method_call_graph[entry][CALLED_BY] = list( method_call_graph[entry][CALLED_BY]) FileUtil.write_to_json(output_method_callgraph, method_call_graph)
def create_network(self): """ It gets the input nodes from the dataset and creates the network starting from the input nodes created by `create_input_nodes` Sets: network nodes depending on the specific child class """ ffconffile = self._prediction_conf global_step_ff = self._prediction_global_step ff_dataset_conf, ff_model_parameters, ff_config = ArgoLauncher.process_conf_file( ffconffile) ff_dataset = Dataset.load_dataset(ff_dataset_conf) check_dataset_shapes(ff_dataset.x_shape_eval, self.x_shape) full_class_path = "prediction.core." + ff_model_parameters["model"] prediction_model_class = load_class(full_class_path) ff_network, ff_checkpoint_name = load_network( prediction_model_class, ffconffile, ff_dataset, global_step=global_step_ff) x_shape = (None, ) + self.x_shape dummy_x = tf.placeholder(tf.float32, shape=x_shape, name='dummy_input') # LOAD FF NETWORK dummy_output = ff_network(dummy_x, is_training=self.is_training) ff_network.restore(self.sess, ff_checkpoint_name) # CALLABLE if isinstance(dummy_output, tfp.distributions.Distribution): def ff_module(inputs, is_training): return ff_network(inputs, is_training=is_training).logits else: ff_module = ff_network #.module self._ff_module = ff_module # CREATE TRANSFORM MODULES, one for attack with default params and one for accuracy calculation self.transform_name, self.transform_kwargs = self._transform_tuple self.transform_kwargs.update({ "dummy_x": dummy_x, "sess": self.sess, "is_training": self.is_training }) self._transform_module, self._transform_feedable = get_transform_module( self.transform_name, self.transform_kwargs) def _build_net(inputs): _x = self._transform_module(inputs) return self._ff_module(_x, is_training=self.is_training) self._build_net = _build_net #tile for better accuracy estimation self._logits = self._build_net(self.x_tiled) self._accuracy = 100. * tf.reduce_mean( tf.cast(tf.equal(tf.argmax(self._logits, axis=1), tf.cast(self.y_tiled, dtype=tf.int64)), dtype=tf.float32))
def load_and_run_hook(conf_file, global_steps_list): import tensorflow as tf from datasets.Dataset import Dataset from argo.core.ArgoLauncher import ArgoLauncher tf.reset_default_graph() # ###################################################### # # LOAD THE WHOLE MODEL WITH ITS OWN MONITOREDSESSION # # ###################################################### # dataset_conf, model_parameters, config = ArgoLauncher.process_conf_file(conf_file) if 'WavReconstructHook' not in hooks: config.pop('WavReconstructHook') if 'WavGenerateHook' not in hooks: config.pop('WavGenerateHook') config.update(hooks) # remove hooks that I do not want to trigger config["save_summaries"] = False config["save_model"] = False config["stats_period"] = 17e300 # an insanely large number, one of the biggest int before inf hooks_to_remove = [ 'LoggingMeanTensorsHook', 'GradientsHook', ] for key in hooks_to_remove: config.pop(key, None) dataset = Dataset.load_dataset(dataset_conf) ArgoTFDeepLearningModelClass = load_class(model_parameters["model"], base_path=model_class_base_path) model_dir = os.path.split(os.path.dirname(conf_file))[0] model = ArgoTFDeepLearningModelClass(model_parameters, model_dir, gpu=gpu, seed=seed) model.init(dataset) # network = model._network # network.init_saver() x_shape = (1,) + tuple(model.x_shape['train']) model._init_session_saver() model.create_session(model_parameters, config) # if global_step is None it will restore the last checkpoint in the folder model._checkpoint_dir, you can pass global_step to restore a particular chackpoint for global_step in global_steps_list: tf_logging.info('...Running global step... ' + str(global_step)) try: model.restore(global_step=global_step) except Exception: print('-----LOAD EXCEPTION: could not LOAD model at step', global_step) continue # this is needed in case global_step was None, to load last step global_step = model.get_raw_session().run(model.global_step) # I force the trigger for the hooks in the config file max_steps = model._get_steps(fix_period, model._time_reference_str) # need extra list cos cannot remove elements while iterating to_remove = [] for hook in model.hooks: if type(hook).__name__ in hook_keys: hook._timer.reset() hook.before_training(model.sess) hook._timer.update_last_triggered_step(global_step - max_steps) else: to_remove.append(hook) for h in to_remove: model.hooks.remove(h) # two times to trigger the hooks, since first step they are disabled by design gs = model.sess.run(model.global_step, feed_dict={model.raw_x: np.zeros(x_shape)}) gs = model.sess.run(model.global_step, feed_dict={model.raw_x: np.zeros(x_shape)}) tf_logging.info('Finished with model...')
def __init__(self, dataset: Dataset): super().__init__(dataset) self.callgraph_aggregator = CallGraphTraceLinkAggregator( 0.9, NeighborStrategy.both, dataset.method_callgraph())
raise ValueError( "Either you chose the wrong model or you didn't specify an autoencoder conf file!" ) if (model_choice[0] == 'f'): if (autoencconffile is not None): raise ValueError("FF requires no autoencoder!") ################### ## LOAD DATASETS ## ################### ffmodeldir = os.path.dirname(ffconffile) ff_dataset_conf, ff_model_parameters, ff_config = ArgoLauncher.process_conf_file( args.ffconffile) ff_dataset = Dataset.load_dataset(ff_dataset_conf) if (model_choice[0] == 'v'): #i.e. there is a VAE model vaemodeldir = os.path.dirname(autoencconffile) vae_dataset_conf, vae_model_parameters, vae_config = ArgoLauncher.process_conf_file( args.autoencconffile) vae_dataset = Dataset.load_dataset(vae_dataset_conf) #if datasets x_shape are different raise Exception! what is the meaning of a comparison otherwise? assert ff_dataset.x_shape == vae_dataset.x_shape, \ "the VAE and FF network that you are trying to load have been \ trained on datasets with different x_shape : `%s` and `%s`" %(str(ff_dataset.x_shape), str(vae_dataset.x_shape)) x_shape = (None, ) + ff_dataset.x_shape mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
def build_graph(dataset_path: str, log_dir: str, graph_type: GraphType, epoch_num: int, batch_size: int, cnn_model: NerualNetwork.__class__, cnn_param: dict, sess: tf.Session, data_param: dict, data_loader: Dataset, base_lr=1e-4, base_lambda=1000): with sess.graph.as_default(): global_step = tf.train.get_or_create_global_step(graph=sess.graph) weight_regularizer = tf.contrib.layers.l2_regularizer(0.0005) is_training = graph_type == GraphType.TRAIN dataset, num_class = data_loader.load_data(dataset_path, is_training, epoch_num, batch_size, data_param=data_param) image, label = dataset model = cnn_model() if graph_type == GraphType.EVAL: logits = model.inference(image, num_class, label=None, param={ **cnn_param, 'global_steps': global_step, 'weight_regularizer': weight_regularizer, 'graph_type': graph_type }) saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) # initialize variables if os.path.isdir(log_dir): latest_ckpt = tf.train.latest_checkpoint( os.path.expanduser(log_dir)) else: latest_ckpt = log_dir if latest_ckpt is not None: # restore model saver.restore(sess, latest_ckpt) else: raise ValueError(f"No model to evaluate in {log_dir}") # return normalized features and corresponding labels return logits / tf.reshape(tf.norm(logits, axis=1), (-1, 1)), label, global_step logits, base_loss = model.inference(image, num_class, label=label, param={ **cnn_param, 'global_steps': global_step, 'weight_regularizer': weight_regularizer, 'graph_type': graph_type, 'base_lambda': base_lambda }) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = tf.add_n([base_loss] + reg_losses, name="loss") tf.summary.scalar("loss", loss) # SGD training strategy # base_lr = 1e-5 # # def lr_decay(step): # """ # calculate learning rate # same as multistep in caffe # see https://github.com/BVLC/caffe/blob/master/src/caffe/solvers/sgd_solver.cpp#L54 # :param step: stepvalue # :return: learning rate for corresponding stepvalue # """ # gamma = 0.1 # return base_lr * math.pow(gamma, step) # # boundaries = [16000, 24000, 28000] # values = [base_lr, *list(map(lr_decay, boundaries))] # learning_rate = tf.train.piecewise_constant(global_step, boundaries, values) # tf.summary.scalar("learning_rate", learning_rate) # train_op = tf.train.MomentumOptimizer(momentum=0.9, # name='optimizer', # learning_rate=learning_rate).minimize(loss, global_step=global_step) with tf.name_scope("learning_rate"): # Adam strategy def lr_decay_values(boundaries, gamma=0.1): v = [base_lr] for i in range(len(boundaries)): if i == 0: last_boundary = 0 else: last_boundary = boundaries[i - 1] this_boundary = boundaries[i] interval = this_boundary - last_boundary v.append( base_lr * math.pow(gamma, math.floor(this_boundary / interval))) return v boundaries = [16000, 24000, 28000] values = lr_decay_values(boundaries) learning_rate = tf.train.piecewise_constant( global_step, boundaries, values) tf.summary.scalar("learning_rate", learning_rate) optimizer = tf.train.AdamOptimizer(name='optimizer', learning_rate=learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) # gvs = optimizer.compute_gradients(loss) # grads_and_vars = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in gradients] summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) # initialize variables if os.path.isdir(log_dir): latest_ckpt = tf.train.latest_checkpoint( os.path.expanduser(log_dir)) else: latest_ckpt = log_dir sess.run(tf.global_variables_initializer()) if latest_ckpt is not None: # restore model tf.logging.info(f"loading variables from {latest_ckpt}") saver.restore(sess, latest_ckpt) # add accuracy node with tf.name_scope("accuracy"): if graph_type == GraphType.TRAIN: acc = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(label, axis=1), tf.argmax(logits, axis=1)), tf.float32)) acc_op = None elif graph_type == GraphType.TEST: acc, acc_op = tf.metrics.accuracy(tf.argmax(label, axis=1), tf.argmax(logits, axis=1)) else: raise ValueError(f"Illegal argument graph_type: {graph_type}") tf.summary.scalar("acc", acc) acc_summary = tf.summary.merge_all() sess.run(tf.local_variables_initializer()) # return train_op, acc, acc_op, loss, global_step, summary_op, saver return train_op, acc, acc_op, loss, global_step, summary_op, saver, acc_summary
from scipy.special import expit import sys #from testingVAE_tools.l2_average_error import l2_reconstr_error from testingVAE_tools.accuracy_testing import ff, accuracy #folder to save weights vae_dir = sys.argv[1] # ---------------------------- LOADING VAE ----------------------------------------------------- launcher = VAELauncher() parallelism = 1 # 1 is equivalent to pool dataset_conf, params, config = launcher.process_conf_file(vae_dir) dataset = Dataset.load_dataset(dataset_conf) # train launcher.run(dataset, params, config, parallelism) # uncomment this line # model is now trained print("Model(s) trained") # create the object, it is empty run = 0 # run of the algorithm epoch = 1000 # checkpoint from which to load gpu = 0 # number of the GPU to which I want to allocate the tensorflow graph (-1 = CPU) # load the model vae = TFModelSaver.restore_model_from_conf_filename( VAE.GaussianVariationalAutoEncoder, vae_dir, epoch, gpu=gpu) #------------------------------------ END OF LOADING VAE ---------------------------------------
def load_and_run_hook(conf_file, global_step): import tensorflow as tf from datasets.Dataset import Dataset from argo.core.ArgoLauncher import ArgoLauncher tf.reset_default_graph() # ###################################################### # # LOAD THE WHOLE MODEL WITH ITS OWN MONITOREDSESSION # # ###################################################### # dataset_conf, model_parameters, config = ArgoLauncher.process_conf_file( conf_file) config.update(hooksconfig) #remove hooks that I do not want to trigger config["save_summaries"] = False config["save_model"] = False config[ "stats_period"] = 17e300 # an insanely large number, one of the biggest int before inf hooks_to_remove = [ 'LoggingMeanTensorsHook', 'GradientsHook', ] for key in hooks_to_remove: config.pop(key, None) dataset = Dataset.load_dataset(dataset_conf) ArgoTFDeepLearningModelClass = load_class(model_parameters["model"], base_path=model_class_base_path) # add information about the dataset for the launchable construction, needed in view of future keras compatibility # try catch to allow compatibility for datasets which do not have labels (see Dataset interface) try: output_shape = dataset.y_shape except ValueError: output_shape = None dataset_info = { "output_shape": output_shape, "input_shape": dataset.x_shape_train } model_parameters.update(dataset_info) model_dir = os.path.split(os.path.dirname(conf_file))[0] try: output_shape = dataset.y_shape except ValueError: output_shape = None dataset_info = { "output_shape": output_shape, "input_shape": dataset.x_shape_train } model_parameters.update(dataset_info) model = ArgoTFDeepLearningModelClass(model_parameters, model_dir, gpu=gpu, seed=seed) model.init(dataset) # network = model._network # network.init_saver() x_shape = (1, ) + model.x_shape['train'] # # I want input shape but I don't want to pass by the handle, which might have more None shapes (if loop dataset has cropping) # train_loop_iter, _ = dataset.get_dataset_iterator(1, "train", shuffle=1, repeat=1, augment=1) # x_shape = train_loop_iter.get_next()[0].shape.as_list() # for i,d in enumerate(x_shape): # if d is None: # x_shape[i] = 1 model._init_session_saver() model.create_session(model_parameters, config) #if global_step is None it will restore the last checkpoint in the folder model._checkpoint_dir, you can pass global_step to restore a particular chackpoint model.restore(global_step=global_step) # this is needed in case global_step was None, to load last step global_step = model.get_raw_session().run(model.global_step) # I force the trigger for the hooks in the config file max_steps = model._get_steps(fix_period, model._time_reference_str) # need extra list cos cannot remove elements while iterating to_remove = [] for hook in model.hooks: if type(hook).__name__ in hook_keys: hook._timer.reset() hook._timer.update_last_triggered_step(global_step - max_steps) else: to_remove.append(hook) for h in to_remove: model.hooks.remove(h) # two times to trigger the hooks, since first step they are disabled by design gs = model.sess.run(model.global_step, feed_dict={model.raw_x: np.zeros(x_shape)}) gs = model.sess.run(model.global_step, feed_dict={model.raw_x: np.zeros(x_shape)})
def task_execute(self, dm_params, config, gpu=-1, dependencies=None, lock=None, message_prefix=""): """ this method takes care of executing a task passed as a parameter the task is defined in opts, while config contains other information necessary to run the task, including information about loggers Args: config: gpu: dependencies: lock: message_prefix: Returns: """ # There is a reason to import tensorflow only here, but I don't remember it # since I am a responsible person, I provide you a link to satisfy your curiosity # see https://zhuanlan.zhihu.com/p/24311810 # (oppps unfortunately it's in Chinese ;-( ) # Since I am extremely nice (and feel guilty) I provide a translation # #It should be noted that some side effects occur when Cuda tools such as import theano #or import tensorflow are called, and the side effects are copied to the child processes #as they are and errors then occur, such as: # # could not retrieve CUDA device count: CUDA_ERROR_NOT_INITIALIZED # # The solution is to ensure that the parent process does not introduce these tools, # but after the child process is created, let the child process each introduced. import tensorflow as tf # I have to reset the graph here, because there might be leftover from previous jobs # it happens if you use tf.variable_scope(. ,reuse=None) somewhere you see the error tf.reset_default_graph() dataset_params, model_params = dm_params #TODO this has to be fixed when the optimization module will be created dataset = Dataset.load_dataset(dataset_params) # I need to copy configs here in case some algorithms are adding stuffs to the dictionaries, # e.g. regularizers etcetera... Since python is passed by reference they get modified before writing # to file resulting in unreadable confs from experiment folders. Please leave it here (Riccardo) model_params_orig = copy.deepcopy(model_params) dataset_params_orig = copy.deepcopy(dataset_params) config_orig = copy.deepcopy(config) # this messages_prefix is used to debug purposes, in particular to distinguish the prints # coming from different consumers. It is a prefix added to each print if message_prefix != "": message_prefix += " " # setting the seed for numpy for the task, which is specified in opts["seed"], specified # set in the function create_opts_list where the Cartesian product is computed print(message_prefix + "setting seed=" + str(model_params["seed"])) np.random.seed(model_params["seed"]) # create the full_id, which includes # ALGORITHM-NAME_DATASET-NAME-WITH-OPTIONS_ALGORITHM-OPTIONS # notice that this method may be overwritten by some Launchers, such as # TestAdvExamplesLauncher, which implements more sophisticated naming convetions for the # algorithm # get the class to load launchableClass = self._load_model_class(model_params["model"]) # add information about the dataset for the launchable construction, needed in view of future keras compatibility # try catch to allow compatibility for datasets which do not have labels (see Dataset interface) try: output_shape = dataset.y_shape except ValueError: output_shape = None dataset_info = { "output_shape": output_shape, "input_shape": dataset.x_shape_train } model_params.update(dataset_info) baseDir = config["dirName"] + "/" + dataset.id # TODO why check_ops is so high in the hierarchy? check_ops = getattr(config, "check_ops", False) self._launchable = launchableClass(model_params, baseDir, check_ops=check_ops, gpu=gpu, seed=model_params['seed']) dirName = self._launchable.dirName full_id = get_full_id(dataset, self._launchable) print(message_prefix + "got a new job, checking " + full_id) # check if the algorithm has been executed previously and if successfully completed # this is certified by the existence of a log file in dirName log_file = dirName + '/experiment.log' if not os.path.isfile(log_file): # if not, I need to prepare to execute the algorithm, by first creating the necessary # directories, such as those to save modes and log general purpose quantities, in case # this is specified in the config # if lock is None, there is no need to lock_resouces, since there is not parallelism if lock: print(message_prefix + "consumer " + str(os.getpid()) + " checking locks for " + full_id) lock_resources = self.lock_resources(lock, dependencies, model_params, config) else: lock_resources = True # in case lock_resoucers is false, then I cannot run the algorithm, thus I return false if not lock_resources: print(message_prefix + "consumer " + str(os.getpid()) + " lock not available " + full_id) return False else: print(message_prefix + "consumer " + str(os.getpid()) + " available or locked " + full_id) # create directories where the conf and txt files are saved, notice that in case of more sophisticated algorithms # the function can be overwritten by the child Launcher, as in TestAdvExamplesLauncher # dirName, launchable_id = self.create_path_directories(path) os.makedirs(dirName, exist_ok=True) # choose between running on GPU or CPU if gpu == -1: print(message_prefix + "running on cpu") device = '/cpu:0' os.environ["CUDA_VISIBLE_DEVICES"] = "" else: os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) print(message_prefix + "running on gpu = " + str(gpu)) device = '/gpu:' + '0' # create a single conf file which allows to execute only the specific instance extracted # by the Cartesian product, independently from the contents of the original conf file ArgoLauncher.write_conf_file(dirName + '/experiment.conf', dataset_params_orig, model_params_orig, config_orig) # instantiate algorithm to be run self.initialize(self._launchable, config) # start timer start = strftime("%Y-%m-%d %H:%M:%S\n", gmtime()) startTime = time.time() assert ( self._launchable is not None ), "the Launchable object has to be instantiated before executing" self.execute(self._launchable, dataset, model_params, config) # stop timer end = strftime("%Y-%m-%d %H:%M:%S\n", gmtime()) endTime = time.time() self._launchable.release() del dataset gc.collect() print("Released") print(message_prefix + "consumer " + str(os.getpid()) + " unlocking resources for " + full_id) if lock: self.unlock_resources(lock, dependencies, model_params, config) f = open(log_file, 'w') f.write("started at " + start) f.write("done at " + end) f.write("duration " + str(endTime - startTime) + "\n") f.write("seed used is " + str(model_params["seed"]) + "\n") f.write("gpu " + str(gpu) + "\n") f.close() print(message_prefix + "completed job " + full_id) else: print(message_prefix + "found completed job " + full_id) return True