def _validate(self): ''' get the validation loss returns: - the validation loss - an op to update the validation loss - the number of validation batches ''' #create the evaluator evaltype = self.evaluatorconf.get('evaluator', 'evaluator') if evaltype != 'None': evaluator = evaluator_factory.factory(evaltype)( conf=self.evaluatorconf, dataconf=self.dataconf, model=self.model) return evaluator.evaluate()
def _validate(self): ''' get the validation loss returns: - the validation loss for a batch - the number of validation batches ''' #create the evaluator evaltype = self.evaluatorconf.get('evaluator', 'evaluator') if evaltype != 'None': evaluator = evaluator_factory.factory(evaltype)( conf=self.evaluatorconf, dataconf=self.dataconf, model=self.model ) #compute the loss val_batch_loss, valbatches = evaluator.evaluate() return val_batch_loss, valbatches
def __init__(self, conf, tasksconf, dataconf, modelconf, evaluatorconf, expdir, init_filename, server, task_index): ''' NnetTrainer constructor, creates the training graph Args: conf: the trainer config taskconf: the config file for each task dataconf: the data configuration as a ConfigParser modelconf: the neural net model configuration evaluatorconf: the evaluator configuration for evaluating if None no evaluation will be done expdir: directory where the summaries will be written init_filename: filename of the network that should be used to initialize the model. Put to None if no network is available/wanted. server: optional server to be used for distributed training task_index: optional index of the worker task in the cluster ''' self.expdir = expdir self.server = server self.conf = conf self.tasksconf = tasksconf self.task_index = task_index self.init_filename = init_filename self.batch_size = int(conf['batch_size']) cluster = tf.train.ClusterSpec(server.server_def.cluster) #create the graph self.graph = tf.Graph() #3 model types for multi task: single one to one; single one to many; multiple one to one #single one to one: the whole model is shared for all tasks, only loss function can be different #single one to many: each task has a separate output so only part of the network is shared, eg evrything but the output layer #multiple one to one: each task has its own network. Possibly the outputs are combined in a loss function #create the model modelfile = os.path.join(expdir, 'model', 'model.pkl') with open(modelfile, 'wb') as fid: self.model = model_factory.factory( modelconf.get('model', 'architecture'))(conf=modelconf) pickle.dump(self.model, fid) evaltype = evaluatorconf.get('evaluator', 'evaluator') #get the database configurations input_dataconfs = dict() target_dataconfs = dict() loss_computers = dict() nr_input_sections = dict() if evaltype != 'None': evaluators = dict() for task in self.conf['tasks'].split(' '): taskconf = self.tasksconf[task] #get the database configurations input_names = modelconf.get('io', 'inputs').split(' ') if input_names == ['']: input_names = [] input_sections = [taskconf[i].split(' ') for i in input_names] nr_input_sections[task] = len(input_sections) task_input_dataconfs = [] for sectionset in input_sections: task_input_dataconfs.append([]) for section in sectionset: task_input_dataconfs[-1].append( dict(dataconf.items(section))) input_dataconfs[task] = task_input_dataconfs output_names = taskconf['targets'].split(' ') if output_names == ['']: output_names = [] target_sections = [taskconf[o].split(' ') for o in output_names] task_target_dataconfs = [] for sectionset in target_sections: task_target_dataconfs.append([]) for section in sectionset: task_target_dataconfs[-1].append( dict(dataconf.items(section))) target_dataconfs[task] = task_target_dataconfs #create the loss computer loss_computer = loss_computer_factory.factory( taskconf['loss_type'])(self.batch_size) loss_computers[task] = loss_computer if evaltype != 'None': evaluator = evaluator_factory.factory(evaltype)( conf=evaluatorconf, dataconf=dataconf, model=self.model, task=task) evaluators[task] = evaluator if 'local' in cluster.as_dict(): num_replicas = 1 device = tf.DeviceSpec(job='local') else: #distributed training num_replicas = len(cluster.as_dict()['worker']) num_servers = len(cluster.as_dict()['ps']) ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( num_tasks=num_servers, load_fn=tf.contrib.training.byte_size_load_fn) device = tf.train.replica_device_setter(ps_tasks=num_servers, ps_strategy=ps_strategy) chief_ps = tf.DeviceSpec(job='ps', task=0) self.is_chief = task_index == 0 #define the placeholders in the graph with self.graph.as_default(): #create a local num_steps variable self.num_steps = tf.get_variable( name='num_steps', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) #a variable to hold the amount of steps already taken self.global_step = tf.get_variable( name='global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) should_terminate = tf.get_variable( name='should_terminate', shape=[], dtype=tf.bool, initializer=tf.constant_initializer(False), trainable=False) self.terminate = should_terminate.assign(True).op #create a check if training should continue self.should_stop = tf.logical_or( tf.greater_equal(self.global_step, self.num_steps), should_terminate) with tf.device(device): data_queues = dict() num_steps = [] done_ops = [] for task in self.conf['tasks'].split(' '): #check if running in distributed model if 'local' in cluster.as_dict(): #get the filenames data_queue_elements, _ = input_pipeline.get_filenames( input_dataconfs[task] + target_dataconfs[task]) #create the data queue and queue runners (inputs get shuffled! I already did this so set to False) data_queue = tf.train.string_input_producer( string_tensor=data_queue_elements, shuffle=False, seed=None, capacity=self.batch_size * 2, shared_name='data_queue_' + task) data_queues[task] = data_queue #compute the number of steps if int(conf['numbatches_to_aggregate']) == 0: task_num_steps = (int(conf['num_epochs']) * len(data_queue_elements) / self.batch_size) else: task_num_steps = ( int(conf['num_epochs']) * len(data_queue_elements) / (self.batch_size * int(conf['numbatches_to_aggregate']))) #set the number of steps num_steps.append(task_num_steps) done_ops.append(tf.no_op()) else: with tf.device(chief_ps): #get the data queue data_queue = tf.FIFOQueue( capacity=self.batch_size * (num_replicas + 1), shared_name='data_queue_' + task, name='data_queue_' + task, dtypes=[tf.string], shapes=[[]]) data_queues[task] = data_queue #get the number of steps from the parameter server num_steps_queue = tf.FIFOQueue( capacity=num_replicas, dtypes=[tf.int32], shared_name='num_steps_queue', name='num_steps_queue', shapes=[[]]) #set the number of steps task_num_steps = num_steps_queue.dequeue() #get the done queues for i in range(num_servers): with tf.device('job:ps/task:%d' % i): done_queue = tf.FIFOQueue( capacity=num_replicas, dtypes=[tf.bool], shapes=[[]], shared_name='done_queue%d' % i, name='done_queue%d' % i) done_ops.append(done_queue.enqueue(True)) self.set_num_steps = self.num_steps.assign(min(num_steps)).op self.done = tf.group(*done_ops) #training part with tf.variable_scope('train'): #a variable to scale the learning rate (used to reduce the #learning rate in case validation performance drops) learning_rate_fact = tf.get_variable( name='learning_rate_fact', shape=[], initializer=tf.constant_initializer(1.0), trainable=False) #compute the learning rate with exponential decay and scale #with the learning rate factor self.learning_rate = (tf.train.exponential_decay( learning_rate=float(conf['initial_learning_rate']), global_step=self.global_step, decay_steps=self.num_steps, decay_rate=float(conf['learning_rate_decay'])) * learning_rate_fact) #create the optimizer optimizer = tf.train.AdamOptimizer(self.learning_rate) self.total_loss = tf.get_variable( name='total_loss', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) self.reset_loss = self.total_loss.assign(0.0) loss = [] for task in self.conf['tasks'].split(' '): with tf.variable_scope(task): #create the input pipeline data, seq_length = input_pipeline.input_pipeline( data_queue=data_queues[task], batch_size=self.batch_size, numbuckets=int(conf['numbuckets']), dataconfs=input_dataconfs[task] + target_dataconfs[task]) inputs = { input_names[i]: d for i, d in enumerate( data[:nr_input_sections[task]]) } seq_length = { input_names[i]: d for i, d in enumerate( seq_length[:nr_input_sections[task]]) } targets = { output_names[i]: d for i, d in enumerate( data[nr_input_sections[task]:]) } #target_seq_length = { #output_names[i]: d #for i, d in enumerate(seq_length[nr_input_sections[task]:])} #compute the training outputs of the model logits = self.model(inputs=inputs, input_seq_length=seq_length, is_training=True) #TODO: The proper way to exploit data paralellism is via the #SyncReplicasOptimizer defined below. However for some reason it hangs #and I have not yet found a solution for it. For the moment the gradients #are accumulated in a way that does not allow data paralellism and there # is no advantage on having multiple workers. (We also accumulate the loss) #create an optimizer that aggregates gradients #if int(conf['numbatches_to_aggregate']) > 0: #optimizer = tf.train.SyncReplicasOptimizer( #opt=optimizer, #replicas_to_aggregate=int( #conf['numbatches_to_aggregate'])#, ##total_num_replicas=num_replicas #) #compute the loss task_loss = loss_computers[task](targets, logits, seq_length) #append the task loss to the global loss loss.append(task_loss) #accumulate losses from tasks with tf.variable_scope('accumulate_loss_from_tasks'): loss = tf.reduce_mean(loss) #accumulate losses from batches self.acc_loss = self.total_loss.assign_add(loss) ##compute the gradients #grads_and_vars = optimizer.compute_gradients(self.loss) #with tf.variable_scope('clip'): #clip_value = float(conf['clip_grad_value']) ##clip the gradients #grads_and_vars = [(tf.clip_by_value(grad, -clip_value, clip_value), var) #for grad, var in grads_and_vars] self.params = tf.trainable_variables() grads = [ tf.get_variable(param.op.name, param.get_shape().as_list(), initializer=tf.constant_initializer(0), trainable=False) for param in self.params ] self.reset_grad = tf.variables_initializer(grads) #compute the gradients minibatch_grads_and_vars = optimizer.compute_gradients( loss) with tf.variable_scope('clip'): clip_value = float(conf['clip_grad_value']) #clip the gradients minibatch_grads_and_vars = [ (tf.clip_by_value(grad, -clip_value, clip_value), var) for grad, var in minibatch_grads_and_vars ] (minibatchgrads, minibatchvars) = zip(*minibatch_grads_and_vars) #update gradients by accumulating them self.update_gradients = [ grad.assign_add(batchgrad) for batchgrad, grad in zip(minibatchgrads, grads) ] #opperation to apply the gradients grads_and_vars = list(zip(grads, minibatchvars)) apply_gradients_op = optimizer.apply_gradients( grads_and_vars=grads_and_vars, global_step=self.global_step, name='apply_gradients') #all remaining operations with the UPDATE_OPS GraphKeys update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #create an operation to update the gradients, the batch_loss #and do all other update ops self.update_op = tf.group(*([apply_gradients_op] + update_ops), name='update') if evaltype != 'None': #validation part with tf.variable_scope('validate'): #create a variable to hold the validation loss self.validation_loss = tf.get_variable( name='validation_loss', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) #create a variable to save the last step where the model #was validated validated_step = tf.get_variable( name='validated_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer( -int(conf['valid_frequency'])), trainable=False) #a check if validation is due self.should_validate = tf.greater_equal( self.global_step - validated_step, int(conf['valid_frequency'])) val_batch_loss = [] valbatches = [] for task in self.conf['tasks'].split(' '): with tf.variable_scope(task): task_val_batch_loss, task_valbatches, _, _ = evaluators[ task].evaluate() val_batch_loss.append(task_val_batch_loss) valbatches.append(task_valbatches) val_batch_loss = tf.reduce_mean(val_batch_loss) self.valbatches = min(valbatches) self.update_loss = self.validation_loss.assign( self.validation_loss + val_batch_loss #/self.valbatches ).op #update the learning rate factor self.half_lr = learning_rate_fact.assign( learning_rate_fact / 2).op #create an operation to updated the validated step self.update_validated_step = validated_step.assign( self.global_step).op #variable to hold the best validation loss so far self.best_validation = tf.get_variable( name='best_validation', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(1.79e+308), trainable=False) #op to update the best velidation loss self.update_best = self.best_validation.assign( self.validation_loss).op #a variable that holds the amount of workers at the #validation point waiting_workers = tf.get_variable( name='waiting_workers', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) #an operation to signal a waiting worker self.waiting = waiting_workers.assign_add(1).op #an operation to set the waiting workers to zero self.reset_waiting = waiting_workers.initializer #an operation to check if all workers are waiting self.all_waiting = tf.equal(waiting_workers, num_replicas - 1) tf.summary.scalar('validation loss', self.validation_loss) else: self.update_loss = None tf.summary.scalar('learning rate', self.learning_rate) #create a histogram for all trainable parameters for param in tf.trainable_variables(): tf.summary.histogram(param.name, param) #create the scaffold self.scaffold = tf.train.Scaffold()
def test(expdir, test_model_checkpoint, task): """does everything for testing""" # read the database config file database_cfg = configparser.ConfigParser() database_cfg.read(os.path.join(expdir, 'database.cfg')) # read the model config file model_cfg = configparser.ConfigParser() model_cfg.read(os.path.join(expdir, 'model.cfg')) # read the evaluator config file evaluator_cfg = configparser.ConfigParser() evaluator_cfg.read(os.path.join(expdir, 'evaluator.cfg')) losses_cfg_file = os.path.join(expdir, 'loss.cfg') if not os.path.isfile(losses_cfg_file): warnings.warn( 'In following versions it will be required to provide a loss config file', Warning) loss_cfg = None else: loss_cfg = configparser.ConfigParser() loss_cfg.read(losses_cfg_file) if evaluator_cfg.has_option(task, 'output_handling_type'): output_handling_type = evaluator_cfg.get(task, 'output_handling_type') else: output_handling_type = 'reconstructor' if output_handling_type == 'reconstructor': # read the reconstructor config file output_handler_cfg = configparser.ConfigParser() output_handler_cfg.read(os.path.join(expdir, 'reconstructor.cfg')) rec_dir = os.path.join(expdir, 'reconstructions', task) # read the scorer config file scorer_cfg = configparser.ConfigParser() scorer_cfg.read(os.path.join(expdir, 'scorer.cfg')) elif output_handling_type == 'speaker_verification': # read the speaker verification output handler config file output_handler_cfg = configparser.ConfigParser() output_handler_cfg.read( os.path.join(expdir, 'speaker_verification_handler.cfg')) store_dir = os.path.join(expdir, 'speaker_verification_data', task) # read the scorer config file scorer_cfg = configparser.ConfigParser() scorer_cfg.read(os.path.join(expdir, 'speaker_verification_scorer.cfg')) else: raise BaseException('Unknown output handling type: %s' % output_handling_type) # read the postprocessor config file, if it exists try: postprocessor_cfg = configparser.ConfigParser() postprocessor_cfg.read(os.path.join(expdir, 'postprocessor.cfg')) if not postprocessor_cfg.sections(): postprocessor_cfg = None except: postprocessor_cfg = None # load the model with open(os.path.join(expdir, 'model', 'model.pkl'), 'rb') as fid: models = pickle.load(fid) if \ '/esat/spchtemp/scratch/jzegers/Nabu-SS2.0/Default17_MERL_DANet_Drude2018_sum_task_losses_sweep' in expdir or \ '/esat/spchtemp/scratch/jzegers/Nabu-SS2.0/Default17_MERL_DANet_Drude2018_acc_step_norm_weights_sweep' in expdir: models['speaker_embeddings_model'].conf['no_bias'] = 'True' models['outlayer'].conf['no_bias'] = 'True' models['id_outlayer'].conf['no_bias'] = 'True' with open(os.path.join(expdir, 'model', 'model.pkl'), 'wb') as fid2: pickle.dump(models, fid2) elif \ '/esat/spchtemp/scratch/jzegers/Nabu-SS2.0/Default17_SREMix_101trspks_DANet_hamming_scipy_Drude2018' in expdir: models['speaker_embeddings_model'].conf['no_bias'] = 'True' models['outlayer'].conf['no_bias'] = 'True' models['id_outlayer'].conf['no_bias'] = 'False' with open(os.path.join(expdir, 'model', 'model.pkl'), 'wb') as fid2: pickle.dump(models, fid2) if os.path.isfile(os.path.join(expdir, 'loss_%s' % task)): print 'Already reconstructed all signals for task %s, going straight to scoring' % task if evaluator_cfg.has_option(task, 'requested_utts'): requested_utts = int(evaluator_cfg.get(task, 'requested_utts')) else: requested_utts = int( evaluator_cfg.get('evaluator', 'requested_utts')) if evaluator_cfg.has_option(task, 'batch_size'): batch_size = int(evaluator_cfg.get(task, 'batch_size')) else: batch_size = int(evaluator_cfg.get('evaluator', 'batch_size')) numbatches = int(float(requested_utts) / float(batch_size)) else: print 'Evaluating task %s' % task # create the evaluator if loss_cfg: loss_cfg = dict( loss_cfg.items(evaluator_cfg.get(task, 'loss_type'))) evaltype = evaluator_cfg.get(task, 'evaluator') evaluator = evaluator_factory.factory(evaltype)(conf=evaluator_cfg, lossconf=loss_cfg, dataconf=database_cfg, models=models, task=task) checkpoint_dir = os.path.join(expdir, 'logdir_%s' % task) # create the output handler if output_handling_type == 'reconstructor': # create the reconstructor task_output_handler_cfg = dict(output_handler_cfg.items(task)) reconstruct_type = task_output_handler_cfg['reconstruct_type'] # whether the targets should be used to determine the optimal speaker permutation on frame level. Should # only be used for analysis and not for reporting results. if 'optimal_frame_permutation' in task_output_handler_cfg and \ task_output_handler_cfg['optimal_frame_permutation'] == 'True': optimal_frame_permutation = True else: optimal_frame_permutation = False output_handler = reconstructor_factory.factory(reconstruct_type)( conf=task_output_handler_cfg, evalconf=evaluator_cfg, dataconf=database_cfg, rec_dir=rec_dir, task=task, optimal_frame_permutation=optimal_frame_permutation) if optimal_frame_permutation: opt_frame_perm_op = getattr( output_handler, "reconstruct_signals_opt_frame_perm", None) if not callable(opt_frame_perm_op): raise NotImplementedError( 'The "optimal_frame_permutation" flag was set while the function ' '"reconstruct_signals_opt_frame_perm" is not implemented in the reconstructor' ) elif output_handling_type == 'speaker_verification': task_output_handler_cfg = dict(output_handler_cfg.items(task)) speaker_verification_handler_type = task_output_handler_cfg[ 'speaker_verification_handler_type'] output_handler = speaker_verification_handler_factory.factory( speaker_verification_handler_type)( conf=task_output_handler_cfg, evalconf=evaluator_cfg, dataconf=database_cfg, store_dir=store_dir, exp_dir=expdir, task=task) else: raise BaseException('Unknown output handling type: %s' % output_handling_type) # create the graph with tf.Graph().as_default(): # create a hook that will load the model load_hook = LoadAtBegin(test_model_checkpoint, models) # create a hook for summary writing # summary_hook = SummaryHook(os.path.join(expdir, 'logdir')) # saver_hook = tf.train.CheckpointSaverHook( checkpoint_dir=checkpoint_dir, save_steps=np.ceil(1000.0 / float(evaluator.batch_size))) config = tf.ConfigProto(intra_op_parallelism_threads=6, inter_op_parallelism_threads=2, device_count={ 'CPU': 8, 'GPU': 0 }) options = tf.RunOptions() options.report_tensor_allocations_upon_oom = True # current_batch_ind_tf = tf.get_variable( name='global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) current_batch_ind_inc_op = current_batch_ind_tf.assign_add(1) reset_current_batch_ind_op = current_batch_ind_tf.assign(0) # get the current batch_ind with tf.train.SingularMonitoredSession( config=config, checkpoint_dir=checkpoint_dir) as sess: start_batch_ind = sess.run(current_batch_ind_tf) start_utt_ind = start_batch_ind * evaluator.batch_size output_handler.pos = start_utt_ind output_handler.open_scp_files(from_start=start_utt_ind == 0) # compute the loss batch_loss, batch_norm, numbatches, batch_outputs, batch_targets, batch_seq_length = evaluator.evaluate( start_utt_ind=start_utt_ind) # only keep the outputs requested by the reconstructor (usually the output of the output layer) batch_outputs = { out_name: out for out_name, out in batch_outputs.iteritems() if out_name in output_handler.requested_output_names } batch_seq_length = { seq_name: seq for seq_name, seq in batch_seq_length.iteritems() if seq_name in output_handler.requested_output_names } hooks = [load_hook] # hooks = [load_hook, summary_hook] if numbatches > 100: hooks.append(saver_hook) # start the session with tf.train.SingularMonitoredSession( hooks=hooks, config=config, checkpoint_dir=checkpoint_dir) as sess: loss = 0.0 loss_norm = 0.0 for batch_ind in range(start_batch_ind, numbatches): print('evaluating batch number %d' % batch_ind) last_time = time.time() [ batch_loss_eval, batch_norm_eval, batch_outputs_eval, batch_targets_eval, batch_seq_length_eval ] = sess.run(fetches=[ batch_loss, batch_norm, batch_outputs, batch_targets, batch_seq_length ], options=options) loss += batch_loss_eval loss_norm += batch_norm_eval print('%f' % (time.time() - last_time)) last_time = time.time() if output_handling_type != 'reconstructor' or not optimal_frame_permutation: output_handler(batch_outputs_eval, batch_seq_length_eval) else: output_handler.opt_frame_perm(batch_outputs_eval, batch_targets_eval, batch_seq_length_eval) sess.run(current_batch_ind_inc_op) print('%f' % (time.time() - last_time)) loss = loss / loss_norm print('task %s: loss = %0.6g' % (task, loss)) # write the loss to disk with open(os.path.join(expdir, 'loss_%s' % task), 'w') as fid: fid.write(str(loss)) if hasattr(output_handler, 'scp_file'): output_handler.scp_fid.close() if hasattr(output_handler, 'masks_pointer_file'): output_handler.masks_pointer_fid.close() if os.path.isdir(checkpoint_dir): try: os.rmdir(checkpoint_dir) except: pass # from here on there is no need for a GPU anymore ==> score script to be run separately on # different machine? if evaluator_cfg.has_option(task, 'scorers_names'): scorers_names = evaluator_cfg.get(task, 'scorers_names').split(' ') else: scorers_names = [task] for scorer_name in scorers_names: task_scorer_cfg = dict(scorer_cfg.items(scorer_name)) score_types = task_scorer_cfg['score_type'].split(' ') for score_type in score_types: if os.path.isfile( os.path.join( expdir, 'results_%s_%s_complete.json' % (scorer_name, score_type))): print( 'Already found a score for score task %s for score type %s, skipping it.' % (scorer_name, score_type)) else: print('Scoring task %s for score type %s' % (scorer_name, score_type)) checkpoint_file = os.path.join( expdir, 'checkpoint_results_%s_%s' % (scorer_name, score_type)) if output_handling_type == 'reconstructor': # create the scorer scorer = scorer_factory.factory(score_type)( conf=task_scorer_cfg, evalconf=evaluator_cfg, dataconf=database_cfg, rec_dir=rec_dir, numbatches=numbatches, task=task, scorer_name=scorer_name, checkpoint_file=checkpoint_file) elif output_handling_type == 'speaker_verification': # create the scorer scorer = speaker_verification_scorer_factory.factory( score_type)(conf=task_scorer_cfg, evalconf=evaluator_cfg, dataconf=database_cfg, store_dir=store_dir, numbatches=numbatches, task=task, scorer_name=scorer_name, checkpoint_file=checkpoint_file) # run the scorer scorer() result_summary = scorer.summarize() with open( os.path.join( expdir, 'results_%s_%s_summary.json' % (scorer_name, score_type)), 'w') as fid: json.dump(result_summary, fid) with open( os.path.join( expdir, 'results_%s_%s_complete.json' % (scorer_name, score_type)), 'w') as fid: json.dump(scorer.storable_result(), fid) if os.path.isfile(checkpoint_file): try: os.remove(checkpoint_file) except: pass # legacy code to be removed if postprocessor_cfg != None: # && postprocessing is not done yet for this task from nabu.postprocessing.postprocessors import postprocessor_factory if evaluator_cfg.has_option(task, 'postprocessors_names'): postprocessors_names = evaluator_cfg.get( task, 'postprocessors_names').split(' ') else: postprocessors_names = [task] for postprocessors_name in postprocessors_names: task_postprocessor_cfg = dict( postprocessor_cfg.items(postprocessors_name)) postprocess_types = task_postprocessor_cfg[ 'postprocess_type'].split(' ') for postprocess_type in postprocess_types: print('Postprocessing task %s for postprocessor type %s' % (postprocessors_name, postprocess_type)) # create the postprocessor postprocessor = postprocessor_factory.factory( postprocess_type)(conf=task_postprocessor_cfg, evalconf=evaluator_cfg, expdir=expdir, rec_dir=rec_dir, postprocessors_name=postprocessors_name) # run the postprocessor postprocessor() postprocessor.matlab_eng.quit()
def __init__(self, task_name, trainerconf, taskconf, models, modelconf, dataconf, evaluatorconf, lossconf, batch_size): """ TaskTrainer constructor, gathers the dataconfigs and sets the loss_computer and evaluator for this task. Args: task_name: a name for the training task trainerconf: the trainer config taskconf: the config file for each task models: the neural net models modelconf: the neural net models configuration dataconf: the data configuration as a ConfigParser evaluatorconf: the evaluator configuration for evaluating if None no evaluation will be done lossconf: the configuration of the loss function batch_size: the size of the batch. """ self.task_name = task_name self.trainerconf = trainerconf self.taskconf = taskconf self.models = models self.modelconf = modelconf self.evaluatorconf = evaluatorconf self.batch_size = batch_size # get the database configurations for all inputs, outputs, intermediate model nodes and models. self.output_names = taskconf['outputs'].split(' ') self.input_names = taskconf['inputs'].split(' ') self.target_names = taskconf['targets'].split(' ') if self.target_names == ['']: self.target_names = [] self.model_nodes = taskconf['nodes'].split(' ') if 'linkedsets' in taskconf: set_names = taskconf['linkedsets'].split(' ') self.linkedsets = dict() for set_name in set_names: set_input_names = [ '%s_%s' % (set_name, in_name) for in_name in self.input_names ] set_target_names = [ '%s_%s' % (set_name, tar_name) for tar_name in self.target_names ] self.linkedsets[set_name] = { 'inputs': set_input_names, 'targets': set_target_names } if 'linkedset_weighting' in taskconf: linkedset_weighting = np.array( map(float, taskconf['linkedset_weighting'].split(' '))) # the first set has the reference weight linkedset_weighting /= linkedset_weighting[0] else: linkedset_weighting = np.array([1.0] * len(self.linkedsets)) self.linkedset_weighting = { set_name: weight for set_name, weight in zip(set_names, linkedset_weighting) } else: self.linkedsets = { 'set0': { 'inputs': self.input_names, 'targets': self.target_names } } self.linkedset_weighting = {'set0': 1.0} self.input_dataconfs = dict() self.target_dataconfs = dict() for linkedset in self.linkedsets: self.input_dataconfs[linkedset] = [] for input_name in self.linkedsets[linkedset]['inputs']: # input config dataconfs_for_input = [] sections = taskconf[input_name].split(' ') for section in sections: dataconfs_for_input.append(dict(dataconf.items(section))) self.input_dataconfs[linkedset].append(dataconfs_for_input) self.target_dataconfs[linkedset] = [] for target_name in self.linkedsets[linkedset]['targets']: # target config dataconfs_for_target = [] sections = taskconf[target_name].split(' ') for section in sections: dataconfs_for_target.append(dict(dataconf.items(section))) self.target_dataconfs[linkedset].append(dataconfs_for_target) self.model_links = dict() self.inputs_links = dict() self.nodes_output_names = dict() for node in self.model_nodes: self.model_links[node] = taskconf['%s_model' % node] self.inputs_links[node] = taskconf['%s_inputs' % node].split(' ') if '%s_output_names' % node in taskconf: self.nodes_output_names[node] = taskconf['%s_output_names' % node].split(' ') else: self.nodes_output_names[node] = node # create the loss computer if lossconf: loss_type = lossconf['loss_type'] else: loss_type = taskconf['loss_type'] self.loss_computer = loss_computer_factory.factory(loss_type)( lossconf, self.batch_size) # create valiation evaluator evaltype = evaluatorconf.get('evaluator', 'evaluator') if evaltype != 'None': self.evaluator = evaluator_factory.factory(evaltype)( conf=evaluatorconf, dataconf=dataconf, lossconf=lossconf, models=self.models, task=task_name)
def test(expdir): '''does everything for testing''' #read the database config file database_cfg = configparser.ConfigParser() database_cfg.read(os.path.join(expdir, 'database.cfg')) #read the model config file model_cfg = configparser.ConfigParser() model_cfg.read(os.path.join(expdir, 'model.cfg')) #read the evaluator config file evaluator_cfg = configparser.ConfigParser() evaluator_cfg.read(os.path.join(expdir, 'evaluator.cfg')) #quick fix #evaluator_cfg.set('evaluator','batch_size','5') #read the reconstructor config file reconstructor_cfg = configparser.ConfigParser() reconstructor_cfg.read(os.path.join(expdir, 'reconstructor.cfg')) #read the scorer config file scorer_cfg = configparser.ConfigParser() scorer_cfg.read(os.path.join(expdir, 'scorer.cfg')) #read the postprocessor config file, if it exists try: postprocessor_cfg = configparser.ConfigParser() postprocessor_cfg.read(os.path.join(expdir, 'postprocessor.cfg')) if not postprocessor_cfg.sections(): postprocessor_cfg = None except: postprocessor_cfg = None postprocessor_cfg = None if evaluator_cfg.get('evaluator', 'evaluator') == 'multi_task': tasks = evaluator_cfg.get('evaluator', 'tasks').split(' ') else: raise 'unkown type of evaluation %s' % evaluator_cfg.get( 'evaluator', 'evaluator') #evaluate each task separately for task in tasks: rec_dir = os.path.join(expdir, 'reconstructions', task) #load the model with open(os.path.join(expdir, 'model', 'model.pkl'), 'rb') as fid: models = pickle.load(fid) if os.path.isfile(os.path.join(expdir, 'loss_%s' % task)): print 'already reconstructed all signals for task %s, going straight to scoring' % task if evaluator_cfg.has_option(task, 'requested_utts'): requested_utts = int(evaluator_cfg.get(task, 'requested_utts')) else: requested_utts = int( evaluator_cfg.get('evaluator', 'requested_utts')) if evaluator_cfg.has_option(task, 'batch_size'): batch_size = int(evaluator_cfg.get(task, 'batch_size')) else: batch_size = int(evaluator_cfg.get('evaluator', 'batch_size')) numbatches = int(float(requested_utts) / float(batch_size)) else: print 'Evaluating task %s' % task #create the evaluator evaltype = evaluator_cfg.get(task, 'evaluator') evaluator = evaluator_factory.factory(evaltype)( conf=evaluator_cfg, dataconf=database_cfg, models=models, task=task) #create the reconstructor task_reconstructor_cfg = dict(reconstructor_cfg.items(task)) reconstruct_type = task_reconstructor_cfg['reconstruct_type'] reconstructor = reconstructor_factory.factory(reconstruct_type)( conf=task_reconstructor_cfg, evalconf=evaluator_cfg, dataconf=database_cfg, rec_dir=rec_dir, task=task) #create the graph graph = tf.Graph() with graph.as_default(): #compute the loss batch_loss, batch_norm, numbatches, batch_outputs, batch_seq_length = evaluator.evaluate( ) #create a hook that will load the model load_hook = LoadAtBegin( os.path.join(expdir, 'model', 'network.ckpt'), models) #create a hook for summary writing summary_hook = SummaryHook(os.path.join(expdir, 'logdir')) config = tf.ConfigProto(device_count={'CPU': 1, 'GPU': 0}) options = tf.RunOptions() options.report_tensor_allocations_upon_oom = True #start the session with tf.train.SingularMonitoredSession( hooks=[load_hook, summary_hook], config=config) as sess: loss = 0.0 loss_norm = 0.0 for batch_ind in range(0, numbatches): print 'evaluating batch number %d' % batch_ind last_time = time.time() [ batch_loss_eval, batch_norm_eval, batch_outputs_eval, batch_seq_length_eval ] = sess.run(fetches=[ batch_loss, batch_norm, batch_outputs, batch_seq_length ], options=options) loss += batch_loss_eval loss_norm += batch_norm_eval print '%f' % (time.time() - last_time) last_time = time.time() #chosing the first seq_length reconstructor(batch_outputs_eval, batch_seq_length_eval) print '%f' % (time.time() - last_time) loss = loss / loss_norm print 'task %s: loss = %0.6g' % (task, loss) #write the loss to disk with open(os.path.join(expdir, 'loss_%s' % task), 'w') as fid: fid.write(str(loss)) #from here on there is no need for a GPU anymore ==> score script to be run separately on #different machine? task_scorer_cfg = dict(scorer_cfg.items(task)) score_types = task_scorer_cfg['score_type'].split(' ') for score_type in score_types: if os.path.isfile( os.path.join( expdir, 'results_%s_%s_complete.json' % (task, score_type))): print 'Already found a score for task %s for score type %s, skipping it.' % ( task, score_type) else: print 'Scoring task %s for score type %s' % (task, score_type) #create the scorer scorer = scorer_factory.factory(score_type)( conf=task_scorer_cfg, evalconf=evaluator_cfg, dataconf=database_cfg, rec_dir=rec_dir, numbatches=numbatches, task=task) #run the scorer scorer() with open( os.path.join( expdir, 'results_%s_%s_complete.json' % (task, score_type)), 'w') as fid: json.dump(scorer.results, fid) result_summary = scorer.summarize() with open( os.path.join( expdir, 'results_%s_%s_summary.json' % (task, score_type)), 'w') as fid: json.dump(result_summary, fid) if postprocessor_cfg != None: # && postprocessing is not done yet for this task task_postprocessor_cfg = dict(postprocessor_cfg.items(task)) task_processor_cfg = dict( postprocessor_cfg.items('processor_' + task)) postprocess_types = task_postprocessor_cfg[ 'postprocess_type'].split(' ') for postprocess_type in postprocess_types: #create the postprocessor postprocessor = postprocessor_factory.factory( postprocess_type)(conf=task_postprocessor_cfg, proc_conf=task_processor_cfg, evalconf=evaluator_cfg, expdir=expdir, rec_dir=rec_dir, task=task) #run the postprocessor postprocessor() postprocessor.matlab_eng.quit()
def __init__(self, task_name, trainerconf, taskconf, models, modelconf, dataconf, evaluatorconf, batch_size): """ TaskTrainer constructor, gathers the dataconfigs and sets the loss_computer and evaluator for this task. Args: task_name: a name for the training task trainerconf: the trainer config taskconf: the config file for each task models: the neural net models modelconf: the neural net models configuration dataconf: the data configuration as a ConfigParser evaluatorconf: the evaluator configuration for evaluating if None no evaluation will be done batch_size: the size of the batch. """ self.task_name = task_name self.trainerconf = trainerconf self.taskconf = taskconf self.models = models self.modelconf = modelconf self.evaluatorconf = evaluatorconf self.batch_size = batch_size # get the database configurations for all inputs, outputs, intermediate model nodes and models. self.output_names = taskconf['outputs'].split(' ') self.input_names = taskconf['inputs'].split(' ') self.target_names = taskconf['targets'].split(' ') if self.target_names == ['']: self.target_names = [] self.model_nodes = taskconf['nodes'].split(' ') if 'linkedsets' in taskconf: set_names = taskconf['linkedsets'].split(' ') self.linkedsets = dict() for set_name in set_names: inp_indices = map(int, taskconf['%s_inputs' % set_name].split(' ')) tar_indices = map(int, taskconf['%s_targets' % set_name].split(' ')) set_inputs = [ inp for ind, inp in enumerate(self.input_names) if ind in inp_indices ] set_targets = [ tar for ind, tar in enumerate(self.target_names) if ind in tar_indices ] self.linkedsets[set_name] = { 'inputs': set_inputs, 'targets': set_targets } else: self.linkedsets = { 'set0': { 'inputs': self.input_names, 'targets': self.target_names } } self.input_dataconfs = dict() self.target_dataconfs = dict() for linkedset in self.linkedsets: self.input_dataconfs[linkedset] = [] for input_name in self.linkedsets[linkedset]['inputs']: # input config dataconfs_for_input = [] sections = taskconf[input_name].split(' ') for section in sections: dataconfs_for_input.append(dict(dataconf.items(section))) self.input_dataconfs[linkedset].append(dataconfs_for_input) self.target_dataconfs[linkedset] = [] for target_name in self.linkedsets[linkedset]['targets']: # target config dataconfs_for_target = [] sections = taskconf[target_name].split(' ') for section in sections: dataconfs_for_target.append(dict(dataconf.items(section))) self.target_dataconfs[linkedset].append(dataconfs_for_target) self.model_links = dict() self.inputs_links = dict() for node in self.model_nodes: self.model_links[node] = taskconf['%s_model' % node] self.inputs_links[node] = taskconf['%s_inputs' % node].split(' ') # create the loss computer self.loss_computer = loss_computer_factory.factory( taskconf['loss_type'])(self.batch_size) # create valiation evaluator evaltype = evaluatorconf.get('evaluator', 'evaluator') if evaltype != 'None': self.evaluator = evaluator_factory.factory(evaltype)( conf=evaluatorconf, dataconf=dataconf, models=self.models, task=task_name)
def test(expdir, testing=False): '''does everything for testing args: expdir: the experiments directory testing: if true only the graph will be created for debugging purposes ''' #read the database config file database_cfg = configparser.ConfigParser() database_cfg.read(os.path.join(expdir, 'database.cfg')) if testing: model_cfg = configparser.ConfigParser() model_cfg.read(os.path.join(expdir, 'model.cfg')) trainer_cfg = configparser.ConfigParser() trainer_cfg.read(os.path.join(expdir, 'trainer.cfg')) model = Model(conf=model_cfg, trainlabels=int(trainer_cfg.get('trainer', 'trainlabels')), constraint=None) else: #load the model with open(os.path.join(expdir, 'model', 'model.pkl'), 'rb') as fid: model = pickle.load(fid) #read the evaluator config file evaluator_cfg = configparser.ConfigParser() evaluator_cfg.read(os.path.join(expdir, 'test_evaluator.cfg')) #create the evaluator evaltype = evaluator_cfg.get('evaluator', 'evaluator') evaluator = evaluator_factory.factory(evaltype)(conf=evaluator_cfg, dataconf=database_cfg, model=model) #create the graph graph = tf.Graph() with graph.as_default(): #compute the loss loss, update_loss, numbatches = evaluator.evaluate() if testing: return #create a histogram for all trainable parameters for param in tf.trainable_variables(): tf.summary.histogram(param.name, param, collections=['variable_summaries']) eval_summary = tf.summary.merge_all('eval_summaries') variable_summary = tf.summary.merge_all('variable_summaries') #create a hook that will load the model load_hook = LoadAtBegin(os.path.join(expdir, 'model', 'network.ckpt'), model.variables) #start the session with tf.train.SingularMonitoredSession(hooks=[load_hook]) as sess: summary_writer = tf.summary.FileWriter( os.path.join(expdir, 'logdir')) summary = variable_summary.eval(session=sess) summary_writer.add_summary(summary) print 'TENSORFLOW ITEMS' print '---Errors----' op = sess.graph.get_operations() print 'errors:' test = tf.get_default_graph().get_tensor_by_name( "evaluate/evaluate_decoder/Sum_1:0") print test.eval(session=sess) print 'new_num_targets' test = tf.get_default_graph().get_tensor_by_name( "evaluate/evaluate_decoder/add:0") print test.eval(session=sess) print 'batch_targets' test = tf.get_default_graph().get_tensor_by_name( "evaluate/evaluate_decoder/Sum_3:0") print test.eval(session=sess) print '--CTC DECODER ---' print 'loss:' test = tf.get_default_graph().get_tensor_by_name( "validation_loss:0") print test.eval(session=sess) print 'outputs:' print 'references:' test = tf.get_default_graph().get_tensor_by_name( "evaluate/input_pipeline/batch:2") print test.eval(session=sess) print 'references_seq_length' test = tf.get_default_graph().get_tensor_by_name( "evaluate/input_pipeline/batch:3") print test.eval(session=sess) for i in range(numbatches): if eval_summary is not None: _, summary = sess.run([update_loss, eval_summary]) summary_writer.add_summary(summary, i) else: update_loss.run(session=sess) print 'loss: ' temploss = loss.eval(session=sess) print temploss loss = loss.eval(session=sess) print 'loss = %f' % loss #write the result to disk with open(os.path.join(expdir, 'result'), 'w') as fid: fid.write(str(loss))
def test(expdir): '''does everything for testing''' #read the database config file database_cfg = configparser.ConfigParser() database_cfg.read(os.path.join(expdir, 'database.cfg')) #load the model with open(os.path.join(expdir, 'model', 'model.pkl'), 'rb') as fid: model = pickle.load(fid) #read the evaluator config file evaluator_cfg = configparser.ConfigParser() evaluator_cfg.read(os.path.join(expdir, 'evaluator.cfg')) #create the evaluator evaltype = evaluator_cfg.get('evaluator', 'evaluator') evaluator = evaluator_factory.factory(evaltype)( conf=evaluator_cfg, dataconf=database_cfg, model=model) #create the reconstructor reconstruct_type = evaluator_cfg.get('reconstructor', 'reconstruct_type') reconstructor = reconstructor_factory.factory(reconstruct_type)( conf=evaluator_cfg, dataconf=database_cfg, expdir=expdir) os.environ['CUDA_VISIBLE_DEVICES'] = '0' #create the graph graph = tf.Graph() with graph.as_default(): #compute the loss batch_loss, numbatches, batch_outputs, batch_seq_length = evaluator.evaluate() #create a hook that will load the model load_hook = LoadAtBegin( os.path.join(expdir, 'model', 'network.ckpt'), model) #create a hook for summary writing summary_hook = SummaryHook(os.path.join(expdir, 'logdir')) #start the session with tf.train.SingularMonitoredSession( hooks=[load_hook, summary_hook]) as sess: loss = 0.0 for batch_ind in range(0,numbatches): print 'evaluating batch number %d' %batch_ind batch_loss_eval, batch_outputs_eval, batch_seq_length_eval = sess.run( fetches=[batch_loss, batch_outputs, batch_seq_length]) loss += batch_loss_eval reconstructor(batch_outputs_eval['outputs'], batch_seq_length_eval['features']) loss = loss#/numbatches print 'loss = %0.6g' % loss #write the loss to disk with open(os.path.join(expdir, 'loss'), 'w') as fid: fid.write(str(loss)) #from here on there is no need for a GPU anymore ==> score script to be run separately on #different machine? reconstructor.rec_dir has to be known though. can be put in evaluator_cfg score_type = evaluator_cfg.get('scorer', 'score_type') for i in range(10): # Sometime it fails and not sure why. Just retry then. max 10 times try: #create the scorer scorer = scorer_factory.factory(score_type)( conf=evaluator_cfg, dataconf=database_cfg, rec_dir=reconstructor.rec_dir, numbatches=numbatches) #run the scorer scorer() except Exception: if i==9: raise Exception else: continue break with open(os.path.join(expdir, 'results_complete.json'), 'w') as fid: json.dump(scorer.results,fid) result_summary = scorer.summarize() with open(os.path.join(expdir, 'results_summary.json'), 'w') as fid: json.dump(result_summary,fid)
def test(expdir, testing=False): '''does everything for testing args: expdir: the experiments directory testing: if true only the graph will be created for debugging purposes ''' #read the database config file database_cfg = configparser.ConfigParser() database_cfg.read(os.path.join(expdir, 'database.conf')) if testing: model_cfg = configparser.ConfigParser() model_cfg.read(os.path.join(expdir, 'model.cfg')) trainer_cfg = configparser.ConfigParser() trainer_cfg.read(os.path.join(expdir, 'trainer.cfg')) model = Model(conf=model_cfg, trainlabels=int(trainer_cfg.get('trainer', 'trainlabels')), constraint=None) else: #load the model with open(os.path.join(expdir, 'model', 'model.pkl'), 'rb') as fid: model = pickle.load(fid) #read the evaluator config file evaluator_cfg = configparser.ConfigParser() evaluator_cfg.read(os.path.join(expdir, 'test_evaluator.cfg')) #create the evaluator evaltype = evaluator_cfg.get('evaluator', 'evaluator') evaluator = evaluator_factory.factory(evaltype)(conf=evaluator_cfg, dataconf=database_cfg, model=model) #create the graph graph = tf.Graph() with graph.as_default(): #compute the loss loss, update_loss, numbatches = evaluator.evaluate() if testing: return #create a histogram for all trainable parameters for param in tf.trainable_variables(): tf.summary.histogram(param.name, param, collections=['variable_summaries']) eval_summary = tf.summary.merge_all('eval_summaries') variable_summary = tf.summary.merge_all('variable_summaries') #create a hook that will load the model load_hook = LoadAtBegin(os.path.join(expdir, 'model', 'network.ckpt'), model.variables) #start the session with tf.train.SingularMonitoredSession(hooks=[load_hook]) as sess: summary_writer = tf.summary.FileWriter( os.path.join(expdir, 'logdir')) summary = variable_summary.eval(session=sess) summary_writer.add_summary(summary) for i in range(numbatches): if eval_summary is not None: _, summary = sess.run([update_loss, eval_summary]) summary_writer.add_summary(summary, i) else: update_loss.run(session=sess) loss = loss.eval(session=sess) print 'loss = %f' % loss #write the result to disk with open(os.path.join(expdir, 'result'), 'w') as fid: fid.write(str(loss))
def test(expdir, testing=False): '''does everything for testing args: expdir: the experiments directory testing: if true only the graph will be created for debugging purposes ''' #read the database config file database_cfg = configparser.ConfigParser() database_cfg.read(os.path.join(expdir, 'database.conf')) if testing: model_cfg = configparser.ConfigParser() model_cfg.read(os.path.join(expdir, 'model.cfg')) trainer_cfg = configparser.ConfigParser() trainer_cfg.read(os.path.join(expdir, 'trainer.cfg')) model = Model(conf=model_cfg, trainlabels=int(trainer_cfg.get('trainer', 'trainlabels'))) else: #load the model with open(os.path.join(expdir, 'model', 'model.pkl'), 'rb') as fid: model = pickle.load(fid) #read the evaluator config file evaluator_cfg = configparser.ConfigParser() evaluator_cfg.read(os.path.join(expdir, 'test_evaluator.cfg')) #create the evaluator evaltype = evaluator_cfg.get('evaluator', 'evaluator') evaluator = evaluator_factory.factory(evaltype)(conf=evaluator_cfg, dataconf=database_cfg, model=model) #create the graph graph = tf.Graph() with graph.as_default(): #compute the loss batch_loss, numbatches = evaluator.evaluate() if testing: return #create a histogram for all trainable parameters for param in model.variables: tf.summary.histogram(param.name, param) #create a hook that will load the model load_hook = LoadAtBegin(os.path.join(expdir, 'model', 'network.ckpt'), model.variables) #create a hook for summary writing summary_hook = SummaryHook(os.path.join(expdir, 'logdir')) #start the session with tf.train.SingularMonitoredSession( hooks=[load_hook, summary_hook]) as sess: loss = 0.0 for _ in range(numbatches): loss += batch_loss.eval(session=sess) loss = loss / numbatches print 'loss = %f' % loss #write the result to disk with open(os.path.join(expdir, 'result'), 'w') as fid: fid.write(str(loss))
def __init__(self, task_name, trainerconf, taskconf, models, modelconf, dataconf, evaluatorconf, batch_size): ''' TaskTrainer constructor, gathers the dataconfigs and sets the loss_computer and evaluator for this task. Args: task_name: a name for the training task trainerconf: the trainer config taskconf: the config file for each task models: the neural net models modelconf: the neural net models configuration dataconf: the data configuration as a ConfigParser evaluatorconf: the evaluator configuration for evaluating if None no evaluation will be done batch_size: the size of the batch. ''' self.task_name = task_name self.trainerconf = trainerconf self.taskconf = taskconf self.models = models self.modelconf = modelconf self.evaluatorconf = evaluatorconf self.batch_size = batch_size #get the database configurations for all inputs, outputs, intermediate model nodes and models. self.output_names = taskconf['outputs'].split(' ') self.input_names = taskconf['inputs'].split(' ') self.model_nodes = taskconf['nodes'].split(' ') self.input_dataconfs = [] for input_name in self.input_names: #input config self.input_dataconfs.append( dict(dataconf.items(taskconf[input_name]))) self.target_names = taskconf['targets'].split(' ') if self.target_names == ['']: self.target_names = [] self.target_dataconfs = [] for target_name in self.target_names: #target config self.target_dataconfs.append( dict(dataconf.items(taskconf[target_name]))) self.model_links = dict() self.inputs_links = dict() for node in self.model_nodes: self.model_links[node] = taskconf['%s_model' % node] self.inputs_links[node] = taskconf['%s_inputs' % node].split(' ') #create the loss computer self.loss_computer = loss_computer_factory.factory( taskconf['loss_type'])(self.batch_size) #create valiation evaluator evaltype = evaluatorconf.get('evaluator', 'evaluator') if evaltype != 'None': self.evaluator = evaluator_factory.factory(evaltype)( conf=evaluatorconf, dataconf=dataconf, models=self.models, task=task_name)