def dump_data(self, out_fn, out_featdim): file_reader = read_dataset(self.data_spec, pad_zeros=True) #taking only one reader out_options = self.data_spec.copy() out_options['base_path'] = self.export_path #updating the base_path out_options['featdim'] = out_featdim out_options['writer_type'] = "NP" file_writer = write_dataset(out_options) batch_size = file_reader.batch_size while not file_reader.is_finish(): for batch_index in xrange(file_reader.nBatches): s_idx = batch_index * batch_size e_idx = s_idx + batch_size data = out_fn(file_reader.feat[s_idx:e_idx]) label = file_reader.label[s_idx:e_idx] if ((batch_index == file_reader.nBatches - 1) and (not file_reader.num_pad_frames == 0)): data = data[:-file_reader.num_pad_frames] label = label[:-file_reader.num_pad_frames] file_writer.write_data(data, label) file_reader.read_next_partition_data(pad_zeros=True) logger.debug('NP Dataexporter : data is exported to %s' % self.export_path)
def testing(nnetModel,data_spec,saveLabel=True,outFile='test.out'): try: test_sets = read_dataset(data_spec['testing']) except KeyError: #raise e logger.info("No testing set:Skiping Testing"); else: _testing(nnetModel,test_sets) if saveLabel: saveLabels(nnetModel,outFile,data_spec['testing'])
def fineTunning(nnetModel,model_config,data_spec): try: train_sets = read_dataset(data_spec['training']) valid_sets = read_dataset(data_spec['validation']) except KeyError: #raise e logger.info("No validation/training set:Skiping Fine tunning"); else: try: outFile=model_config['output_file'] saveFeq=model_config['save_feq'] finetune_config = model_config['finetune_params'] momentum = finetune_config['momentum'] lrate = LearningRate.get_instance(finetune_config); except KeyError, e: logger.error("KeyMissing:"+str(e)); logger.critical("Fine tunning Paramters Missing") exit(2) _fineTunning(nnetModel,train_sets,valid_sets,lrate,momentum,saveFeq,outFile)
def dump_data(self,out_fn,out_featdim): filepath = self.data_spec['base_path'] + os.sep + self.data_spec['filename'] copy_path = create_folder_structure_if_not_exists(self.export_path + os.sep + self.data_spec['filename']) shutil.copy(filepath,copy_path); #copies the file directly self.filehandle = open(filepath,'rb') line = self.filehandle.readline(); # reading file header header = line.split(); num_classes = int(header[1]); for idx in xrange(num_classes): level1_filename = self.filehandle.readline().strip(); level1_filepath = self.data_spec['base_path'] + os.sep + level1_filename #filename of individual classes copy_path = create_folder_structure_if_not_exists(self.export_path + os.sep + level1_filename) shutil.copy(level1_filepath,copy_path); #copies the index file directly from the source directly self.level1FileHandle = open(level1_filepath,'rb'); level2_filepath = self.level1FileHandle.readline().strip(); while level2_filepath.__len__()!= 0: in_child_options = self.data_spec.copy(); in_child_options['filename'] = level2_filepath #filename of individual classes in_child_options['reader_type'] = "TD" in_child_options['label'] = idx; file_reader = read_dataset(in_child_options,pad_zeros=True) #taking only one reader out_child_options = in_child_options.copy(); out_child_options['base_path'] = self.export_path; #updating the base_path out_child_options['featdim'] = out_featdim; out_child_options['writer_type'] = "TD" file_writer = write_dataset(out_child_options); batch_size=file_reader.batch_size while not file_reader.is_finish(): for batch_index in xrange(file_reader.nBatches): s_idx = batch_index * batch_size; e_idx = s_idx + batch_size data = out_fn(file_reader.feat[s_idx:e_idx]) label = file_reader.label[s_idx:e_idx]; if ((batch_index == file_reader.nBatches-1) and (not file_reader.num_pad_frames == 0)) : data=data[:-file_reader.num_pad_frames] label = label[:-file_reader.num_pad_frames] file_writer.write_data(data,label); file_reader.read_next_partition_data(pad_zeros=True); level2_filepath = self.level1FileHandle.readline().strip(); logger.debug('T2 Dataexporter : data is exported to %s' % self.export_path);
def dump_data(self, out_fn, out_featdim): filepath = self.data_spec['base_path'] + os.sep + self.data_spec[ 'filename'] copy_path = create_folder_structure_if_not_exists( self.export_path + os.sep + self.data_spec['filename']) shutil.copy(filepath, copy_path) #copies the file directly self.filehandle = open(filepath, 'rb') line = self.filehandle.readline() # reading file header header = line.split() num_classes = int(header[1]) for idx in xrange(num_classes): in_child_options = self.data_spec.copy() in_child_options['filename'] = self.filehandle.readline().strip( ) #filename of individual classes in_child_options['reader_type'] = "TD" in_child_options['label'] = idx file_reader = read_dataset(in_child_options, pad_zeros=True) #taking only one reader out_child_options = in_child_options.copy() out_child_options['base_path'] = self.export_path out_child_options['featdim'] = out_featdim out_child_options['writer_type'] = "TD" file_writer = write_dataset(out_child_options) batch_size = file_reader.batch_size while (not file_reader.is_finish()): for batch_index in xrange(file_reader.nBatches): s_idx = batch_index * batch_size e_idx = s_idx + batch_size data = out_fn(file_reader.feat[s_idx:e_idx]) label = file_reader.label[s_idx:e_idx] if ((batch_index == file_reader.nBatches - 1) and (not file_reader.num_pad_frames == 0)): data = data[:-file_reader.num_pad_frames] label = label[:-file_reader.num_pad_frames] file_writer.write_data(data, label) file_reader.read_next_partition_data(pad_zeros=True) logger.debug('T1 Dataexporter : data is exported to %s' % self.export_path)
def plot_layer_output(self,plot_spec,plot_path,max_images=10): #default all nodes set to value 1 #inp = numpy.random.random(self.conv_input_dim).astype(theano.config.floatX); batch_size = plot_spec['batch_size']; plot_path = plot_path +os.sep +'layer_%d'+os.sep +'batch_%d'+os.sep+'img_%d.png' for layer_idx in xrange(self.conv_layer_num): img_plot_remaining = max_images; layer_out_fn = self.getLayerOutFunction(layer_idx); logger.info('Plotting the layer %d'%layer_idx); file_reader =read_dataset(plot_spec,pad_zeros=True); while not file_reader.is_finish(): for batch_index in xrange(file_reader.cur_frame_num/batch_size): s_idx = batch_index * batch_size; e_idx = s_idx + batch_size data = layer_out_fn(file_reader.feat[s_idx:e_idx]) e_idx= min(file_reader.cur_frame_num - file_reader.num_pad_frames,s_idx+batch_size); img_plot_remaining = plot(data[s_idx:e_idx],plot_path,layer_idx,batch_index,img_plot_remaining); if img_plot_remaining == 0: break; if img_plot_remaining == 0: break; file_reader.read_next_partition_data(pad_zeros=True);
def saveLabels(nnetModel,export_path,data_spec): logger.info('Getting the Test(Get Label) function') #fp = open(out_path, "w"); test_sets = read_dataset(data_spec,pad_zeros=True,makeShared=False) # get the label function for the model getLabel = nnetModel.getLabelFunction() batch_size = test_sets.batch_size with open(export_path,'w') as fp: while (not test_sets.is_finish()): for batch_index in xrange(test_sets.nBatches): s_idx = batch_index*batch_size; e_idx = s_idx+batch_size; pred = getLabel(test_sets.feat[s_idx:e_idx]) act = test_sets.label[s_idx:e_idx] if ((batch_index == test_sets.nBatches-1) and (not test_sets.num_pad_frames == 0)) : pred=pred[:-test_sets.num_pad_frames] act= act[:-test_sets.num_pad_frames] labels = zip(pred.T,act) numpy.savetxt(fp, labels,fmt='%d %d') test_sets.read_next_partition_data(pad_zeros=True);
def dump_data(self,out_fn,out_featdim): file_reader = read_dataset(self.data_spec,pad_zeros=True) #taking only one reader out_options = self.data_spec.copy(); out_options['base_path'] = self.export_path; #updating the base_path out_options['featdim'] = out_featdim; out_options['writer_type'] = "NP" file_writer = write_dataset(out_options); batch_size=file_reader.batch_size while not file_reader.is_finish(): for batch_index in xrange(file_reader.nBatches): s_idx = batch_index * batch_size; e_idx = s_idx + batch_size data = out_fn(file_reader.feat[s_idx:e_idx]) label = file_reader.label[s_idx:e_idx]; if ((batch_index == file_reader.nBatches-1) and (not file_reader.num_pad_frames == 0)) : data=data[:-file_reader.num_pad_frames] label = label[:-file_reader.num_pad_frames] file_writer.write_data(data,label); file_reader.read_next_partition_data(pad_zeros=True); logger.debug('NP Dataexporter : data is exported to %s' % self.export_path);
ptr_file = model_config['input_file'] dbn.load(filename=ptr_file) except KeyError, e: logger.info("KeyMissing:" + str(e)) logger.info( "Pretrained network Missing in configFile: Skipping Loading") except IOError, e: logger.error("IOError:" + str(e)) logger.error('Model cannot be initialize from input file ') sys.exit(2) ######################### # PRETRAINING THE MODEL # ######################### if model_config['processes']['pretraining']: train_sets = read_dataset(data_spec['training']) preTraining(dbn, train_sets, model_config['pretrain_params']) del train_sets ######################## # FINETUNING THE MODEL # ######################## if model_config['processes']['finetuning']: fineTunning(dbn, model_config, data_spec) ######################## # TESTING THE MODEL # ######################## if model_config['processes']['testing']: testing(dbn, data_spec) ##########################
# pretraining ptr_file = model_config['input_file'] dbn.load(filename=ptr_file) except KeyError, e: logger.info("KeyMissing:"+str(e)); logger.info("Pretrained network Missing in configFile: Skipping Loading"); except IOError, e: logger.error("IOError:"+str(e)); logger.error('Model cannot be initialize from input file ') sys.exit(2) ######################### # PRETRAINING THE MODEL # ######################### if model_config['processes']['pretraining']: train_sets = read_dataset(data_spec['training']) preTraining(dbn,train_sets,model_config['pretrain_params']) del train_sets; ######################## # FINETUNING THE MODEL # ######################## if model_config['processes']['finetuning']: fineTunning(dbn,model_config,data_spec) ######################## # TESTING THE MODEL # ######################## if model_config['processes']['testing']: testing(dbn,data_spec) ##########################
def runSdA(arg): if type(arg) is dict: model_config = arg else : model_config = load_model(arg,'SDA') sda_config = load_sda_spec(model_config['nnet_spec']) data_spec = load_data_spec(model_config['data_spec'],model_config['batch_size']); # numpy random generator numpy_rng = numpy.random.RandomState(model_config['random_seed']) #theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) #get Activation function activationFn = parse_activation(sda_config['activation']); createDir(model_config['wdir']); #create working dir logger.info('building the model') # construct the stacked denoising autoencoder class sda = SDA(numpy_rng=numpy_rng, n_ins=model_config['n_ins'], hidden_layers_sizes=sda_config['hidden_layers'], n_outs=model_config['n_outs'],activation=activationFn) batch_size = model_config['batch_size']; ######################### # PRETRAINING THE MODEL # ######################### if model_config['processes']['pretraining']: train_sets = read_dataset(data_spec['training']) pretraining_config = model_config['pretrain_params'] corruption_levels = sda_config['corruption_levels'] preTraining(sda,train_sets,corruption_levels,pretraining_config); del train_sets; ######################## # FINETUNING THE MODEL # ######################## if model_config['processes']['finetuning']: fineTunning(sda,model_config,data_spec) ######################## # TESTING THE MODEL # ######################## if model_config['processes']['testing']: testing(sda,data_spec) ########################## ## Export Features ## ########################## if model_config['processes']['export_data']: exportFeatures(sda,model_config,data_spec) # save the pretrained nnet to file logger.info('Saving model to ' + str(model_config['output_file']) + '....') sda.save(filename=model_config['output_file'], withfinal=True) logger.info('Saved model to ' + str(model_config['output_file']))