예제 #1
0
    def dump_data(self, out_fn, out_featdim):
        file_reader = read_dataset(self.data_spec,
                                   pad_zeros=True)  #taking only one reader
        out_options = self.data_spec.copy()
        out_options['base_path'] = self.export_path
        #updating the base_path
        out_options['featdim'] = out_featdim
        out_options['writer_type'] = "NP"
        file_writer = write_dataset(out_options)
        batch_size = file_reader.batch_size

        while not file_reader.is_finish():
            for batch_index in xrange(file_reader.nBatches):
                s_idx = batch_index * batch_size
                e_idx = s_idx + batch_size
                data = out_fn(file_reader.feat[s_idx:e_idx])
                label = file_reader.label[s_idx:e_idx]

                if ((batch_index == file_reader.nBatches - 1)
                        and (not file_reader.num_pad_frames == 0)):
                    data = data[:-file_reader.num_pad_frames]
                    label = label[:-file_reader.num_pad_frames]

                file_writer.write_data(data, label)

            file_reader.read_next_partition_data(pad_zeros=True)
        logger.debug('NP Dataexporter : data is exported to %s' %
                     self.export_path)
예제 #2
0
def testing(nnetModel,data_spec,saveLabel=True,outFile='test.out'):
	try:
		test_sets = read_dataset(data_spec['testing']) 
	except KeyError:
		#raise e
		logger.info("No testing set:Skiping Testing");
	else:
		_testing(nnetModel,test_sets)
		if saveLabel:
			saveLabels(nnetModel,outFile,data_spec['testing'])
예제 #3
0
def fineTunning(nnetModel,model_config,data_spec):
	try:
		train_sets = read_dataset(data_spec['training'])
		valid_sets = read_dataset(data_spec['validation'])
	except KeyError:
		#raise e
		logger.info("No validation/training set:Skiping Fine tunning");
	else:
		try:
			outFile=model_config['output_file']
			saveFeq=model_config['save_feq']
			finetune_config = model_config['finetune_params']
			momentum = finetune_config['momentum']
			lrate = LearningRate.get_instance(finetune_config);
		except KeyError, e:
			logger.error("KeyMissing:"+str(e));
			logger.critical("Fine tunning Paramters Missing")
			exit(2)

		_fineTunning(nnetModel,train_sets,valid_sets,lrate,momentum,saveFeq,outFile)
예제 #4
0
	def dump_data(self,out_fn,out_featdim):
		filepath = self.data_spec['base_path'] + os.sep + self.data_spec['filename']
		
		copy_path = create_folder_structure_if_not_exists(self.export_path + os.sep + self.data_spec['filename'])
		shutil.copy(filepath,copy_path);	#copies the file directly
		
		self.filehandle = open(filepath,'rb')
		line = self.filehandle.readline(); # reading file header
		header = line.split();
		num_classes = int(header[1]);
		
		for idx in xrange(num_classes):
			level1_filename = self.filehandle.readline().strip();
			level1_filepath  = self.data_spec['base_path'] + os.sep + level1_filename	#filename of individual classes
			
			copy_path = create_folder_structure_if_not_exists(self.export_path + os.sep + level1_filename)
			shutil.copy(level1_filepath,copy_path);	#copies the index file directly from the source directly
			
			self.level1FileHandle = open(level1_filepath,'rb');
			level2_filepath = self.level1FileHandle.readline().strip();
			while level2_filepath.__len__()!= 0:			
				in_child_options = self.data_spec.copy();
				in_child_options['filename'] = level2_filepath	#filename of individual classes
				in_child_options['reader_type'] = "TD"
				in_child_options['label'] = idx;
				file_reader  = read_dataset(in_child_options,pad_zeros=True)	#taking only one reader 
				out_child_options = in_child_options.copy();
				out_child_options['base_path'] = self.export_path;	#updating the base_path
				out_child_options['featdim'] = out_featdim;
				out_child_options['writer_type'] = "TD"
				file_writer =  write_dataset(out_child_options);
				batch_size=file_reader.batch_size

				while not file_reader.is_finish():
					for batch_index in xrange(file_reader.nBatches):
						s_idx = batch_index * batch_size; e_idx = s_idx + batch_size
						data = out_fn(file_reader.feat[s_idx:e_idx])
						label = file_reader.label[s_idx:e_idx];

						if ((batch_index == file_reader.nBatches-1) and (not file_reader.num_pad_frames == 0)) :
							data=data[:-file_reader.num_pad_frames]
							label = label[:-file_reader.num_pad_frames]

						file_writer.write_data(data,label);
					
					file_reader.read_next_partition_data(pad_zeros=True);
			
				level2_filepath = self.level1FileHandle.readline().strip();
		logger.debug('T2 Dataexporter : data is exported to %s' % self.export_path);
예제 #5
0
    def dump_data(self, out_fn, out_featdim):
        filepath = self.data_spec['base_path'] + os.sep + self.data_spec[
            'filename']

        copy_path = create_folder_structure_if_not_exists(
            self.export_path + os.sep + self.data_spec['filename'])
        shutil.copy(filepath, copy_path)
        #copies the file directly

        self.filehandle = open(filepath, 'rb')
        line = self.filehandle.readline()
        # reading file header
        header = line.split()
        num_classes = int(header[1])

        for idx in xrange(num_classes):
            in_child_options = self.data_spec.copy()
            in_child_options['filename'] = self.filehandle.readline().strip(
            )  #filename of individual classes
            in_child_options['reader_type'] = "TD"
            in_child_options['label'] = idx
            file_reader = read_dataset(in_child_options,
                                       pad_zeros=True)  #taking only one reader
            out_child_options = in_child_options.copy()
            out_child_options['base_path'] = self.export_path
            out_child_options['featdim'] = out_featdim
            out_child_options['writer_type'] = "TD"
            file_writer = write_dataset(out_child_options)
            batch_size = file_reader.batch_size

            while (not file_reader.is_finish()):
                for batch_index in xrange(file_reader.nBatches):
                    s_idx = batch_index * batch_size
                    e_idx = s_idx + batch_size
                    data = out_fn(file_reader.feat[s_idx:e_idx])
                    label = file_reader.label[s_idx:e_idx]

                    if ((batch_index == file_reader.nBatches - 1)
                            and (not file_reader.num_pad_frames == 0)):
                        data = data[:-file_reader.num_pad_frames]
                        label = label[:-file_reader.num_pad_frames]

                    file_writer.write_data(data, label)

                file_reader.read_next_partition_data(pad_zeros=True)
        logger.debug('T1 Dataexporter : data is exported to %s' %
                     self.export_path)
예제 #6
0
	def plot_layer_output(self,plot_spec,plot_path,max_images=10):
		#default all nodes set to value 1
		#inp = numpy.random.random(self.conv_input_dim).astype(theano.config.floatX);
		batch_size = plot_spec['batch_size'];
		plot_path = plot_path +os.sep +'layer_%d'+os.sep +'batch_%d'+os.sep+'img_%d.png'
		for layer_idx in xrange(self.conv_layer_num):	
			img_plot_remaining = max_images;
			layer_out_fn = self.getLayerOutFunction(layer_idx);
			logger.info('Plotting the layer %d'%layer_idx);
			file_reader =read_dataset(plot_spec,pad_zeros=True);
			while not file_reader.is_finish():
				for batch_index in xrange(file_reader.cur_frame_num/batch_size):
					s_idx = batch_index * batch_size; e_idx = s_idx + batch_size
					data = layer_out_fn(file_reader.feat[s_idx:e_idx])
					e_idx= min(file_reader.cur_frame_num - file_reader.num_pad_frames,s_idx+batch_size);
					img_plot_remaining = plot(data[s_idx:e_idx],plot_path,layer_idx,batch_index,img_plot_remaining);
					if img_plot_remaining == 0:
						break;
				if img_plot_remaining == 0:
					break;
				file_reader.read_next_partition_data(pad_zeros=True);
예제 #7
0
def saveLabels(nnetModel,export_path,data_spec):
	logger.info('Getting the Test(Get Label) function')
	#fp = open(out_path, "w");
	test_sets  = read_dataset(data_spec,pad_zeros=True,makeShared=False)
	# get the label function for the model
	getLabel = nnetModel.getLabelFunction()

	batch_size = test_sets.batch_size
	with open(export_path,'w') as fp:
		while (not test_sets.is_finish()):
			for batch_index in xrange(test_sets.nBatches):
				s_idx = batch_index*batch_size;
				e_idx = s_idx+batch_size;
				pred = getLabel(test_sets.feat[s_idx:e_idx])

				act = test_sets.label[s_idx:e_idx]
				if ((batch_index == test_sets.nBatches-1) and
					(not test_sets.num_pad_frames == 0)) :
						pred=pred[:-test_sets.num_pad_frames]
						act= act[:-test_sets.num_pad_frames]
				labels = zip(pred.T,act)
				numpy.savetxt(fp, labels,fmt='%d %d')
			test_sets.read_next_partition_data(pad_zeros=True);
예제 #8
0
	def dump_data(self,out_fn,out_featdim):
		file_reader  = read_dataset(self.data_spec,pad_zeros=True)	#taking only one reader 
		out_options = self.data_spec.copy();
		out_options['base_path'] = self.export_path;	#updating the base_path
		out_options['featdim'] = out_featdim;
		out_options['writer_type'] = "NP"
		file_writer =  write_dataset(out_options);
		batch_size=file_reader.batch_size

		while not file_reader.is_finish():
			for batch_index in xrange(file_reader.nBatches):
				s_idx = batch_index * batch_size; e_idx = s_idx + batch_size
				data = out_fn(file_reader.feat[s_idx:e_idx])
				label = file_reader.label[s_idx:e_idx];

				if ((batch_index == file_reader.nBatches-1) and (not file_reader.num_pad_frames == 0)) :
					data=data[:-file_reader.num_pad_frames]
					label = label[:-file_reader.num_pad_frames]

				file_writer.write_data(data,label);

			file_reader.read_next_partition_data(pad_zeros=True);
		logger.debug('NP Dataexporter : data is exported to %s' % self.export_path);
예제 #9
0
        ptr_file = model_config['input_file']
        dbn.load(filename=ptr_file)
    except KeyError, e:
        logger.info("KeyMissing:" + str(e))
        logger.info(
            "Pretrained network Missing in configFile: Skipping Loading")
    except IOError, e:
        logger.error("IOError:" + str(e))
        logger.error('Model cannot be initialize from input file ')
        sys.exit(2)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    if model_config['processes']['pretraining']:
        train_sets = read_dataset(data_spec['training'])
        preTraining(dbn, train_sets, model_config['pretrain_params'])
        del train_sets

    ########################
    # FINETUNING THE MODEL #
    ########################
    if model_config['processes']['finetuning']:
        fineTunning(dbn, model_config, data_spec)

    ########################
    #  TESTING THE MODEL   #
    ########################
    if model_config['processes']['testing']:
        testing(dbn, data_spec)
    ##########################
예제 #10
0
    # pretraining
        ptr_file = model_config['input_file']
        dbn.load(filename=ptr_file)
    except KeyError, e:
        logger.info("KeyMissing:"+str(e));
        logger.info("Pretrained network Missing in configFile: Skipping Loading");
    except IOError, e:
        logger.error("IOError:"+str(e));
        logger.error('Model cannot be initialize from input file ')
        sys.exit(2)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    if model_config['processes']['pretraining']:
        train_sets = read_dataset(data_spec['training'])
        preTraining(dbn,train_sets,model_config['pretrain_params'])
        del train_sets;

    ########################
    # FINETUNING THE MODEL #
    ########################
    if model_config['processes']['finetuning']:
        fineTunning(dbn,model_config,data_spec)

    ########################
    #  TESTING THE MODEL   #
    ########################
    if model_config['processes']['testing']:
        testing(dbn,data_spec)
    ##########################
예제 #11
0
def runSdA(arg):

    if type(arg) is dict:
        model_config = arg
    else :
        model_config = load_model(arg,'SDA')
        
    sda_config = load_sda_spec(model_config['nnet_spec'])
    data_spec =  load_data_spec(model_config['data_spec'],model_config['batch_size']);

    # numpy random generator
    numpy_rng = numpy.random.RandomState(model_config['random_seed'])
    #theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

    #get Activation function
    activationFn = parse_activation(sda_config['activation']);

    createDir(model_config['wdir']);
    #create working dir

    logger.info('building the model')
    # construct the stacked denoising autoencoder class
    sda = SDA(numpy_rng=numpy_rng, n_ins=model_config['n_ins'],
              hidden_layers_sizes=sda_config['hidden_layers'],
              n_outs=model_config['n_outs'],activation=activationFn)

    batch_size = model_config['batch_size'];


    #########################
    # PRETRAINING THE MODEL #
    #########################
    if model_config['processes']['pretraining']:
        
        train_sets = read_dataset(data_spec['training'])
        pretraining_config = model_config['pretrain_params']
        corruption_levels = sda_config['corruption_levels']

        preTraining(sda,train_sets,corruption_levels,pretraining_config);
        del train_sets;

    ########################
    # FINETUNING THE MODEL #
    ########################
    if model_config['processes']['finetuning']:
        fineTunning(sda,model_config,data_spec)

    ########################
    #  TESTING THE MODEL   #
    ########################
    if model_config['processes']['testing']:
        testing(sda,data_spec)

    ##########################
    ##   Export Features    ##
    ##########################
    if model_config['processes']['export_data']:
        exportFeatures(sda,model_config,data_spec)

    # save the pretrained nnet to file
    logger.info('Saving model to ' + str(model_config['output_file']) + '....')
    sda.save(filename=model_config['output_file'], withfinal=True)
    logger.info('Saved model to ' + str(model_config['output_file']))