Python Instrumentation.Timestamp 예제들

프로그래밍 언어: Python

클래스/타입: Instrumentation

메소드/함수: Timestamp

hotexamples.com에서의 예제들: 5

Python Instrumentation.Timestamp - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Instrumentation.Timestamp 패키지로부터 mysql-binlog-replication에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Function(6)

Configuration(5)

Instrumentation(5)

Timestamp(5)

Parameters(4)

instrumentationLogFileName(3)

loadInstrumentationData(2)

saveInstrumentationData(1)

예제 #1

파일 보기

def pretrain_layerwise(m):
    """
    """
    options = m.options
    textfile_log = m.textfile_log
    hdf5_backend = m.hdf5_backend
    console = m.console
    nade_class = m.nade_class
    nade = m.nade
    loss_function = m.loss_function
    training_dataset = m.training_dataset
    masks_dataset = m.masks_dataset

    # Pretrain layerwise
    for l in xrange(1, options.hlayers + 1):
        if l == 1:
            nade.initialize_parameters_from_dataset(training_dataset)
        else:
            nade = nade_class.create_from_smaller_NADE(nade, add_n_hiddens=1)
        # Configure training
        trainer = Optimization.MomentumSGD(
            nade, nade.__getattribute__(loss_function))
        trainer.set_datasets([training_dataset, masks_dataset])
        trainer.set_learning_rate(options.lr)
        trainer.set_datapoints_as_columns(True)
        trainer.add_controller(TrainingController.AdaptiveLearningRate(
            options.lr, 0, epochs=options.pretraining_epochs))
        trainer.add_controller(
            TrainingController.MaxIterations(options.pretraining_epochs))
        trainer.add_controller(TrainingController.ConfigurationSchedule(
            "momentum", [(2, 0), (float('inf'), options.momentum)]))
        trainer.set_updates_per_epoch(options.epoch_size)
        trainer.set_minibatch_size(options.batch_size)
    #    trainer.set_weight_decay_rate(options.wd)
        trainer.add_controller(TrainingController.NaNBreaker())
        # Instrument the training
        trainer.add_instrumentation(Instrumentation.Instrumentation([console, textfile_log, hdf5_backend],
                                                                    Instrumentation.Function("training_loss", lambda ins: ins.get_training_loss())))
        trainer.add_instrumentation(Instrumentation.Instrumentation(
            [console, textfile_log, hdf5_backend], Instrumentation.Configuration()))
        trainer.add_instrumentation(Instrumentation.Instrumentation(
            [console, textfile_log, hdf5_backend], Instrumentation.Timestamp()))
        # Train
        trainer.set_context("pretraining_%d" % l)
        trainer.train()
    return nade, trainer

예제 #2

파일 보기

def pre_training_configs(m):
    """
    Before training the model, configure it 
    """
    ordering = range(m.n_visible)
    np.random.shuffle(ordering)
    trainer = Optimization.MomentumSGD(m.nade, m.nade.__getattribute__(m.loss_function))
    trainer.set_datasets([m.training_dataset, m.masks_dataset])
    trainer.set_learning_rate(m.options.lr)
    trainer.set_datapoints_as_columns(True)
    trainer.add_controller(TrainingController.AdaptiveLearningRate(
        m.options.lr, 0, epochs=m.options.epochs))
    trainer.add_controller(TrainingController.MaxIterations(m.options.epochs))
    if m.options.training_ll_stop < np.inf:
        # Assumes that we're doing minimization so negative ll
        trainer.add_controller(
            TrainingController.TrainingErrorStop(-m.options.training_ll_stop))
    trainer.add_controller(TrainingController.ConfigurationSchedule(
        "momentum", [(2, 0), (float('inf'), m.options.momentum)]))
    trainer.set_updates_per_epoch(m.options.epoch_size)
    trainer.set_minibatch_size(m.options.batch_size)
#    trainer.set_weight_decay_rate(options.wd)
    trainer.add_controller(TrainingController.NaNBreaker())
    # Instrument the training
    trainer.add_instrumentation(Instrumentation.Instrumentation(
        [m.console, m.textfile_log, m.hdf5_backend], Instrumentation.Function("training_loss", lambda ins: ins.get_training_loss())))
    if not m.options.no_validation:
        trainer.add_instrumentation(Instrumentation.Instrumentation([m.console],
                                                                    m.validation_loss_measurement))
        trainer.add_instrumentation(Instrumentation.Instrumentation([m.hdf5_backend],
                                                                    m.validation_loss_measurement,
                                                                    at_lowest=[Instrumentation.Parameters()]))
    trainer.add_instrumentation(Instrumentation.Instrumentation(
        [m.console, m.textfile_log, m.hdf5_backend], Instrumentation.Configuration()))
    # trainer.add_instrumentation(Instrumentation.Instrumentation([hdf5_backend], Instrumentation.Parameters(), every = 10))
    trainer.add_instrumentation(Instrumentation.Instrumentation(
        [m.console, m.textfile_log, m.hdf5_backend], Instrumentation.Timestamp()))

    return trainer

예제 #3

파일 보기

파일: oNADE.py 프로젝트: vlimant/NADE

def nade(options, args):

    gc.set_threshold(gc.get_threshold()[0] / 5)
    # gc.set_debug(gc.DEBUG_UNCOLLECTABLE | gc.DEBUG_INSTANCES | gc.DEBUG_OBJECTS)

    if options.theano:
        import NADE
    else:
        import npNADE as NADE
        raise Exception("Not implemented yet")

    results_route = os.path.join(os.environ["RESULTSPATH"], args[0])
    try:
        os.makedirs(results_route)
    except OSError:
        pass

    console = Backends.Console()
    textfile_log = Backends.TextFile(os.path.join(results_route, "NADE_training.log"))
    hdf5_backend = Backends.HDF5(results_route, "NADE")
    hdf5_backend.write([], "options", options)
    hdf5_backend.write([], "svn_revision", Utils.svn.svnversion())
    hdf5_backend.write([], "svn_status", Utils.svn.svnstatus())
    hdf5_backend.write([], "svn_diff", Utils.svn.svndiff())

    # Read datasets
    dataset_file = os.path.join(os.environ["DATASETSPATH"], options.dataset)
    training_dataset = Data.BigDataset(dataset_file, options.training_route, options.samples_name)
    if not options.no_validation:
        validation_dataset = Data.BigDataset(dataset_file, options.validation_route, options.samples_name)
    test_dataset = Data.BigDataset(dataset_file, options.test_route, options.samples_name)
    n_visible = training_dataset.get_dimensionality(0)
    # # Calculate normalsation constants
    if options.normalize:
        # Normalise all datasets
        mean, std = Data.utils.get_dataset_statistics(training_dataset)
        training_dataset = Data.utils.normalise_dataset(training_dataset, mean, std)
        if not options.no_validation:
            validation_dataset = Data.utils.normalise_dataset(validation_dataset, mean, std)
        test_dataset = Data.utils.normalise_dataset(test_dataset, mean, std)
        hdf5_backend.write([], "normalisation/mean", mean)
        hdf5_backend.write([], "normalisation/std", std)
    # Dataset of masks
    try:
        masks_filename = options.dataset + "." + floatX + ".masks"
        masks_route = os.path.join(os.environ["DATASETSPATH"], masks_filename)
        masks_dataset = Data.BigDataset(masks_route + ".hdf5", "masks/.*", "masks")
    except:
        create_dropout_masks(os.environ["DATASETSPATH"], masks_filename, n_visible, ks=1000)
        masks_dataset = Data.BigDataset(masks_route + ".hdf5", "masks/.*", "masks")

    l = 1 if options.layerwise else options.hlayers
    if options.form == "MoG":
        nade_class = NADE.OrderlessMoGNADE
        nade = nade_class(n_visible, options.units, l, options.n_components, nonlinearity=options.nonlinearity)
        loss_function = "sym_masked_neg_loglikelihood_gradient"
        validation_loss_measurement = Instrumentation.Function("validation_loss", lambda ins:-ins.model.estimate_average_loglikelihood_for_dataset_using_masks(validation_dataset, masks_dataset, loops=options.validation_loops))
    elif options.form == "Bernoulli":
        nade_class = NADE.OrderlessBernoulliNADE
        nade = nade_class(n_visible, options.units, l, nonlinearity=options.nonlinearity)
        loss_function = "sym_masked_neg_loglikelihood_gradient"
        validation_loss_measurement = Instrumentation.Function("validation_loss", lambda ins:-ins.model.estimate_average_loglikelihood_for_dataset_using_masks(validation_dataset, masks_dataset, loops=options.validation_loops))
    elif options.form == "QR":
        nade_class = NADE.OrderlessQRNADE
        nade = nade_class(n_visible, options.units, l, options.n_quantiles, nonlinearity=options.nonlinearity)
        loss_function = "sym_masked_pinball_loss_gradient"
        validation_loss_measurement = Instrumentation.Function("validation_loss", lambda ins: ins.model.estimate_average_pinball_loss_for_dataset(validation_dataset, masks_dataset, loops=options.validation_loops))
    else:
        raise Exception("Unknown form")

    if options.layerwise:
        # Pretrain layerwise
        for l in xrange(1, options.hlayers + 1):
            if l == 1:
                nade.initialize_parameters_from_dataset(training_dataset)
            else:
                nade = nade_class.create_from_smaller_NADE(nade, add_n_hiddens=1)
            # Configure training
            trainer = Optimization.MomentumSGD(nade, nade.__getattribute__(loss_function))
            trainer.set_datasets([training_dataset, masks_dataset])
            trainer.set_learning_rate(options.lr)
            trainer.set_datapoints_as_columns(True)
            trainer.add_controller(TrainingController.AdaptiveLearningRate(options.lr, 0, epochs=options.pretraining_epochs))
            trainer.add_controller(TrainingController.MaxIterations(options.pretraining_epochs))
            trainer.add_controller(TrainingController.ConfigurationSchedule("momentum", [(2, 0), (float('inf'), options.momentum)]))
            trainer.set_updates_per_epoch(options.epoch_size)
            trainer.set_minibatch_size(options.batch_size)
        #    trainer.set_weight_decay_rate(options.wd)
            trainer.add_controller(TrainingController.NaNBreaker())
            # Instrument the training
            trainer.add_instrumentation(Instrumentation.Instrumentation([console, textfile_log, hdf5_backend],
                                                                        Instrumentation.Function("training_loss", lambda ins: ins.get_training_loss())))
            trainer.add_instrumentation(Instrumentation.Instrumentation([console, textfile_log, hdf5_backend], Instrumentation.Configuration()))
            trainer.add_instrumentation(Instrumentation.Instrumentation([console, textfile_log, hdf5_backend], Instrumentation.Timestamp()))
            # Train
            trainer.set_context("pretraining_%d" % l)
            trainer.train()
    else:  # No pretraining
        nade.initialize_parameters_from_dataset(training_dataset)
    # Configure training
    ordering = range(n_visible)
    np.random.shuffle(ordering)
    trainer = Optimization.MomentumSGD(nade, nade.__getattribute__(loss_function))
    trainer.set_datasets([training_dataset, masks_dataset])
    trainer.set_learning_rate(options.lr)
    trainer.set_datapoints_as_columns(True)
    trainer.add_controller(TrainingController.AdaptiveLearningRate(options.lr, 0, epochs=options.epochs))
    trainer.add_controller(TrainingController.MaxIterations(options.epochs))
    if options.training_ll_stop < np.inf:
        trainer.add_controller(TrainingController.TrainingErrorStop(-options.training_ll_stop))  # Assumes that we're doing minimization so negative ll
    trainer.add_controller(TrainingController.ConfigurationSchedule("momentum", [(2, 0), (float('inf'), options.momentum)]))
    trainer.set_updates_per_epoch(options.epoch_size)
    trainer.set_minibatch_size(options.batch_size)
#    trainer.set_weight_decay_rate(options.wd)
    trainer.add_controller(TrainingController.NaNBreaker())
    # Instrument the training
    trainer.add_instrumentation(Instrumentation.Instrumentation([console, textfile_log, hdf5_backend],
                                                                Instrumentation.Function("training_loss", lambda ins: ins.get_training_loss())))
    if not options.no_validation:
        trainer.add_instrumentation(Instrumentation.Instrumentation([console],
                                                                    validation_loss_measurement))
        trainer.add_instrumentation(Instrumentation.Instrumentation([hdf5_backend],
                                                                    validation_loss_measurement,
                                                                    at_lowest=[Instrumentation.Parameters()]))
    trainer.add_instrumentation(Instrumentation.Instrumentation([console, textfile_log, hdf5_backend], Instrumentation.Configuration()))
    # trainer.add_instrumentation(Instrumentation.Instrumentation([hdf5_backend], Instrumentation.Parameters(), every = 10))
    trainer.add_instrumentation(Instrumentation.Instrumentation([console, textfile_log, hdf5_backend], Instrumentation.Timestamp()))
    # Train
    trainer.train()

    return nade

    #------------------------------------------------------------------------------
    # Report some final performance measurements
    if trainer.was_successful():
        np.random.seed(8341)
        hdf5_backend.write(["final_model"], "parameters", nade.get_parameters())
        if not options.no_validation:
            nade.set_parameters(hdf5_backend.read("/lowest_validation_loss/parameters"))
        config = {"wd": options.wd, "h": options.units, "lr": options.lr, "q": options.n_quantiles}
        log_message([console, textfile_log], "Config %s" % str(config))
        if options.show_training_stop:
            training_likelihood = nade.estimate_loglikelihood_for_dataset(training_dataset)
            log_message([console, textfile_log], "Training average loss\t%f" % training_likelihood)
            hdf5_backend.write([], "training_loss", training_likelihood)
        val_ests = []
        test_ests = []
        for i in xrange(options.summary_orderings):
            nade.setup_n_orderings(n=1)
            if not options.no_validation:
                val_ests.append(nade.estimate_loglikelihood_for_dataset(validation_dataset))
            test_ests.append(nade.estimate_loglikelihood_for_dataset(test_dataset))
        if not options.no_validation:
            val_avgs = map(lambda x: x.estimation, val_ests)
            val_mean, val_se = (np.mean(val_avgs), scipy.stats.sem(val_avgs))
            log_message([console, textfile_log], "*Validation mean\t%f \t(se: %f)" % (val_mean, val_se))
            hdf5_backend.write([], "validation_likelihood", val_mean)
            hdf5_backend.write([], "validation_likelihood_se", val_se)
            for i, est in enumerate(val_ests):
                log_message([console, textfile_log], "Validation detail #%d mean\t%f \t(se: %f)" % (i + 1, est.estimation, est.se))
                hdf5_backend.write(["results", "orderings", str(i + 1)], "validation_likelihood", est.estimation)
                hdf5_backend.write(["results", "orderings", str(i + 1)], "validation_likelihood_se", est.se)
        test_avgs = map(lambda x: x.estimation, test_ests)
        test_mean, test_se = (np.mean(test_avgs), scipy.stats.sem(test_avgs))
        log_message([console, textfile_log], "*Test mean\t%f \t(se: %f)" % (test_mean, test_se))
        hdf5_backend.write([], "test_likelihood", test_mean)
        hdf5_backend.write([], "test_likelihood_se", test_se)
        for i, est in enumerate(test_ests):
            log_message([console, textfile_log], "Test detail #%d mean\t%f \t(se: %f)" % (i + 1, est.estimation, est.se))
            hdf5_backend.write(["results", "orderings", str(i + 1)], "test_likelihood", est.estimation)
            hdf5_backend.write(["results", "orderings", str(i + 1)], "test_likelihood_se", est.se)
        hdf5_backend.write([], "final_score", test_mean)
        # Report results on ensembles of NADES
        if options.report_mixtures:
            # #
            for components in [2, 4, 8, 16, 32]:
                nade.setup_n_orderings(n=components)
                est = nade.estimate_loglikelihood_for_dataset(test_dataset)
                log_message([console, textfile_log], "Test ll mixture of nades %d components: mean\t%f \t(se: %f)" % (components, est.estimation, est.se))
                hdf5_backend.write(["results", "mixtures", str(components)], "test_likelihood", est.estimation)
                hdf5_backend.write(["results", "mixtures", str(components)], "test_likelihood_se", est.se)
    return nade

예제 #4

파일 보기

	def run(self, quiet = False, testing = True):
		# Set garbage collection threshold 
		gc.set_threshold(gc.get_threshold()[0]/5)

		# Create result directory if it doesnt already exist
		results_route = self.params['resultspath']
		if not os.path.isdir(results_route):
			os.makedirs(results_route)

		# Write all the parameters to file
		console = Backends.Console()
		textfile_log = Backends.TextFile(os.path.join(results_route, "NADE_training.log"))
		hdf5_backend = Backends.HDF5(results_route, "NADE")
		hdf5_backend.write([], "params", self.params)
		hdf5_backend.write([], "svn_revision", Utils.svn.svnversion())
		hdf5_backend.write([], "svn_status", Utils.svn.svnstatus())
		hdf5_backend.write([], "svn_diff", Utils.svn.svndiff())

		if quiet: 
			report = [textfile_log, hdf5_backend]
		else:
			report = [console, textfile_log, hdf5_backend]


		# Read datasets
		dataset_file = os.path.join(self.params['datasetspath'], self.params['dataset'])
		training_dataset = Data.BigDataset(dataset_file, self.params['training_route'], self.params['samples_name'])
		if not self.params['no_validation']:
		    validation_dataset = Data.BigDataset(dataset_file, self.params['validation_route'], self.params['samples_name'])
		test_dataset = Data.BigDataset(dataset_file, self.params['test_route'], self.params['samples_name'])
		n_visible = training_dataset.get_dimensionality(0)

		# Calculate normalsation constants
		mean, std = Data.utils.get_dataset_statistics(training_dataset)
		self.params['mean'] = mean.reshape(len(mean), 1)
		self.params['std'] = std.reshape(len(std), 1)

		if self.params['normalize']:
		    # Normalise all datasets
		    training_dataset = Data.utils.normalise_dataset(training_dataset, mean, std)
		    if not self.params['no_validation']:
		        validation_dataset = Data.utils.normalise_dataset(validation_dataset, mean, std)
		    test_dataset = Data.utils.normalise_dataset(test_dataset, mean, std)
		    hdf5_backend.write([], "normalisation/mean", mean)
		    hdf5_backend.write([], "normalisation/std", std)

		# Dataset of masks
		try:
		    masks_filename = self.params['dataset'] + "." + floatX + ".masks"
		    masks_route = os.path.join(self.params['datasetspath'], masks_filename)
		    masks_dataset = Data.BigDataset(masks_route + ".hdf5", "masks/.*", "masks")
		except:
		    create_dropout_masks(self.params['datasetspath'], masks_filename, n_visible, ks=1000)
		    masks_dataset = Data.BigDataset(masks_route + ".hdf5", "masks/.*", "masks")

		l = 1 if self.params['layerwise'] else self.params['hlayers']

		if self.params['form'] == "MoG":
		    nade_class = NADE.OrderlessMoGNADE
		    self.nade = nade_class(n_visible, self.params['units'], l, self.params['n_components'], nonlinearity= self.params['nonlinearity'])
		    loss_function = "sym_masked_neg_loglikelihood_gradient"
		    validation_loss_measurement = Instrumentation.Function("validation_loss", lambda ins:-ins.model.estimate_average_loglikelihood_for_dataset_using_masks(validation_dataset, masks_dataset, loops=self.params['validation_loops']))
		elif self.params['form'] == "Bernoulli":
		    nade_class = NADE.OrderlessBernoulliNADE
		    self.nade = nade_class(n_visible, self.params['units'], l, nonlinearity=self.params['nonlinearity'])
		    loss_function = "sym_masked_neg_loglikelihood_gradient"
		    validation_loss_measurement = Instrumentation.Function("validation_loss", lambda ins:-ins.model.estimate_average_loglikelihood_for_dataset_using_masks(validation_dataset, masks_dataset, loops = self.params['validation_loops']))
		elif self.params['form'] == "QR":
		    nade_class = NADE.OrderlessQRNADE
		    self.nade = nade_class(n_visible, self.params['units'], l, self.params['n_quantiles'], nonlinearity= self.params['nonlinearity'])
		    loss_function = "sym_masked_pinball_loss_gradient"
		    validation_loss_measurement = Instrumentation.Function("validation_loss", lambda ins: ins.model.estimate_average_pinball_loss_for_dataset(validation_dataset, masks_dataset, loops=self.params['validation_loops']))
		else:
		    raise Exception("Unknown form")

		if self.params['layerwise']:
		    # Pretrain layerwise
		    for l in xrange(1, self.params['hlayers'] + 1):
		        if l == 1:
		            self.nade.initialize_parameters_from_dataset(training_dataset)
		        else:
		            self.nade = nade_class.create_from_smaller_NADE(self.nade, add_n_hiddens=1)

		        # Configure training 
		        trainer = Optimization.MomentumSGD(self.nade, self.nade.__getattribute__(loss_function))
		        trainer.set_datasets([training_dataset, masks_dataset])
		        trainer.set_learning_rate(self.params['lr'])
		        trainer.set_datapoints_as_columns(True)
		        trainer.add_controller(TrainingController.AdaptiveLearningRate(lr, 0, epochs=self.params['pretraining_epochs']))
		        trainer.add_controller(TrainingController.MaxIterations(self.params['pretraining_epochs']))
		        trainer.add_controller(TrainingController.ConfigurationSchedule("momentum", [(2, 0), (float('inf'), self.params['momentum'])]))
		        trainer.set_updates_per_epoch(self.params['epoch_size'])
		        trainer.set_minibatch_size(self.params['batch_size'])
		        # trainer.set_weight_decay_rate(options.wd)
		        trainer.add_controller(TrainingController.NaNBreaker())
		        # Instrument the training
		        trainer.add_instrumentation(Instrumentation.Instrumentation(report,
		                                                                    Instrumentation.Function("training_loss", lambda ins: ins.get_training_loss())))
		        trainer.add_instrumentation(Instrumentation.Instrumentation(report, Instrumentation.Configuration()))
		        trainer.add_instrumentation(Instrumentation.Instrumentation(report, Instrumentation.Timestamp()))
		        # Train
		        trainer.set_context("pretraining_%d" % l)
		        trainer.train()

		else:  # No pretraining
		    self.nade.initialize_parameters_from_dataset(training_dataset)
		# Configure training
		ordering = range(n_visible)
		np.random.shuffle(ordering)
		trainer = Optimization.MomentumSGD(self.nade, self.nade.__getattribute__(loss_function))
		trainer.set_datasets([training_dataset, masks_dataset])
		trainer.set_learning_rate(self.params['lr'])
		trainer.set_datapoints_as_columns(True)
		trainer.add_controller(TrainingController.AdaptiveLearningRate(self.params['lr'], 0, epochs=self.params['epochs']))
		trainer.add_controller(TrainingController.MaxIterations(self.params['epochs']))
		if self.params['training_ll_stop'] < np.inf:
		    trainer.add_controller(TrainingController.TrainingErrorStop(-self.params['training_ll_stop']))  # Assumes that we're doing minimization so negative ll
		trainer.add_controller(TrainingController.ConfigurationSchedule("momentum", [(2, 0), (float('inf'), self.params['momentum'])]))
		trainer.set_updates_per_epoch(self.params['epoch_size'])
		trainer.set_minibatch_size(self.params['batch_size'])

		# trainer.set_weight_decay_rate(options.wd)
		trainer.add_controller(TrainingController.NaNBreaker())
		# Instrument the training
		trainer.add_instrumentation(Instrumentation.Instrumentation(report,
		                                                            Instrumentation.Function("training_loss", lambda ins: ins.get_training_loss())))
		if not self.params['no_validation']:
			temp = [textfile_log] 
			if not quiet:
				temp.append(console)
			trainer.add_instrumentation(Instrumentation.Instrumentation(temp,
			                                                            validation_loss_measurement))
			trainer.add_instrumentation(Instrumentation.Instrumentation([hdf5_backend], 
			                                                            validation_loss_measurement, at_lowest=[Instrumentation.Parameters()]))
		trainer.add_instrumentation(Instrumentation.Instrumentation(report, Instrumentation.Configuration()))
		# trainer.add_instrumentation(Instrumentation.Instrumentation([hdf5_backend], Instrumentation.Parameters(), every = 10))
		trainer.add_instrumentation(Instrumentation.Instrumentation(report, Instrumentation.Timestamp()))
		# Train
		trainer.train()

		#------------------------------------------------------------------------------
		# Report some final performance measurements
		flag = False # a flag that indicates whether training is successful
		if trainer.was_successful():
			flag = True
			np.random.seed(8341)
			hdf5_backend.write(["final_model"], "parameters", self.nade.get_parameters())
			if not self.params['no_validation']:
			    self.nade.set_parameters(hdf5_backend.read("/lowest_validation_loss/parameters"))
			config = {"wd": self.params['wd'], "units": self.params['units'], "lr": self.params['lr'], "n_quantiles": self.params['n_quantiles']}
			log_message([console, textfile_log], "Config %s" % str(config))
			if self.params['show_training_stop']:
			    training_likelihood = self.nade.estimate_loglikelihood_for_dataset(training_dataset)
			    log_message([console, textfile_log], "Training average loss\t%f" % training_likelihood)
			    hdf5_backend.write([], "training_loss", training_likelihood)

			if testing:
				val_ests = []
				test_ests = []
				for i in xrange(self.params['summary_orderings']):
				    self.nade.setup_n_orderings(n=1)
				    if not self.params['no_validation']:
				        val_ests.append(self.nade.estimate_loglikelihood_for_dataset(validation_dataset))
				    test_ests.append(self.nade.estimate_loglikelihood_for_dataset(test_dataset))
				if not self.params['no_validation']:
				    val_avgs = map(lambda x: x.estimation, val_ests)
				    val_mean, val_se = (np.mean(val_avgs), scipy.stats.sem(val_avgs))
				    log_message([console, textfile_log], "*Validation mean\t%f \t(se: %f)" % (val_mean, val_se))
				    hdf5_backend.write([], "validation_likelihood", val_mean)
				    hdf5_backend.write([], "validation_likelihood_se", val_se)
				    for i, est in enumerate(val_ests):
				        log_message([console, textfile_log], "Validation detail #%d mean\t%f \t(se: %f)" % (i + 1, est.estimation, est.se))
				        hdf5_backend.write(["results", "orderings", str(i + 1)], "validation_likelihood", est.estimation)
				        hdf5_backend.write(["results", "orderings", str(i + 1)], "validation_likelihood_se", est.se)
				test_avgs = map(lambda x: x.estimation, test_ests)
				test_mean, test_se = (np.mean(test_avgs), scipy.stats.sem(test_avgs))
				log_message([console, textfile_log], "*Test mean\t%f \t(se: %f)" % (test_mean, test_se))
				hdf5_backend.write([], "test_likelihood", test_mean)
				hdf5_backend.write([], "test_likelihood_se", test_se)
				for i, est in enumerate(test_ests):
				    log_message([console, textfile_log], "Test detail #%d mean\t%f \t(se: %f)" % (i + 1, est.estimation, est.se))
				    hdf5_backend.write(["results", "orderings", str(i + 1)], "test_likelihood", est.estimation)
				    hdf5_backend.write(["results", "orderings", str(i + 1)], "test_likelihood_se", est.se)
				hdf5_backend.write([], "final_score", test_mean)

				# Report results on ensembles of NADES
				if self.params['report_mixtures']:
				    # #
				    for components in [2, 4, 8, 16, 32]:
				        self.nade.setup_n_orderings(n=components)
				        est = self.nade.estimate_loglikelihood_for_dataset(test_dataset)
				        log_message([console, textfile_log], "Test ll mixture of nades %d components: mean\t%f \t(se: %f)" % (components, est.estimation, est.se))
				        hdf5_backend.write(["results", "mixtures", str(components)], "test_likelihood", est.estimation)
				        hdf5_backend.write(["results", "mixtures", str(components)], "test_likelihood_se", est.se)

		# Set the training log file and the hdf5 file 
		self.results = {
			'training_log': textfile_log.filename, 
			'result_file': os.path.join(hdf5_backend.path, hdf5_backend.filename),
			'successful': flag 
		}

		print self.results

예제 #5

파일 보기

파일: originalNADE.py 프로젝트: viv92/Deep-Autoregressive-Model

def main():
    parser = OptionParser(usage="usage: %prog [options] results_route")
    parser.add_option("--theano",
                      dest="theano",
                      default=False,
                      action="store_true")
    # Model options
    parser.add_option("--form", dest="form", default="")
    parser.add_option("--n_quantiles",
                      dest="n_quantiles",
                      default=20,
                      type="int")
    parser.add_option("--n_components",
                      dest="n_components",
                      default=10,
                      type="int")
    parser.add_option("--hlayers", dest="hlayers", default=1, type="int")
    parser.add_option("--units", dest="units", default=100, type="int")
    parser.add_option("--nonlinearity", dest="nonlinearity", default="RLU")
    # Training options
    parser.add_option("--layerwise",
                      dest="layerwise",
                      default=False,
                      action="store_true")
    parser.add_option("--training_ll_stop",
                      dest="training_ll_stop",
                      default=np.inf,
                      type="float")
    parser.add_option("--lr", dest="lr", default=0.01, type="float")
    parser.add_option("--decrease_constant",
                      dest="decrease_constant",
                      default=0.1,
                      type="float")
    parser.add_option("--wd", dest="wd", default=0.00, type="float")
    parser.add_option("--momentum", dest="momentum", default=0.9, type="float")
    parser.add_option("--epochs", dest="epochs", default=200, type="int")
    parser.add_option("--pretraining_epochs",
                      dest="pretraining_epochs",
                      default=20,
                      type="int")
    parser.add_option("--epoch_size",
                      dest="epoch_size",
                      default=10,
                      type="int")
    parser.add_option("--batch_size",
                      dest="batch_size",
                      default=100,
                      type="int")
    # Dataset options
    parser.add_option("--dataset", dest="dataset", default="")
    parser.add_option("--training_route",
                      dest="training_route",
                      default="train")
    parser.add_option("--validation_route",
                      dest="validation_route",
                      default="validation")
    parser.add_option("--test_route", dest="test_route", default="test")
    parser.add_option("--samples_name", dest="samples_name", default="data")
    parser.add_option("--normalize",
                      dest="normalize",
                      default=False,
                      action="store_true")
    parser.add_option("--validation_loops",
                      dest="validation_loops",
                      default=16,
                      type="int")
    parser.add_option("--no_validation",
                      dest="no_validation",
                      default=False,
                      action="store_true")
    # Reports
    parser.add_option("--show_training_stop",
                      dest="show_training_stop",
                      default=False,
                      action="store_true")
    parser.add_option("--summary_orderings",
                      dest="summary_orderings",
                      default=10,
                      type="int")
    parser.add_option("--report_mixtures",
                      dest="report_mixtures",
                      default=False,
                      action="store_true")

    gc.set_threshold(gc.get_threshold()[0] / 5)
    # gc.set_debug(gc.DEBUG_UNCOLLECTABLE | gc.DEBUG_INSTANCES | gc.DEBUG_OBJECTS)

    (options, args) = parser.parse_args()

    if options.theano:
        import NADE
    else:
        import npNADE as NADE
        raise Exception("Not implemented yet")

    results_route = os.path.join(os.environ["RESULTSPATH"], args[0])
    try:
        os.makedirs(results_route)
    except OSError:
        pass

    console = Backends.Console()
    textfile_log = Backends.TextFile(
        os.path.join(results_route, "NADE_training.log"))
    hdf5_backend = Backends.HDF5(results_route, "NADE")
    hdf5_backend.write([], "options", options)
    hdf5_backend.write([], "svn_revision", Utils.svn.svnversion())
    hdf5_backend.write([], "svn_status", Utils.svn.svnstatus())
    hdf5_backend.write([], "svn_diff", Utils.svn.svndiff())

    # Read datasets
    dataset_file = os.path.join(os.environ["DATASETSPATH"], options.dataset)
    training_dataset = Data.BigDataset(dataset_file, options.training_route,
                                       options.samples_name)
    if not options.no_validation:
        validation_dataset = Data.BigDataset(dataset_file,
                                             options.validation_route,
                                             options.samples_name)
    test_dataset = Data.BigDataset(dataset_file, options.test_route,
                                   options.samples_name)
    n_visible = training_dataset.get_dimensionality(0)
    # # Calculate normalsation constants
    if options.normalize:
        # Normalise all datasets
        mean, std = Data.utils.get_dataset_statistics(training_dataset)
        training_dataset = Data.utils.normalise_dataset(
            training_dataset, mean, std)
        if not options.no_validation:
            validation_dataset = Data.utils.normalise_dataset(
                validation_dataset, mean, std)
        test_dataset = Data.utils.normalise_dataset(test_dataset, mean, std)
        hdf5_backend.write([], "normalisation/mean", mean)
        hdf5_backend.write([], "normalisation/std", std)
    l = 1 if options.layerwise else options.hlayers
    if options.form == "MoG":
        nade_class = NADE.MoGNADE
        nade = nade_class(n_visible,
                          options.units,
                          options.n_components,
                          nonlinearity=options.nonlinearity)
        loss_function = "sym_neg_loglikelihood_gradient"
        validation_loss_measurement = Instrumentation.Function(
            "validation_loss", lambda ins: -ins.model.
            estimate_loglikelihood_for_dataset(validation_dataset))
    elif options.form == "Laplace":
        nade_class = NADE.MoLaplaceNADE
        nade = nade_class(n_visible,
                          options.units,
                          options.n_components,
                          nonlinearity=options.nonlinearity)
        loss_function = "sym_neg_loglikelihood_gradient"
        validation_loss_measurement = Instrumentation.Function(
            "validation_loss", lambda ins: -ins.model.
            estimate_loglikelihood_for_dataset(validation_dataset))
    elif options.form == "Bernoulli":
        nade_class = NADE.BernoulliNADE
        nade = nade_class(n_visible,
                          options.units,
                          nonlinearity=options.nonlinearity)
        loss_function = "sym_neg_loglikelihood_gradient"
        validation_loss_measurement = Instrumentation.Function(
            "validation_loss", lambda ins: -ins.model.
            estimate_loglikelihood_for_dataset(validation_dataset))
    else:
        raise Exception("Unknown form")
    #------------------------------------------------------------------------------
    # Training
    nade.initialize_parameters_from_dataset(training_dataset)
    # Configure training
    ordering = range(n_visible)
    np.random.shuffle(ordering)
    trainer = Optimization.NAGSGD(nade, nade.__getattribute__(loss_function))
    trainer.set_datasets([training_dataset])
    trainer.set_datapoints_as_columns(True)
    trainer.add_controller(
        TrainingController.MontrealLearningRate(options.lr,
                                                options.decrease_constant))
    # trainer.add_controller(TrainingController.AdaptiveLearningRate(options.lr, 0, epochs=options.epochs))
    trainer.add_controller(TrainingController.MaxIterations(options.epochs))
    if options.training_ll_stop < np.inf:
        trainer.add_controller(
            TrainingController.TrainingErrorStop(-options.training_ll_stop)
        )  # Assumes that we're doing minimization so negative ll
    trainer.add_controller(
        TrainingController.ConfigurationSchedule(
            "momentum", [(2, 0), (float('inf'), options.momentum)]))
    trainer.set_updates_per_epoch(options.epoch_size)
    trainer.set_minibatch_size(options.batch_size)
    #    trainer.set_weight_decay_rate(options.wd)
    trainer.add_controller(TrainingController.NaNBreaker())
    # Instrument the training
    trainer.add_instrumentation(
        Instrumentation.Instrumentation(
            [console, textfile_log, hdf5_backend],
            Instrumentation.Function("training_loss",
                                     lambda ins: ins.get_training_loss())))
    if not options.no_validation:
        trainer.add_instrumentation(
            Instrumentation.Instrumentation([console],
                                            validation_loss_measurement))
        trainer.add_instrumentation(
            Instrumentation.Instrumentation(
                [hdf5_backend],
                validation_loss_measurement,
                at_lowest=[Instrumentation.Parameters()]))
    trainer.add_instrumentation(
        Instrumentation.Instrumentation([console, textfile_log, hdf5_backend],
                                        Instrumentation.Configuration()))
    # trainer.add_instrumentation(Instrumentation.Instrumentation([hdf5_backend], Instrumentation.Parameters(), every = 10))
    trainer.add_instrumentation(
        Instrumentation.Instrumentation([console, textfile_log, hdf5_backend],
                                        Instrumentation.Timestamp()))
    # Train
    trainer.train()
    #------------------------------------------------------------------------------
    # Report some final performance measurements
    if trainer.was_successful():
        np.random.seed(8341)
        hdf5_backend.write(["final_model"], "parameters",
                           nade.get_parameters())
        if not options.no_validation:
            nade.set_parameters(
                hdf5_backend.read("/lowest_validation_loss/parameters"))
        config = {
            "wd": options.wd,
            "h": options.units,
            "lr": options.lr,
            "q": options.n_quantiles
        }
        log_message([console, textfile_log], "Config %s" % str(config))
        if options.show_training_stop:
            training_likelihood = nade.get_average_loglikelihood_for_dataset(
                training_dataset)
            log_message([console, textfile_log],
                        "Training average loss\t%f" % training_likelihood)
            hdf5_backend.write([], "training_loss", training_likelihood)
        if not options.no_validation:
            val_est = nade.estimate_loglikelihood_for_dataset(
                validation_dataset)
            log_message([console, textfile_log],
                        "*Validation mean\t%f \t(se: %f)" %
                        (val_est.estimation, val_est.se))
            hdf5_backend.write([], "validation_likelihood", val_est.estimation)
            hdf5_backend.write([], "validation_likelihood_se", val_est.se)
        test_est = nade.estimate_loglikelihood_for_dataset(test_dataset)
        log_message([console, textfile_log], "*Test mean\t%f \t(se: %f)" %
                    (test_est.estimation, test_est.se))
        hdf5_backend.write([], "test_likelihood", test_est.estimation)
        hdf5_backend.write([], "test_likelihood_se", test_est.se)
        hdf5_backend.write([], "final_score", test_est.estimation)