Example #1
0
    def abalone_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.abalone_data()
        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'abalone'
        kernel = ExperimentSetup.get_kernels(Xtrain.shape[1], 1, False)

        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000

        cond_ll = WarpLL(np.array([-2.0485, 1.7991, 1.5814]),
                         np.array([2.7421, 0.9426, 1.7804]),
                         np.array([0.1856, 0.7024, -0.7421]),
                         np.log(0.1))

        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['mog', 'hyp', 'll'], MinTransformation, True,
                                 config['log_level'], False, latent_noise=0.001,
                                 opt_per_iter={'mog': 25, 'hyp': 25, 'll': 25},
                                 max_iter=200))
Example #2
0
    def boston_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.boston_data()
        d = data[config['run_id'] - 1]
        names = []
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'boston'
        kernel = ExperimentSetup.get_kernels(Xtrain.shape[1], 1, True)
        # gaussian_sigma = np.var(Ytrain)/4 + 1e-4
        gaussian_sigma = 1.0
        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        cond_ll = UnivariateGaussian(np.array(gaussian_sigma))
        num_samples = 2000

        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['hyp', 'mog', 'll'], MeanTransformation, True,
                                 config['log_level'], False, latent_noise=0.001,
                                 opt_per_iter={'mog': 25, 'hyp': 25, 'll': 25},
                                 max_iter=200))
        return names
Example #3
0
    def creep_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.creep_data()

        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'creep'
        scaler = preprocessing.StandardScaler().fit(Xtrain)
        Xtrain = scaler.transform(Xtrain)
        Xtest = scaler.transform(Xtest)
        kernel = ExperimentSetup.get_kernels(Xtrain.shape[1], 1, True)

        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000

        cond_ll = WarpLL(np.array([3.8715, 3.8898, 2.8759]),
                         np.array([1.5925, -1.3360, -2.0289]),
                         np.array([0.7940, -4.1855, -3.0289]),
                         np.log(0.01))

        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['mog', 'hyp', 'll'], MinTransformation, True,
                                 config['log_level'], False, latent_noise=0.001,
                                 opt_per_iter={'mog': 25, 'hyp': 25, 'll': 25},
                                 max_iter=200))
Example #4
0
    def wisconsin_breast_cancer_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.wisconsin_breast_cancer_data()
        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'breast_cancer'

        # uncomment these lines to use softmax
        # kernel = Experiments.get_kernels(Xtrain.shape[1], 2, False)
        # Ytrain = np.array([(Ytrain[:,0] + 1) / 2, (-Ytrain[:,0] + 1) / 2]).T
        # Ytest = np.array([(Ytest[:,0] + 1) / 2, (-Ytest[:,0] + 1) / 2]).T
        # cond_ll = SoftmaxLL(2)

        # uncomment these lines to use logistic
        cond_ll = LogisticLL()
        kernel = ExperimentSetup.get_kernels(Xtrain.shape[1], 1, False)

        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000
        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['mog', 'hyp'], IdentityTransformation, True,
                                 config['log_level'], False, latent_noise=0.001,
                                 opt_per_iter={'mog': 25, 'hyp': 25, 'll': 25},
                                 max_iter=200))
        return names
Example #5
0
    def mining_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.mining_data()
        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'mining'
        kernel = ExperimentSetup.get_kernels(Xtrain.shape[1], 1, False)

        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000
        cond_ll = LogGaussianCox(math.log(191. / 811))
        kernel[0].variance = 1.0
        kernel[0].lengthscale = 13516.

        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['mog'], IdentityTransformation, True,
                                 config['log_level'], True, latent_noise=0.001,
                                 opt_per_iter={'mog': 15000},
                                 max_iter=1))
        return names
Example #6
0
    def _handle_sources(self):
        """
        Sort out sources and eqs for them
        :return:
        """
        assert len(self.fhosts) > 0
        _feng_cfg = self.configd['fengine']
        source_names = _feng_cfg['source_names'].strip().split(',')
        source_mcast = _feng_cfg['source_mcast_ips'].strip().split(',')
        assert len(source_mcast) == len(source_names), (
            'Source names (%d) must be paired with multicast source '
            'addresses (%d)' % (len(source_names), len(source_mcast)))

        # match eq polys to source names
        eq_polys = {}
        for src_name in source_names:
            eq_polys[src_name] = utils.process_new_eq(
                _feng_cfg['eq_poly_%s' % src_name])

        # assemble the sources given into a list
        _fengine_sources = []
        for source_ctr, address in enumerate(source_mcast):
            new_source = DataSource.from_mcast_string(address)
            new_source.name = source_names[source_ctr]
            assert new_source.ip_range == self.ports_per_fengine, (
                'F-engines should be receiving from %d streams.' %
                self.ports_per_fengine)
            _fengine_sources.append(new_source)

        # assign sources and eqs to fhosts
        self.logger.info('Assigning DataSources, EQs and DelayTrackers to '
                         'f-engines...')
        source_ctr = 0
        self.fengine_sources = []
        for fhost in self.fhosts:
            self.logger.info('\t%s:' % fhost.host)
            _eq_dict = {}
            for fengnum in range(0, self.f_per_fpga):
                _source = _fengine_sources[source_ctr]
                _eq_dict[_source.name] = {'eq': eq_polys[_source.name],
                                          'bram_num': fengnum}
                assert _source.ip_range == _fengine_sources[0].ip_range, (
                    'All f-engines should be receiving from %d streams.' %
                    self.ports_per_fengine)
                self.fengine_sources.append({'source': _source,
                                             'source_num': source_ctr,
                                             'host': fhost,
                                             'numonhost': fengnum})
                fhost.add_source(_source)
                self.logger.info('\t\t%s' % _source)
                source_ctr += 1
            fhost.eqs = _eq_dict
        if source_ctr != len(self.fhosts) * self.f_per_fpga:
            raise RuntimeError('We have different numbers of sources (%d) and '
                               'f-engines (%d). Problem.', source_ctr,
                               len(self.fhosts) * self.f_per_fpga)
        self.logger.info('done.')
Example #7
0
    def test_gp(plot=False, method='full'):
        """
        Compares model prediction with an exact GP (without optimisation)
        """
        # note that this test fails without latent noise in the case of full Gaussian
        np.random.seed(111)
        num_input_samples = 10
        num_samples = 10000
        gaussian_sigma = .2
        X, Y, kernel = DataSource.normal_generate_samples(num_input_samples, gaussian_sigma, 1)
        kernel = [GPy.kern.RBF(1, variance=1., lengthscale=np.array((1.,)))]

        if method == 'full':
            m = SAVIGP_SingleComponent(X, Y, num_input_samples, UnivariateGaussian(np.array(gaussian_sigma)),
                                          kernel, num_samples, None, 0.001, True, True)

        if method == 'diag':
            m = SAVIGP_Diag(X, Y, num_input_samples, 1, UnivariateGaussian(np.array(gaussian_sigma)),
                                          kernel, num_samples, None, 0.001, True, True)

        # update model using optimal parameters
        # gp = SAVIGP_Test.gpy_prediction(X, Y, gaussian_sigma, kernel[0])
        # gp_mean, gp_var = gp.predict(X, full_cov=True)
        # m.MoG.m[0,0] = gp_mean[:,0]
        # m.MoG.update_covariance(0, gp_var - gaussian_sigma * np.eye(10))

        try:
            folder_name = 'test' + '_' + ModelLearn.get_ID()
            logger = ModelLearn.get_logger(folder_name, logging.DEBUG)

            Optimizer.optimize_model(m, 10000, logger, ['mog'])
        except KeyboardInterrupt:
            pass
        sa_mean, sa_var = m.predict(X)
        gp = SAVIGP_Test.gpy_prediction(X, Y, gaussian_sigma, deepcopy(kernel[0]))
        gp_mean, gp_var = gp.predict(X)
        mean_error = (np.abs(sa_mean - gp_mean)).sum() / sa_mean.shape[0]
        var_error = (np.abs(sa_var - gp_var)).sum() / gp_var.T.shape[0]
        if mean_error < 0.1:
            print bcolors.OKBLUE, "passed: mean gp prediction ", mean_error
        else:
            print bcolors.WARNING, "failed: mean gp prediction ", mean_error
        print bcolors.ENDC
        if var_error < 0.1:
            print bcolors.OKBLUE, "passed: var gp prediction ", var_error
        else:
            print bcolors.WARNING, "failed: var gp prediction ", var_error
        print bcolors.ENDC
        if plot:
            plot_fit(m)
            gp.plot()
            show(block=True)
Example #8
0
    def sarcos_all_joints_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.sarcos_all_joints_data()

        names = []
        d = data[0]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'sarcos_all_joints'

        scaler = preprocessing.StandardScaler().fit(Xtrain)
        Xtrain = scaler.transform(Xtrain)
        Xtest = scaler.transform(Xtest)

        kernel = ExperimentSetup.get_kernels(Xtrain.shape[1], 8, False)

        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000

        cond_ll = CogLL(0.1, 7, 1)

        if 'n_thread' in config.keys():
            n_threads = config['n_thread']
        else:
            n_threads = 1

        if 'partition_size' in config.keys():
            partition_size = config['partition_size']
        else:
            partition_size = 3000

        image = None
        if 'image' in config.keys():
            image = config['image']

        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['mog', 'll', 'hyp'], MeanStdYTransformation, True,
                                 config['log_level'], False, latent_noise=0.001,
                                 opt_per_iter={'mog': 50, 'hyp': 10, 'll': 10},
                                 max_iter=200,
                                 partition_size=partition_size,
                                 ftol=10,
                                 n_threads=n_threads,
                                 model_image_file=image))
Example #9
0
    def initialise_pre_gbe(self):
        """
        Set up f-engines on this device. This is done after programming the
        devices in the instrument.
        :return:
        """

        if 'x_setup' in self.hosts[0].registers.names():
            self.logger.info('Found num_x independent f-engines')
            # set up the x-engine information in the f-engine hosts
            num_x_hosts = len(self.corr.xhosts)
            num_x = num_x_hosts * int(self.corr.configd['xengine']['x_per_fpga'])
            f_per_x = self.corr.n_chans / num_x
            ip_per_x = 1.0
            THREADED_FPGA_OP(
                self.hosts, timeout=10,
                target_function=(
                    lambda fpga_:
                    fpga_.registers.x_setup.write(f_per_x=f_per_x,
                                                  ip_per_x=ip_per_x,
                                                  num_x=num_x,),))
            time.sleep(1)
        else:
            self.logger.info('Found FIXED num_x f-engines')

        # set eq and shift
        self.eq_write_all()
        self.set_fft_shift_all()

        # set up the fpga comms
        self.tx_disable()
        THREADED_FPGA_OP(
            self.hosts, timeout=10,
            target_function=(
                lambda fpga_: fpga_.registers.control.write(gbe_rst=True),))
        self.clear_status_all()

        # where does the f-engine data go?
        self.corr.fengine_output = DataSource.from_mcast_string(
            self.corr.configd['fengine']['destination_mcast_ips'])
        self.corr.fengine_output.name = 'fengine_destination'
        fdest_ip = int(self.corr.fengine_output.ip_address)
        THREADED_FPGA_OP(self.hosts, timeout=5, target_function=(
            lambda fpga_: fpga_.registers.iptx_base.write_int(fdest_ip),))

        # set the sample rate on the Fhosts
        for host in self.hosts:
            host.rx_data_sample_rate_hz = self.corr.sample_rate_hz
Example #10
0
    def __init__(self, json):
        self._json = json

        self.title = json['title']
        self.description = json.get('description', None)
        self.children = []
        self.data_source = None

        if 'dashboard-type' in json:
            self.children = \
                [Module(child) for child in json['modules']]
        elif json['module-type'] == 'tab':
            self.children = \
                [Module(child) for child in json['tabs']]
        else:
            self.data_source = DataSource(json['data-source'])
Example #11
0
def parse_sources(name_string, ip_string):
    """
    Parse lists of source name and IPs into a list of DataSource objects.
    :return:
    """
    source_names = name_string.strip().split(',')
    source_mcast = ip_string.strip().split(',')
    assert len(source_mcast) == len(source_names), (
        'Source names (%i) must be paired with multicast source '
        'addresses (%i)' % (len(source_names), len(source_mcast)))
    _sources = []
    source_ctr = 0
    for counter, address in enumerate(source_mcast):
        new_source = DataSource.from_mcast_string(address)
        new_source.name = source_names[counter]
        new_source.source_number = source_ctr
        _sources.append(new_source)
        if source_ctr > 0:
            assert new_source.ip_range == _sources[0].ip_range,\
                'DataSources have to offer the same IP range.'
        source_ctr += 1
    return _sources
Example #12
0
    def test_model_learn(config):
        """
        Compares the model output with exact GP
        """
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        names = []
        num_input_samples = 20
        gaussian_sigma = .2

        X, Y, kernel = DataSource.normal_generate_samples(num_input_samples, gaussian_sigma)
        train_n = int(0.5 * num_input_samples)

        Xtrain = X[:train_n, :]
        Ytrain = Y[:train_n, :]
        Xtest = X[train_n:, :]
        Ytest = Y[train_n:, :]
        kernel1 = ModelLearn.get_kernels(Xtrain.shape[1], 1, True)
        kernel2 = ModelLearn.get_kernels(Xtrain.shape[1], 1, True)
        gaussian_sigma = 1.0

        #number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 10000
        cond_ll = UnivariateGaussian(np.array(gaussian_sigma))

        n1, _ = ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel1, method,
                                           'test_' + ModelLearn.get_ID(), 'test', num_inducing,
                                     num_samples, sparsify_factor, ['mog', 'll', 'hyp'], IdentityTransformation, True,
                                           logging.DEBUG, True)

        n2, _ =ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel2, 'gp',
                                           'test_' + ModelLearn.get_ID(), 'test', num_inducing,
                                     num_samples, sparsify_factor, ['mog', 'll', 'hyp'], IdentityTransformation)

        PlotOutput.plot_output('test', ModelLearn.get_output_path(), [n1, n2], None, False)
Example #13
0
    def test_grad_single(config, verbose, sparse, likelihood_type):
        num_input_samples = 3
        num_samples = 100000
        cov, gaussian_sigma, ll, num_process = SAVIGP_Test.get_cond_ll(likelihood_type)
        np.random.seed(111)
        if sparse:
            num_inducing = num_input_samples - 1
        else:
            num_inducing = num_input_samples
        X, Y, kernel = DataSource.normal_generate_samples(num_input_samples, cov)
        s1 = SAVIGP_SingleComponent(X, Y, num_inducing, ll,
                                      [deepcopy(kernel) for j in range(num_process)], num_samples, config, 0, True, True)

        s1.rand_init_mog()

        def f(x):
            s1.set_params(x)
            return s1.objective_function()

        def f_grad(x):
            s1.set_params(x)
            return s1.objective_function_gradients()

        return GradChecker.check(f, f_grad, s1.get_params(), s1.get_param_names(), verbose=verbose)
Example #14
0
    def MNIST_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.mnist_data()
        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'mnist'

        # uncomment these lines to delete unused features
        # features_rm = np.array([])
        # for n in range(Xtrain.shape[1]):
        # if Xtrain[:, n].sum() ==0:
        #         features_rm = np.append(features_rm, n)
        # Xtrain = np.delete(Xtrain, features_rm.astype(int), 1)
        # Xtest = np.delete(Xtest, features_rm.astype(int), 1)


        # uncomment these lines to change the resolution
        # res = 13
        # current_res = int(np.sqrt(Xtrain.shape[1]))
        # X_train_resized = np.empty((Xtrain.shape[0], res * res))
        # X_test_resized = np.empty((Xtest.shape[0], res * res))
        # for n in range(Xtrain.shape[0]):
        #     im = Image.fromarray(Xtrain[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_train_resized[n] = np.array(im).flatten()
        #
        # for n in range(Xtest.shape[0]):
        #     im = Image.fromarray(Xtest[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_test_resized[n] = np.array(im).flatten()
        #
        #
        # Xtrain = X_train_resized
        # Xtest = X_test_resized

        kernel = [ExtRBF(Xtrain.shape[1], variance=11, lengthscale=np.array((9.,)), ARD=False) for j in range(10)]
        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000
        cond_ll = SoftmaxLL(10)

        if 'n_thread' in config.keys():
            n_threads = config['n_thread']
        else:
            n_threads = 1

        if 'partition_size' in config.keys():
            partition_size = config['partition_size']
        else:
            partition_size = 3000

        image = None
        if 'image' in config.keys():
            image = config['image']

        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['mog', 'hyp'], IdentityTransformation, False,
                                 config['log_level'], False, latent_noise=0.001,
                                 opt_per_iter={'mog': 50, 'hyp': 10},
                                 max_iter=300, n_threads=n_threads, ftol=10,
                                 model_image_file=image, partition_size=partition_size))
Example #15
0
    def MNIST_binary_inducing_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.mnist_data()
        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain_full = d['train_Y']
        Xtest = d['test_X']
        Ytest_full = d['test_Y']
        name = 'mnist_binary'

        # uncomment these lines to change the resolution
        # res = 13
        # current_res = int(np.sqrt(Xtrain.shape[1]))
        # X_train_resized = np.empty((Xtrain.shape[0], res * res))
        # X_test_resized = np.empty((Xtest.shape[0], res * res))
        # for n in range(Xtrain.shape[0]):
        #     im = Image.fromarray(Xtrain[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_train_resized[n] = np.array(im).flatten()
        #
        # for n in range(Xtest.shape[0]):
        #     im = Image.fromarray(Xtest[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_test_resized[n] = np.array(im).flatten()
        #
        #
        # Xtrain = X_train_resized
        # Xtest = X_test_resized

        Ytrain = np.apply_along_axis(lambda x: x[1:10:2].sum() - x[0:10:2].sum(), 1, Ytrain_full).astype(int)[:, np.newaxis]
        Ytest = np.apply_along_axis(lambda x: x[1:10:2].sum() - x[0:10:2].sum(), 1, Ytest_full).astype(int)[:, np.newaxis]

        kernel = [ExtRBF(Xtrain.shape[1], variance=11, lengthscale=np.array((9.,)), ARD=False) for j in range(1)]
        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000
        cond_ll = LogisticLL()

        if 'n_thread' in config.keys():
            n_threads = config['n_thread']
        else:
            n_threads = 1

        if 'partition_size' in config.keys():
            partition_size = config['partition_size']
        else:
            partition_size = 3000

        image = None
        if 'image' in config.keys():
            image = config['image']

        names.append(
            ModelLearn.run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, d['id'], num_inducing,
                                 num_samples, sparsify_factor, ['mog', 'hyp', 'inducing'], IdentityTransformation, False,
                                 config['log_level'], False, latent_noise=0.001,
                                 opt_per_iter={'mog': 60, 'hyp': 15, 'inducing': 6},
                                 max_iter=9, n_threads=n_threads, ftol=10,
                                 model_image_file=image, partition_size=partition_size))
Example #16
0
from collections import defaultdict

from data_source import DataSource

data_source = DataSource()

base_keys = defaultdict(lambda: [])
dream_keys = defaultdict(lambda: [])
for dream_night in data_source.get_entries_for_stat_over_range(
        "dreams", "earliest", "latest"):
    for key in dream_night["data"].keys():
        if key == "dreams":
            continue
        base_keys[key].append(dream_night["date"].date())
    for dream in dream_night["data"]["dreams"]:
        for key in dream.keys():
            dream_keys[key].append(dream_night["date"].date())

print(base_keys)
for key, val in base_keys.items():
    print(f"{key}: {len(val)}")
print(dream_keys)
for key, val in dream_keys.items():
    print(f"{key}: {len(val)}")
Example #17
0
        if (d_neg_cpu < 0).any() or torch.isnan(d_neg_cpu).any():
            print(f'Distance to negative is smaller than 0 or Nan')
            print(f'It is: {distance_negative}')
            distance_negative[torch.isnan(distance_negative)] = 0
        losses = F.relu(distance_positive - distance_negative +
                        self.margin) + self.alpha * F.relu(distance_positive -
                                                           self.margin2)
        if size_average:
            if batch_all:
                return distance_positive, distance_negative, losses.sum() / ((
                    (losses > 1e-16).sum()).float() + 1e-16), losses.mean()
            else:
                return distance_positive, distance_negative, losses.mean()
        else:
            return distance_positive, distance_negative, losses.sum()


if __name__ == "__main__":
    from data_source import DataSource
    from training_set import TrainingSet
    ds = DataSource('/tmp/training')
    ds.loadAll()
    ts = TrainingSet(ds)
    a, p, n = ts.__getitem__(0)

    loss = TripletLoss(1.0)
    dp, dn, sum = loss.forward(a, p, n, False)
    print("Distance positive: \t", dp)
    print("Distance negative: \t", dn)
    print("loss sum: \t", sum)
Example #18
0
def _get_cur_data_data(ds: DataSource):
    """获取实时数据"""
    res = ds.query("SELECT * FROM cur_data")
    print("从数据库中读取cue_data完成")
    return res
Example #19
0
    def evaluate_set_accuracy(
        files_path,
        file_names,
        image_depth,
        descriptor_additional_parameters,
        build_descriptor_builder,
        finder_parameters_set,
        build_finder,
    ):
        data_source = DataSource(files_path, file_names, image_depth)
        descriptor_builder = build_descriptor_builder(image_depth, descriptor_additional_parameters)
        finder = build_finder(data_source, descriptor_builder, finder_parameters_set)
        classifier = kNNClassifier(5, finder)

        # print('learning/indexing in progress ...')
        learning_start = time.time()
        classifier.learn(data_source, None)
        learning_end = time.time()
        learning_time_secs = learning_end - learning_start
        # print('learning/indexing completed')

        # print('classification in progress ...')

        progress_counter = 0
        mistakes_count = 0
        current_total_attempts = 0

        local_attempts_count = 0
        local_mistakes_count = 0

        total_classification_time_secs = 0

        total_images_count = data_source.get_count()
        for image_index in xrange(0, total_images_count):

            # Dirty hack: element of data_source is excluded during classification to prevent comparing similar images.
            data_source.excluded_index = -1
            image = data_source.get_image(image_index)
            actual_class = data_source.get_image_class(image_index)
            data_source.excluded_index = image_index

            single_classification_start = time.time()
            calculated_class = classifier.classify_image(image)
            single_classification_end = time.time()
            single_classification_time_secs = single_classification_end - single_classification_start
            total_classification_time_secs += single_classification_time_secs

            is_correct = calculated_class == actual_class
            if not is_correct:
                mistakes_count += 1
                local_mistakes_count += 1

            current_total_attempts += 1
            local_attempts_count += 1

            progress_counter += 1
            if progress_counter % 10 == 0:
                current_correct_results = current_total_attempts - mistakes_count
                current_accuracy = (float(current_correct_results) / current_total_attempts) * 100
                # print repr(progress_counter) \
                # + ' already classified... (current accuracy = ' \
                # + repr(current_accuracy)\
                # + ') (increment: ' \
                # + repr(local_attempts_count-local_mistakes_count) \
                # + ' out of ' \
                # + repr(local_attempts_count) \
                # + ')'

                local_attempts_count = 0
                local_mistakes_count = 0

        data_source.excluded_index = -1

        # print('classification completed')

        correct_results = current_total_attempts - mistakes_count
        accuracy = (float(correct_results) / current_total_attempts) * 100
        average_classification_time_ms = (total_classification_time_secs / current_total_attempts) * 1000

        return accuracy, learning_time_secs, average_classification_time_ms, total_classification_time_secs
Example #20
0
    sin_gen = SinGenerator()
    mod_gen = ModGenerator()

    # Create reactive data generator
    reactive_data_generator = DummyDataGenerator(1000, sin_gen, mod_gen)

    # Create the data publishers
    sin_publisher = NumpyPublisher(
        reactive_data_generator, 'sin', frequency=30,
    )
    mod_publisher = NumpyPublisher(
        reactive_data_generator, 'mod', frequency=30,
    )

    # Create a data sources
    sin_data_source = DataSource(publisher=sin_publisher, buffer_size=2250)
    mod_data_source = DataSource(publisher=mod_publisher, buffer_size=2250)

    # Bind the data sources to their publishers
    sin_data_source.bind()
    mod_data_source.bind()

    #--------------------------------------------------------------------------
    # Plot Generation
    #--------------------------------------------------------------------------
    sin_index = ArrayDataSource([])
    sin_value = ArrayDataSource([])

    mod_index = ArrayDataSource([])
    mod_value = ArrayDataSource([])
 def __init__(self, client):
     DataSource.__init__(self, client)
     self.tags = ['fashion/series/stylewatch']
     # self.page_size = 1
     self.show_elements = 'image'
Example #22
0
class Module(object):

    @staticmethod
    def from_slug(slug):
        url = '{0}/public/dashboards?slug={1}'.format(config.METADATA_URL, slug)
        response = requests.get(url, verify=False)

        if response.status_code == 200:
            return Module(response.json())
        elif response.status_code == 404:
            return None
        else:
            #should raise error
            print response.status_code, response.body
            return None

    def __init__(self, json):
        self._json = json

        self.title = json['title']
        self.description = json.get('description', None)
        self.children = []
        self.data_source = None

        if 'dashboard-type' in json:
            self.children = \
                [Module(child) for child in json['modules']]
        elif json['module-type'] == 'tab':
            self.children = \
                [Module(child) for child in json['tabs']]
        else:
            self.data_source = DataSource(json['data-source'])


    def axes(self):
        module_type = self._json['module-type']
        json_axes = self._json.get('axes', None)

        if module_type == 'kpi':
            axes = {
                'x': {
                    'label': 'Quarter',
                    'key': '_quarter_start_at',
                    'format': 'date',
                },
                'y': [{
                    'label': self.title,
                    'key': self._json['value-attribute'],
                    'format': self._json['format'],
                }]
            }
        elif module_type == 'realtime':
            axes = merge_axes({
                'x': {
                    'label': 'Time',
                    'key': '_timestamp',
                    'format': 'time'
                },
                'y': [
                    {
                        'label': 'Number of unique visitors',
                        'key': 'unique_visitors',
                        'format': 'integer'
                    }
                ]
            }, json_axes)
        elif module_type == 'grouped_timeseries':
            axes = merge_axes({
                'x': {
                    'label': 'Date',
                    'key': '_start_at',
                    'format': {
                        'type': 'date',
                        'format': 'MMMM YYYY'
                    }
                }
            }, json_axes)

            for axis in axes['y']:
                axis['groupValue'] = axis['groupId']
                axis['groupKey'] = 'channel'
                axis['key'] = self._json['value-attribute']
                del axis['groupId']
        else:
            axes = None

        return axes


    def data(self):
        if self.data_source is None:
            return None

        raw_data = self.data_source.get()

        if raw_data is None:
            return None

        return data_to_table(self.axes(), raw_data)

    def all_data_sources(self):
        if self.data_source is None:
            return list(itertools.chain.from_iterable(
                [child.all_data_sources() for child in self.children]))
        else:
            return [self.data_source]

    def fetch(self):
        data_sources = self.all_data_sources()
        requests = [grequests.get(ds.url()) for ds in data_sources]
        responses = grequests.map(requests)

        for data_source, response in zip(data_sources, responses):
            data_source.parse_response(response)

    def render(self, depth=1):
        rendered_children = \
            '\n'.join([m.render(depth=depth+1) for m in self.children])

        return render_template('module.html',
            depth=depth,
            title=self.title,
            description=self.description,
            children=rendered_children,
            data=self.data(),
        )
    def MNIST_binary_inducing_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.mnist_data()
        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain_full = d['train_Y']
        Xtest = d['test_X']
        Ytest_full = d['test_Y']
        name = 'mnist_binary'

        # uncomment these lines to change the resolution
        # res = 13
        # current_res = int(np.sqrt(Xtrain.shape[1]))
        # X_train_resized = np.empty((Xtrain.shape[0], res * res))
        # X_test_resized = np.empty((Xtest.shape[0], res * res))
        # for n in range(Xtrain.shape[0]):
        #     im = Image.fromarray(Xtrain[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_train_resized[n] = np.array(im).flatten()
        #
        # for n in range(Xtest.shape[0]):
        #     im = Image.fromarray(Xtest[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_test_resized[n] = np.array(im).flatten()
        #
        #
        # Xtrain = X_train_resized
        # Xtest = X_test_resized

        Ytrain = np.apply_along_axis(
            lambda x: x[1:10:2].sum() - x[0:10:2].sum(), 1,
            Ytrain_full).astype(int)[:, np.newaxis]
        Ytest = np.apply_along_axis(
            lambda x: x[1:10:2].sum() - x[0:10:2].sum(), 1,
            Ytest_full).astype(int)[:, np.newaxis]

        kernel = [
            ExtRBF(Xtrain.shape[1],
                   variance=11,
                   lengthscale=np.array((9., )),
                   ARD=False) for j in range(1)
        ]
        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000
        cond_ll = LogisticLL()

        if 'n_thread' in config.keys():
            n_threads = config['n_thread']
        else:
            n_threads = 1

        if 'partition_size' in config.keys():
            partition_size = config['partition_size']
        else:
            partition_size = 3000

        image = None
        if 'image' in config.keys():
            image = config['image']

        names.append(
            ModelLearn.run_model(Xtest,
                                 Xtrain,
                                 Ytest,
                                 Ytrain,
                                 cond_ll,
                                 kernel,
                                 method,
                                 name,
                                 d['id'],
                                 num_inducing,
                                 num_samples,
                                 sparsify_factor, ['mog', 'hyp', 'inducing'],
                                 IdentityTransformation,
                                 False,
                                 config['log_level'],
                                 False,
                                 latent_noise=0.001,
                                 opt_per_iter={
                                     'mog': 60,
                                     'hyp': 15,
                                     'inducing': 6
                                 },
                                 max_iter=9,
                                 n_threads=n_threads,
                                 ftol=10,
                                 model_image_file=image,
                                 partition_size=partition_size))
    def sarcos_all_joints_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.sarcos_all_joints_data()

        names = []
        d = data[0]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'sarcos_all_joints'

        scaler = preprocessing.StandardScaler().fit(Xtrain)
        Xtrain = scaler.transform(Xtrain)
        Xtest = scaler.transform(Xtest)

        kernel = ExperimentSetup.get_kernels(Xtrain.shape[1], 8, False)

        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000

        cond_ll = CogLL(0.1, 7, 1)

        if 'n_thread' in config.keys():
            n_threads = config['n_thread']
        else:
            n_threads = 1

        if 'partition_size' in config.keys():
            partition_size = config['partition_size']
        else:
            partition_size = 3000

        image = None
        if 'image' in config.keys():
            image = config['image']

        names.append(
            ModelLearn.run_model(Xtest,
                                 Xtrain,
                                 Ytest,
                                 Ytrain,
                                 cond_ll,
                                 kernel,
                                 method,
                                 name,
                                 d['id'],
                                 num_inducing,
                                 num_samples,
                                 sparsify_factor, ['mog', 'll', 'hyp'],
                                 MeanStdYTransformation,
                                 True,
                                 config['log_level'],
                                 False,
                                 latent_noise=0.001,
                                 opt_per_iter={
                                     'mog': 50,
                                     'hyp': 10,
                                     'll': 10
                                 },
                                 max_iter=200,
                                 partition_size=partition_size,
                                 ftol=10,
                                 n_threads=n_threads,
                                 model_image_file=image))
Example #25
0
def _get_aqi_info_data(ds: DataSource):
    """获取aqi历史数据"""
    res = ds.query("SELECT * FROM aqi_info")
    print("从数据库中读取aqi_info完成")
    return res
Example #26
0
import logging
from ExtRBF import ExtRBF
from model_learn import ModelLearn
from data_transformation import MeanTransformation
from likelihood import UnivariateGaussian
from data_source import DataSource
import numpy as np

# defining model type. It can be "mix1", "mix2", or "full"
method = "full"

# number of inducing points
num_inducing = 30

# loading data
data = DataSource.boston_data()

d = data[0]
Xtrain = d['train_X']
Ytrain = d['train_Y']
Xtest = d['test_X']
Ytest = d['test_Y']

# is is just of name that will be used for the name of folders and files when exporting results
name = 'boston'

# defining the likelihood function
cond_ll = UnivariateGaussian(np.array(1.0))

# number of samples used for approximating the likelihood and its gradients
num_samples = 2000
 def __init__(self, client):
     DataSource.__init__(self, client)
     self.content_type = 'video'
     self.tags = ['theguardian/series/how-to-dress']
     # self.page_size = 1
     self.show_elements = 'video'
    def MNIST_data(config):
        method = config['method']
        sparsify_factor = config['sparse_factor']
        np.random.seed(12000)
        data = DataSource.mnist_data()
        names = []
        d = data[config['run_id'] - 1]
        Xtrain = d['train_X']
        Ytrain = d['train_Y']
        Xtest = d['test_X']
        Ytest = d['test_Y']
        name = 'mnist'

        # uncomment these lines to delete unused features
        # features_rm = np.array([])
        # for n in range(Xtrain.shape[1]):
        # if Xtrain[:, n].sum() ==0:
        #         features_rm = np.append(features_rm, n)
        # Xtrain = np.delete(Xtrain, features_rm.astype(int), 1)
        # Xtest = np.delete(Xtest, features_rm.astype(int), 1)

        # uncomment these lines to change the resolution
        # res = 13
        # current_res = int(np.sqrt(Xtrain.shape[1]))
        # X_train_resized = np.empty((Xtrain.shape[0], res * res))
        # X_test_resized = np.empty((Xtest.shape[0], res * res))
        # for n in range(Xtrain.shape[0]):
        #     im = Image.fromarray(Xtrain[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_train_resized[n] = np.array(im).flatten()
        #
        # for n in range(Xtest.shape[0]):
        #     im = Image.fromarray(Xtest[n, :].reshape((current_res, current_res)))
        #     im = im.resize((res, res))
        #     X_test_resized[n] = np.array(im).flatten()
        #
        #
        # Xtrain = X_train_resized
        # Xtest = X_test_resized

        kernel = [
            ExtRBF(Xtrain.shape[1],
                   variance=11,
                   lengthscale=np.array((9., )),
                   ARD=False) for j in range(10)
        ]
        # number of inducing points
        num_inducing = int(Xtrain.shape[0] * sparsify_factor)
        num_samples = 2000
        cond_ll = SoftmaxLL(10)

        if 'n_thread' in config.keys():
            n_threads = config['n_thread']
        else:
            n_threads = 1

        if 'partition_size' in config.keys():
            partition_size = config['partition_size']
        else:
            partition_size = 3000

        image = None
        if 'image' in config.keys():
            image = config['image']

        names.append(
            ModelLearn.run_model(Xtest,
                                 Xtrain,
                                 Ytest,
                                 Ytrain,
                                 cond_ll,
                                 kernel,
                                 method,
                                 name,
                                 d['id'],
                                 num_inducing,
                                 num_samples,
                                 sparsify_factor, ['mog', 'hyp'],
                                 IdentityTransformation,
                                 False,
                                 config['log_level'],
                                 False,
                                 latent_noise=0.001,
                                 opt_per_iter={
                                     'mog': 50,
                                     'hyp': 10
                                 },
                                 max_iter=300,
                                 n_threads=n_threads,
                                 ftol=10,
                                 model_image_file=image,
                                 partition_size=partition_size))
 def __init__(self, client):
     DataSource.__init__(self, client)
     self.content_type = 'gallery'
     self.tags = ['(fashion/series/fashion-for-all-ages|fashion/series/key-fashion-trends-of-the-season|fashion/series/fashion-line-up)']
     # self.page_size = 1
     self.show_elements = 'image'
Example #30
0
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Entry point to execute data quality scripts.')
    parser.add_argument(
        'method',
        type=str,
        help='Method to be executed: execute_batch, test_data_source')
    parser.add_argument(
        'id',
        type=int,
        help='Id of the object on which to execute the method.')
    arguments = parser.parse_args()

    method = arguments.method
    if method == 'execute_batch':
        batch_id = arguments.id
        batch = Batch()
        batch.execute(batch_id)

    elif method == 'test_data_source':
        data_source_id = arguments.id
        data_source = DataSource()
        data_source.test(data_source_id)

    else:
        error_message = f'Invalid method {method}'
        log.error(error_message)
        raise Exception(error_message)
Example #31
0
 def __init__(self, client):
     DataSource.__init__(self, client)
     self.tags = ['fashion/series/stylewatch']
     # self.page_size = 1
     self.show_elements = 'image'
Example #32
0
def get_data_source(conf='db.ini'):
    """获取数据源"""
    return DataSource(conf)
Example #33
0
 def __init__(self, client):
     DataSource.__init__(self, client)
     self.content_type = 'video'
     self.tags = ['technology/technology']
     self.show_elements = 'video'
Example #34
0
 def __init__(self):
     self.data = DataSource()
     self.preprocessing = None