def test_eval(self):
     with self.test_session() as sess:
         x = Normal(0.0, 0.1)
         x_ph = tf.placeholder(tf.float32, [])
         y = Normal(x_ph, 0.1)
         self.assertLess(x.eval(), 5.0)
         self.assertLess(x.eval(sess), 5.0)
         self.assertLess(x.eval(feed_dict={x_ph: 100.0}), 5.0)
         self.assertGreater(y.eval(feed_dict={x_ph: 100.0}), 5.0)
         self.assertGreater(y.eval(sess, feed_dict={x_ph: 100.0}), 5.0)
         self.assertRaises(tf.errors.InvalidArgumentError, y.eval)
         self.assertRaises(tf.errors.InvalidArgumentError, y.eval, sess)
Example #2
0
D = np.float32(irt_data.values)

model.init_inference(data=D,n_iter=niter)
model.fit()

# generate output files #

# output ability
ability = pd.DataFrame(index=irt_data.columns)
ability['ability'] = tf.nn.sigmoid(model.qtheta.distribution.loc).eval()
ability.loc['stddev'] = ability.ability.std()
ability.to_csv(result_path+'/irt_ability_vi_'+partial_save_name+'.csv') 

# output difficulty and discrimination
if args.fixed_a:
    discrimination = a.eval()
else:   
    discrimination = model.qa.loc.eval()

difficulty = tf.nn.sigmoid(model.qdelta.distribution.loc).eval()
if not dataset in ['fashion','mnist']:
    #if not args.fixed_a:
    fig = vs.plot_parameters(xtest.values[:,:-1], difficulty, discrimination)
    fig.savefig(result_path+'/irt_parameters_vi_'+partial_save_name+'.pdf') 

parameters = pd.DataFrame(index=irt_data.index)
parameters['difficulty'] = difficulty
parameters['discrimination'] = discrimination
parameters.to_csv(result_path+'/irt_parameters_vi_'+partial_save_name+'.csv',index=False)

# visualize correlation between difficulty and response
Example #3
0
def save(arr,xdata,ydata):
	tf.reset_default_graph()

	trainSetNumber = round(FLAGS.T* 0.8)

	x_train = xdata[:trainSetNumber]
	y_train = ydata[:trainSetNumber]
	x_test = xdata[trainSetNumber:]
	y_test = ydata[trainSetNumber:]

	x_train = np.asarray(x_train)
	x_test = np.asarray(x_test)

	x_train = np.asarray(x_train)
	x_test = np.asarray(x_test)
	# print(x_test)
	# print(y_test)
	pos = 0
	name = arr[pos]
	pos +=1
	H1 = int(arr[pos])
	pos+=1
	H2 = int(arr[pos])
	pos+=1
	param1 = float(arr[pos])
	pos += 1
	param2 = float(arr[pos])

	graph1 = tf.Graph()
	with graph1.as_default():
		with tf.name_scope("model"):
			W_0 = Normal(loc=tf.zeros([FLAGS.D, H1]), scale=param1*tf.ones([FLAGS.D,H1 ]),name="W_0")
			W_1 = Normal(loc=tf.zeros([H1, H2]), scale=param2*tf.ones([H1, H2]), name="W_1")
			W_2 = Normal(loc=tf.zeros([H2, FLAGS.O]), scale=param2*tf.ones([H2, FLAGS.O]), name="W_2")
			b_0 = Normal(loc=tf.zeros(H1), scale=param1 *tf.ones(H1), name="b_0")
			b_1 = Normal(loc=tf.zeros(H2), scale=param2* tf.ones(H2), name="b_1")
			b_2 = Normal(loc=tf.zeros(FLAGS.O), scale=param2* tf.ones(FLAGS.O), name="b_2")

			X = tf.placeholder(tf.float32, [trainSetNumber, FLAGS.D], name="X")
			y = Normal(loc=neural_network(x_train,W_0, W_1, W_2, b_0, b_1, b_2, trainSetNumber), scale=0.1*tf.ones([trainSetNumber,FLAGS.O]), name="y")
		
		with tf.variable_scope("posterior",reuse=tf.AUTO_REUSE):
			with tf.variable_scope("qW_0",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [FLAGS.D, H1])
			    scale = param1*tf.nn.softplus(tf.get_variable("scale", [FLAGS.D, H1]))
			    qW_0 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qW_1",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H1, H2])
			    scale = param2*tf.nn.softplus(tf.get_variable("scale", [H1, H2]))
			    qW_1 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qW_2",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H2, FLAGS.O])
			    scale = param2*tf.nn.softplus(tf.get_variable("scale", [H2, FLAGS.O]))
			    qW_2 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qb_0",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H1])
			    scale =param1 * tf.nn.softplus(tf.get_variable("scale", [H1]))
			    qb_0 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qb_1",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H2])
			    scale =param2 * tf.nn.softplus(tf.get_variable("scale", [H2]))
			    qb_1 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qb_2",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [FLAGS.O])
			    scale =param2 * tf.nn.softplus(tf.get_variable("scale", [FLAGS.O]))
			    qb_2 = Normal(loc=loc, scale=scale)
	#inference
	with tf.Session(graph=graph1) as sess:
		# Set up the inference method, mapping the prior to the posterior variables
		inference = ed.KLqp({W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, data={X: x_train, y: y_train})
		# Set up the adam optimizer
		global_step = tf.Variable(0, trainable=False)
		starter_learning_rate = 0.1
		learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,100, 0.3, staircase=True)
		optimizer = tf.train.AdamOptimizer(learning_rate)

		# Run the inference method
		pos += 1
		iter1 = arr[pos]
		inference.run(n_iter=iter1,optimizer=optimizer ,n_samples=5)

		#Run the test data through the neural network
		infered = neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2, len(x_test))
		inferedList = infered.eval()

		#Accuracy checks on the data (The test data)
		# In order to work with PPC and other metrics, it must be a random variables
		# Normal creates this random varaibles by sampling from the poterior with a normal distribution
		NormalTest =Normal(loc=neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2,len(x_test)), scale=0.1*tf.ones([len(x_test),FLAGS.O]), name="y_other") 
		NormalTestList = NormalTest.eval()
		
		# Change the graph so that the posterior point to the output
		y_post = ed.copy(NormalTest, {W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2})
		X = tf.placeholder(tf.float32, [len(x_test), FLAGS.D], name="X")
		y_test_tensor = tf.convert_to_tensor(y_test)
		MSE = ed.evaluate('mean_squared_error', data={X: x_test, NormalTest: y_test_tensor})
		MAE =ed.evaluate('mean_absolute_error', data={X: x_test, NormalTest: y_test_tensor})
		# PPC calculation
		PPCMean = ed.ppc(lambda xs, zs: tf.reduce_mean(xs[y_post]), data={y_post:  y_test, X:x_test}, latent_vars={W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, n_samples=5)
		# Change the graph again, this is done to do epistemic uncertainty calculations
		posterior = ed.copy(NormalTest, dict_swap={W_0: qW_0.mean(), b_0: qb_0.mean(),W_1: qW_1.mean(), b_1: qb_1.mean(),W_2: qW_2.mean(), b_2: qb_2.mean()})
		Y_post1 = sess.run(posterior.sample(len(x_test)), feed_dict={X: x_test, posterior: y_test})
		mean_prob_over_samples=np.mean(Y_post1, axis=0) ## prediction means
		prediction_variances = np.apply_along_axis(predictive_entropy, axis=1, arr=mean_prob_over_samples)
		
		# Run analysis on test data, to see how many records were correct
		classes, actualClass, cor, firsts, seconds, thirds, fails, perCorrect = Analysis(inferedList, y_test)
		# Save the model through TF saver
		saver = tf.train.Saver()
		dir_path = os.path.dirname(os.path.realpath(__file__))
		save_path = saver.save(sess, dir_path +"/"+name+"/model.ckpt")
		print("Model saved in path: %s" % save_path)

		file = open(dir_path+"/"+name +"/"+name+".csv",'w')
		file.write("MSE = " + str(MSE))
		file.write("\nMAE = " + str(MAE))
		file.write("\nPPC mean = " + str(PPCMean))
		file.write("; Predicted First;Predicted Second; Predicted Third; Predicted Fail \n")
		classNames = ['First','Second', 'Third', 'Fail']
		for x in range(len(firsts)):
			file.write(classNames[x] + ";" + str(firsts[x]) + ";" + str(seconds[x])+ ";" + str(thirds[x])+ ";" + str(fails[x]) + "\n")
		file.write("Num;Class 1;Class 2;Class 3;Class 4;Epi;Predicted Class;Correct Class\n ")
		for x in range(len(inferedList)):
			line = str(x) 
			for i in range(len(inferedList[x])):
				line += ";" + str(round(inferedList[x][i],2))
			line += ";" + str(round(prediction_variances[x],2)) + ";" + str(classes[x]+1) + ";" + str(actualClass[x]+1) + "\n"
			file.write(line) 
		file.close()

		return perCorrect
Example #4
0
plt.scatter(x_train[:,0], np.log(mu[:,which_species]));
for _ in range(20):
    plt.scatter(x_train[:,0], np.log(qyhat.eval()[:,which_species]), alpha=0.1, c="darkred", s=10);


# In[ ]:

plt.hist(qz.stddev().eval(), bins=50);


# In[ ]:

# Approximate posterior distributions of z, given x. Should be no big (low-frequency?) trends or gaps
for i in range(n_z):
    for _ in range(25):
        plt.scatter(x_train[:,0], qz.eval()[:,i], alpha=0.1, c="darkred", s=5)
    plt.show()


# In[ ]:

nn = 1000
xx = 0
qxx = 0
for _ in range(nn):
    xx += yhat.eval() / nn
    qxx += qyhat.eval() / nn
    
# Predicted mean versus x
plt.scatter(x_train[:,0], np.log(mu)[:,which_species])
plt.scatter(x_train[:,0], np.log(xx)[:,which_species], s=10, alpha=0.5, c="black");
Example #5
0
def main():
    try:
        if not ('.csv' in args.input): raise Exception('input_format')
        if not ('.pkl' in args.output): raise Exception('output_format')

        with open(args.input, 'rb') as input:

            # DATA
            reader = csv.reader(input, delimiter=';')
            reader.next()
            n = 0
            xn = []
            for track in reader:
                print('Track {}'.format(n))
                track = format_track(track[0])
                xn.append(track)
                n += 1
            xn = np.asarray(xn)  # N x D
            xn = xn.T  # D x N

            D = len(xn)
            N = len(xn[0])

            # MODEL
            ds = tf.contrib.distributions
            sigma = ed.models.Gamma(1.0, 1.0)

            alpha = ed.models.Gamma(tf.ones([K]), tf.ones([K]))
            w = Normal(mu=tf.zeros([D, K]),
                       sigma=tf.reshape(tf.tile(alpha, [D]), [D, K]))
            z = Normal(mu=tf.zeros([K, N]), sigma=tf.ones([K, N]))
            mu = Normal(mu=tf.zeros([D]), sigma=tf.ones([D]))
            x = Normal(mu=tf.matmul(w, z) +
                       tf.transpose(tf.reshape(tf.tile(mu, [N]), [N, D])),
                       sigma=sigma * tf.ones([D, N]))

            # INFERENCE
            qalpha = ed.models.TransformedDistribution(
                distribution=ed.models.NormalWithSoftplusSigma(
                    mu=tf.Variable(tf.random_normal([K])),
                    sigma=tf.Variable(tf.random_normal([K]))),
                bijector=ds.bijector.Exp(),
                name='qalpha')

            qw = Normal(mu=tf.Variable(tf.random_normal([D, K])),
                        sigma=tf.nn.softplus(
                            tf.Variable(tf.random_normal([D, K]))))
            qz = Normal(mu=tf.Variable(tf.random_normal([K, N])),
                        sigma=tf.nn.softplus(
                            tf.Variable(tf.random_normal([K, N]))))

            data_mean = np.mean(xn, axis=1).astype(np.float32, copy=False)

            qmu = Normal(mu=tf.Variable(data_mean + tf.random_normal([D])),
                         sigma=tf.nn.softplus(
                             tf.Variable(tf.random_normal([D]))))

            qsigma = ed.models.TransformedDistribution(
                distribution=ed.models.NormalWithSoftplusSigma(
                    mu=tf.Variable(0.0), sigma=tf.Variable(1.0)),
                bijector=ds.bijector.Exp(),
                name='qsigma')

            inference = ed.KLqp(
                {
                    alpha: qalpha,
                    w: qw,
                    z: qz,
                    mu: qmu,
                    sigma: qsigma
                },
                data={x: xn})
            inference.run(n_iter=N_ITERS, n_samples=N_SAMPLES)

            alphas = tf.exp(qalpha.distribution.mean()).eval()
            alphas.sort()
            # mean_alphas = np.mean(alphas)
            print('Alphas: {}'.format(alphas))

            points = qz.eval()
            xn_new = []
            for i in range(len(alphas)):
                # if alphas[i] > (mean_alphas * 1.2):
                xn_new.append(points[i])
            xn_new = np.asarray(xn_new).T

            # Normalization
            maxs = np.max(xn_new, axis=0)
            mins = np.min(xn_new, axis=0)
            rng = maxs - mins
            high = 100.0
            low = 0.0
            xn_new = high - (((high - low) * (maxs - xn_new)) / rng)

            print('New points: {}'.format(xn_new))
            print('Number of points: {}'.format(len(xn_new)))
            print('Point dimensions: {}'.format(len(xn_new[0])))

            with open(args.output, 'w') as output:
                pkl.dump({'xn': np.array(xn_new)}, output)

    except IOError:
        print('File not found!')
    except Exception as e:
        if e.args[0] == 'input_format': print('Input must be a CSV file')
        elif e.args[0] == 'output_format': print('Output must be a PKL file')
        else:
            print('Unexpected error: {}'.format(sys.exc_info()[0]))
            raise
class BayesianRegression:
    def __init__(self, in_dim=1, n_classes=2):
        """
        Bayesian Logistic regression based on Edward lib (http://edwardlib.org).

        y = W * x + b

        :param in_dim:
        :param n_classes:
        """

        self.in_dim = in_dim
        self.n_classes = n_classes

        self.X = tf.placeholder(tf.float32, [None, self.in_dim])
        self.W = Normal(loc=tf.zeros([self.in_dim, self.n_classes]),
                        scale=tf.ones([self.in_dim, self.n_classes]))
        self.b = Normal(loc=tf.zeros(self.n_classes),
                        scale=tf.ones(self.n_classes))

        h = tf.matmul(self.X, self.W) + self.b
        self.y = Normal(loc=tf.sigmoid(-h), scale=0.1)

        self.qW = Normal(loc=tf.get_variable("qW/loc",
                                             [self.in_dim, self.n_classes]),
                         scale=tf.nn.softplus(
                             tf.get_variable("qW/scale",
                                             [self.in_dim, self.n_classes])))
        self.qb = Normal(loc=tf.get_variable("qb/loc", [self.n_classes]),
                         scale=tf.nn.softplus(
                             tf.get_variable("qb/scale", [self.n_classes])))

    def infer(self, X, y, n_samples=5, n_iter=250):

        inference = ed.KLqp({
            self.W: self.qW,
            self.b: self.qb,
        },
                            data={
                                self.y: y,
                                self.X: X
                            })

        inference.run(n_samples=n_samples, n_iter=n_iter)

    def predict(self, X):

        self.qW_mean = self.qW.mean().eval()
        self.qb_mean = self.qb.mean().eval()

        h = tf.matmul(X, self.qW_mean) + self.qb_mean

        return tf.sigmoid(-h).eval()

    def sample_boudary(self, X):

        qW = self.qW.eval()
        qb = self.qb.eval()

        w = -qW[0][0] / qW[1][0]
        b = (0.5 - qb[0]) / qW[0][0]

        return w, b

    def predict_std(self, X):
        self.qW_stddev = self.qW.stddev().eval()
        self.qb_stddev = self.qb.stddev().eval()

        h = tf.matmul(X, self.qW_stddev) + self.qb_stddev

        return tf.sigmoid(-h).eval()

    def get_coef(self):
        return self.qW.mean().eval().T[0]