Esempio n. 1
0
 def test(self, test_file):
     """
     Tests the model for accuracy against a hold-out testing file.
     Returns a tuple of (# correct, # total, percent correct, percent nodes correct)
     """
     correct = 0
     total = 0
     nodewise_correct = 0
     for sample in get_samples(test_file):
         print total
         for i in range(0, sample.shape[0]-1):
             current_nodes = sample[i,0:self.num_sites]
             next = sample[i+1,0:self.num_sites]
             predicted = self.predict(current_nodes)
             true_state = np.matrix(np.zeros((self.num_nodes)))
             true_state[0,:self.num_sites] = current_nodes
             true_state[0,self.num_sites:] = next
             #print "Ground: {0}".format(true_state)
             # print "Ground prob: {0}".format(self.prob(true_state))
             #print "Next: {0} Predicted: {1}".format(next, predicted)
             if (next == predicted).all():
                 correct += 1
                 nodewise_correct += 1
             else:
                 for j in range(0, self.num_sites):
                     if next[0,j] == predicted[0,j]:
                         nodewise_correct += 1 / float(self.num_sites)
             total += 1
         print test_file
         pretty_print((correct, total, correct / float(total), nodewise_correct / float(total)))
     return (correct, total, correct / float(total), nodewise_correct / float(total))
Esempio n. 2
0
 def test(self, test_file):
     """
     Tests the model for accuracy against a hold-out testing file.
     Returns a tuple of (# correct, # total, percent correct, percent nodes correct)
     """
     correct = 0
     total = 0
     nodewise_correct = 0
     for sample in get_samples(test_file):
         print total
         for i in range(0, sample.shape[0] - 1):
             current_nodes = sample[i, 0:self.num_sites]
             next = sample[i + 1, 0:self.num_sites]
             predicted = self.predict(current_nodes)
             true_state = np.matrix(np.zeros((self.num_nodes)))
             true_state[0, :self.num_sites] = current_nodes
             true_state[0, self.num_sites:] = next
             #print "Ground: {0}".format(true_state)
             # print "Ground prob: {0}".format(self.prob(true_state))
             #print "Next: {0} Predicted: {1}".format(next, predicted)
             if (next == predicted).all():
                 correct += 1
                 nodewise_correct += 1
             else:
                 for j in range(0, self.num_sites):
                     if next[0, j] == predicted[0, j]:
                         nodewise_correct += 1 / float(self.num_sites)
             total += 1
         print test_file
         pretty_print((correct, total, correct / float(total),
                       nodewise_correct / float(total)))
     return (correct, total, correct / float(total),
             nodewise_correct / float(total))
Esempio n. 3
0
def create_holdouts(filename, num_splits):
    print "Creating testing data"
    count = 0
    splits = [open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb') for i in range(0,num_splits)]
    for sample in get_samples(filename):
        f = splits[count % num_splits]
        for row in range(0,sample.shape[0]):
            f.write(','.join([str(x) for x in sample[row,:].tolist()[0]]))
            f.write('\n')
        f.write('\n')
        count += 1
        print count
Esempio n. 4
0
    def calc_mu(self, samples):
        mu_s = np.matrix(np.zeros((1,self.num_nodes)))
        s = np.zeros((self.num_nodes, self.num_nodes))
        mu_st11 = np.matrix(s)
        mu_st10 = np.matrix(s)
        mu_st01 = np.matrix(s)
        mu_st00 = np.matrix(s)
        total_samples = 0
        for m1 in get_samples(samples):
            m2 = np.roll(m1, -1, axis=0)
            total_samples += m1.shape[0]-1
            print total_samples
            for i in range(0,m1.shape[0]-1):
                """
                Calculate the edges from n->n'.
                This is the upper-right quadrant.

                """	
                n1 = m1[i]
                n2 = m2[i]
                upright = self.calc_mu_quadrant(n1, n2, False)
                mu_s[0,0:self.num_sites] += n1
                mu_st11[0:self.num_sites,self.num_sites:] += upright[3] # nn11
                mu_st10[0:self.num_sites,self.num_sites:] += upright[2] # nn10
                mu_st01[0:self.num_sites,self.num_sites:] += upright[1] # nn01
                mu_st00[0:self.num_sites,self.num_sites:] += upright[0] # nn00
                """
                Calculate the edges from n'->n'.
                This is the lower-right quadrant.
                """
                lowright = self.calc_mu_quadrant(n2, n2, True)
                mu_s[0,self.num_sites:] += n2
                mu_st11[self.num_sites:,self.num_sites:] += lowright[3] # nn11
                mu_st10[self.num_sites:,self.num_sites:] += lowright[2] # nn10
                mu_st01[self.num_sites:,self.num_sites:] += lowright[1] # nn01
                mu_st00[self.num_sites:,self.num_sites:] += lowright[0] # nn00
        mu_st11 = np.triu(mu_st11)
        mu_st10 = np.triu(mu_st10)
        mu_st01 = np.triu(mu_st01)
        mu_st00 = np.triu(mu_st00)
        mu_s /= total_samples
        mu_st11 /= total_samples
        mu_st10 /= total_samples
        mu_st01 /= total_samples
        mu_st00 /= total_samples
        print "Mu_s:\n{0}".format(mu_s)
        print "Mu_st11:\n{0}\n\n".format(mu_st11)
        print "Mu_st10:\n{0}\n\n".format(mu_st10)
        print "Mu_st01:\n{0}\n\n".format(mu_st01)
        print "Mu_st00:\n{0}\n\n".format(mu_st00)
        print "Summed:\n{0}\n\n".format(mu_st11 + mu_st10 + mu_st01 + mu_st00)
        self.num_samples = total_samples
        return [mu_s, mu_st00, mu_st01, mu_st10, mu_st11]
Esempio n. 5
0
def percent_split(filename, percent, num_splits):
    print 'Creating {0} splits of {1}% each from {2}'.format(num_splits, percent * 100, filename)
    training = [open(filename.replace('.csv', '_train{0}.csv'.format(i)), 'wb') for i in range(0,num_splits)]
    testing = [open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb') for i in range(0,num_splits)]
    count = 0
    for sample in get_samples(filename):
        for i in range(num_splits):
            if random.random() < percent:
                write_sample(sample, testing[i])
            else:
                write_sample(sample, training[i])
        count += 1
        print count
Esempio n. 6
0
def transform_to_netinf(fname):
	f = open(fname.replace('.csv','.netinf'), 'wb')
	for sample in get_samples(fname):
		numvars= sample.shape[1]

	for i in range(numvars):
		f.write("%s,%s\n" %(i,i))

	total = 0

	for sample in get_samples(fname):
		numvars= sample.shape[1]
		netinf = {}
		print total
		for i in range(sample.shape[0]):
			for j in range(sample.shape[1]):
				if sample[i,j] == 1:
					if j not in netinf:
						netinf[j] = i
		f.write('\n')
		for item in netinf.keys():
			f.write('%s,%s;' % (item, netinf[item]))
		total += 1
Esempio n. 7
0
def get_truncation_samples():
    generated = ['church', 'kitchen']

    for dataset in generated:
        imgs = pt_to_np(get_samples('stylegan', dataset, N, truncated=True))
        save_stats(imgs, 'truncated', dataset)
        del imgs

    for dataset in generated:
        with np.load(f'fid_stats/{dataset}_gt_stats.npz') as data:
            m1, s1 = data['m'], data['s']
        with np.load(f'{dataset}_truncated_stats.npz') as data:
            m2, s2 = data['m'], data['s']
        print('stylegan', dataset, fid.calculate_frechet_distance(m1, s1, m2, s2))
Esempio n. 8
0
def transform_to_netinf(fname):
    f = open(fname.replace('.csv', '.netinf'), 'wb')
    for sample in get_samples(fname):
        numvars = sample.shape[1]

    for i in range(numvars):
        f.write("%s,%s\n" % (i, i))

    total = 0

    for sample in get_samples(fname):
        numvars = sample.shape[1]
        netinf = {}
        print total
        for i in range(sample.shape[0]):
            for j in range(sample.shape[1]):
                if sample[i, j] == 1:
                    if j not in netinf:
                        netinf[j] = i
        f.write('\n')
        for item in netinf.keys():
            f.write('%s,%s;' % (item, netinf[item]))
        total += 1
Esempio n. 9
0
def create_holdouts(filename, num_splits):
    print "Creating testing data"
    count = 0
    splits = [
        open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb')
        for i in range(0, num_splits)
    ]
    for sample in get_samples(filename):
        f = splits[count % num_splits]
        for row in range(0, sample.shape[0]):
            f.write(','.join([str(x) for x in sample[row, :].tolist()[0]]))
            f.write('\n')
        f.write('\n')
        count += 1
        print count
Esempio n. 10
0
def percent_split(filename, percent, num_splits):
    print 'Creating {0} splits of {1}% each from {2}'.format(
        num_splits, percent * 100, filename)
    training = [
        open(filename.replace('.csv', '_train{0}.csv'.format(i)), 'wb')
        for i in range(0, num_splits)
    ]
    testing = [
        open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb')
        for i in range(0, num_splits)
    ]
    count = 0
    for sample in get_samples(filename):
        for i in range(num_splits):
            if random.random() < percent:
                write_sample(sample, testing[i])
            else:
                write_sample(sample, training[i])
        count += 1
        print count
Esempio n. 11
0
def get_dataset_statistics():
    generated = []
    gt = ['ffhq']

    for model, dataset in generated:
        imgs = pt_to_np(get_samples(model, dataset, N))
        save_stats(imgs, model, dataset)
        del imgs

    for dataset in gt:
        imgs = pt_to_np(get_gt_samples(dataset, N))
        save_stats(imgs, 'gt', dataset)
        del imgs

    for model, dataset in generated:
        for dataset_gt in gt:
            with np.load(f'{dataset}_{model}_stats.npz') as data:
                m1, s1 = data['m'], data['s']
            with np.load(f'{dataset}_gt_stats.npz') as data:
                m2, s2 = data['m'], data['s']
            print(model, dataset, dataset_gt, fid.calculate_frechet_distance(m1, s1, m2, s2))
Esempio n. 12
0
from get_samples import get_samples

def create_counts(filename):
    data = pickle.load(open(f,'rb'))
    counts = {}
    for site in data.keys():
        for bigram in data[site]:
            if not bigram in counts:
                counts[bigram] = 0
            counts[bigram] += 1
    #Debug
    sorted_bigrams = sorted(counts.iteritems(), key=operator.itemgetter(1))
    for i in range(50):
        print '{0}: {1}'.format(sorted_bigrams[i], counts[sorted_bigrams[i]])
    return counts


# site_ids = pickle.load(open('data/site_ids.data', "rb"))
# files = [x for x in os.listdir('data/users') if not x.startswith('.')]
# for f in files:
#     data = pickle.load(open(f,'rb'))
#     print '{0}: {1} sites'.format(f, len(data))

# create_counts('data/ngrams/2010_11.ngrams')

if __name__ == "__main__":
    total = 0
    print 'here'
    for sample in get_samples('data/infections_daily_test5.csv'):
        total += sample.shape[1]
        print total
Esempio n. 13
0
    def calc_mu(self, samples):
        mu_s = np.matrix(np.zeros((1, self.num_nodes)))
        s = np.zeros((self.num_nodes, self.num_nodes))
        mu_st11 = np.matrix(s)
        mu_st10 = np.matrix(s)
        mu_st01 = np.matrix(s)
        mu_st00 = np.matrix(s)
        total_samples = 0
        for m1 in get_samples(samples):
            m2 = np.roll(m1, -1, axis=0)
            total_samples += m1.shape[0] - 1
            print total_samples
            for i in range(0, m1.shape[0] - 1):
                """
                Calculate the edges from n->n'.
                This is the upper-right quadrant.

                """
                n1 = m1[i]
                n2 = m2[i]
                upright = self.calc_mu_quadrant(n1, n2, False)
                mu_s[0, 0:self.num_sites] += n1
                mu_st11[0:self.num_sites,
                        self.num_sites:] += upright[3]  # nn11
                mu_st10[0:self.num_sites,
                        self.num_sites:] += upright[2]  # nn10
                mu_st01[0:self.num_sites,
                        self.num_sites:] += upright[1]  # nn01
                mu_st00[0:self.num_sites,
                        self.num_sites:] += upright[0]  # nn00
                """
                Calculate the edges from n'->n'.
                This is the lower-right quadrant.
                """
                lowright = self.calc_mu_quadrant(n2, n2, True)
                mu_s[0, self.num_sites:] += n2
                mu_st11[self.num_sites:,
                        self.num_sites:] += lowright[3]  # nn11
                mu_st10[self.num_sites:,
                        self.num_sites:] += lowright[2]  # nn10
                mu_st01[self.num_sites:,
                        self.num_sites:] += lowright[1]  # nn01
                mu_st00[self.num_sites:,
                        self.num_sites:] += lowright[0]  # nn00
        mu_st11 = np.triu(mu_st11)
        mu_st10 = np.triu(mu_st10)
        mu_st01 = np.triu(mu_st01)
        mu_st00 = np.triu(mu_st00)
        mu_s /= total_samples
        mu_st11 /= total_samples
        mu_st10 /= total_samples
        mu_st01 /= total_samples
        mu_st00 /= total_samples
        print "Mu_s:\n{0}".format(mu_s)
        print "Mu_st11:\n{0}\n\n".format(mu_st11)
        print "Mu_st10:\n{0}\n\n".format(mu_st10)
        print "Mu_st01:\n{0}\n\n".format(mu_st01)
        print "Mu_st00:\n{0}\n\n".format(mu_st00)
        print "Summed:\n{0}\n\n".format(mu_st11 + mu_st10 + mu_st01 + mu_st00)
        self.num_samples = total_samples
        return [mu_s, mu_st00, mu_st01, mu_st10, mu_st11]
Esempio n. 14
0
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
import numpy as np
from get_samples import get_samples
from noisy import noisy


#get blocks of training data
samples = get_samples('lfw-deepfunneled',11000,0) #image folder path, # of samples to collect,1 for isgrayscale)
print(samples)
samples_size = samples.shape
print(samples_size)


#path for full image to test on
path = '/Users/matthewkonyndyk/Desktop/K-SVD/lfw-deepfunneled/William_Genego/William_Genego_0001.jpg'

#original image
original_img=mpimg.imread(path)
imgplot = plt.imshow(original_img)
plt.show()

#depixelated image
noisy_img = noisy(path,100,1) #image path, r is the fraction of pixels to change to 0 per block
#img=mpimg.imread(noisy_img)
imgplot = plt.imshow(noisy_img)
plt.show()

Esempio n. 15
0
n_chains = 15
n_samples = 8
plot_every = 5
image_data = np.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8')

dataset = 'mnist.pkl.gz'
f = gzip.open(dataset, 'rb')
train_set, valid_set, test_set = pickle.load(f, encoding="bytes")
f.close()

binarizer = preprocessing.Binarizer(threshold=0.5)
training_data = binarizer.transform(train_set[0])
train_data = test_set[0]

feed_samplor = get_samples(hidden_list=hidden_list, W=W, b=b)
feed_data = feed_samplor.get_mean_activation(input_data=training_data)

#feed_data = sigmoid(np.dot(training_data,W) + b[784:])

feed_mean_activation = np.mean(feed_data, axis=0)

#seeds = []

a = np.load(savepath1 + 'seeds.npy')

for idx in range(n_samples):
    persistent_vis_chain = np.random.binomial(n=1,
                                              p=feed_mean_activation,
                                              size=(n_chains, hidden_list[-1]))
    # persistent_vis_chain2 = np.random.binomial(n=1, p= feed_mean_activation, size=(n_chains, hidden_list[-1]))