def test(self, test_file): """ Tests the model for accuracy against a hold-out testing file. Returns a tuple of (# correct, # total, percent correct, percent nodes correct) """ correct = 0 total = 0 nodewise_correct = 0 for sample in get_samples(test_file): print total for i in range(0, sample.shape[0]-1): current_nodes = sample[i,0:self.num_sites] next = sample[i+1,0:self.num_sites] predicted = self.predict(current_nodes) true_state = np.matrix(np.zeros((self.num_nodes))) true_state[0,:self.num_sites] = current_nodes true_state[0,self.num_sites:] = next #print "Ground: {0}".format(true_state) # print "Ground prob: {0}".format(self.prob(true_state)) #print "Next: {0} Predicted: {1}".format(next, predicted) if (next == predicted).all(): correct += 1 nodewise_correct += 1 else: for j in range(0, self.num_sites): if next[0,j] == predicted[0,j]: nodewise_correct += 1 / float(self.num_sites) total += 1 print test_file pretty_print((correct, total, correct / float(total), nodewise_correct / float(total))) return (correct, total, correct / float(total), nodewise_correct / float(total))
def test(self, test_file): """ Tests the model for accuracy against a hold-out testing file. Returns a tuple of (# correct, # total, percent correct, percent nodes correct) """ correct = 0 total = 0 nodewise_correct = 0 for sample in get_samples(test_file): print total for i in range(0, sample.shape[0] - 1): current_nodes = sample[i, 0:self.num_sites] next = sample[i + 1, 0:self.num_sites] predicted = self.predict(current_nodes) true_state = np.matrix(np.zeros((self.num_nodes))) true_state[0, :self.num_sites] = current_nodes true_state[0, self.num_sites:] = next #print "Ground: {0}".format(true_state) # print "Ground prob: {0}".format(self.prob(true_state)) #print "Next: {0} Predicted: {1}".format(next, predicted) if (next == predicted).all(): correct += 1 nodewise_correct += 1 else: for j in range(0, self.num_sites): if next[0, j] == predicted[0, j]: nodewise_correct += 1 / float(self.num_sites) total += 1 print test_file pretty_print((correct, total, correct / float(total), nodewise_correct / float(total))) return (correct, total, correct / float(total), nodewise_correct / float(total))
def create_holdouts(filename, num_splits): print "Creating testing data" count = 0 splits = [open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb') for i in range(0,num_splits)] for sample in get_samples(filename): f = splits[count % num_splits] for row in range(0,sample.shape[0]): f.write(','.join([str(x) for x in sample[row,:].tolist()[0]])) f.write('\n') f.write('\n') count += 1 print count
def calc_mu(self, samples): mu_s = np.matrix(np.zeros((1,self.num_nodes))) s = np.zeros((self.num_nodes, self.num_nodes)) mu_st11 = np.matrix(s) mu_st10 = np.matrix(s) mu_st01 = np.matrix(s) mu_st00 = np.matrix(s) total_samples = 0 for m1 in get_samples(samples): m2 = np.roll(m1, -1, axis=0) total_samples += m1.shape[0]-1 print total_samples for i in range(0,m1.shape[0]-1): """ Calculate the edges from n->n'. This is the upper-right quadrant. """ n1 = m1[i] n2 = m2[i] upright = self.calc_mu_quadrant(n1, n2, False) mu_s[0,0:self.num_sites] += n1 mu_st11[0:self.num_sites,self.num_sites:] += upright[3] # nn11 mu_st10[0:self.num_sites,self.num_sites:] += upright[2] # nn10 mu_st01[0:self.num_sites,self.num_sites:] += upright[1] # nn01 mu_st00[0:self.num_sites,self.num_sites:] += upright[0] # nn00 """ Calculate the edges from n'->n'. This is the lower-right quadrant. """ lowright = self.calc_mu_quadrant(n2, n2, True) mu_s[0,self.num_sites:] += n2 mu_st11[self.num_sites:,self.num_sites:] += lowright[3] # nn11 mu_st10[self.num_sites:,self.num_sites:] += lowright[2] # nn10 mu_st01[self.num_sites:,self.num_sites:] += lowright[1] # nn01 mu_st00[self.num_sites:,self.num_sites:] += lowright[0] # nn00 mu_st11 = np.triu(mu_st11) mu_st10 = np.triu(mu_st10) mu_st01 = np.triu(mu_st01) mu_st00 = np.triu(mu_st00) mu_s /= total_samples mu_st11 /= total_samples mu_st10 /= total_samples mu_st01 /= total_samples mu_st00 /= total_samples print "Mu_s:\n{0}".format(mu_s) print "Mu_st11:\n{0}\n\n".format(mu_st11) print "Mu_st10:\n{0}\n\n".format(mu_st10) print "Mu_st01:\n{0}\n\n".format(mu_st01) print "Mu_st00:\n{0}\n\n".format(mu_st00) print "Summed:\n{0}\n\n".format(mu_st11 + mu_st10 + mu_st01 + mu_st00) self.num_samples = total_samples return [mu_s, mu_st00, mu_st01, mu_st10, mu_st11]
def percent_split(filename, percent, num_splits): print 'Creating {0} splits of {1}% each from {2}'.format(num_splits, percent * 100, filename) training = [open(filename.replace('.csv', '_train{0}.csv'.format(i)), 'wb') for i in range(0,num_splits)] testing = [open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb') for i in range(0,num_splits)] count = 0 for sample in get_samples(filename): for i in range(num_splits): if random.random() < percent: write_sample(sample, testing[i]) else: write_sample(sample, training[i]) count += 1 print count
def transform_to_netinf(fname): f = open(fname.replace('.csv','.netinf'), 'wb') for sample in get_samples(fname): numvars= sample.shape[1] for i in range(numvars): f.write("%s,%s\n" %(i,i)) total = 0 for sample in get_samples(fname): numvars= sample.shape[1] netinf = {} print total for i in range(sample.shape[0]): for j in range(sample.shape[1]): if sample[i,j] == 1: if j not in netinf: netinf[j] = i f.write('\n') for item in netinf.keys(): f.write('%s,%s;' % (item, netinf[item])) total += 1
def get_truncation_samples(): generated = ['church', 'kitchen'] for dataset in generated: imgs = pt_to_np(get_samples('stylegan', dataset, N, truncated=True)) save_stats(imgs, 'truncated', dataset) del imgs for dataset in generated: with np.load(f'fid_stats/{dataset}_gt_stats.npz') as data: m1, s1 = data['m'], data['s'] with np.load(f'{dataset}_truncated_stats.npz') as data: m2, s2 = data['m'], data['s'] print('stylegan', dataset, fid.calculate_frechet_distance(m1, s1, m2, s2))
def transform_to_netinf(fname): f = open(fname.replace('.csv', '.netinf'), 'wb') for sample in get_samples(fname): numvars = sample.shape[1] for i in range(numvars): f.write("%s,%s\n" % (i, i)) total = 0 for sample in get_samples(fname): numvars = sample.shape[1] netinf = {} print total for i in range(sample.shape[0]): for j in range(sample.shape[1]): if sample[i, j] == 1: if j not in netinf: netinf[j] = i f.write('\n') for item in netinf.keys(): f.write('%s,%s;' % (item, netinf[item])) total += 1
def create_holdouts(filename, num_splits): print "Creating testing data" count = 0 splits = [ open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb') for i in range(0, num_splits) ] for sample in get_samples(filename): f = splits[count % num_splits] for row in range(0, sample.shape[0]): f.write(','.join([str(x) for x in sample[row, :].tolist()[0]])) f.write('\n') f.write('\n') count += 1 print count
def percent_split(filename, percent, num_splits): print 'Creating {0} splits of {1}% each from {2}'.format( num_splits, percent * 100, filename) training = [ open(filename.replace('.csv', '_train{0}.csv'.format(i)), 'wb') for i in range(0, num_splits) ] testing = [ open(filename.replace('.csv', '_test{0}.csv'.format(i)), 'wb') for i in range(0, num_splits) ] count = 0 for sample in get_samples(filename): for i in range(num_splits): if random.random() < percent: write_sample(sample, testing[i]) else: write_sample(sample, training[i]) count += 1 print count
def get_dataset_statistics(): generated = [] gt = ['ffhq'] for model, dataset in generated: imgs = pt_to_np(get_samples(model, dataset, N)) save_stats(imgs, model, dataset) del imgs for dataset in gt: imgs = pt_to_np(get_gt_samples(dataset, N)) save_stats(imgs, 'gt', dataset) del imgs for model, dataset in generated: for dataset_gt in gt: with np.load(f'{dataset}_{model}_stats.npz') as data: m1, s1 = data['m'], data['s'] with np.load(f'{dataset}_gt_stats.npz') as data: m2, s2 = data['m'], data['s'] print(model, dataset, dataset_gt, fid.calculate_frechet_distance(m1, s1, m2, s2))
from get_samples import get_samples def create_counts(filename): data = pickle.load(open(f,'rb')) counts = {} for site in data.keys(): for bigram in data[site]: if not bigram in counts: counts[bigram] = 0 counts[bigram] += 1 #Debug sorted_bigrams = sorted(counts.iteritems(), key=operator.itemgetter(1)) for i in range(50): print '{0}: {1}'.format(sorted_bigrams[i], counts[sorted_bigrams[i]]) return counts # site_ids = pickle.load(open('data/site_ids.data', "rb")) # files = [x for x in os.listdir('data/users') if not x.startswith('.')] # for f in files: # data = pickle.load(open(f,'rb')) # print '{0}: {1} sites'.format(f, len(data)) # create_counts('data/ngrams/2010_11.ngrams') if __name__ == "__main__": total = 0 print 'here' for sample in get_samples('data/infections_daily_test5.csv'): total += sample.shape[1] print total
def calc_mu(self, samples): mu_s = np.matrix(np.zeros((1, self.num_nodes))) s = np.zeros((self.num_nodes, self.num_nodes)) mu_st11 = np.matrix(s) mu_st10 = np.matrix(s) mu_st01 = np.matrix(s) mu_st00 = np.matrix(s) total_samples = 0 for m1 in get_samples(samples): m2 = np.roll(m1, -1, axis=0) total_samples += m1.shape[0] - 1 print total_samples for i in range(0, m1.shape[0] - 1): """ Calculate the edges from n->n'. This is the upper-right quadrant. """ n1 = m1[i] n2 = m2[i] upright = self.calc_mu_quadrant(n1, n2, False) mu_s[0, 0:self.num_sites] += n1 mu_st11[0:self.num_sites, self.num_sites:] += upright[3] # nn11 mu_st10[0:self.num_sites, self.num_sites:] += upright[2] # nn10 mu_st01[0:self.num_sites, self.num_sites:] += upright[1] # nn01 mu_st00[0:self.num_sites, self.num_sites:] += upright[0] # nn00 """ Calculate the edges from n'->n'. This is the lower-right quadrant. """ lowright = self.calc_mu_quadrant(n2, n2, True) mu_s[0, self.num_sites:] += n2 mu_st11[self.num_sites:, self.num_sites:] += lowright[3] # nn11 mu_st10[self.num_sites:, self.num_sites:] += lowright[2] # nn10 mu_st01[self.num_sites:, self.num_sites:] += lowright[1] # nn01 mu_st00[self.num_sites:, self.num_sites:] += lowright[0] # nn00 mu_st11 = np.triu(mu_st11) mu_st10 = np.triu(mu_st10) mu_st01 = np.triu(mu_st01) mu_st00 = np.triu(mu_st00) mu_s /= total_samples mu_st11 /= total_samples mu_st10 /= total_samples mu_st01 /= total_samples mu_st00 /= total_samples print "Mu_s:\n{0}".format(mu_s) print "Mu_st11:\n{0}\n\n".format(mu_st11) print "Mu_st10:\n{0}\n\n".format(mu_st10) print "Mu_st01:\n{0}\n\n".format(mu_st01) print "Mu_st00:\n{0}\n\n".format(mu_st00) print "Summed:\n{0}\n\n".format(mu_st11 + mu_st10 + mu_st01 + mu_st00) self.num_samples = total_samples return [mu_s, mu_st00, mu_st01, mu_st10, mu_st11]
import os import matplotlib.pyplot as plt import matplotlib.image as mpimg import random import numpy as np from get_samples import get_samples from noisy import noisy #get blocks of training data samples = get_samples('lfw-deepfunneled',11000,0) #image folder path, # of samples to collect,1 for isgrayscale) print(samples) samples_size = samples.shape print(samples_size) #path for full image to test on path = '/Users/matthewkonyndyk/Desktop/K-SVD/lfw-deepfunneled/William_Genego/William_Genego_0001.jpg' #original image original_img=mpimg.imread(path) imgplot = plt.imshow(original_img) plt.show() #depixelated image noisy_img = noisy(path,100,1) #image path, r is the fraction of pixels to change to 0 per block #img=mpimg.imread(noisy_img) imgplot = plt.imshow(noisy_img) plt.show()
n_chains = 15 n_samples = 8 plot_every = 5 image_data = np.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') dataset = 'mnist.pkl.gz' f = gzip.open(dataset, 'rb') train_set, valid_set, test_set = pickle.load(f, encoding="bytes") f.close() binarizer = preprocessing.Binarizer(threshold=0.5) training_data = binarizer.transform(train_set[0]) train_data = test_set[0] feed_samplor = get_samples(hidden_list=hidden_list, W=W, b=b) feed_data = feed_samplor.get_mean_activation(input_data=training_data) #feed_data = sigmoid(np.dot(training_data,W) + b[784:]) feed_mean_activation = np.mean(feed_data, axis=0) #seeds = [] a = np.load(savepath1 + 'seeds.npy') for idx in range(n_samples): persistent_vis_chain = np.random.binomial(n=1, p=feed_mean_activation, size=(n_chains, hidden_list[-1])) # persistent_vis_chain2 = np.random.binomial(n=1, p= feed_mean_activation, size=(n_chains, hidden_list[-1]))