def save_params(n, params): fname = 'params_epoch_'+str(n)+'.ft' f = open(fname, 'w') for p in params: ft.write(f, p.get_value(borrow=True)) f.close()
def frequency_table(): filenames = ['nist_train_class_freq.ft','p07_train_class_freq.ft','pnist_train_class_freq.ft'] iterators = [datasets.nist_all(),datasets.nist_P07(),datasets.PNIST07()] for dataset,filename in zip(iterators,filenames): freq_table = numpy.zeros(62) for x,y in dataset.train(1): freq_table[int(y)]+=1 f = open(filename,'w') ft.write(f,freq_table) f.close()
def save_mat(fname, mat, save_dir=''): assert isinstance(mat, numpy.ndarray) print 'save ndarray to file: ', save_dir + fname file_handle = open(os.path.join(save_dir, fname), 'w') filetensor.write(file_handle, mat) writebool = False while not writebool: try: file_handle.close() writebool = True except: print 'save model error' time.sleep((numpy.random.randint(10)+2)*10)
def save_mat(fname, mat, save_dir=''): assert isinstance(mat, numpy.ndarray) print 'save ndarray to file: ', save_dir + fname file_handle = open(os.path.join(save_dir, fname), 'w') filetensor.write(file_handle, mat) writebool = False while not writebool: try: file_handle.close() writebool = True except: print 'save model error' time.sleep((numpy.random.randint(10) + 2) * 10)
ensemble valid est trainorig[:80000] ensemble test est trainorig[80000:160000] ensemble train est trainorig[160000:] trainorig est deja shuffled ''' from pylearn.io import filetensor as ft import numpy, os dir1 = '/data/lisa/data/ocr_breuel/filetensor/' dir2 = "/data/lisa/data/ift6266h10/" f = open(dir1 + 'unlv-corrected-2010-02-01-shuffled.ft') d = ft.read(f) f = open(dir2 + "ocr_valid_data.ft", 'wb') ft.write(f, d[:80000]) f = open(dir2 + "ocr_test_data.ft", 'wb') ft.write(f, d[80000:160000]) f = open(dir2 + "ocr_train_data.ft", 'wb') ft.write(f, d[160000:]) f = open(dir1 + 'unlv-corrected-2010-02-01-labels-shuffled.ft') d = ft.read(f) f = open(dir2 + "ocr_valid_labels.ft", 'wb') ft.write(f, d[:80000]) f = open(dir2 + "ocr_test_labels.ft", 'wb') ft.write(f, d[80000:160000]) f = open(dir2 + "ocr_train_labels.ft", 'wb') ft.write(f, d[160000:]) for i in ["train", "valid", "test"]:
def generate_ft_file(data,labels,ft_name): fdata = open(ft_name+'_data.ft','w') flabels = open(ft_name+'_labels.ft','w') ft.write(fdata,data) ft.write(flabels,labels) fdata.close();flabels.close()
all_x = [] all_y = [] all_count = 0 for mb_x,mb_y in ds.train(1): if mb_y[0] <= 9: all_x.append(mb_x[0]) all_y.append(mb_y[0]) if (all_count+1) % 100000 == 0: print "Done next 100k" all_count += 1 # data is stored as uint8 on 0-255 merged_x = numpy.asarray(all_x, dtype=numpy.uint8) merged_y = numpy.asarray(all_y, dtype=numpy.int32) print "Kept", len(all_x), "(shape ", merged_x.shape, ") examples from", all_count f = open(output_data_file, 'wb') ft.write(f, merged_x) f.close() f = open(output_labels_file, 'wb') ft.write(f, merged_y) f.close()
from pylearn.io import filetensor as ft import pylab import random as r from numpy import * nombre = 20000 #La grandeur de l'echantillon f = open('all_train_data.ft') #Le jeu de donnees est en local. d = ft.read(f) f.close() print len(d) random.seed(3525) echantillon=r.sample(xrange(len(d)),nombre) nouveau=d[0:nombre] for i in xrange(nombre): nouveau[i]=d[echantillon[i]] f2 = open('echantillon_occlusion.ft', 'w') ft.write(f2,nouveau) f2.close() ##Tester si ca a fonctionne f3 = open('echantillon_occlusion.ft') d2=ft.read(f3) pylab.imshow(d2[0].reshape((32,32))) pylab.show() f3.close()