Beispiel #1
0
 def save_params(n, params):
     fname   =   'params_epoch_'+str(n)+'.ft'
     f       =   open(fname, 'w')
     
     for p in params:
         ft.write(f, p.get_value(borrow=True))
    
     f.close() 
def frequency_table():
    filenames = ['nist_train_class_freq.ft','p07_train_class_freq.ft','pnist_train_class_freq.ft']
    iterators = [datasets.nist_all(),datasets.nist_P07(),datasets.PNIST07()]
    for dataset,filename in zip(iterators,filenames):
        freq_table = numpy.zeros(62)
        for x,y in dataset.train(1):
            freq_table[int(y)]+=1
        f = open(filename,'w')
        ft.write(f,freq_table)
        f.close()
def save_mat(fname, mat, save_dir=''):
    assert isinstance(mat, numpy.ndarray)
    print 'save ndarray to file: ', save_dir + fname
    file_handle = open(os.path.join(save_dir, fname), 'w')
    filetensor.write(file_handle, mat)
    writebool = False
    while not writebool:
        try:
            file_handle.close()
            writebool = True
        except:
            print 'save model error'
            time.sleep((numpy.random.randint(10)+2)*10)
Beispiel #4
0
def save_mat(fname, mat, save_dir=''):
    assert isinstance(mat, numpy.ndarray)
    print 'save ndarray to file: ', save_dir + fname
    file_handle = open(os.path.join(save_dir, fname), 'w')
    filetensor.write(file_handle, mat)
    writebool = False
    while not writebool:
        try:
            file_handle.close()
            writebool = True
        except:
            print 'save model error'
            time.sleep((numpy.random.randint(10) + 2) * 10)
ensemble valid est trainorig[:80000]
ensemble test est trainorig[80000:160000]
ensemble train est trainorig[160000:]
trainorig est deja shuffled
'''

from pylearn.io import filetensor as ft
import numpy, os

dir1 = '/data/lisa/data/ocr_breuel/filetensor/'
dir2 = "/data/lisa/data/ift6266h10/"

f = open(dir1 + 'unlv-corrected-2010-02-01-shuffled.ft')
d = ft.read(f)
f = open(dir2 + "ocr_valid_data.ft", 'wb')
ft.write(f, d[:80000])
f = open(dir2 + "ocr_test_data.ft", 'wb')
ft.write(f, d[80000:160000])
f = open(dir2 + "ocr_train_data.ft", 'wb')
ft.write(f, d[160000:])

f = open(dir1 + 'unlv-corrected-2010-02-01-labels-shuffled.ft')
d = ft.read(f)
f = open(dir2 + "ocr_valid_labels.ft", 'wb')
ft.write(f, d[:80000])
f = open(dir2 + "ocr_test_labels.ft", 'wb')
ft.write(f, d[80000:160000])
f = open(dir2 + "ocr_train_labels.ft", 'wb')
ft.write(f, d[160000:])

for i in ["train", "valid", "test"]:
def generate_ft_file(data,labels,ft_name):
    fdata = open(ft_name+'_data.ft','w')
    flabels = open(ft_name+'_labels.ft','w')
    ft.write(fdata,data)
    ft.write(flabels,labels)
    fdata.close();flabels.close()
    all_x = []
    all_y = []

    all_count = 0

    for mb_x,mb_y in ds.train(1):
        if mb_y[0] <= 9:
            all_x.append(mb_x[0])
            all_y.append(mb_y[0])

        if (all_count+1) % 100000 == 0:
            print "Done next 100k"

        all_count += 1
   
    # data is stored as uint8 on 0-255
    merged_x = numpy.asarray(all_x, dtype=numpy.uint8)
    merged_y = numpy.asarray(all_y, dtype=numpy.int32)

    print "Kept", len(all_x), "(shape ", merged_x.shape, ") examples from", all_count

    f = open(output_data_file, 'wb')
    ft.write(f, merged_x)
    f.close()

    f = open(output_labels_file, 'wb')
    ft.write(f, merged_y)
    f.close()
    
from pylearn.io import filetensor as ft
import pylab
import random as r
from numpy import *

nombre = 20000  #La grandeur de l'echantillon

f = open('all_train_data.ft')  #Le jeu de donnees est en local.  
d = ft.read(f)
f.close()
print len(d)
random.seed(3525)

echantillon=r.sample(xrange(len(d)),nombre)
nouveau=d[0:nombre]
for i in xrange(nombre):
    nouveau[i]=d[echantillon[i]]


f2 = open('echantillon_occlusion.ft', 'w')
ft.write(f2,nouveau)
f2.close()


##Tester si ca a fonctionne
f3 = open('echantillon_occlusion.ft')

d2=ft.read(f3)
pylab.imshow(d2[0].reshape((32,32)))
pylab.show()
f3.close()