def val_iter(self, count, recorder): '''use the val_iter_fn compiled''' if self.current_v==0 and self.subb_v == 0: self.data.shuffle_data(mode='val') self.data.shard_data(mode='val',rank=self.rank, size=self.size) img= self.data.val_img_shard labels = self.data.val_labels_shard img_mean = self.data.rawdata[4] mode='val' function=self.val_iter_fn if self.subb_v == 0: # load the whole file into shared_x when loading sub-batch 0 of each file. arr = img[self.current_v] #- img_mean arr = np.rollaxis(arr,0,4) self.shared_x.set_value(arr) self.shared_y.set_value(labels[self.current_v]) if self.current_v == self.data.n_batch_val - 1: self.last_one_v = True else: self.last_one_v = False from theanompi.models.layers2 import Dropout, Crop Dropout.SetDropoutOff() Crop.SetRandCropOff() cost,error,error_top5 = function(self.subb_v) Dropout.SetDropoutOn() Crop.SetRandCropOn() recorder.val_error(count, cost, error, error_top5) if (self.subb_v+1)//self.n_subb == 1: # test if next sub-batch is in another file if self.last_one_v == False: self.current_v+=1 else: self.current_v=0 self.subb_v=0 else: self.subb_v+=1
def val_iter(self, count,recorder): '''use the val_iter_fn compiled''' if self.current_v==0: self.data.shard_data(file_batch_size, self.rank, self.size) img= self.data.val_img_shard labels = self.data.val_labels_shard mode='val' function=self.val_iter_fn if self.subb_v == 0: # load the whole file into shared_x when loading sub-batch 0 of each file. # parallel loading of shared_x if self.data.para_load: icomm = self.data.icomm if self.current_v == 0: # 3.0 give mode signal to adjust loading mode between train and val icomm.isend('val',dest=0,tag=40) # 3.1 give load signal to load the very first file icomm.isend(img[self.current_v],dest=0,tag=40) if self.current_v == self.data.n_batch_val - 1: self.last_one_v = True # Only to get the last copy_finished signal from load icomm.isend(img[self.current_v],dest=0,tag=40) else: self.last_one_v = False # 4. give preload signal to load next file icomm.isend(img[self.current_v+1],dest=0,tag=40) # 5. wait for the batch to be loaded into shared_x msg = icomm.recv(source=0,tag=55) # assert msg == 'copy_finished' else: arr = hkl.load(img[self.current_v]) #- img_mean # arr = np.rollaxis(arr,0,4) self.shared_x.set_value(arr) # direct loading of shared_y self.shared_y.set_value(labels[self.current_v]) if self.current_v == self.data.n_batch_val - 1: self.last_one_v = True else: self.last_one_v = False from theanompi.models.layers2 import Dropout, Crop Dropout.SetDropoutOff() Crop.SetRandCropOff() cost,error,error_top5 = function(self.subb_v) Dropout.SetDropoutOn() Crop.SetRandCropOn() recorder.val_error(count, cost, error, error_top5) if (self.subb_v+1)//self.n_subb == 1: # test if next sub-batch is in another file if self.last_one_v == False: self.current_v+=1 else: self.current_v=0 self.subb_v=0 else: self.subb_v+=1
def build_model(self): if self.verbose: print(self.name) # start graph construction from scratch import theano.tensor as T if seed_weight_on_pid: import theanompi.models.layers2 as layers import os layers.rng = np.random.RandomState(os.getpid()) from theanompi.models.layers2 import (ConvPoolLRN,Dropout,FC, Dimshuffle, Crop, Subtract, Softmax,Flatten,LRN, Constant, Normal) self.x = T.ftensor4('x') self.y = T.lvector('y') self.lr = T.scalar('lr') subtract_layer = Subtract(input=self.x, input_shape=(self.channels, self.data.width, self.data.height, self.batch_size), subtract_arr = self.data.rawdata[4], printinfo = self.verbose ) crop_layer = Crop(input=subtract_layer, output_shape=(self.channels, self.input_width, self.input_height, self.batch_size), flag_batch=batch_crop_mirror, printinfo = self.verbose ) convpool_layer1 = ConvPoolLRN(input=crop_layer, input_shape=(self.channels, self.input_width, self.input_height, self.batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, b=0.0, lrn=True, lib_conv=lib_conv, printinfo = self.verbose #output_shape = (96, 27, 27, batch_size) ) convpool_layer2 = ConvPoolLRN(input=convpool_layer1, #input_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, b=0.1, lrn=True, lib_conv=lib_conv, printinfo = self.verbose #output_shape=(256, 13, 13, batch_size), ) convpool_layer3 = ConvPoolLRN(input=convpool_layer2, #input_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, b=0.0, lrn=False, lib_conv=lib_conv, printinfo = self.verbose #output_shape=(384, 13, 13, batch_size), ) convpool_layer4 = ConvPoolLRN(input=convpool_layer3, #input_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, b=0.1, lrn=False, lib_conv=lib_conv, printinfo = self.verbose #output_shape=(384, 13, 13, batch_size), ) convpool_layer5 = ConvPoolLRN(input=convpool_layer4, #input_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, b=0.0, lrn=False, lib_conv=lib_conv, printinfo = self.verbose #output_shape=(256, 6, 6, batch_size), ) shuffle = Dimshuffle(input=convpool_layer5, new_axis_order=(3,0,1,2), printinfo=self.verbose ) fc_layer6_input = Flatten(input=shuffle, #input_shape=(batch_size, 256, 6, 6), axis = 2, printinfo=self.verbose ) fc_layer6 = FC(input=fc_layer6_input, # n_in=9216, n_out=4096, W=Normal((fc_layer6_input.output_shape[1], 4096), std=0.005), b=Constant((4096,), val=0.1), printinfo = self.verbose ) dropout_layer6 = Dropout(input=fc_layer6, # n_in=4096, n_out=fc_layer6.output_shape[1], prob_drop=0.5, printinfo = self.verbose) fc_layer7 = FC(input=dropout_layer6, # n_in=4096, n_out=4096, W = Normal((dropout_layer6.output_shape[1], 4096), std=0.005), b = Constant((4096,), val=0.1), printinfo = self.verbose ) dropout_layer7 = Dropout(input=fc_layer7, #n_in=4096, n_out=fc_layer7.output_shape[1], prob_drop=0.5, printinfo = self.verbose) softmax_layer8 = Softmax(input=dropout_layer7, #n_in=4096, n_out=self.n_softmax_out, W = Normal((dropout_layer7.output_shape[1], self.n_softmax_out), mean=0, std=0.01), b = Constant((self.n_softmax_out,),val=0), printinfo = self.verbose) self.output_layer = softmax_layer8 self.cost = softmax_layer8.negative_log_likelihood(self.y) self.error = softmax_layer8.errors(self.y) self.error_top_5 = softmax_layer8.errors_top_x(self.y)
def build_model(self): if self.verbose: print(self.name) import theano.tensor as T if seed_weight_on_pid: import theanompi.models.layers2 as layers import os layers.rng = np.random.RandomState(os.getpid()) self.x = T.ftensor4('x') # c01b self.y = T.lvector('y') self.lr = T.scalar('lr') input_shuffle = Dimshuffle( self.x, input_shape=(self.channels, self.input_width, self.input_height, self.batch_size), new_axis_order=(3, 0, 1, 2), printinfo=True, ) conv_7x7 = ConvPoolLRN_bc01(input=input_shuffle, convstride=2, padsize=3, poolsize=3, poolstride=2, poolpad=1, W=Normal((64, 3, 7, 7), mean=0.0, std=0.1), b=Constant((64, ), val=0.2), lrn=True, lib_conv='cudnn', printinfo=self.verbose) # output shape = (112x112x64) # output shape = (56x56x64) conv_r3x3 = Conv( input=conv_7x7, #image_shape=(batch_size, 64,56,56), convstride=1, padsize=0, W=Normal((64, 64, 1, 1), mean=0.0, std=0.1), b=Constant((64, ), val=0.2), lib_conv='cudnn', printinfo=self.verbose) # output shape = (56x56x64) conv_3x3 = ConvPoolLRN_bc01( input=conv_r3x3, #image_shape=(batch_size, 64,56,56), convstride=1, padsize=1, poolsize=3, poolstride=2, poolpad=1, W=Normal((192, 64, 3, 3), mean=0.0, std=0.03), b=Constant((192, ), val=0.2), lrn=True, lib_conv='cudnn', printinfo=self.verbose) # output shape = (56x56x192) # output shape = (28x28x192) incep3a = Incept( input=conv_3x3, #input_shape = (batch_size, 192,28,28) n1x1=64, nr3x3=96, n3x3=128, nr5x5=16, n5x5=32, npj=32, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep3a output shape: (28x28x256)' # output shape = (28x28x256) incep3b = Incept( input=incep3a, #input_shape = (256,28,28,batch_size), n1x1=128, nr3x3=128, n3x3=192, nr5x5=32, n5x5=96, npj=64, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep3b output shape: (28x28x480)' # output shape = (28x28x480) # lrn3 = self.lrn_func(incep3b.output) # print 'LRN(added)' pool3 = Pool(input=incep3b, poolsize=3, poolstride=2, poolpad=1, printinfo=self.verbose) # output shape = (14x14x480) incep4a = Incept( input=pool3, #input_shape = (480,14,14,batch_size), n1x1=192, nr3x3=96, n3x3=208, nr5x5=16, n5x5=48, npj=64, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep4a output shape: (14x14x512)' # output shape = (14x14x512) incep4b = Incept( input=incep4a, #input_shape = (512,14,14,batch_size), n1x1=160, nr3x3=112, n3x3=224, nr5x5=24, n5x5=64, npj=64, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep4b output shape: (14x14x512)' # output shape = (14x14x512) incep4c = Incept( input=incep4b, #input_shape = (512,14,14,batch_size), n1x1=128, nr3x3=128, n3x3=256, nr5x5=24, n5x5=64, npj=64, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep4c output shape: (14x14x512)' # output shape = (14x14x512) incep4d = Incept( input=incep4c, #input_shape = (512,14,14,batch_size), n1x1=112, nr3x3=144, n3x3=288, nr5x5=32, n5x5=64, npj=64, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep4d output shape: (14x14x528)' # output shape = (14x14x528) incep4e = Incept( input=incep4d, #input_shape = (528,14,14,batch_size), n1x1=256, nr3x3=160, n3x3=320, nr5x5=32, n5x5=128, npj=128, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep4e output shape: (14x14x832)' # output shape = (14x14x832) lrn4 = LRN(input=incep4e, printinfo=self.verbose ) # turn on only this for 16data,53s/5120images # print 'LRN(added)' pool4 = Pool( input=lrn4, #incep4e, poolsize=3, poolstride=2, poolpad=1, printinfo=self.verbose) # output shape = (7x7x832) incep5a = Incept( input=pool4, #input_shape = (832,7,7,batch_size), n1x1=256, nr3x3=160, n3x3=320, nr5x5=32, n5x5=128, npj=128, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep5a output shape: (7x7x832)' # output shape = (7x7x832) incep5b = Incept( input=incep5a, #input_shape = (832,7,7,batch_size), n1x1=384, nr3x3=192, n3x3=384, nr5x5=48, n5x5=128, npj=128, lib_conv=lib_conv, printinfo=self.verbose) # print 'incep5b output shape: (7x7x1024)' # output shape = (7x7x1024) # lrn5 = self.lrn_func(incep5b.output) # turn on only this for 16data, 51s/5120images # print 'LRN(added)' poolx = Pool(input=incep5b, poolsize=7, poolstride=1, poolpad=0, mode='average', printinfo=self.verbose) # output shape = (1x1x1024) l_flatten = Flatten(input=poolx, axis=2, printinfo=self.verbose) # output shape = (1024) dropout = Dropout( input=l_flatten, #n_in=1024, n_out=l_flatten.output_shape[1], prob_drop=0.4, printinfo=self.verbose) # output shape = (1024) softmax_layer = Softmax( input=dropout, #n_in=1024, n_out=self.n_softmax_out, printinfo=self.verbose) # output shape = (n_softmax_out) # auxilary classifier # print 'auxilary classifier 1:' aux1 = Aux_tower( input=incep4a, #input_shape=(512,14,14,batch_size), n_softmax_out=self.n_softmax_out, lib_conv=lib_conv, printinfo=self.verbose) # print 'auxilary classifier 2:' aux2 = Aux_tower( input=incep4d, #input_shape=(528,14,14,batch_size), n_softmax_out=self.n_softmax_out, lib_conv=lib_conv, printinfo=self.verbose) self.output_layer = softmax_layer self.cost = softmax_layer.negative_log_likelihood(self.y)+ \ 0.3*aux1.negative_log_likelihood(self.y)+\ 0.3*aux2.negative_log_likelihood(self.y) self.error = softmax_layer.errors(self.y) self.error_top_5 = softmax_layer.errors_top_x(self.y) self.layers = get_layers(lastlayer=self.output_layer) self.layers.extend([aux1, aux2])
def __init__(self, input, n_softmax_out, input_shape=None, output_shape=None, lib_conv='cudnn', printinfo=False): self.get_input_shape(input, input_shape) self.verbose = printinfo layers = [] outlayers = [] # input shape = (14x14x512or528) pool = Pool(input=input, poolsize=5, poolstride=3, poolpad=0, mode='average', printinfo=self.verbose) # output shape = (4x4x512or528) conv1x1 = Conv(input=pool, convstride=1, padsize=0, W=Normal((128, self.input_shape[1], 1, 1), mean=0.0, std=0.1), b=Constant((128, ), val=0.2), lib_conv='cudnn', printinfo=self.verbose) layers.append(conv1x1) # output shape = (4x4x128) l_flatten = Flatten( input=conv1x1, #5 #input_shape=conv_5x5.output_shape, # (b, 64, 2, 2) axis=2, # expand dimensions after the first dimension printinfo=self.verbose #output_shape = (b,64*2*2) ) # output shape = (2048) fc = FC( input=l_flatten, #n_in=2048, n_out=1024, W=Normal((l_flatten.output_shape[1], 1024), mean=0, std=0.01), b=Constant((1024, ), val=0), printinfo=self.verbose #input_shape = flatten.output_shape # (b, 9216) ) layers.append(fc) drp = Dropout( input=fc, #n_in=1024, n_out=fc.output_shape[1], prob_drop=0.7, printinfo=self.verbose) softmax_layer = Softmax(input=drp, n_out=n_softmax_out, W=Normal((drp.output_shape[1], n_softmax_out), mean=0, std=0.01), b=Constant((n_softmax_out, ), val=0), printinfo=self.verbose) layers.append(softmax_layer) self.output = softmax_layer.p_y_given_x self.negative_log_likelihood = softmax_layer.negative_log_likelihood self.params, self.weight_type = get_params(layers) if output_shape: self.output_shape = output_shape else: self.output_shape = self.get_output_shape(self.input_shape) self.name = 'AuxTower ({})'.format(lib_conv) if printinfo: self.print_shape()
def build_model(self): if self.verbose: print(self.name) # start graph construction from scratch import theano.tensor as T if seed_weight_on_pid: import theanompi.models.layers2 as layers import os layers.rng = np.random.RandomState(os.getpid()) from theanompi.models.layers2 import Conv,Pool,Dropout,FC, Subtract, Crop, Dimshuffle,\ Softmax,Flatten,LRN, Constant, Normal self.x = T.ftensor4('x') self.y = T.lvector('y') self.lr = T.scalar('lr') subtract_layer = Subtract(input=self.x, input_shape=(self.channels, self.data.width, self.data.height, self.batch_size), subtract_arr = self.data.rawdata[4], printinfo = self.verbose) crop_layer = Crop(input=subtract_layer, output_shape=(self.channels, self.input_width, self.input_height, self.batch_size), flag_batch=self.batch_crop_mirror, printinfo = self.verbose ) shuffle = Dimshuffle(input=crop_layer, new_axis_order=(3,0,1,2), printinfo=self.verbose ) conv_5x5 = Conv(input=shuffle, input_shape=(self.batch_size, self.channels, self.input_width, self.input_height), # (b, 3, 28, 28) convstride=1, padsize=0, W = Normal((64, self.channels, 5, 5), std=0.05), # bc01 b = Constant((64,), val=0), printinfo=self.verbose #output_shape = (b, 64, 24, 24) ) pool_2x2 = Pool(input=conv_5x5, #input_shape=conv_3x3.output_shape, # (b, 64, 24, 24) poolsize=2, poolstride=2, poolpad=0, mode = 'max', printinfo=self.verbose #output_shape = (b, 64, 12, 12) ) conv_5x5 = Conv(input=pool_2x2, #input_shape=conv_2x2.output_shape, # (b, 64, 12, 12) convstride=1, padsize=0, W = Normal((128, pool_2x2.output_shape[1], 5, 5), std=0.05), # bc01 b = Constant((128,), val=0), printinfo=self.verbose #output_shape = (b, 128, 8, 8) ) pool_2x2 = Pool(input=conv_5x5, #input_shape=conv_5x5.output_shape, # (b, 128, 8, 8) poolsize=2, poolstride=2, poolpad=0, mode = 'max', printinfo=self.verbose #output_shape = (b, 128, 4, 4) ) conv_5x5 = Conv(input=pool_2x2, #input_shape=pool_2x2.output_shape, # (b, 128, 4, 4) convstride=1, padsize=0, W = Normal((64, pool_2x2.output_shape[1], 3, 3), std=0.05), # bc01 b = Constant((64,), val=0), printinfo=self.verbose #output_shape = (b, 64, 2, 2) ) # bc01 from now on flatten = Flatten(input = conv_5x5, #5 #input_shape=conv_5x5.output_shape, # (b, 64, 2, 2) axis = 2, # expand dimensions after the first dimension printinfo=self.verbose #output_shape = (b,64*2*2) ) fc_256 = FC(input= flatten, n_out=256, W = Normal((flatten.output_shape[1], 256), std=0.001), b = Constant((256,),val=0), printinfo=self.verbose #input_shape = flatten.output_shape # (b, 9216) ) dropout= Dropout(input=fc_256, n_out=fc_256.output_shape[1], prob_drop=0.5, printinfo=self.verbose #input_shape = fc_4096.output_shape # (b, 4096) ) softmax = Softmax(input=dropout, n_out=self.n_softmax_out, W = Normal((dropout.output_shape[1], self.n_softmax_out), std=0.005), b = Constant((self.n_softmax_out,),val=0), printinfo=self.verbose #input_shape = dropout.output_shape # (b, 4096) ) self.output_layer = softmax self.cost = softmax.negative_log_likelihood(self.y) self.error = softmax.errors(self.y) self.error_top_5 = softmax.errors_top_x(self.y)