def conv2D_fixed(I, W, FL_O, WL_O=16): ''' Mimic the 16-b MAC array hardware implementation I: Q(16-FL_I, FL_I) W: Q(16-FL_W, FL_W) P: Q(32-FL_I, FL_W, FL_I+FL_W-16) O(before rounding): Q(38-FL_I-FL_W, FL_I+FL_W-16) O(after rounding): Q(16-FL_O, FL_O) ''' NB, NI, XI, YI = I.shape NO, NI, XW, YW = W.shape XO = XI + 1 - XW YO = YI + 1 - YW #FL_P = I.FL+W.FL-16 FL_P = I.FL+W.FL IV = I.value WV = W.value IV = IV.transpose(0,1) I_reshaped = torch.empty(XW*YW, NI, NB, XO, YO, dtype=torch.float32, device=I.device) for i, (xw,yw) in enumerate(itertools.product(xrange(XW),xrange(YW))): I_reshaped[i,:,:,:,:] = IV[:,:,xw:xw+XO,yw:yw+YO] I_reshaped = I_reshaped.transpose(0,1).reshape(NI*XW*YW,NB*XO*YO) W_reshaped = WV.reshape(NO, NI*XW*YW).t() #P = fixed(torch.bmm(I_reshaped.unsqueeze(2).to(torch.float64), W_reshaped.unsqueeze(1).to(torch.float64)).to(torch.int64), I.WL+W.WL, I.FL+W.FL).round(16, FL_P) P = fixed(torch.bmm(I_reshaped.unsqueeze(2).to(torch.float64), W_reshaped.unsqueeze(1).to(torch.float64)).to(torch.int64), I.WL+W.WL, I.FL+W.FL) #O = fixed(torch.sum(P.value,0), 22, FL_P).round(WL_O, FL_O, True) O = fixed(torch.sum(P.value,0), 32, FL_P).round(WL_O, FL_O, True) return O.reshape(NB, XO, YO, NO).permute(0,3,1,2)
def apply_weight_gradients(self, learning_rate, momentum, batch_size, last_group): learning_rate_scaled = fixed(learning_rate / self.scale / batch_size, 16, self.FL_L_WG) if batch_size == self.num_images: num_groups = len(self.weight_gradients) scaled_WG = [(self.weight_gradients[i] * learning_rate_scaled).round(16, self.FL_WM) for i in range(num_groups)] for i in range(num_groups): self.W_momentum += scaled_WG[i] last_group = True else: scaled_WG = (self.weight_gradients * learning_rate_scaled).round( 16, self.FL_WM) self.W_momentum += scaled_WG if last_group: scale_fp = fixed(self.scale, 16, self.FL_L_WU) scaled_WM = (scale_fp * self.W_momentum).round(16, self.FL_W) nonzero_grad = np.count_nonzero(scaled_WM.value.cpu().numpy()) total_params = np.size(scaled_WM.value.cpu().numpy()) zero_grad = total_params - nonzero_grad wtgrad_sparsity = zero_grad * 100.0 / total_params if (wtgrad_sparsity > 95.0): print('WARNING..!%s has almost zero wt gradients' % (self.name)) self.W -= scaled_WM momentum_fp = fixed(momentum, 16, self.FL_M_WU) self.W_momentum = (self.W_momentum * momentum_fp).round( 16, self.FL_WM)
def feed_forward(self, input, train_or_test): self.input = input self.num_images = input.shape[0] self.num_values = int(self.num_images * self.num_channels * \ self.output_map_size[0] * self.output_map_size[1]) num_values = self.num_values input_00 = torch.reshape(self.input[:, :, 0::2, 0::2], (num_values, )) input_01 = torch.reshape(self.input[:, :, 0::2, 1::2], (num_values, )) input_10 = torch.reshape(self.input[:, :, 1::2, 0::2], (num_values, )) input_11 = torch.reshape(self.input[:, :, 1::2, 1::2], (num_values, )) input_reshaped = torch.stack((input_00, input_01, input_10, \ input_11), dim=0) max_pooling_index_reshaped = torch.argmax(input_reshaped, dim=0) self.max_pooling_index = torch.reshape( max_pooling_index_reshaped, (self.num_images, self.num_channels, self.output_map_size[0], self.output_map_size[1])) self.max_pooling_index_reshaped = max_pooling_index_reshaped output_reshaped, index = torch.max(input_reshaped, dim=0) self.output = fixed( torch.reshape(output_reshaped, (self.num_images, self.num_channels, self.output_map_size[0], self.output_map_size[1])), input.WL, input.FL) return self.output
def matmul_fixed(I, W, FL_O, WL_O=16): ''' Mimic the 16-b MAC array hardware implementation I: Q(16-FL_I, FL_I) W: Q(16-FL_W, FL_W) P: Q(32-FL_I, FL_W, FL_I+FL_W-16) O(before rounding): Q(38-FL_I-FL_W, FL_I+FL_W-16) O(after rounding): Q(16-FL_O, FL_O) ''' NB, NI = I.shape NI, NO = W.shape #FL_P = I.FL+W.FL-16 FL_P = I.FL + W.FL IV = I.value.to(torch.float64) WV = W.value.to(torch.float64) #O_pre = zeros((NB, NO), 22, FL_P) O_pre = zeros((NB, NO), 32, FL_P) for ni in range(NI): # P = fixed(torch.ger(IV[:,ni], WV[ni,:]).to(torch.int64), I.WL+W.WL, I.FL+W.FL).round(16, FL_P) P = fixed( torch.ger(IV[:, ni], WV[ni, :]).to(torch.int64), I.WL + W.WL, I.FL + W.FL) O_pre += P O = O_pre.round(WL_O, FL_O, True) return O
def process_dataset(train_X, test_X, valid_X, train_y, test_y, valid_y): train_X = fixed(train_X, 16, FL_A_input) valid_X = fixed(valid_X, 16, FL_A_input) test_X = fixed(test_X, 16, FL_A_input) train_y = torch.from_numpy(train_y).to(torch.int64) valid_y = torch.from_numpy(valid_y).to(torch.int64) test_y = torch.from_numpy(test_y).to(torch.int64) logging.info('train_X.shape %s' % str(train_X.shape)) logging.info('test_X.shape %s ' % str(test_X.shape)) logging.info('valid_X.shape %s' % str(valid_X.shape)) logging.info('train_y.shape %s' % str(train_y.shape)) logging.info('test_y.shape %s ' % str(test_y.shape)) logging.info('valid_y.shape %s\n' % str(valid_y.shape)) return train_X, test_X, valid_X, train_y, test_y, valid_y
def apply_weight_gradients_mask( self, learning_rate, momentum, batch_size, last_group, layer_mask ): # add mask within this function to enable segmented training # ## mask = 0 means these pixels to be frozen; mask = 1 means these pixels will be updated in the future learning_rate_scaled = fixed(learning_rate / self.scale / batch_size, 16, self.FL_L_WG) if self.num_images == batch_size: num_groups = len(self.weight_gradients) for i in range(num_groups): scaled_WG = (self.weight_gradients[i] * learning_rate_scaled).round( 16, self.FL_WM) # scaled weight gradient self.W_momentum += scaled_WG last_group = True else: scaled_WG = (self.weight_gradients * learning_rate_scaled).round( 16, self.FL_WM) self.W_momentum += scaled_WG # momentum updating if last_group: scale_fp = fixed(self.scale, 16, self.FL_L_WU) scaled_WM = (scale_fp * self.W_momentum).round( 16, self.FL_W) # momentum nonzero_grad = np.count_nonzero(scaled_WM.value.cpu().numpy()) total_params = np.size(scaled_WM.value.cpu().numpy()) zero_grad = total_params - nonzero_grad wtgrad_sparsity = zero_grad * 100.0 / total_params if (wtgrad_sparsity > 95.0): print('Warning..!%s has almost zero wt gradients' % (self.name)) # print(layer_mask[-5:-1, 0, :, :]) # import pdb;pdb.set_trace() #---------------------------------------------- fixed_layer_mask = fixed(layer_mask, 16, 15) masked_WM = (fixed_layer_mask * scaled_WM).round(16, self.FL_W) #------------------------------------------------ self.W -= masked_WM momentum_fp = fixed(momentum, 16, self.FL_M_WU) self.W_momentum = (self.W_momentum * momentum_fp).round( 16, self.FL_WM)
def rmac(x, h, precision): #We pad the input vector with len(h) zeros to make the convolution easier xbase = x[0].base xz = [] for i in range(len(h)): xz.append(fixed(0, xbase)) x = xz + x + xz y = [] for xpos in range(len(h), len(x)): sum = fixed(0,precision) for k in range(len(h)): yn = h[k] * x[xpos - k] sum += yn.convrnd(precision) y.append(sum) return y
def xtobuf(x, xbase): # turns an array of x such that it can be read starting at xbase p = x for i in range(256 - len(x)): p.append(fixed(0, 15)) p.reverse() for i in range(xbase + 1): r = p.pop() p.insert(0, r) return p
def weight_gradient(self, groups=0): # t_start = time.time() if groups > 0: group_size = int(self.num_images / groups) input_list = [ fixed(self.input[i * group_size:(i + 1) * group_size], self.input.WL, self.input.FL) for i in range(groups) ] local_gradients_list = [ \ fixed(self.local_gradients[i*group_size:(i+1)*group_size], self.local_gradients.WL, self.local_gradients.FL) \ for i in range(groups)] self.weight_gradients = [ matmul_fixed(input_list[i].transpose(0, 1), local_gradients_list[i], self.FL_WG) for i in range(groups) ] else: self.weight_gradients = matmul_fixed( self.input.copy().transpose(0, 1), self.local_gradients, self.FL_WG)
def valid_single_image(image_idx, X, y): valid_error = 0. # print(X[image_idx:image_idx+1, ...].shape) predictions, valid_loss_batch = cnn.feed_forward( fixed(X[image_idx:image_idx + 1, ...], 16, FL_A_input), y[image_idx:image_idx + 1], train_or_test=0, record=True) valid_error += torch.sum( predictions.cpu() != y[image_idx:image_idx + 1]).numpy() valid_acc = 1 - valid_error logging.info('Testing one single image, idx {}'.format(image_idx)) return valid_acc
def feed_backward(self, output_gradients, skip=False): # t_start = time.time() self.local_gradients = fixed(output_gradients.value * \ self.activation_derivatives, output_gradients.WL, output_gradients.FL) self.local_gradients = fixed(self.local_gradients.value * \ self.dropout_derivatives, output_gradients.WL, output_gradients.FL) if skip: return None pads_out = int(self.kernel_size - 1 - self.pads) if pads_out > 0: local_gradients_padded = zeros( (self.num_images, self.num_filters, self.output_map_size[0] + 2 * pads_out, self.output_map_size[1] + 2 * pads_out), self.local_gradients.WL, self.local_gradients.FL) local_gradients_padded[:,:,pads_out : pads_out + \ self.output_map_size[0],pads_out : pads_out + \ self.output_map_size[1]] = self.local_gradients.value else: local_gradients_padded = self.local_gradients.copy() W_transposed = self.W.copy().transpose(0, 1) flip_index = torch.arange(start=self.kernel_size - 1, end=-1, step=-1, dtype=torch.long, device=self.W.device) W_transposed.value = torch.index_select(W_transposed.value, 2, flip_index) W_transposed.value = torch.index_select(W_transposed.value, 3, flip_index) self.input_gradients = conv2D_fixed(local_gradients_padded, W_transposed, self.FL_DI) # time_elased = time.time() - t_start # print("%s feed backward: %.3f sec" % (self.name, time_elased)) return self.input_gradients
def weight_gradient(self, groups=0): # t_start = time.time() if groups > 0: group_size = int(self.num_images / groups) input_list = [ fixed(self.input_padded[i * group_size:(i + 1) * group_size], self.input_padded.WL, self.input_padded.FL) for i in range(groups) ] local_gradients_list = [ \ fixed(self.local_gradients[i*group_size:(i+1)*group_size], self.local_gradients.WL, self.local_gradients.FL) \ for i in range(groups)] self.weight_gradients = [conv2D_fixed(input_list[i].transpose(0,1), local_gradients_list[i].transpose(0,1), self.FL_WG).transpose(0,1) \ for i in range(groups)] else: input_padded_tranposed = self.input_padded.copy().transpose(0, 1) local_gradients_tranposed = self.local_gradients.copy().transpose( 0, 1) self.weight_gradients = conv2D_fixed(input_padded_tranposed, local_gradients_tranposed, self.FL_WG) self.weight_gradients.transpose(0, 1)
def feed_forward(self, input, train_or_test): ''' Perform feed forward pass for input (very naive implementation for easy use as a reference for RTL implementation) input shape: (batch_size, num_channels, input_map_size[0], input_map_size[1]) Note: here weights are not flipped before convolution unlike numpy and other implementations. ''' # t_start = time.time() self.num_images = input.shape[0] # Pad zeros if self.pads > 0: self.input_padded = zeros( (self.num_images, self.num_channels, input.shape[2] + 2 * self.pads, input.shape[3] + 2 * self.pads), WL=input.WL, FL=input.FL) self.input_padded[:,:,self.pads : self.pads + input.shape[2], \ self.pads : self.pads + input.shape[3]] = input.value else: self.input_padded = input.copy() # Convolution self.convolved = conv2D_fixed(self.input_padded, self.W, self.FL_AO) #bias = self.b.unsqueeze(0).unsqueeze(2).unsqueeze(3) # import pdb;pdb.set_trace() if (train_or_test == 0): # for testing p_r = fixed(self.dropout_prob, self.convolved.WL, self.convolved.FL) self.drop_convolved = (self.convolved * p_r).round( self.convolved.WL, self.convolved.FL, True) # or self.convolved*self.dropout_prob else: self.drop_convolved, self.dropout_derivatives = self.convolved.dropout( self.dropout_prob) self.dropout_derivatives = self.dropout_derivatives.to(torch.int64) self.biased = self.drop_convolved self.activation_derivatives = (self.biased.value >= 0).to(torch.int64) self.output = self.biased self.output.value = torch.where(self.output.value > 0, self.output.value, torch.zeros_like(self.output.value)) # time_elased = time.time() - t_start # print("%s feed forward: %.3f sec" % (self.name, time_elased)) return self.output
def __init__(self, name, input_dim, num_units, FL_W, FL_WG, FL_AO, FL_DI, FL_WM, FL_L_WG, FL_L_WU, FL_M_WU, scale, relu=True, dropout_prob=1): ''' Implement a simple fully connected layer, weights are initialized uniformly, biases is initialized as zeros. ''' self.input_dim = input_dim self.num_units = num_units weight_bound = np.sqrt(6. / (input_dim + num_units)) # data = sio.loadmat('Best_epoch_CIFAR10_W_fl.mat') # wt = data[name+'_W'] # print ('initializing weights...') wt = np.random.uniform(low=-weight_bound, high=weight_bound, size=(input_dim, num_units)) self.W = fixed(wt, 16, FL_W) self.W_momentum = zeros((input_dim, num_units), 16, FL_WM) self.type = 'FC' self.FL_W = FL_W self.FL_WG = FL_WG self.FL_AO = FL_AO self.FL_DI = FL_DI self.FL_WM = FL_WM self.FL_L_WG = FL_L_WG self.FL_L_WU = FL_L_WU self.FL_M_WU = FL_M_WU self.scale = scale self.relu = relu self.name = name self.dropout_prob = dropout_prob
def feed_backward(self, output_gradients): # t_start = time.time() # self.local_gradients = fixed(output_gradients.value * \ # self.dropout_derivatives, output_gradients.WL, # output_gradients.FL) if self.relu: self.local_gradients = fixed(output_gradients.value * \ self.activation_derivatives, output_gradients.WL, output_gradients.FL) else: self.local_gradients = output_gradients self.input_gradients = matmul_fixed(self.local_gradients, self.W.copy().transpose(0, 1), self.FL_DI) # time_elased = time.time() - t_start # print("%s feed backward: %.3f sec" % (self.name, time_elased)) return self.input_gradients
def valid(num_image, X, y): batch_size_valid = 40 num_batches = int(num_image / batch_size_valid) valid_error = 0. valid_loss = 0. for j in range(num_batches): # testing predictions, valid_loss_batch = cnn.feed_forward( fixed(X[j * batch_size_valid:(j + 1) * batch_size_valid], 16, FL_A_input), y[j * batch_size_valid:(j + 1) * batch_size_valid], train_or_test=0) valid_error += torch.sum( predictions.cpu() != y[j * batch_size_valid:(j + 1) * batch_size_valid]).numpy() valid_loss += valid_loss_batch valid_error /= num_image valid_loss /= num_batches valid_acc = (100 - (valid_error * 100)) return valid_acc
def sim_rmac(x, h): # n is the precision. # assume x is 16-bit, h is 22 bit, fixed-point n = 26 sum = fixed(0, 7+n); for i in range(144): p= x[i]*h[i] q = p sum += p.trunc(n-1) # because of the way sum does things #print "%d : %d (p) = %d (x) * %d (h), truncated to %d" % (i, q.val, x[i].val, h[i].val, p.trunc(n).val) yrnd = sum.convrnd(15) ytrunk = yrnd if yrnd.val > 32767: ytrunk.val = 32767 elif yrnd.val < -32768: ytrunk.val = -32768 print "we have a sum %d, convrnd to %d, with trunk %d" %(sum.val, yrnd.val, ytrunk.val) return ytrunk
def feed_forward(self, logits, labels=None): ''' Make predictions, evaluate loss if labels provided logits: (num_images, num_classes), float32 labels: (num_images,), int32 ''' self.num_images = logits.shape[0] self.logits = logits self.predictions = torch.argmax(logits.value, dim=1) if labels is None: return self.predictions self.labels = fixed( (2 * (torch.eye(self.num_classes, dtype=torch.int64).index_select( 0, labels.type(torch.long))) - 1) * (2**logits.FL), logits.WL, logits.FL) rough_loss = (self.logits.get_real_torch() - self.labels.get_real_torch())**2 / 2. self.conditions = self.logits.get_real_torch() * \ self.labels.get_real_torch() rough_loss = torch.where(self.conditions > 1., torch.zeros_like(rough_loss), rough_loss) self.loss = torch.mean(rough_loss) return self.predictions, self.loss
IX = np.random.permutation(np.arange(edge_image_train)) train_X_shuffled = train_edge_x[IX, :] train_y_shuffled = train_edge_y[IX] wrong_predictions = 0 train_loss = 0 logging.info('Epoch {} train_loss {:.4f}'.format(i, train_loss)) print('\nTraining.....Mask applied\n') # weight update function for j in range(int(num_batches)): # logging.info("Epoch %d (%d/%d)" % (i+1, j+1, num_batches)) train_X_mb = train_X_shuffled[j * batch_size:(j + 1) * batch_size] # mini-batch train_y_mb = train_y_shuffled[j * batch_size:(j + 1) * batch_size] for k in range(num_groups): # number_gropu = batch_size train_X_mg = fixed( train_X_mb[k * group_size:(k + 1) * group_size], 16, FL_A_input ) # convert dataset to fixed-point, 16+FL_A_input train_y_mg = train_y_mb[k * group_size:(k + 1) * group_size] predictions, loss = cnn.feed_forward(train_X_mg, train_y_mg, train_or_test=1) # logging.info('Epoch {} Batch {} Loss {:.4f}'.format(i, j, loss)) cnn.feed_backward() cnn.weight_gradient() if k == num_groups - 1: cnn.apply_weight_gradients(Learning_Rate, args.momentum, batch_size, True, mask) else: cnn.apply_weight_gradients(Learning_Rate, args.momentum,
def main(): # simple test: x = [fixed(2**15, 16), fixed(2**14, 16), fixed(2**15, 16)] h = [fixed(2**21, 22), fixed(2**21, 22), fixed(0, 22)] print rmac(x, h, 24) x = [fixed(2**17, 16), fixed(2**15, 16), fixed(2**15, 16)] h = [fixed(2**21, 22), fixed(2**21, 22), fixed(2**21, 22)] y = rmac(x, h, 24) print y print "test" print overf(convrnd(y, 16), 1)
def __init__(self, input_map_size, num_filters, num_channels, kernel_size, pads, FL_W, FL_WM, FL_WG, FL_AO, FL_DI, FL_L_WG, FL_L_WU, FL_M_WU, scale, name, dropout_prob): ''' Implement a simple 2D convolutional layer, stride size = 1, weights are initialized uniformly, biases is initialized as zeros. Convolution implemented as matmul inspired by https://github.com/wiseodd/hipsternet FL_W: fraction length of weights FL_WM: fraction length of weight momentum FL_AI: fraction length of input activations FL_AO: fraction length of output activations FL_DI: fraction length of previous-layer local gradients FL_DO: fraction length of local gradients FL_L_WG: fraction length of learning rate used in weight gradient scaling FL_L_WU: fraction length of learning rate used in weight update FL_M_WU: fraction length of momentum factor used in weight update scale: scaling factor of learning rate used in weight gradient scaling and weight update to make full use of 16-b representation ''' self.input_map_size = input_map_size self.num_filters = num_filters self.num_channels = num_channels self.kernel_size = kernel_size self.pads = pads self.dropout_prob = dropout_prob self.output_map_size = (int(input_map_size[0] + 2 * pads + 1 - \ kernel_size), int(input_map_size[1] + 2 * pads + 1 - kernel_size)) fan_in = num_channels * kernel_size * kernel_size fan_out = num_filters * kernel_size * kernel_size weight_std = np.sqrt(2. / fan_in) # data = sio.loadmat('Best_epoch_CIFAR10_W_fl.mat') # wt = data[name+'_W'] wt = np.random.normal(loc=0.0, scale=weight_std, size=(num_filters, num_channels, kernel_size, kernel_size)) self.W = fixed(wt, 16, FL_W) # self.W = wt # self.W= fixed(np.random.uniform( # low = -np.sqrt(6. / (num_channels*input_map_size[0]*input_map_size[0] + num_filters*input_map_size[0]*input_map_size[0])), # high = np.sqrt(6. / (num_channels*input_map_size[0]*input_map_size[0] + num_filters*input_map_size[0]*input_map_size[0])), # size=(num_filters, num_channels,kernel_size, kernel_size) # ), 16, FL_W) self.W_momentum = zeros( (num_filters, num_channels, kernel_size, kernel_size), 16, FL_WM) self.type = 'Conv' self.FL_W = FL_W self.FL_WM = FL_WM self.FL_WG = FL_WG self.FL_AO = FL_AO self.FL_DI = FL_DI self.FL_L_WG = FL_L_WG self.FL_L_WU = FL_L_WU self.FL_M_WU = FL_M_WU self.scale = scale self.name = name
""" % (__version__, date.strftime('%Y-%m-%d %H:%M:%S'), SAMPLE_RATE/DOWNSAMPLE_RATE, args.fran[0], args.fran[1], args.step, N_CHANNEL, sample, args.raw, RHEA_temp) for freq in fran: try: # s.set_freq(freq, 0, 0) # s.set_freq(freq) s.set_freq(freq, 0, 0, False) s.set_freq(freq, 0, 1) r.clear() # s.iq_tgl # sleep(args.time + args.time*0.5) s.iq_toggle('start') sleep(args.time*1.5*N_CHANNEL) ts, i, q = fixed(r, dsize) # i = zip(*[iter(i)]*N_CHANNEL) # q = zip(*[iter(q)]*N_CHANNEL) _i = np.array(i) _q = np.array(q) _i = _i.reshape(-1, N_CHANNEL) _q = _q.reshape(-1, N_CHANNEL) i_mean, q_mean, i_std, q_std = [], [], [], [] for k in range(N_CHANNEL): i_mean.append(np.mean(_i[:,k])) q_mean.append(np.mean(_q[:,k])) i_std.append(np.std(_i[:,k])) q_std.append(np.std(_q[:,k])) # i_mean = np.mean(i) # q_mean = np.mean(q) # i_std = np.std(i)
FL_M_WU_conv_4 = 16 FL_M_WU_conv_5 = 16 FL_M_WU_fc = 16 scale = 2 # Load and preprocess CIFAR10 dataset data = sio.loadmat('../CIFAR10.mat') train_X = data['train_X'] valid_X = data['valid_X'] test_X = data['test_X'] train_y = np.argmax(data['train_y'], axis=1) valid_y = np.argmax(data['valid_y'], axis=1) test_y = np.argmax(data['test_y'], axis=1) train_X = fixed(train_X, 16, FL_A_input) valid_X = fixed(valid_X, 16, FL_A_input) test_X = fixed(test_X, 16, FL_A_input) train_y = torch.from_numpy(train_y).to(torch.int64) valid_y = torch.from_numpy(valid_y).to(torch.int64) test_y = torch.from_numpy(test_y).to(torch.int64) # import pdb; pdb.set_trace() # Build CNN for CIFAR10 cnn.append_layer('Conv_fixed', name='conv_0', input_map_size=(32,32), num_filters=16, num_channels=3, kernel_size=3, pads=1, FL_AO=FL_A_conv_0,
task_division = list(map(int, args.task_division.split(","))) cloud_list = task_list[0:task_division[0]] logging.info('cloud list %s\n' % (cloud_list)) num_image_train = 45000 / 10 * task_division[0] num_image_test = 10000 / 10 * task_division[0] num_image_valid = 5000 / 10 * task_division[0] train_X = train_X[0:num_image_train] valid_X = valid_X[0:num_image_valid] test_X = test_X[0:num_image_test] train_y = train_y[0:num_image_train] valid_y = valid_y[0:num_image_valid] test_y = test_y[0:num_image_test] train_X = fixed(train_X, 16, FL_A_input) valid_X = fixed(valid_X, 16, FL_A_input) test_X = fixed(test_X, 16, FL_A_input) train_y = torch.from_numpy(train_y).to(torch.int64) valid_y = torch.from_numpy(valid_y).to(torch.int64) test_y = torch.from_numpy(test_y).to(torch.int64) logging.info('train_X.shape %s' % str(train_X.shape)) logging.info('test_X.shape %s ' % str(test_X.shape)) logging.info('valid_X.shape %s' % str(valid_X.shape)) logging.info('train_y.shape %s' % str(train_y.shape)) logging.info('test_y.shape %s ' % str(test_y.shape)) logging.info('valid_y.shape %s' % str(valid_y.shape)) # Build CNN for CIFAR10 cnn.append_layer('Conv_fixed', name='conv_0',
for i in range(xbase + 1): r = p.pop() p.insert(0, r) return p x = [] h = [] # impulse and impulse for i in range (256): x.append(fixed(0, 15)) h.append(fixed(0,21)) x[0] = fixed(32767, 15) h[0] = fixed(2097151, 21) bufs = bufferset() bufs.add_vector(x, 0, h, fixed(32767, 15)) # h[n] = 1-e, x[n] = lsb # to check and make sure we're only MACing 120 times for i in range (256): x[i] = fixed(1, 15) h[i] = fixed(2097151, 22)
# test_y = test_y[0: num_image_test] cloud_image_train = 45000 / 10 * task_division[0] cloud_image_test = 10000 / 10 * task_division[0] cloud_image_valid = 5000 / 10 * task_division[0] # cloud data # cloud data train_cloud_x = train_X[0:cloud_image_train] valid_cloud_x = valid_X[0:cloud_image_valid] test_cloud_x = test_X[0:cloud_image_test] train_cloud_y = train_y[0:cloud_image_train] valid_cloud_y = valid_y[0:cloud_image_valid] test_cloud_y = test_y[0:cloud_image_test] train_cloud_x = fixed(train_cloud_x, 16, FL_A_input) valid_cloud_x = fixed(valid_cloud_x, 16, FL_A_input) test_cloud_x = fixed(test_cloud_x, 16, FL_A_input) train_cloud_y = torch.from_numpy(train_cloud_y).to(torch.int64) valid_cloud_y = torch.from_numpy(valid_cloud_y).to(torch.int64) test_cloud_y = torch.from_numpy(test_cloud_y).to(torch.int64) logging.info('train_cloud_x.shape %s' % str(train_cloud_x.shape)) logging.info('test_cloud_x.shape %s ' % str(test_cloud_x.shape)) logging.info('valid_cloud_x.shape %s' % str(valid_cloud_x.shape)) logging.info('train_cloud_y.shape %s' % str(train_cloud_y.shape)) logging.info('test_cloud_y.shape %s ' % str(test_cloud_y.shape)) logging.info('valid_cloud_y.shape %s' % str(valid_cloud_y.shape)) # Build CNN for CIFAR10 cnn.append_layer('Conv_fixed',
scale_fc = 1.05 * 2.**(FL_L_WG_fc - 15) * args.LR_start / args.batch_size scale = 2 global_vals = dict(globals().items()) # for name in sorted(global_vals.iterkeys()): # if name.startswith('FL') or name.startswith('scale') or name.startswith('LR'): # print("%s: %s" % (name, global_vals[name])) # Load and preprocess CIFAR10 dataset data = sio.loadmat('../CIFAR10.mat') train_X = data['train_X'] valid_X = data['valid_X'] test_X = data['test_X'] train_y = np.argmax(data['train_y'], axis=1) valid_y = np.argmax(data['valid_y'], axis=1) test_y = np.argmax(data['test_y'], axis=1) train_X = fixed(train_X, 16, FL_A_input) valid_X = fixed(valid_X, 16, FL_A_input) test_X = fixed(test_X, 16, FL_A_input) train_y = torch.from_numpy(train_y).to(torch.int64) valid_y = torch.from_numpy(valid_y).to(torch.int64) test_y = torch.from_numpy(test_y).to(torch.int64) # Build CNN for CIFAR10 cnn.append_layer('Conv_fixed', name='conv_0', input_map_size=(32, 32), num_filters=16 * args.filter_mult, num_channels=3, kernel_size=3, pads=1, FL_AO=FL_A_conv_0, FL_DI=FL_D_input,