def tick(self): if self.pass_done.rd(): return out_sets = self.arr_x // self.chn_per_word fmap_per_iteration = self.image_size[0] * self.image_size[1] if self.arch_output_chn.valid(): data = [e for e in self.arch_output_chn.pop()] x = self.fmap_idx % self.image_size[0] y = self.fmap_idx // self.image_size[0] if self.curr_set < out_sets: cmin = self.curr_set * self.chn_per_word cmax = cmin + self.chn_per_word for c in range(cmin, cmax): self.ofmap[x, y, c] = data[c - cmin] self.curr_set += 1 if self.curr_set == out_sets: self.curr_set = 0 self.fmap_idx += 1 if self.fmap_idx == fmap_per_iteration: self.fmap_idx = 0 self.pass_done.wr(True) if np.all(self.ofmap == self.reference): raise Finish("Success") else: print(self.ofmap) print(self.reference) print(self.ofmap - self.reference) raise Finish("Validation Failed")
def tick(self): if self.pass_done.rd(): return out_sets = self.arr_x // self.chn_per_word fmap_per_iteration = self.image_size[0] * self.image_size[1] if self.arch_output_chn.valid() and (self.psum_chn.vacancy() or self.curr_pass % 2 == 1): data = [e for e in self.arch_output_chn.pop()] if ((self.curr_pass % 2) == 0): # push ofmap psum to serializer on pass 0 and 2 self.psum_chn.push(data) x = self.fmap_idx % self.image_size[0] y = self.fmap_idx // self.image_size[0] if self.curr_set < out_sets: channel_offset = 0 if (self.curr_pass > 1): channel_offset = 8 cmin = self.curr_set * self.chn_per_word + channel_offset cmax = cmin + self.chn_per_word for c in range(cmin, cmax): self.ofmap[x, y, c] = data[c - cmin] self.curr_set += 1 if self.curr_set == out_sets: self.curr_set = 0 self.fmap_idx += 1 if self.fmap_idx == fmap_per_iteration: self.fmap_idx = 0 if (self.curr_pass == self.num_passes - 1): self.pass_done.wr(True) if np.all(self.ofmap == self.reference): raise Finish("Success") else: print(self.ofmap) print(self.reference) print(self.ofmap - self.reference) raise Finish("Validation Failed")
def tick(self): if self.pass_done.rd(): return # How many psums packets we expect to receive out_sets = self.arr_x // self.chn_per_word fmap_per_iteration = self.image_size[0] * self.image_size[1] if self.arch_output_chn.valid(): data = [e for e in self.arch_output_chn.pop()] # Calculate the end coords of where these ending psums must go... x = self.fmap_idx % self.image_size[0] y = self.fmap_idx // self.image_size[0] if self.curr_set < out_sets: cmin = self.curr_set * self.chn_per_word cmax = cmin + self.chn_per_word for c in range(cmin, cmax): self.ofmap[x, y, c] = data[c - cmin] self.curr_set += 1 # After recieving all the elements for pixel 0, do pixel 1, etc... if self.curr_set == out_sets: self.curr_set = 0 self.fmap_idx += 1 if self.fmap_idx == fmap_per_iteration: self.fmap_idx = 0 self.pass_done.wr(True) if np.all(self.ofmap == self.reference): raise Finish("Success") else: print(self.ofmap) print(self.reference) print(self.ofmap - self.reference) raise Finish("Validation Failed")
def tick(self): if (self.in_chn.vacancy() and not self.iteration == self.iterations+1): imin = self.curr_set*self.input_size imax = imin+self.input_size data = [self.test_data[self.iteration][i] for i in range(imin, imax)] self.in_chn.push(data) self.curr_set += 1 if (self.curr_set == self.in_sets): self.curr_set = 0 self.iteration += 1 if (self.out_chn.valid()): data = self.out_chn.pop() print(data) #print("out_counter: ", self.out_counter) self.out_counter += 1 if (self.out_counter == self.iterations): raise Finish("Check manually")
def tick(self): if self.pass_done.rd(): return out_sets = self.out_chn // self.block_size fmap_per_iteration = self.image_size[0] * self.image_size[1] if self.arch_output_chn.valid(): rcvd = self.arch_output_chn.pop() loc_tag = [e[0] for e in rcvd] data = [e[1] for e in rcvd] #print(loc_tag) x = loc_tag[0] // self.image_size[1] y = loc_tag[0] % self.image_size[1] #x = self.fmap_idx % self.image_size[0] #y = self.fmap_idx // self.image_size[0] self.fmap_idx = x + y * self.image_size[0] #print("{},{} received (output deserializer)".format(x,y)) #print(data) if self.curr_set < out_sets: cmin = self.curr_set * self.block_size cmax = cmin + self.block_size for c in range(cmin, cmax): assert (self.ofmap[x, y, c] == 0 ) # should never replace an existing value self.ofmap[x, y, c] = data[c - cmin] self.curr_set += 1 if self.curr_set == out_sets: self.curr_set = 0 self.fmap_idx += 1 if self.fmap_idx == fmap_per_iteration: self.fmap_idx = 0 self.pass_done.wr(True) raise Finish("Done processing")
def tick(self): if self.pass_done.rd(): # partly parallelized to be on chip: # x_idx = (self.curr_tile // 2)*2 # y_idx = (self.curr_tile % 2)*2 # self.ofmap_transformed[x_idx:x_idx+2, y_idx:y_idx+2, self.curr_chn] += np.dot(self.A_T, np.dot(self.ofmap[:,:,self.curr_chn, self.curr_tile],self.A)) # self.curr_tile += 1 # if self.curr_tile == 4: # self.curr_tile = 0 # self.ofmap_transformed[:,:,self.curr_chn] += self.bias[self.curr_chn] # add bias # self.curr_chn += 1 # if self.curr_chn == 8: # print ("reference shape: ", self.reference.shape) # print ("ofmap shape: ", self.ofmap.shape) # FOR LOOPS USED B/C NOT COUNTING OFF CHIP PROCESSING IN PERFORMANCE STATISTICS (will unroll loops in on chip processing) # for k in range(8): # self.ofmap_transformed[:,:,k] += self.bias[k] # add bias # for t in range(self.num_tiles): # x_idx = (t // 2)*2 # y_idx = (t % 2)*2 # self.ofmap_transformed[x_idx:x_idx+2,y_idx:y_idx+2,k] += np.dot(self.A_T,np.dot(self.ofmap[:,:,k,t],self.A)) # self.finish_signal_chn.push(True) if np.all(self.ofmap == self.reference): raise Finish("Success") else: print("ofmap: ") print(self.ofmap) print("reference: ") print(self.reference) print("difference: ") print(self.ofmap - self.reference) raise Finish("Validation Failed") else: #print ("output deser curr_tile, fmap_idx: ", self.curr_tile, self.fmap_idx) out_sets = self.arr_x // self.chn_per_word # 2 fmap_per_iteration = 4 # ofmap size, parametrize .. TODO if self.arch_output_chn.valid(): data = [e for e in self.arch_output_chn.pop()] x_idx = (self.curr_tile // 2) * 2 y_idx = (self.curr_tile % 2) * 2 x = (self.fmap_idx % 2) + x_idx y = self.fmap_idx // 2 + y_idx # self.ofmap_transformed[x_idx:x_idx+2,y_idx:y_idx+2,k] += np.dot(self.A_T,np.dot(self.ofmap[:,:,k,t],self.A)) if self.curr_set < out_sets: cmin = self.curr_set * self.chn_per_word cmax = cmin + self.chn_per_word for c in range(cmin, cmax): self.ofmap[x, y, c] = data[c - cmin] self.curr_set += 1 if self.curr_set == out_sets: self.curr_set = 0 #self.fmap_idx += 1 self.curr_tile += 1 if self.curr_tile == 4: self.fmap_idx += 1 self.curr_tile = 0 if self.fmap_idx == fmap_per_iteration: self.fmap_idx = 0 self.curr_tile = 0 # self.ofmap = self.ofmap//(128*128) self.pass_done.wr(True)
def tick(self): if not self.started or self.done_chn.valid(): self.started = True old_layer = self.layers[self.layer_step] if self.done_chn.valid(): valid = self.done_chn.pop() if not valid: raise Finish('Validation Failed') if isinstance(old_layer, Conv): self.conv_inputs[ self.batch_step] = self.conv_tb.get_output() self.batch_step += 1 if self.batch_step == self.batch_size: self.conv_inputs = [ batch for batch in old_layer.activation( np.array(self.conv_inputs)) ] self.batch_step = 0 self.layer_step += 1 self.cur_conv += 1 else: self.fc_input = self.fc_tb.get_output() self.fc_input = old_layer.activation(self.fc_input) self.layer_step += 1 self.cur_fc += 1 if self.layer_step == len(self.layers): raise Finish('Success') layer = self.layers[self.layer_step] # handle conv to fc transition if isinstance( layer, FC ) and self.fc_input is None and self.conv_inputs[0] is not None: if self.name != None: self.output_file.write("FC MODE\n") self.fc_input = np.zeros( (self.batch_size, layer.input_size)).astype(np.int64) for i in range(self.batch_size): self.fc_input[i] = self.conv_inputs[i].reshape( layer.input_size) if isinstance(layer, Conv): if self.name != None: self.output_file.write("CONV MODE\n") if self.conv_inputs[self.batch_step] is None: _, weights, bias = self.conv_tb.configure( layer.image_size, layer.filter_size, layer.in_chn, layer.out_chn) self.conv_weights[self.cur_conv] = weights self.conv_bias[self.cur_conv] = bias elif self.conv_weights[ self.cur_conv] is None or self.conv_bias[ self.cur_conv] is None: weights, bias = self.conv_tb.configure_fixed_image( self.conv_inputs[self.batch_step], layer.filter_size, layer.in_chn, layer.out_chn) self.conv_weights[self.cur_conv] = weights self.conv_bias[self.cur_conv] = bias else: self.conv_tb.configure_fixed( self.conv_inputs[self.batch_step], self.conv_weights[self.cur_conv], self.conv_bias[self.cur_conv]) elif isinstance(layer, FC): if self.fc_input is None: _, weights, bias = self.fc_tb.configure( self.batch_size, layer.input_size, layer.output_size) self.fc_weights[self.cur_fc] = weights self.fc_bias[self.cur_fc] = bias elif self.fc_weights[self.cur_fc] is None or self.fc_bias[ self.cur_fc] is None: weights, bias = self.fc_tb.configure_fixed_image( self.fc_input, layer.output_size) self.fc_weights[self.cur_fc] = weights self.fc_bias[self.cur_fc] = bias else: self.fc_tb.configure_fixed(self.fc_input, self.fc_weights[self.cur_fc], self.fc_bias[self.cur_fc]) else: raise Exception('layer not valid')
def tick(self): # ---------------------------------------------------------------------------------------- # Send Traces into the Designs input channels, cycle by cycle # ---------------------------------------------------------------------------------------- # ------------------------- weights input trace ---------------------------------------------- if self.weights_in_chn.vacancy(): if self.weights_itrace_idx < len(self.weights_itrace): if not np.isnan(self.weights_itrace[self.weights_itrace_idx]): weight_data_to_send = [ self.weights_itrace[self.weights_itrace_idx] ] self.weights_in_chn.push(weight_data_to_send) self.weights_itrace_idx += 1 else: self.weights_done = True # ------------------------- ifmap input trace ------------------------------------------------ if self.ifmap_chn.vacancy(): if self.ifmap_itrace_idx < len(self.ifmap_itrace): if not np.isnan(self.ifmap_itrace[self.ifmap_itrace_idx]): ifmap_data_to_send = [ self.ifmap_itrace[self.ifmap_itrace_idx] ] self.ifmap_chn.push(ifmap_data_to_send) self.ifmap_itrace_idx += 1 else: self.ifmap_done = True if self.psum_in_chn.vacancy(): if self.psum_itrace_idx < len(self.psum_itrace): if not np.isnan(self.psum_itrace[self.psum_itrace_idx]): psum_data_to_send = [ self.psum_itrace[self.psum_itrace_idx] ] self.psum_in_chn.push(psum_data_to_send) self.psum_itrace_idx += 1 else: self.psum_done = True # ------------------------------------------------------------------------------------------- # Collect output from the design's output channel # ------------------------------------------------------------------------------------------- if self.psum_out_chn.valid(): psum_out = self.psum_out_chn.pop() for idx in range(len(psum_out)): p = psum_out[idx] self.result.append(p) # print('-->', len(self.result), 'calculated:', p, 'reference:', self.psum_otrace[len(self.result)-1], 'total:',len(self.psum_otrace) ) # ------------------------------------------------------------------------------------------- # Compare the design's output with the generated output traces to validate functionality # ------------------------------------------------------------------------------------------- if len(self.result) == len(self.psum_otrace): if self.result == self.psum_otrace: raise Finish('Success') else: print('Failed') print('---------ofmap-----------------') print(self.result) print('----------ofmap_trace------------') print(self.psum_otrace) print('---------difference------------') difference = self.result for i in range(len(difference)): difference[i] = self.result[i] - self.psum_otrace[i] print(difference) raise Finish()