def generate(self, model_len=Constant.MODEL_LEN, model_width=Constant.MODEL_WIDTH): pooling_len = int(model_len / 4) graph = Graph(self.input_shape, False) temp_input_channel = self.input_shape[-1] output_node_id = 0 for i in range(model_len): output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer( StubConv(temp_input_channel, model_width, kernel_size=3), output_node_id) output_node_id = graph.add_layer( StubBatchNormalization(model_width), output_node_id) temp_input_channel = model_width if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): output_node_id = graph.add_layer(StubPooling(), output_node_id) output_node_id = graph.add_layer(StubFlatten(), output_node_id) output_node_id = graph.add_layer( StubDropout(Constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer( StubDense(graph.node_list[output_node_id].shape[0], model_width), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer( StubDense(model_width, self.n_classes), output_node_id) graph.add_layer(StubSoftmax(), output_node_id) return graph
def wider_pre_dense(layer, n_add, weighted=True): """Get previous dense layer for current layer Args: weighted: layer: the layer from which we get wide previous dense layer n_add: output shape Returns: The previous dense layer """ if not weighted: return StubDense(layer.units + n_add, layer.activation) n_units2 = layer.units teacher_w, teacher_b = layer.get_weights() rand = np.random.randint(n_units2, size=n_add) student_w = teacher_w.copy() student_b = teacher_b.copy() # target layer update (i) for i in range(n_add): teacher_index = rand[i] new_weight = teacher_w[:, teacher_index] new_weight = new_weight[:, np.newaxis] student_w = np.concatenate( (student_w, add_noise(new_weight, student_w)), axis=1) student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b)) new_pre_layer = StubDense(n_units2 + n_add, layer.activation) new_pre_layer.set_weights((student_w, student_b)) return new_pre_layer
def wider_pre_dense(layer, n_add, weighted=True): if not weighted: return StubDense(layer.input_units, layer.units + n_add) n_units2 = layer.units teacher_w, teacher_b = layer.get_weights() rand = np.random.randint(n_units2, size=n_add) student_w = teacher_w.copy() student_b = teacher_b.copy() # target layer update (i) for i in range(n_add): teacher_index = rand[i] new_weight = teacher_w[teacher_index, :] new_weight = new_weight[np.newaxis, :] student_w = np.concatenate( (student_w, add_noise(new_weight, student_w)), axis=0) student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b)) new_pre_layer = StubDense(layer.input_units, n_units2 + n_add) new_pre_layer.set_weights((student_w, student_b)) return new_pre_layer
def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True): """Get next dense layer for current layer Args: weighted: layer: the dense layer from which we search next dense layer n_add: output shape start_dim: the started dimension total_dim: the total dimension Returns: The next dense layer """ if not weighted: return StubDense(layer.units, layer.activation) n_units = layer.units teacher_w, teacher_b = layer.get_weights() student_w = teacher_w.copy() n_units_each_channel = int(teacher_w.shape[0] / total_dim) new_weight = np.zeros((n_add * n_units_each_channel, teacher_w.shape[1])) student_w = np.concatenate( (student_w[:start_dim * n_units_each_channel], add_noise(new_weight, student_w), student_w[start_dim * n_units_each_channel:total_dim * n_units_each_channel])) new_layer = StubDense(n_units, layer.activation) new_layer.set_weights((student_w, teacher_b)) return new_layer
def get_conv_dense_model(): graph = Graph((32, 32, 3), False) output_node_id = 0 output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubFlatten(), output_node_id) output_node_id = graph.add_layer(StubDropout(Constant.DENSE_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) graph.add_layer(StubSoftmax(), output_node_id) graph.produce_model().set_weight_to_graph() return graph
def dense_to_deeper_block(dense_layer, weighted=True): units = dense_layer.units weight = np.eye(units) bias = np.zeros(units) new_dense_layer = StubDense(units, units) if weighted: new_dense_layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) return [StubReLU(), new_dense_layer]
def dense_to_deeper_block(dense_layer, weighted=True): units = dense_layer.units weight = np.eye(units, dtype=np.float32) bias = np.zeros(units, dtype=np.float32) new_dense_layer = StubDense(units, units) if weighted: new_dense_layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) return [StubReLU(), new_dense_layer]
def dense_to_deeper_block(dense_layer, weighted=True): units = dense_layer.units weight = np.eye(units) bias = np.zeros(units) new_dense_layer = StubDense(units, dense_layer.activation) if weighted: new_dense_layer.set_weights( (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) return [new_dense_layer, StubDropout(constant.DENSE_DROPOUT_RATE)]
def get_add_skip_model(): graph = Graph((5, 5, 3), False) output_node_id = 0 output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) temp_node_id = output_node_id output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) temp_node_id = graph.add_layer(StubConv(3, 3, 1), temp_node_id) output_node_id = graph.add_layer(StubAdd(), [output_node_id, temp_node_id]) temp_node_id = output_node_id output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) temp_node_id = graph.add_layer(StubConv(3, 3, 1), temp_node_id) output_node_id = graph.add_layer(StubAdd(), [output_node_id, temp_node_id]) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubFlatten(), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) graph.add_layer(StubSoftmax(), output_node_id) graph.produce_model().set_weight_to_graph() return graph
def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True): if not weighted: return StubDense(layer.input_units + n_add, layer.units) teacher_w, teacher_b = layer.get_weights() student_w = teacher_w.copy() n_units_each_channel = int(teacher_w.shape[1] / total_dim) new_weight = np.zeros((teacher_w.shape[0], n_add * n_units_each_channel)) student_w = np.concatenate((student_w[:, :start_dim * n_units_each_channel], add_noise(new_weight, student_w), student_w[:, start_dim * n_units_each_channel:total_dim * n_units_each_channel]), axis=1) new_layer = StubDense(layer.input_units + n_add, layer.units) new_layer.set_weights((student_w, teacher_b)) return new_layer
def generate(self, model_len=constant.MODEL_LEN, model_width=constant.MODEL_WIDTH): pool = self._get_pool_layer_func() conv = get_conv_layer_func(len(self._get_shape(3))) ave = get_ave_layer_func(len(self._get_shape(3))) pooling_len = int(model_len / 4) model = StubModel() model.input_shape = self.input_shape model.inputs = [0] model.layers.append(StubInput()) for i in range(model_len): model.layers += [ StubActivation('relu'), StubConv(model_width, kernel_size=3, func=conv), StubBatchNormalization(), StubDropout(constant.CONV_DROPOUT_RATE) ] if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): model.layers.append(StubPooling(func=pool)) model.layers.append(StubGlobalPooling(ave)) model.layers.append(StubDense(self.n_classes, activation='softmax')) model.outputs = [len(model.layers)] for index, layer in enumerate(model.layers): layer.input = index layer.output = index + 1 return Graph(model, False)
def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True): if not weighted: return StubDense(layer.input_units + n_add, layer.units) teacher_w, teacher_b = layer.get_weights() student_w = teacher_w.copy() n_units_each_channel = int(teacher_w.shape[1] / total_dim) new_weight = np.zeros((teacher_w.shape[0], n_add * n_units_each_channel), dtype=np.float32) student_w = np.concatenate((student_w[:, :start_dim * n_units_each_channel], add_noise(new_weight, student_w), student_w[:, start_dim * n_units_each_channel:total_dim * n_units_each_channel]), axis=1) new_layer = StubDense(layer.input_units + n_add, layer.units) new_layer.set_weights((student_w, teacher_b)) return new_layer
def to_stub_model(model, weighted=False): node_count = 0 tensor_dict = {} ret = StubModel() ret.input_shape = model.input_shape for layer in model.layers: if isinstance(layer.input, list): input_nodes = layer.input else: input_nodes = [layer.input] for node in input_nodes + [layer.output]: if node not in tensor_dict: tensor_dict[node] = StubTensor(get_int_tuple(node.shape)) node_count += 1 if isinstance(layer.input, list): input_id = [] for node in layer.input: input_id.append(tensor_dict[node]) else: input_id = tensor_dict[layer.input] output_id = tensor_dict[layer.output] if is_conv_layer(layer): temp_stub_layer = StubConv(layer.filters, layer.kernel_size, layer.__class__, input_id, output_id) elif isinstance(layer, Dense): temp_stub_layer = StubDense(layer.units, layer.activation, input_id, output_id) elif isinstance(layer, WeightedAdd): temp_stub_layer = StubWeightedAdd(input_id, output_id) elif isinstance(layer, Concatenate): temp_stub_layer = StubConcatenate(input_id, output_id) elif isinstance(layer, BatchNormalization): temp_stub_layer = StubBatchNormalization(input_id, output_id) elif isinstance(layer, Activation): temp_stub_layer = StubActivation(layer.activation, input_id, output_id) elif isinstance(layer, InputLayer): temp_stub_layer = StubLayer(input_id, output_id) elif isinstance(layer, Flatten): temp_stub_layer = StubFlatten(input_id, output_id) elif isinstance(layer, Dropout): temp_stub_layer = StubDropout(layer.rate, input_id, output_id) elif is_pooling_layer(layer): temp_stub_layer = StubPooling(layer.__class__, input_id, output_id) elif is_global_pooling_layer(layer): temp_stub_layer = StubGlobalPooling(layer.__class__, input_id, output_id) else: raise TypeError("The layer {} is illegal.".format(layer)) if weighted: temp_stub_layer.set_weights(layer.get_weights()) ret.add_layer(temp_stub_layer) ret.inputs = [tensor_dict[model.inputs[0]]] ret.outputs = [tensor_dict[model.outputs[0]]] return ret
def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True): if not weighted: return StubDense(layer.units, layer.activation) n_units = layer.units teacher_w, teacher_b = layer.get_weights() student_w = teacher_w.copy() n_units_each_channel = int(teacher_w.shape[0] / total_dim) new_weight = np.zeros((n_add * n_units_each_channel, teacher_w.shape[1])) student_w = np.concatenate( (student_w[:start_dim * n_units_each_channel], add_noise(new_weight, student_w), student_w[start_dim * n_units_each_channel:total_dim * n_units_each_channel])) new_layer = StubDense(n_units, layer.activation) new_layer.set_weights((student_w, teacher_b)) return new_layer
def dense_to_deeper_block(dense_layer, weighted=True): """Get deeper layer for dense layer Args: weighted: dense_layer: the dense layer from which we get deeper layer Returns: The deeper dense layer """ units = dense_layer.units weight = np.eye(units) bias = np.zeros(units) new_dense_layer = StubDense(units, dense_layer.activation) if weighted: new_dense_layer.set_weights( (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) return [new_dense_layer, StubDropout(constant.DENSE_DROPOUT_RATE)]
def wider_pre_dense(layer, n_add, weighted=True): if not weighted: return StubDense(layer.input_units, layer.units + n_add) n_units2 = layer.units teacher_w, teacher_b = layer.get_weights() rand = np.random.randint(n_units2, size=n_add) student_w = teacher_w.copy() student_b = teacher_b.copy() # target layer update (i) for i in range(n_add): teacher_index = rand[i] new_weight = teacher_w[teacher_index, :] new_weight = new_weight[np.newaxis, :] student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=0) student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b)) new_pre_layer = StubDense(layer.input_units, n_units2 + n_add) new_pre_layer.set_weights((student_w, student_b)) return new_pre_layer
def to_stub_model(model): node_count = 0 node_to_id = {} ret = StubModel() ret.input_shape = model.input_shape for layer in model.layers: if isinstance(layer.input, list): input_nodes = layer.input else: input_nodes = [layer.input] for node in input_nodes + [layer.output]: if node not in node_to_id: node_to_id[node] = node_count node_count += 1 if isinstance(layer.input, list): input_id = [] for node in layer.input: input_id.append(node_to_id[node]) else: input_id = node_to_id[layer.input] output_id = node_to_id[layer.output] if is_conv_layer(layer): temp_stub_layer = StubConv(layer.filters, input_id, output_id) elif isinstance(layer, Dense): temp_stub_layer = StubDense(layer.units, input_id, output_id) elif isinstance(layer, WeightedAdd): temp_stub_layer = StubWeightedAdd(input_id, output_id) elif isinstance(layer, Concatenate): temp_stub_layer = StubConcatenate(input_id, output_id) elif isinstance(layer, BatchNormalization): temp_stub_layer = StubBatchNormalization(input_id, output_id) elif isinstance(layer, Activation): temp_stub_layer = StubActivation(input_id, output_id) elif isinstance(layer, InputLayer): temp_stub_layer = StubLayer(input_id, output_id) elif isinstance(layer, Flatten): temp_stub_layer = StubLayer(input_id, output_id) elif isinstance(layer, Dropout): temp_stub_layer = StubLayer(input_id, output_id) elif is_pooling_layer(layer): temp_stub_layer = StubPooling(input_id, output_id) else: raise TypeError("The layer {} is illegal.".format(layer)) ret.add_layer(temp_stub_layer) return ret
def _dense_to_deeper_layer(self, target): return StubDense(self._layer_width(target))
def _wider_pre_dense(self, layer, n_add): return StubDense(self._layer_width(layer) + n_add)
def _wider_next_dense(self, layer, start_dim, total_dim, n_add): return StubDense(self._layer_width(layer))