def _get_end_layer_probs_for_skipping(nn, start_layer): """ Returns the end layer probabilities to be used in skipping. """ dists_from_ip = nn.get_distances_from_ip() dists_to_op = nn.get_distances_to_op() is_a_cnn = nn.nn_class.startswith('cnn') end_layer_prob = [] for layer_idx, layer_label in enumerate(nn.layer_labels): curr_layer_prob = 'assign' if dists_from_ip[layer_idx] - 1 <= dists_from_ip[start_layer] or \ dists_to_op[layer_idx] + 1 >= dists_to_op[start_layer] or \ layer_label in ['ip', 'op', 'softmax']: curr_layer_prob = 'no-assign' elif is_a_cnn and \ nn.post_img_inv_sizes[start_layer] > nn.pre_img_inv_sizes[layer_idx]: # If the layer has an input image size *larger* than the output of the # start layer, then do not assign. curr_layer_prob = 'no-assign' elif layer_label == 'fc': # If its a fully connected layer, connect with this only if it is the first # fc layer. curr_layer_parent_labels = [nn.layer_labels[x] for x in nn.get_parents(layer_idx)] if not all([(is_a_pooling_layer_label(clpl) or is_a_conv_layer_label(clpl)) for clpl in curr_layer_parent_labels]): curr_layer_prob = 'no-assign' curr_layer_prob = 0.0 if curr_layer_prob == 'no-assign' else 1.0 end_layer_prob.append(curr_layer_prob) if sum(end_layer_prob) == 0: return None else: end_layer_prob = np.array(end_layer_prob) end_layer_prob = end_layer_prob / end_layer_prob.sum() return end_layer_prob return
def get_vgg_net(num_conv_layers_per_block=4, cnn_layer_labels=None): """ Returns a VGG net. """ cnn_layer_labels = cnn_layer_labels if cnn_layer_labels is not None else \ get_cnn_layer_labels() layer_labels = [ 'ip', 'conv3', 'conv3', 'max-pool', 'conv3', 'conv3', 'max-pool' ] num_filters_each_layer = [None, 64, 64, None, 128, 128, None] # Now create the blocks block_filter_sizes = [128, 256, 512] for bfs in block_filter_sizes: layer_labels.extend( ['conv3' for _ in range(num_conv_layers_per_block)] + ['max-pool']) num_filters_each_layer.extend([bfs] * num_conv_layers_per_block + [None]) layer_labels.extend(['fc', 'fc', 'fc', 'softmax', 'op']) num_filters_each_layer.extend([128, 256, 512, None, None]) num_layers = len(layer_labels) # Construct the connectivity matrix conn_mat = get_feedforward_adj_mat(num_layers) strides = [(1 if is_a_conv_layer_label(ll) else None) for ll in layer_labels] vgg = ConvNeuralNetwork(layer_labels, conn_mat, num_filters_each_layer, strides, cnn_layer_labels) return vgg
def get_vgg_net(num_conv_layers_per_block=2, cnn_layer_labels=None): """ Returns a VGG net. """ cnn_layer_labels = cnn_layer_labels if cnn_layer_labels is not None else \ get_cnn_layer_labels() #print("cnn_layer_labels:",cnn_layer_labels) layer_labels = [ 'ip', 'conv3', 'conv3', 'avg-pool', 'conv3', 'conv3', 'avg-pool' ] num_filters_each_layer = [None, 64, 64, None, 128, 128, None] # Now create the blocks block_filter_sizes = [256, 512] for bfs in block_filter_sizes: layer_labels.extend( ['conv3' for _ in range(num_conv_layers_per_block)] + ['avg-pool']) num_filters_each_layer.extend([bfs] * num_conv_layers_per_block + [None]) layer_labels.extend(['fc', 'softmax', 'op']) num_filters_each_layer.extend([512, None, None]) #print("layer_labels",layer_labels) #print("number_of_lyers",len(layer_labels)) #print("num_filters_each_layer",num_filters_each_layer) num_layers = len(layer_labels) # Construct the connectivity matrix conn_mat = get_feedforward_adj_mat(num_layers) strides = [(1 if is_a_conv_layer_label(ll) else None) for ll in layer_labels] vgg = ConvNeuralNetwork(layer_labels, conn_mat, num_filters_each_layer, strides, cnn_layer_labels) #print("strides:",strides) #print("layer_parents:",vgg.conn_mat.viewkeys()) return vgg
def _get_multidepth_cnn_eg12_common(): """ A network with 2 softmax layers mostly for debugging common operations. """ cnn_layer_labels = get_cnn_layer_labels() layer_labels = [ 'ip', 'op', 'softmax', 'fc', 'softmax', 'fc', 'conv5', 'avg-pool', 'max-pool', 'conv3', 'conv3', 'max-pool', 'max-pool', 'conv3', 'conv7' ] num_filters_each_layer = [ None, None, None, 64, None, 64, 128, None, None, 64, 64, None, None, 128, 64 ] edges = [(0, 14), (14, 6), (14, 9), (14, 10), (6, 7), (7, 3), (3, 2), (2, 1), (9, 8), (8, 5), (5, 4), (4, 1), (10, 11), (11, 13), (13, 12), (12, 5)] strides = [(1 if is_a_conv_layer_label(ll) else None) for ll in layer_labels] return layer_labels, edges, num_filters_each_layer, cnn_layer_labels, strides
def get_vgg_net_chen(num_conv_layers_per_block=3, cnn_layer_labels=None): cnn_layer_labels = cnn_layer_labels if cnn_layer_labels is not None else \ get_cnn_layer_labels() layer_labels = [ 'ip', 'conv3', 'conv3', 'max-pool', 'conv3', 'conv3', 'max-pool' ] num_filters_each_layer = [None, 64, 64, None, 128, 128, None] #now create the blocks block_filter_sizes = [256, 512, 512] for bfs in block_filter_sizes: layer_labels.extend( ['conv3' for _ in range(num_conv_layers_per_block)] + ['max-pool']) num_filters_each_layer.extend([bfs] * num_conv_layers_per_block + [None]) layer_labels.extend(['fc', 'fc', 'fc', 'softmax', 'op']) num_filters_each_layer.extend([4096, 4096, 1000, None, None]) num_layers = len(layer_labels) conn_mat = get_feedforward_adj_mat(num_layers) strides = [] for ll in layer_labels: if is_a_conv_layer_label(ll): strides.extend([1]) elif is_a_pooling_layer_label(ll): strides.extend([2]) else: strides.extend([None]) #print("layer_labels:",layer_labels) #print("strides:",strides) #print("num_filters_each_layer:",num_filters_each_layer) vgg_16_chen = ConvNeuralNetwork(layer_labels,conn_mat,num_filters_each_layer,strides,\ cnn_layer_labels) #print("layer_parents:",vgg_16_chen.conn_mat.viewkeys()) return vgg_16_chen
def get_list_of_swap_layer_modifiers(nn, num_modifications='all', change_stride_with_prob=0.05, rectifier_swap_candidates=None, sigmoid_swap_candidates=None): """ Returns a list of modifiers for swapping a layer with another. """ # pylint: disable=too-many-statements # Define a local function to return the modifier def _get_swap_layer_modifier(_layer_idx, _replace_with, _new_stride): """ Returns a modifier for swapping a layer. """ return lambda arg_nn: swap_layer_type(arg_nn, _layer_idx, _replace_with, _new_stride) # Preprocessing if nn.nn_class.startswith('mlp'): rectifier_swap_candidates = rectifier_swap_candidates if \ rectifier_swap_candidates is not None else _DFLT_RECTIFIER_SWAP sigmoid_swap_candidates = sigmoid_swap_candidates if \ sigmoid_swap_candidates is not None else _DFLT_SIGMOID_SWAP # Determine the order of the layers layer_order = list(range(nn.num_layers)) if num_modifications == 'all' or num_modifications >= nn.num_layers: num_modifications = nn.num_layers else: np.random.shuffle(layer_order) # iterate through the layers and return ret = [] for idx in layer_order: ll = nn.layer_labels[idx] if ll in ['ip', 'op', 'fc', 'softmax', 'linear']: continue # don't swap out any of these # Determine candidates for swapping out if ll == 'conv3': candidates = ['res3', 'res5', 'conv5', 'conv7', 'max-pool', 'avg-pool'] cand_probs = [0.25, 0.25, 0.15, 0.25, 0.05, 0.05] elif ll == 'conv5': candidates = ['res3', 'res5', 'conv3', 'conv7', 'max-pool', 'avg-pool'] cand_probs = [0.25, 0.25, 0.2, 0.2, 0.05, 0.05] elif ll == 'conv7': candidates = ['res3', 'res5', 'conv3', 'conv5', 'max-pool', 'avg-pool'] cand_probs = [0.25, 0.25, 0.25, 0.15, 0.05, 0.05] elif ll == 'conv9': candidates = ['res3', 'res5', 'conv3', 'conv5', 'conv7', 'max-pool', 'avg-pool'] cand_probs = [0.2, 0.2, 0.2, 0.2, 0.1, 0.05, 0.05] elif ll == 'res3': candidates = ['conv3', 'conv5', 'res5', 'res7', 'max-pool', 'avg-pool'] cand_probs = [0.25, 0.25, 0.15, 0.25, 0.05, 0.05] elif ll == 'res5': candidates = ['conv3', 'conv5', 'res3', 'res7', 'max-pool', 'avg-pool'] cand_probs = [0.25, 0.25, 0.2, 0.2, 0.05, 0.05] elif ll == 'res7': candidates = ['conv3', 'conv5', 'res3', 'res5', 'max-pool', 'avg-pool'] cand_probs = [0.25, 0.25, 0.25, 0.15, 0.05, 0.05] elif ll == 'res9': candidates = ['conv3', 'conv5', 'res3', 'res5', 'res7', 'max-pool', 'avg-pool'] cand_probs = [0.2, 0.2, 0.2, 0.2, 0.1, 0.05, 0.05] elif ll == 'avg-pool': candidates = ['max-pool'] cand_probs = None elif ll == 'max-pool': candidates = ['avg-pool'] cand_probs = None elif ll in MLP_RECTIFIERS: candidates = sigmoid_swap_candidates cand_probs = None elif ll in MLP_SIGMOIDS: candidates = rectifier_swap_candidates cand_probs = None else: raise ValueError('Unidentified layer_type: %s.'%(ll)) # I am determining the probabilities above completely ad-hoc for reasons I don't # know why. # Choose replace_with if cand_probs is not None: cand_probs = np.array(cand_probs) cand_probs = cand_probs / cand_probs.sum() replace_with = np.random.choice(candidates, 1, p=cand_probs)[0] # Determine the stride if nn.nn_class == 'cnn': if is_a_pooling_layer_label(replace_with): new_stride = None elif is_a_conv_layer_label(replace_with) and is_a_pooling_layer_label(ll): new_stride = 2 elif is_a_conv_layer_label(ll) and np.random.random() < change_stride_with_prob: new_stride = 1 if nn.strides[idx] == 2 else 2 else: new_stride = nn.strides[idx] else: new_stride = None # Create modifier and append ret.append(_get_swap_layer_modifier(idx, replace_with, new_stride)) if len(ret) >= num_modifications: break # Check if you have exceeded the maximum amount return ret
def get_list_of_wedge_layer_modifiers(nn, num_modifications='all', internal_layer_type_candidates=None, choose_pool_with_prob=0.05, choose_stride_2_with_prob=0.05): """ Returns a list of operations for adding a layer in between two layers. """ # A local function for creating a modifier def _get_wedge_modifier(_layer_type, _num_units, _edge, _nl_attributes): """ Returns a modifier which wedges an edge between the edge. """ return lambda arg_nn: wedge_layer(arg_nn, _layer_type, _num_units, _edge[0], _edge[1], _nl_attributes) # Pre-process arguments nn_is_a_cnn = nn.nn_class == 'cnn' if internal_layer_type_candidates is None: if nn_is_a_cnn: internal_layer_type_candidates = _DFLT_WEDGE_LAYER_CNN_CANDIDATES else: internal_layer_type_candidates = _DFLT_WEDGE_LAYER_MLP_CANDIDATES if not nn_is_a_cnn: choose_pool_with_prob = 0 all_edges = nn.get_edges() num_modifications = len(all_edges) if num_modifications == 'all' else num_modifications op_layer_idx = nn.get_op_layer_idx() # Output layer ip_layer_idx = nn.get_ip_layer_idx() # Input layer # We won't change this below so keep it as it is nonconv_nl_attrs = Namespace(stride=None) conv_nl_attrs_w_stride_1 = Namespace(stride=1) conv_nl_attrs_w_stride_2 = Namespace(stride=2) # Iterate through all edges ret = [] for edge in all_edges: curr_layer_type = None # First handle the edges cases if edge[1] == op_layer_idx: continue elif nn_is_a_cnn and nn.layer_labels[edge[0]] == 'fc': curr_layer_type = 'fc' curr_num_units = nn.num_units_in_each_layer[edge[0]] nl_attrs = nonconv_nl_attrs elif not nn_is_a_cnn and edge[1] == op_layer_idx: # Don't add new layers just before the output for MLPs continue elif edge[0] == ip_layer_idx and nn_is_a_cnn: curr_pool_prob = 0 # No pooling layer right after the input for a CNN else: curr_pool_prob = choose_pool_with_prob if curr_layer_type is None: if np.random.random() < curr_pool_prob: curr_layer_candidates = ['avg-pool', 'max-pool'] else: curr_layer_candidates = internal_layer_type_candidates curr_layer_type = np.random.choice(curr_layer_candidates, 1)[0] if curr_layer_type in ['max-pool', 'avg-pool', 'linear', 'softmax']: curr_num_units = None else: curr_num_units = _determine_num_units_for_wedge_layer(nn, edge) # Determine stride if is_a_conv_layer_label(curr_layer_type): nl_attrs = conv_nl_attrs_w_stride_2 if \ np.random.random() < choose_stride_2_with_prob else conv_nl_attrs_w_stride_1 else: nl_attrs = nonconv_nl_attrs ret.append(_get_wedge_modifier(curr_layer_type, curr_num_units, edge, nl_attrs)) # Break if more than the number of modifications if len(ret) >= num_modifications: break return ret