def enas_repeat_fn(inputs, outputs, layer_id, out_filters, weight_sharer): h_enas_op = D( ['conv3', 'conv5', 'dsep_conv3', 'dsep_conv5', 'avg_pool', 'max_pool'], name='op_' + str(layer_id)) #h_enas_op = D(['max_pool'], name='op_' + str(layer_id)) op_inputs, op_outputs = enas_op(h_enas_op, out_filters, 'op_' + str(layer_id), weight_sharer) outputs[list(outputs.keys())[-1]].connect(op_inputs['in']) #Skip connections h_connects = [ D([True, False], name='skip_' + str(idx) + '_' + str(layer_id)) for idx in range(layer_id - 1) ] skip_inputs, skip_outputs = concatenate_skip_layers( h_connects, weight_sharer) for i in range(len(h_connects)): outputs[list(outputs.keys())[i]].connect(skip_inputs['in' + str(i)]) op_outputs['out'].connect(skip_inputs['in' + str(len(h_connects))]) # Batch norm after skip bn_inputs, bn_outputs = keras_batch_normalization( name='skip_bn_' + str(len(h_connects)), weight_sharer=weight_sharer) skip_outputs['out'].connect(bn_inputs['in']) outputs['out' + str(len(outputs))] = bn_outputs['out'] return inputs, outputs
def intermediate_node_fn(num_inputs, filters): return mo.siso_sequential([ add(num_inputs), conv2d(D([filters]), D([3])), batch_normalization(), relu() ])
def generate(filters): return cell( lambda channels: mo.siso_sequential( [conv2d(D([channels]), D([1])), batch_normalization(), relu()]), lambda num_inputs, node_id, channels: intermediate_node_fn( num_inputs, node_id, channels, cell_ops), concat, h_connections, 5, filters)
def conv_op(filters, filter_size, stride, dilation_rate, spatial_separable): if spatial_separable: return mo.siso_sequential([ conv2d(D([filters]), D([[1, filter_size]]), D([[1, stride]])), batch_normalization(), relu(), conv2d(D([filters]), D([[filter_size, 1]]), D([[stride, 1]])), ]) else: return conv2d(D([filters]), D([filter_size]), D([stride]), D([dilation_rate]))
def intermediate_node_fn(num_inputs, node_id, filters, cell_ops): return mo.siso_sequential([ add(num_inputs), mo.siso_or( { 'conv1': lambda: conv2d(D([filters]), D([1])), 'conv3': lambda: conv2d(D([filters]), D([3])), 'max3': lambda: max_pool2d(D([3])) }, cell_ops[node_id]), batch_normalization(), relu() ])
def generate_search_space(stacks, num_cells_per_stack, num_nodes_per_cell, num_init_filters): search_space = [stem()] cell_fn = create_cell_generator(num_nodes_per_cell) num_filters = num_init_filters for i in range(stacks): if i > 0: search_space.append(max_pool2d(D([2]), D([2]))) num_filters *= 2 for j in range(num_cells_per_stack): search_space.append(cell_fn(num_filters)) search_space += [global_pool2d(), fc_layer(D([10]))] return mo.siso_sequential(search_space)
def cell_input_fn(filters): prev_input = mo.identity() cur_input = wrap_relu_batch_norm(conv2d(D([filters]), D([1]))) transformed_prev_input = maybe_factorized_reduction(add_relu=True) transformed_prev_input[0]['In0'].connect(prev_input[1]['Out']) transformed_prev_input[0]['In1'].connect(cur_input[1]['Out']) return { 'In0': prev_input[0]['In'], 'In1': cur_input[0]['In'] }, { 'Out0': transformed_prev_input[1]['Out'], 'Out1': cur_input[1]['Out'] }
def generate_search_space(num_nodes_per_cell, num_normal_cells, num_reduction_cells, init_filters, stem_multiplier): global global_vars, hp_sharer global_vars = {} hp_sharer = hp.HyperparameterSharer() hp_sharer.register('drop_path_keep_prob', lambda: D([.7], name='drop_path_keep_prob')) stem_in, stem_out = stem(int(init_filters * stem_multiplier)) progress_in, progress_out = mo.identity() global_vars['progress'] = progress_out['Out'] normal_cell_fn = create_cell_generator(num_nodes_per_cell, False) reduction_cell_fn = create_cell_generator(num_nodes_per_cell, True) total_cells = num_normal_cells + num_reduction_cells hasReduction = [False] * num_normal_cells for i in range(num_reduction_cells): hasReduction[int( float(i + 1) / (num_reduction_cells + 1) * num_normal_cells)] = True inputs = [stem_out, stem_out] filters = init_filters aux_loss_idx = int( float(num_reduction_cells) / (num_reduction_cells + 1) * num_normal_cells) - 1 outs = {} cells_created = 0.0 for i in range(num_normal_cells): if hasReduction[i]: filters *= 2 connect_new_cell( reduction_cell_fn(filters, (cells_created + 1) / total_cells), inputs) cells_created += 1.0 connect_new_cell( normal_cell_fn(filters, (cells_created + 1) / total_cells), inputs) cells_created += 1.0 if i == aux_loss_idx: aux_in, aux_out = aux_logits() aux_in['In'].connect(inputs[-1]['Out']) outs['Out0'] = aux_out['Out'] _, final_out = mo.siso_sequential([(None, inputs[-1]), relu(), global_pool2d(), dropout(D([1.0])), fc_layer(D([10]))]) outs['Out1'] = final_out['Out'] return {'In0': stem_in['In'], 'In1': progress_in['In']}, outs
def generate_stage(stage_num, num_nodes, filters, filter_size): h_connections = [ Bool(name='%d_in_%d_%d' % (stage_num, in_id, out_id)) for (in_id, out_id) in itertools.combinations(range(1, num_nodes + 1), 2) ] return genetic_stage( lambda: mo.siso_sequential([ conv2d(D([filters]), D([filter_size])), batch_normalization(), relu() ]), lambda num_inputs: intermediate_node_fn(num_inputs, filters), lambda num_inputs: intermediate_node_fn(num_inputs, filters), h_connections, num_nodes)
def enas_op(h_op_name, out_filters, name, weight_sharer): return mo.siso_or( { 'conv3': lambda: enas_conv(out_filters, 3, False, weight_sharer, name), 'conv5': lambda: enas_conv(out_filters, 5, False, weight_sharer, name), 'dsep_conv3': lambda: enas_conv(out_filters, 3, True, weight_sharer, name), 'dsep_conv5': lambda: enas_conv(out_filters, 5, True, weight_sharer, name), 'avg_pool': lambda: avg_pool(D([3]), D([1])), 'max_pool': lambda: max_pool(D([3]), D([1])) }, h_op_name)
def intermediate_node_fn(reduction, input_id, node_id, op_num, filters, cell_ratio, cell_ops): stride = 2 if reduction and input_id < 2 else 1 h_is_not_none = co.DependentHyperparameter( lambda op: op != 'none', {'op': cell_ops[node_id * 2 + op_num]}) op_in, op_out = mo.siso_or( { 'none': lambda: check_filters(filters, stride), 'conv1': lambda: wrap_relu_batch_norm( conv2d(D([filters]), D([1]), h_stride=D([stride]))), 'conv3': lambda: full_conv_op(filters, 3, stride, 1, False), 'depth_sep3': lambda: separable_conv_op(filters, 3, stride), 'depth_sep5': lambda: separable_conv_op(filters, 5, stride), 'depth_sep7': lambda: separable_conv_op(filters, 7, stride), 'dilated_3x3_rate_2': lambda: full_conv_op(filters, 3, stride, 2, False), 'dilated_3x3_rate_4': lambda: full_conv_op(filters, 3, stride, 4, False), 'dilated_3x3_rate_6': lambda: full_conv_op(filters, 3, stride, 6, False), '1x3_3x1': lambda: full_conv_op(filters, 3, stride, 1, True), '1x7_7x1': lambda: full_conv_op(filters, 7, stride, 1, True), 'avg2': lambda: pool_op(filters, 2, stride, 'avg'), 'avg3': lambda: pool_op(filters, 3, stride, 'avg'), 'max2': lambda: pool_op(filters, 2, stride, 'max'), 'max3': lambda: pool_op(filters, 3, stride, 'max'), 'min2': lambda: pool_op(filters, 2, stride, 'min') }, cell_ops[node_id * 2 + op_num]) drop_in, drop_out = miso_optional(lambda: drop_path(cell_ratio), h_is_not_none) drop_in['In0'].connect(op_out['Out']) drop_in['In1'].connect(global_vars['progress']) return op_in, drop_out
def full_conv_op(filters, filter_size, stride, dilation_rate, spatial_separable): # Add bottleneck layer according to # https://github.com/tensorflow/tpu/blob/master/models/official/amoeba_net/network_utils.py if filter_size == 3 and spatial_separable: reduced_filter_size = int(3 * filters / 8) else: reduced_filter_size = int(filters / 4) if reduced_filter_size < 1: return wrap_relu_batch_norm( conv_op(filters, filter_size, stride, dilation_rate, spatial_separable)) else: return mo.siso_sequential([ wrap_relu_batch_norm(conv2d(D([reduced_filter_size]), D([1]))), wrap_relu_batch_norm( conv_op(reduced_filter_size, filter_size, stride, dilation_rate, spatial_separable)), wrap_relu_batch_norm(conv2d(D([filters]), D([1]))) ])
def separable_conv_op(filters, filter_size, stride): return mo.siso_sequential([ wrap_relu_batch_norm( separable_conv2d(D([filters]), D([filter_size]), D([stride]))), wrap_relu_batch_norm( separable_conv2d(D([filters]), D([filter_size]), D([1]))) ])
def SP1_ops(name=None, reduction=False): if reduction: # Dilated convolution can't be done with strides ops = [ 'none', 'depth_sep3', 'depth_sep5', 'depth_sep7', 'avg3', 'max3', '1x7_7x1' ] else: ops = [ 'none', 'depth_sep3', 'depth_sep5', 'depth_sep7', 'avg3', 'max3', 'dilated_3x3_rate_2', '1x7_7x1' ] return D(ops, name=name)
def pool_op(filters, filter_size, stride, pool_type): if pool_type == 'avg': pool = avg_pool2d(D([filter_size]), D([stride])) elif pool_type == 'max': pool = max_pool2d(D([filter_size]), D([stride])) else: pool = min_pool2d(D([filter_size]), D([stride])) return mo.siso_sequential([pool, check_filters(filters)])
def aux_logits(): return mo.siso_sequential([ relu(), avg_pool2d(D([5]), D([3]), D(['VALID'])), conv2d(D([128]), D([1])), batch_normalization(), relu(), global_convolution(D([768])), batch_normalization(), relu(), flatten(), fc_layer(D([10])) ])
def __init__(self, name, name_to_hyperp, compile_fn, input_names, output_names, scope=None): co.Module.__init__(self, scope, name) for h in name_to_hyperp: if not isinstance(name_to_hyperp[h], co.Hyperparameter): vs = name_to_hyperp[h] if isinstance( name_to_hyperp[h], list) else [name_to_hyperp[h]] name_to_hyperp[h] = D(vs) self._register(input_names, output_names, name_to_hyperp) self._compile_fn = compile_fn self.isTraining = True
def __init__(self, name, compile_fn, name_to_hyperp, input_names, output_names, scope=None): co.Module.__init__(self, scope, name) hyperparam_dict = {} for h in name_to_hyperp: if not isinstance(name_to_hyperp[h], co.Hyperparameter): hyperparam_dict[h] = D([name_to_hyperp[h]]) else: hyperparam_dict[h] = name_to_hyperp[h] self._register(input_names, output_names, hyperparam_dict) self._compile_fn = compile_fn
def get_enas_search_space(num_classes, num_layers, out_filters, weight_sharer): h_N = D([num_layers], name='num_layers') return mo.siso_sequential([ enas_space( h_N, out_filters, #mo.empty, lambda: wrap_batch_norm_relu(conv2D( 3, 'stem', weight_sharer, out_filters=out_filters), add_relu=False, weight_sharer=weight_sharer, name='stem'), enas_repeat_fn, ['in'], ['out'], weight_sharer), global_pool(), dropout(keep_prob=.9), fc_layer(num_classes, 'softmax', weight_sharer), ])
def create_cell_generator(num_nodes, reduction): prefix = 'reduction' if reduction else 'normal' cell_ops = [ SP1_ops('%s_op_%d_%d' % (prefix, i // 2, i % 2), reduction) for i in range(2 * num_nodes) ] connection_hparams = [ D(list(range(i // 2 + 2)), name='%s_in_%d_%d' % (prefix, i // 2, i % 2)) for i in range(2 * num_nodes) ] def generate(filters, cell_ratio): return cell( lambda: cell_input_fn(filters), lambda in_id, node_id, op_num: intermediate_node_fn( reduction, in_id, node_id, op_num, filters, cell_ratio, cell_ops), lambda: add(2), combine_unused, num_nodes, connection_hparams) return generate
def generate_search_space(nodes_per_stage, filters_per_stage, filter_size_per_stage): search_space = [] for i in range(len(nodes_per_stage)): search_space.append( generate_stage(i, nodes_per_stage[i], filters_per_stage[i], filter_size_per_stage[i])) search_space.append(max_pool2d(D([3]), D([2]), D(['SAME']))) search_space += [ flatten(), fc_layer(D([1024])), dropout(D([.5])), fc_layer(D([10])) ] return mo.siso_sequential(search_space)
def create_cell_generator(num_nodes): h_connections = [ Bool(name='in_%d_%d' % (in_id, out_id)) for (in_id, out_id) in itertools.combinations(range(num_nodes + 2), 2) ] cell_ops = [ D(['conv1', 'conv3', 'max3'], name='node_%d' % i) for i in range(num_nodes) ] def generate(filters): return cell( lambda channels: mo.siso_sequential( [conv2d(D([channels]), D([1])), batch_normalization(), relu()]), lambda num_inputs, node_id, channels: intermediate_node_fn( num_inputs, node_id, channels, cell_ops), concat, h_connections, 5, filters) return generate
def stem(): return mo.siso_sequential([ conv2d(D([128]), D([3])), batch_normalization(), relu(), ])
def stem(filters): return mo.siso_sequential( [conv2d(D([filters]), D([3])), batch_normalization()])
def relu(): return activation(D(['relu']))