def __init__(self, in_features, out_features, aggregators, scalers, avg_d, towers=1, self_loop=False, pretrans_layers=1, posttrans_layers=1, divide_input=True, device='cpu'): """ :param in_features: size of the input per node :param out_features: size of the output per node :param aggregators: set of aggregation function identifiers :param scalers: set of scaling functions identifiers :param avg_d: average degree of nodes in the training set, used by scalers to normalize :param self_loop: whether to add a self loop in the adjacency matrix when aggregating :param pretrans_layers: number of layers in the transformation before the aggregation :param posttrans_layers: number of layers in the transformation after the aggregation :param divide_input: whether the input features should be split between towers or not :param device: device used for computation """ super(PNALayer, self).__init__() assert ( (not divide_input) or in_features % towers == 0 ), "if divide_input is set the number of towers has to divide in_features" assert ( out_features % towers == 0), "the number of towers has to divide the out_features" # retrieve the aggregators and scalers functions aggregators = [AGGREGATORS[aggr] for aggr in aggregators] scalers = [SCALERS[scale] for scale in scalers] self.divide_input = divide_input self.input_tower = in_features // towers if divide_input else in_features self.output_tower = out_features // towers # convolution self.towers = nn.ModuleList() for _ in range(towers): self.towers.append( PNATower(in_features=self.input_tower, out_features=self.output_tower, aggregators=aggregators, scalers=scalers, avg_d=avg_d, self_loop=self_loop, pretrans_layers=pretrans_layers, posttrans_layers=posttrans_layers, device=device)) # mixing network self.mixing_network = FCLayer(out_features, out_features, activation='LeakyReLU')
def set_from_config(config_json, init=None, return_class=True): _id = config_json['_id'] '''number_of_block=config_json['number_of_block'] number_of_base_channel=config_json['number_of_base_channel'] depth=config_json['depth']''' layers = [] import numpy as np #''' conv_i=0 n_layers=2 k=48 input_filter_num=k*2 layers.append(ConvLayer("conv_adjust",k*2)) for i in range(3): for j in range(n_layers): layers.append(DenseNetLayer("DenseNetLayer_"+str(i*n_layers+j),k,4,3)) #input_filter_num=input_filter_num+k if i<2: input_filter_num=(input_filter_num+k*n_layers) #layers.append(PoolLayer_inner("PoolLayer_"+str(i),input_filter_num,'avg')) layers.append(ConvLayer("conv_"+str(i),input_filter_num,1,1)) layers.append(PoolLayer("pool_"+str(i),'avg',2)) #append(ConvLayer_blocks("ConvLayer_blocks_"+str(i),12,(i+1)*11+1,2,2)) # layers.append(PoolLayer("pool_"+str(i),'avg',8,use_bn=True, activation='relu')) layers.append(FCLayer("fc_0",10)) #_id, _type,number_of_block=4, kernel_size=2 ''' for _i, layer_config in enumerate(config_json['layers']): layer_init = init['layers'][_i] if init is not None else None #print(layer_config['name']) layer = get_layer_by_name(layer_config['name']) layers.append(layer.set_from_config(layer_config, layer_init)) #''' if return_class: return LayerCascade(_id, layers) else: return _id, layers
def __init__(self, in_dim, out_dim, aggregators, scalers, avg_d, dropout, graph_norm, batch_norm, towers=1, pretrans_layers=1, posttrans_layers=1, divide_input=True, residual=False, edge_features=False, edge_dim=0, parallel_towers=False): """ :param in_dim: size of the input per node :param out_dim: size of the output per node :param aggregators: set of aggregation function identifiers :param scalers: set of scaling functions identifiers :param avg_d: average degree of nodes in the training set, used by scalers to normalize :param dropout: dropout used :param graph_norm: whether to use graph normalisation :param batch_norm: whether to use batch normalisation :param towers: number of towers to use :param pretrans_layers: number of layers in the transformation before the aggregation :param posttrans_layers: number of layers in the transformation after the aggregation :param divide_input: whether the input features should be split between towers or not :param residual: whether to add a residual connection :param edge_features: whether to use the edge features :param edge_dim: size of the edge features """ super().__init__() assert ( (not divide_input) or in_dim % towers == 0 ), "if divide_input is set the number of towers has to divide in_dim" assert (out_dim % towers == 0), "the number of towers has to divide the out_dim" assert avg_d is not None # retrieve the aggregators and scalers functions aggregators = [AGGREGATORS[aggr] for aggr in aggregators.split()] scalers = [SCALERS[scale] for scale in scalers.split()] self.divide_input = divide_input self.input_tower = in_dim // towers if divide_input else in_dim self.output_tower = out_dim // towers self.in_dim = in_dim self.out_dim = out_dim self.edge_features = edge_features self.parallel_towers = parallel_towers self.residual = residual if in_dim != out_dim: self.residual = False # convolution self.towers = nn.ModuleList() for _ in range(towers): self.towers.append( PNATower(in_dim=self.input_tower, out_dim=self.output_tower, aggregators=aggregators, scalers=scalers, avg_d=avg_d, pretrans_layers=pretrans_layers, posttrans_layers=posttrans_layers, batch_norm=batch_norm, dropout=dropout, graph_norm=graph_norm, edge_features=edge_features, edge_dim=edge_dim)) # mixing network self.mixing_network = FCLayer(out_dim, out_dim, activation='LeakyReLU')
def set_standard_convnet(self, data_provider, conv_blocks_config, fc_block_config, weight_decay, drop_scheme, bn_epsilon, bn_decay, print_info=True, **kwargs): assert (isinstance(data_provider, DataProvider)) self.net_config = { 'weight_decay': weight_decay, 'bn_epsilon': bn_epsilon, 'bn_decay': bn_decay, 'drop_scheme': drop_scheme, } image_size = data_provider.data_shape[0] layers = [] conv_id = 0 for _i, block_config in enumerate(conv_blocks_config): num_layers, kernel_size, filter_num = block_config for _j in range(num_layers): keep_prob = 1.0 if 'conv' in drop_scheme['type']: keep_prob = 1.0 if _i + _j == 0 else drop_scheme.get( 'conv_drop', 1.0) conv_layer = ConvLayer('conv_%d' % conv_id, filter_num, kernel_size=kernel_size, keep_prob=keep_prob, pre_activation=False) conv_id += 1 layers.append(conv_layer) if _i < len(conv_blocks_config) - 1: keep_prob = 1.0 if 'pool' in drop_scheme['type']: keep_prob = drop_scheme.get('pool_drop', 1.0) pool_layer = PoolLayer('pool_%d' % _i, 'max', keep_prob=keep_prob, pre_activation=False) layers.append(pool_layer) image_size = image_size // 2 global_avg_pool = PoolLayer('pool_%d' % len(conv_blocks_config), 'avg', kernel_size=image_size, strides=image_size, pre_activation=False) layers.append(global_avg_pool) for _i, units in enumerate(fc_block_config): keep_prob = 1.0 if 'fc' in drop_scheme['type']: keep_prob = drop_scheme.get('fc_drop', 1.0) fc_layer = FCLayer('fc_%d' % _i, units, keep_prob=keep_prob) layers.append(fc_layer) final_fc_layer = FCLayer('fc_%d' % len(fc_block_config), data_provider.n_classes, use_bn=False, use_bias=True, activation=None) layers.append(final_fc_layer) self.layer_cascade = LayerCascade('SimpleConvNet', layers) if print_info: pass return self
def deepen(self, idx, new_layer_config, input_dim): assert idx < len(self.layers), 'Index out of range: %d' % idx if new_layer_config['name'] == 'fc': assert idx == len(self.layers) - 1 or isinstance( self.layers[idx + 1], FCLayer), 'Invalid' assert isinstance(self.layers[idx], FCLayer) or isinstance( self.layers[idx], PoolLayer), 'Invalid' # prepare the new fc layer units = input_dim for _i in range(idx, -1, -1): if isinstance(self.layers[_i], FCLayer): units = self.layers[_i].units break elif isinstance(self.layers[_i], ConvLayer): units = self.layers[_i].filter_num break fc_idx = 0 for _i in range(0, idx + 1): if isinstance(self.layers[_i], FCLayer): fc_idx += 1 _id = 'fc_%d' % fc_idx # change the id of following fc layers for _i in range(idx + 1, len(self.layers)): if isinstance(self.layers[_i], FCLayer): self.layers[_i].id = 'fc_%d' % (fc_idx + 1) fc_idx += 1 prev_layer = None for _i in range(idx, -1, -1): if self.layers[_i].ready: prev_layer = self.layers[_i] break assert prev_layer is not None, 'Invalid' new_fc_layer = FCLayer(_id, units, ready=False, **new_layer_config) # insert the new layer into the cascade self.layers = self.layers[:idx + 1] + [new_fc_layer ] + self.layers[idx + 1:] return new_fc_layer, prev_layer elif new_layer_config['name'] == 'conv': assert idx == len(self.layers) - 1 or not isinstance( self.layers[idx + 1], FCLayer), 'Invalid' assert isinstance(self.layers[idx], ConvLayer) or isinstance( self.layers[idx], FCLayer), 'Invalid' # prepare the new conv layer filter_num = input_dim for _i in range(idx, -1, -1): if isinstance(self.layers[_i], ConvLayer): filter_num = self.layers[_i].filter_num break conv_idx = 0 for _i in range(0, idx + 1): if isinstance(self.layers[_i], ConvLayer): conv_idx += 1 _id = 'conv_%d' % conv_idx # change the id of following conv layers for _i in range(idx + 1, len(self.layers)): if isinstance(self.layers[_i], ConvLayer): self.layers[_i].id = 'conv_%d' % (conv_idx + 1) conv_idx += 1 prev_layer = None for _i in range(idx, -1, -1): if self.layers[_i].ready: prev_layer = self.layers[_i] break assert prev_layer is not None, 'Invalid' new_conv_layer = ConvLayer(_id, filter_num, ready=False, **new_layer_config) self.layers = self.layers[:idx + 1] + [new_conv_layer ] + self.layers[idx + 1:] return new_conv_layer, prev_layer else: raise ValueError('Not support to insert a %s layer' % new_layer_config['name'])
def set_standard_dense_net(self, data_provider: DataProvider, growth_rate, depth, total_blocks, keep_prob, weight_decay, model_type, first_ratio=2, reduction=1.0, bc_ratio=4, bn_epsilon=1e-5, bn_decay=0.9, print_info=True, pre_activation=True, **kwargs): self.net_config = { 'model_type': model_type, 'weight_decay': weight_decay, 'first_ratio': first_ratio, 'reduction': reduction, 'bc_ratio': bc_ratio, 'bn_epsilon': bn_epsilon, 'bn_decay': bn_decay, 'pre_activation': pre_activation, } image_size = data_provider.data_shape[0] first_output_features = growth_rate * first_ratio bc_mode = (model_type == 'DenseNet-BC') layers_per_block = (depth - (total_blocks + 1)) // total_blocks if bc_mode: layers_per_block = layers_per_block // 2 # initial conv if pre_activation: init_conv_layer = ConvLayer('conv_0', first_output_features, kernel_size=3, activation=None, use_bn=False) else: init_conv_layer = ConvLayer('conv_0', first_output_features, kernel_size=3, pre_activation=False) init_transition = TransitionBlock('T_0_first', [init_conv_layer]) self.blocks = [init_transition] # Dense Blocks in_features_dim = first_output_features for block_idx in range(1, total_blocks + 1): miniblocks = [] block_id = 'D_%d' % block_idx for miniblock_idx in range(1, layers_per_block + 1): miniblock_id = 'M_%d' % miniblock_idx in_bottle = None if bc_mode: bottelneck_layer = ConvLayer('conv_0', growth_rate * bc_ratio, kernel_size=1, keep_prob=keep_prob, pre_activation=pre_activation) in_bottle = LayerCascade('in_bottle', [bottelneck_layer]) branch_0 = LayerCascade('B_0', [ ConvLayer('conv_0', growth_rate, kernel_size=3, keep_prob=keep_prob, pre_activation=pre_activation) ]) miniblocks.append(LayerMultiBranch(miniblock_id, [branch_0], in_bottle=in_bottle)) dense_block = DenseBlock(block_id, miniblocks) self.blocks += [dense_block] out_features_dim = dense_block.out_features_dim(in_features_dim) if block_idx != total_blocks: out_features_dim = int(out_features_dim * reduction) transition_id = 'T_%d_middle' % block_idx conv_layer = ConvLayer('conv_0', out_features_dim, kernel_size=1, keep_prob=keep_prob, pre_activation=pre_activation) avg_pool_layer = PoolLayer('pool_0', 'avg', kernel_size=2, strides=2) transition = TransitionBlock(transition_id, [conv_layer, avg_pool_layer]) self.blocks.append(transition) image_size = image_size // 2 in_features_dim = out_features_dim # Transition to classes if pre_activation: global_avg_pool = PoolLayer('pool_0', 'avg', kernel_size=image_size, strides=image_size, activation='relu', use_bn=True) else: global_avg_pool = PoolLayer('pool_0', 'avg', kernel_size=image_size, strides=image_size, pre_activation=False) final_fc_layer = FCLayer('fc_0', data_provider.n_classes, use_bn=False, use_bias=True, activation=None) transition_to_classes = TransitionBlock('T_to_classes', [global_avg_pool, final_fc_layer]) self.blocks.append(transition_to_classes) # print information about the network if print_info: print('Set Standard %s' % model_type) if not bc_mode: print('Build %s model with %d blocks, ' '%d composite layers each.' % (model_type, total_blocks, layers_per_block)) if bc_mode: print('Build %s model with %d blocks, ' '%d bottleneck layers and %d composite layers each.' % ( model_type, total_blocks, layers_per_block, layers_per_block)) print('Reduction at transition layers: %.2f' % reduction) return self
def __init__(self, in_channels, out_channels, aggregators, scalers, avg_d, towers=1, pretrans_layers=1, posttrans_layers=1, divide_input=False, **kwargs): """ :param in_channels: size of the input per node :param in_channels: size of the output per node :param aggregators: set of aggregation function identifiers :param scalers: set of scaling functions identifiers :param avg_d: average degree of nodes in the training set, used by scalers to normalize :param towers: number of towers to use :param pretrans_layers: number of layers in the transformation before the aggregation :param posttrans_layers: number of layers in the transformation after the aggregation :param divide_input: whether the input features should be split between towers or not """ super(PNAConv, self).__init__(aggr=None, **kwargs) assert ( (not divide_input) or in_channels % towers == 0 ), "if divide_input is set the number of towers has to divide in_features" assert ( out_channels % towers == 0), "the number of towers has to divide the out_features" self.in_channels = in_channels self.out_channels = out_channels self.towers = towers self.divide_input = divide_input self.input_tower = self.in_channels // towers if divide_input else self.in_channels self.output_tower = self.out_channels // towers # retrieve the aggregators and scalers functions self.aggregators = [AGGREGATORS[aggr] for aggr in aggregators] self.scalers = [SCALERS[scale] for scale in scalers] self.avg_d = avg_d self.edge_encoder = FCLayer(in_size=in_channels, out_size=self.input_tower, activation='none') # build pre-transformations and post-transformation MLP for each tower self.pretrans = nn.ModuleList() self.posttrans = nn.ModuleList() for _ in range(towers): self.pretrans.append( MLP(in_size=3 * self.input_tower, hidden_size=self.input_tower, out_size=self.input_tower, layers=pretrans_layers, mid_activation='relu', last_activation='none')) self.posttrans.append( MLP(in_size=(len(self.aggregators) * len(self.scalers) + 1) * self.input_tower, hidden_size=self.output_tower, out_size=self.output_tower, layers=posttrans_layers, mid_activation='relu', last_activation='none')) self.mixing_network = FCLayer(self.out_channels, self.out_channels, activation='LeakyReLU')