def cherry_net(_input_shape, _n_classes): ''' Expected Image Size for CIFAR10: 32x32xd (channel last) | v Conv2D: 3x3 D_OUT= D | v Cherry_Block0: { [(D_IN -> (1x1xD_IN, GROUP(3x3xDENSE_OUT)) -> D_OUT= D_IN + (G_RATE * NUM_DENSE))] * (LAYERS_PER_BLOCK=2) [(D_IN -> (1x1xD_IN, GROUP(3x3xDENSE_OUT)) -> D_OUT= D_IN + (G_RATE * NUM_DENSE))] * (LAYERS_PER_BLOCK=2) Dense_Transition: IN: (NxN, D_IN = D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/2 x N/2, D_OUT=COMPRESS_D0) | v Cherry_Block1: { [(D_IN -> (1x1xD_IN, GROUP(3x3xDENSE_OUT)) -> D_OUT= D_IN + (G_RATE * NUM_DENSE))] * (LAYERS_PER_BLOCK=2) [(D_IN -> (1x1xD_IN, GROUP(3x3xDENSE_OUT)) -> D_OUT= D_IN + (G_RATE * NUM_DENSE))] * (LAYERS_PER_BLOCK=2) Dense_Transition: IN: (N/2xN/2, D_IN = D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/4 x N/4, D_OUT=COMPRESS_D1) | v Cherry_Block2: { [(D_IN -> (1x1xD_IN, GROUP(3x3xDENSE_OUT)) -> D_OUT= D_IN + (G_RATE * NUM_DENSE))] * (LAYERS_PER_BLOCK=2) [(D_IN -> (1x1xD_IN, GROUP(3x3xDENSE_OUT)) -> D_OUT= D_IN + (G_RATE * NUM_DENSE))] * (LAYERS_PER_BLOCK=2) | v Conv2D: D_IN -> (1x1xCOMPRESS_D2) -> D_OUT = COMPRESS_D2 | v GlobalAveragePool: OUT: 1 x 1, D_OUT = COMPRESS_D2 | v Dense+Softmax Activation: D_OUT = N_CLASSES | v ''' print('Architecture: Cherry Network') _d_init = 128 # Initial Depth _C = 8 # Cardinality _n_cherry_blocks = 3 # Number of spore blocks # Number of dense layers per cherry block _n_dense_layers_per_cherry_block = 2 _n_cherry_layers_per_block = 2 # Number of Cherry Layers per block compress_factor = 1 # Compress Factor during Transition # Growth Rate for dense layers growth_rate = (_d_init // (_C * _n_dense_layers_per_cherry_block)) _growth_rate_mul = 2 # Growth Rate Multiplier _max_d_out = 256 # Max Depth Out of Transition input_layer = _create_input_layer(_input_shape) intermed = Conv2D(_d_init, kernel_size=(3, 3), padding='same')(input_layer) # Always new_depth must be divisible by _C (cardinality) new_depth = _d_init ''' Output Sizes: ************* Input Size = 32 x 32 Output Size After ith Dense Transition to Cherry Block: 1: 16 x 16 2: 8 x 8 Depth Sizes: ************ Depth In = 64 1: 64 (64-> Cherry Layers ->256) 2: 256 (256-> Cherry Layers -> 1024 -> Dense Transition ->256) 3: 512 (256-> Cherry Layers -> 1024) Compress: 1024 -> ID -> 256 Depth Out = 256 ''' for cherry_block_idx in range(_n_cherry_blocks): print('Block IDX = {0}'.format(cherry_block_idx)) # Dense Block for cherry_layer_idx in range(_n_cherry_layers_per_block): intermed = _cherry_layer(intermed, _C, new_depth, growth_rate, _n_dense_layers_per_cherry_block) growth_rate = int(growth_rate * _growth_rate_mul) new_depth *= 2 # Dynamic Compress Factor: No compression until depth exceeds Max_D_OUT compress_factor = min(1, _max_d_out / new_depth) # Compression Factor based growth rate growth_rate = (growth_rate if compress_factor == 1 else (_max_d_out // (_C * _n_dense_layers_per_cherry_block))) if cherry_block_idx != (_n_cherry_blocks - 1): # Dense Transition for every block exept last intermed = _dense_transition(intermed, new_depth, int(compress_factor * new_depth)) new_depth = int(new_depth * compress_factor) intermed = _preactivation_layers(intermed) intermed = Conv2D(_max_d_out, kernel_size=(1, 1))(intermed) gap_out = GlobalAveragePooling2D()(intermed) final_out = Dense(_n_classes, activation='softmax')(gap_out) model = Model(inputs=input_layer, outputs=final_out, name='Cherry_Net') return model
def mer_spectro_net(_input_shape, _n_out=120): ''' Input 1: Spectrogram Input Shape Input 2: Number of Arousal + Valence Values (Default: 60 + 60 = 120) Output: MER Spectro Net Model Architecture: Expected Image Size for Spectrogram: 120 x 240, where 240 represents the frequency and 120 represents the time i.e. Transposed Spectrogram | v Conv2D: 2x2, D_OUT | v Max Pooling Across Time Axis: 60 x 240 x D_OUT | v (Conv2D (Freq Dim) + Activation + Conv2D (Freq Dim) + Max Pooling)x4 | v Time Distributed Flatten = 60 x D_OUT | v Time Distributed Dense = 60 x 1024 | | v v (V) Bidirectional LSTM (N_Hidden) (A) Bidirectional LSTM (N_Hidden) | | v v (V) Bidirectional LSTM (N_Hidden/2) (A) Bidirectional LSTM (N_Hidden/2) | v Dense: 120x1 ''' _d_init = 64 _freq_d = [128, 256, 256, 384] _hidden_units_LSTM = 256 input_layer = _create_input_layer(_input_shape) down_sample_time = Conv2D(_d_init, kernel_size=(2, 2), strides=(1, 1), padding='same')(input_layer) conv1_out = _preactivation_layers(down_sample_time) mp_out = MaxPooling2D(pool_size=(2, 1), strides=(2, 1))(conv1_out) intermed_out = mp_out # Create frequency scaling time invariant layers for freq_conv_idx, num_filters in enumerate(_freq_d): intermed_out = _preactivation_layers(intermed_out) intermed_out = Conv2D(num_filters, kernel_size=(1, 3))(intermed_out) intermed_out = _preactivation_layers(intermed_out) intermed_out = Conv2D(num_filters, kernel_size=(1, 3))(intermed_out) intermed_out = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(intermed_out) intermed_out = BatchNormalization()(intermed_out) # Distribute Output with respect to Time f_time_dis_out = TimeDistributed(Flatten())(intermed_out) f_time_dis_out = TimeDistributed(Dense(1024, activation='tanh'))(f_time_dis_out) f_time_dis_out = TimeDistributed(Dropout(0.2))(f_time_dis_out) # Feed each time distributed input to 2 BLSTM: for Valence and Arousal valence_lstm_out = Bidirectional( LSTM(_hidden_units_LSTM, return_sequences=True, activation='tanh'))(f_time_dis_out) v_dropout_out = Dropout(0.2)(valence_lstm_out) valence_lstm_out = Bidirectional( LSTM(_hidden_units_LSTM // 2, activation='tanh'))(v_dropout_out) arousal_lstm_out = Bidirectional( LSTM(_hidden_units_LSTM, return_sequences=True, activation='tanh'))(f_time_dis_out) a_dropout_out = Dropout(0.2)(arousal_lstm_out) arousal_lstm_out = Bidirectional( LSTM(_hidden_units_LSTM // 2, activation='tanh'))(a_dropout_out) # Arousal and Valence values are between -1 and 1, use tanh activation valence_dense = Dense(_n_out // 2, activation='tanh')(valence_lstm_out) arousal_dense = Dense(_n_out // 2, activation='tanh')(arousal_lstm_out) final_dense = concatenate([valence_dense, arousal_dense]) model = Model(inputs=input_layer, outputs=final_dense, name='MER_Spectro_Net') return model
def agg_res_net(_input_shape, _n_classes): ''' Expected Image Size for CIFAR10: 32x32xd (channel last) | v Conv2D: 3x3 D_OUT= D | v Agg_Res_Block0: (D_IN = 2D->(1x1, 3x3, 1x1) -> D_OUT=4D) * (LAYERS_PER_BLOCK=6) | v Agg_Res_Block1: (D_IN = 4D->(1x1, 3x3, 1x1) -> D_OUT=8D) * (LAYERS_PER_BLOCK=6) | v Agg_Res_Block2: (D_IN = 8D->(1x1, 3x3, 1x1) -> D_OUT=16D) * (LAYERS_PER_BLOCK=6) | v GlobalAveragePool: OUT: 1 x 1, D_OUT = 16D | v Dense+Softmax Activation: D_OUT = N_CLASSES | v ''' print('Architecture: Aggregated Residual Network') _d_init = 32 # Initial Depth _C = 8 # Cardinality _n_agg_blocks = 3 # Number of Aggregate Residual Blocks _n_agg_layer_per_block = 6 # Number of Aggregate Residual Layers per Block input_layer = _create_input_layer(_input_shape) intermed = Conv2D(_d_init, kernel_size=(3, 3), padding='same')(input_layer) new_depth = _d_init * 2 ''' Output Sizes: ************* Input Size = 32 x 32 Output Size After ith block: 1: 16 x 16 2: 8 x 8 3: 4 x 4 Depth Sizes: ************ Depth to 1st Block = 64 Depth to the ith (i > 1) block: 2: 128 3: 256 Depth Out = 512 ''' for agg_block_idx in range(_n_agg_blocks): print('Block IDX = {0}'.format(agg_block_idx)) for agg_layer_idx in range(_n_agg_layer_per_block): # Double the stride once per block to halve the output size strides = 2 if agg_layer_idx == 0 else 1 # Can't add shorcut between input and downsampled input intermed = _agg_res_layer(intermed, new_depth, 2 * new_depth, _C, strides) ''' Double the depth after a block to capture the bigger receptive field since each block involves 1 stride of 2 ''' new_depth *= 2 gap_out = GlobalAveragePooling2D()(intermed) final_out = Dense(_n_classes, activation='softmax')(gap_out) model = Model(inputs=input_layer, outputs=final_out, name='Agg_Res_Net') return model
def dense_net(_input_shape, _n_classes): ''' Expected Image Size for CIFAR10: 32x32xd (channel last) | v Conv2D: 3x3 D_OUT= D | v Dense_Block0: (D_IN = D->([1x1xGR_LIMIT], 3x3) -> D_OUT= D_IN + G_RATE) * (LAYERS_PER_BLOCK=4) Dense_Transition: IN: (NxN, D_IN = Dense_Block0_D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/2 x N/2, D_OUT=COMPRESS_D0) | v Dense_Block1: (D_IN = D->([1x1xGR_LIMIT], 3x3) -> D_OUT= D_IN + G_RATE) * (LAYERS_PER_BLOCK=4) Dense_Transition: IN: (N/2 x N/2, D_IN = Dense_Block1_D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/4 x N/4, D_OUT=COMPRESS_D1) | v Dense_Block2: (D_IN = D->([1x1xGR_LIMIT], 3x3) -> D_OUT= D_IN + G_RATE) * (LAYERS_PER_BLOCK=4) Dense_Transition: IN: (N/4 x N/4 , D_IN = Dense_Block2_D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/8 x N/8, D_OUT=COMPRESS_D2) | v Dense_Block3: (D_IN = D->([1x1xGR_LIMIT], 3x3) -> D_OUT= D_IN + G_RATE) * (LAYERS_PER_BLOCK=4) | v GlobalAveragePool: OUT: 1 x 1, D_OUT = Dense_Block3_D_OUT | v Dense+Softmax Activation: D_OUT = N_CLASSES | v ''' print('Architecture: Dense Network') _d_init = 64 # Initial Depth _growth_limit = 376 # Growth Limit _n_dense_blocks = 4 # Number of Dense Blocks _n_dense_layers_per_block = 4 # Number of Dense Layers per block _compress_factor = 1.0 # Compress Factor during Transition growth_rate = 16 # Growth Rate for dense layers _growth_rate_mul = 2 # Growth Rate Multiplier to be used between blocks input_layer = _create_input_layer(_input_shape) intermed = Conv2D(_d_init, kernel_size=(3, 3), padding='same')(input_layer) new_depth = _d_init ''' Output Sizes: ************* Input Size = 32 x 32 Output Size After ith Transition: 1: 16 x 16 2: 8 x 8 3: 4 x 4 Depth Sizes: ************ Depth to 1st Block = 64 Depth to the ith (i > 1) block: 2: 128 3: 256 4: 512 Depth Out = 1024 ''' for dense_block_idx in range(_n_dense_blocks): print('Block IDX = {0}'.format(dense_block_idx)) for dense_layer_idx in range(_n_dense_layers_per_block): intermed = _dense_layer(intermed, new_depth, growth_rate, _growth_limit) new_depth += growth_rate if dense_block_idx != (_n_dense_blocks - 1): # No Dense Transition for last layer intermed = _dense_transition(intermed, new_depth, int(_compress_factor * new_depth)) # Impose Dynamic Growth Rate growth_rate = int(growth_rate * _growth_rate_mul) new_depth = int(_compress_factor * new_depth) gap_out = GlobalAveragePooling2D()(intermed) final_out = Dense(_n_classes, activation='softmax')(gap_out) model = Model(inputs=input_layer, outputs=final_out, name='Dense_Net') return model
def spore_net(_input_shape, _n_classes): ''' Expected Image Size for CIFAR10: 32x32xd (channel last) | v Conv2D: 3x3 D_OUT= D | v Dense_Block0: (D_IN = D->([1x1xGR_LIMIT], 3x3) -> D_OUT= D_IN + G_RATE) * (LAYERS_PER_BLOCK=2) Dense_Transition: IN: (NxN, D_IN = Dense_Block0_D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/2 x N/2, D_OUT=COMPRESS_D0) Agg_Res_Block0: (D_IN = COMPRESS_D0 ->(1x1, 3x3, 1x1) -> D_OUT= 2 * COMPRESS_D0) * (LAYERS_PER_BLOCK=2) | v Dense_Block1: (D_IN ->([1x1xGR_LIMIT], 3x3) -> D_OUT= D_IN + G_RATE) * (LAYERS_PER_BLOCK=2) Dense_Transition: IN: (N/2 x N/2, D_IN = Dense_Block1_D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/4 x N/4, D_OUT=COMPRESS_D1) Agg_Res_Block1: (D_IN = COMPRESS_D1 ->(1x1, 3x3, 1x1) -> D_OUT= 2 * COMPRESS_D1) * (LAYERS_PER_BLOCK=2) | v Dense_Block2: (D_IN ->([1x1xGR_LIMIT], 3x3) -> D_OUT= D_IN + G_RATE) * (LAYERS_PER_BLOCK=2) Dense_Transition: IN: (N/4 x N/4, D_IN = Dense_Block2_D_OUT)-> (COMPRESS, MAXPOOL) -> OUT: (N/8 x N/8, D_OUT=COMPRESS_D2) Agg_Res_Block2: (D_IN = COMPRESS_D2->(1x1, 3x3, 1x1) -> D_OUT= 2 * COMPRESS_D2) * (LAYERS_PER_BLOCK=2) | v GlobalAveragePool: OUT: 1 x 1, D_OUT = Agg_Res_Block2_D_OUT | v Dense+Softmax Activation: D_OUT = N_CLASSES | v ''' print('Architecture: Spore Network') _d_init = 64 # Initial Depth _C = 8 # Cardinality _n_spore_blocks = 3 # Number of spore blocks _n_agg_layer_per_block = 2 # Number of Aggregate Residual Layers per Block _growth_limit = 512 # Growth Limit _max_d_out = 256 _n_dense_layers_per_block = 2 # Number of Dense Layers per block compress_factor = 1 # Compress Factor during Transition _strides = 1 # Strides (2 = Halve the output size, 1 = Retain output size) growth_rate = 32 # Growth Rate for dense layers _growth_rate_mul = 2 # Growth Rate Multiplier input_layer = _create_input_layer(_input_shape) intermed = Conv2D(_d_init, kernel_size=(3,3), padding='same')(input_layer) # Always new_depth must be divisible by _C (cardinality) new_depth = _d_init ''' Output Sizes: ************* Input Size = 32 x 32 Output Size After ith Dense Transition to Agg Res Block: 1: 16 x 16 2: 8 x 8 3: 4 x 4 Depth Sizes: ************ Depth In = 64 1: 64 (64-> Dense Layers->Dense Transition-> 128-> Agg Res Layers ->256) 2: 256 (256-> Dense Layers->Dense Transition-> 256-> Agg Res Layers ->512) 3: 512 (512-> Dense Layers->Dense Transition-> 256-> Agg Res Layers ->512) Depth Out = 512 ''' for spore_block_idx in range(_n_spore_blocks): print('Block IDX = {0}'.format(spore_block_idx)) # Dense Block for dense_layer_idx in range(_n_dense_layers_per_block): intermed = _dense_layer(intermed, new_depth, growth_rate, _growth_limit) new_depth += growth_rate # Dynamic Compress Factor: No compression until depth exceeds Max_D_OUT compress_factor = min(1, _max_d_out/new_depth) # Dense Transition intermed = _dense_transition(intermed, new_depth, int(compress_factor * new_depth)) new_depth = int(new_depth * compress_factor) growth_rate = int(growth_rate * _growth_rate_mul) # Aggregrated Residual Block for agg_layer_idx in range(_n_agg_layer_per_block): intermed = _agg_res_layer(intermed, new_depth, new_depth * 2, _C, _strides) new_depth *= 2 gap_out = GlobalAveragePooling2D()(intermed) final_out = Dense(_n_classes, activation='softmax')(gap_out) model = Model(inputs=input_layer, outputs=final_out, name='Spore_Net') return model