def cnnModel4(trainSize, input_placeholder, activation, init, keep_prob1, keep_prob2): ### To make it work for different trainSize the input will be of different dimension ## therefore the input to FC layer after reshape will be of different length ## Need to generalize it ## for now just make it work for 3 seconds.. TODO for tomorrow !! network_weights = list() network_weights = list() activationList = list() biasList = list() time_dim = 300 # default fc_input = 2720 # default input if time_dim=300 for this architecture if trainSize == '4sec': time_dim = 400 fc_input = 3808 #7*17*32=3808 elif trainSize == '5sec': time_dim = 400 fc_input = 4352 #8*17*32=4352 print('------------ Using cnnModel4 architecture ...') #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [5, 5, 1, 32], [32], [1, 1, 1, 1], 'conv1', act=activation, init_type=init) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) print('Conv1 layer after pooling: shape = ', pool1) # Russians do NIN after this. We put one conv layer. Convolution layer2 without Max pooling #Convolution layer2 conv2, w2, b2 = conv_layer(pool1, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv2', act=activation, init_type=init) print('Conv2 layer shape = ', conv2) #Convolution layer3 conv3, w3, b3 = conv_layer(conv2, [3, 3, 32, 48], [48], [1, 1, 1, 1], 'conv3', act=activation, init_type=init) pool2 = maxPool2x2(conv3, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv3 layer, shape = ', pool2) #Convolution layer4 conv4, w4, b4 = conv_layer(pool2, [3, 3, 48, 48], [48], [1, 1, 1, 1], 'conv4', act=activation, init_type=init) print('Conv4 layer shape = ', conv4) #Convolution layer5 conv5, w5, b5 = conv_layer(conv4, [3, 3, 48, 64], [64], [1, 1, 1, 1], 'conv5', act=activation, init_type=init) pool3 = maxPool2x2(conv5, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv5 layer, shape = ', pool3) #Convolution layer6 conv6, w6, b6 = conv_layer(pool3, [3, 3, 64, 64], [64], [1, 1, 1, 1], 'conv6', act=activation, init_type=init) print('Conv6 layer shape = ', conv6) #Convolution layer7 conv7, w7, b7 = conv_layer(conv6, [3, 3, 64, 32], [32], [1, 1, 1, 1], 'conv7', act=activation, init_type=init) pool4 = maxPool2x2(conv7, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv7 layer, shape = ', pool4) #Convolution layer8 conv8, w8, b8 = conv_layer(pool4, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv8', act=activation, init_type=init) print('Conv8 layer shape = ', conv8) #Convolution layer9 conv9, w9, b9 = conv_layer(conv8, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv9', act=activation, init_type=init) pool5 = maxPool2x2(conv9, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv9 layer, shape = ', pool5) #Convolution layer10 conv10, w10, b10 = conv_layer(pool5, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv10', act=activation, init_type=init) print('Conv10 layer shape = ', conv10) #Convolution layer11 conv11, w11, b11 = conv_layer(conv10, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv11', act=activation, init_type=init) pool6 = maxPool2x2(conv11, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv11 layer, shape = ', pool6) #Fully connected layer1 with dropout flattened = tf.reshape( pool6, shape=[-1, fc_input]) #fc_input = 5*17*32=2720 if 3seconds spectrogram dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') print('Dropped1 shape = ', dropped_1) fc1, w7, b7, = fc_layer(dropped_1, fc_input, 64, 'FC_Layer1', activation) # Output layer with dropout dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') output, w8, b8 = fc_layer(dropped_2, 64, 2, 'Output_Layer', 'no-activation') #get raw logits print('Output layer shape = ', output.shape) return fc1, output, network_weights, activationList, biasList
def cnnModel5(trainSize, input_placeholder, activation, init, targets, fftSize, padding, keep_prob1, keep_prob2, keep_prob3): # This is the Exact replication of Russians Paper using Max-Feature-Map activation #(trainSize,input_data, act,init_type,num_classes,fftSize,padding,keep_prob1,keep_prob2,keep_prob3) print('TrainSize input to architecture is: ', trainSize) trainSize = str(trainSize) + 'sec' network_weights = list() network_weights = list() activationList = list() biasList = list() if trainSize == '1sec': time_dim = 100 if activation == 'mfm': if fftSize == 512: fc_input = 576 #4*9*16 elif fftSize == 256: fc_input = 320 #4*5*16 else: if fftSize == 512: fc_input = 1152 #4*9*32 elif fftSize == 256: fc_input = 640 #4*5*32 elif trainSize == '3sec': time_dim = 300 if activation == 'mfm': fc_input = 5280 #10*33*16 else: fc_input = 10560 #10*33*32 elif trainSize == '4sec': time_dim = 400 if activation == 'mfm': if fftSize == 2048: fc_input = 6864 #13*33*16 elif fftSize == 512: fc_input = 0 elif fftSize == 256: fc_input = 0 else: if fftSize == 2048: fc_input = 13728 # 13*33*32 elif fftSize == 512: fc_input = 0 elif fftSize == 256: fc_input = 0 if activation == 'mfm': in_conv2 = 16 in_conv3 = 16 in_conv4 = 24 in_conv5 = 24 in_conv6 = 32 in_conv7 = 32 in_conv8 = 16 in_conv9 = 16 in_outputLayer = 32 else: in_conv2 = 32 in_conv3 = 32 in_conv4 = 48 in_conv5 = 48 in_conv6 = 64 in_conv7 = 64 in_conv8 = 32 in_conv9 = 32 in_outputLayer = 64 print( '------------ Using cnnModel5, Replicating Russian architecture with MFM Activation !!' ) #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [5, 5, 1, 32], [32], [1, 1, 1, 1], 'conv1', padding, activation, init) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) print('Conv1 layer after pooling: shape = ', pool1) #NIN Layer conv2, w2, b2 = conv_layer(pool1, [1, 1, in_conv2, 32], [32], [1, 1, 1, 1], 'conv2', padding, activation, init) print('Conv2 (NIN) layer shape = ', conv2) #Convolution layer3 conv3, w3, b3 = conv_layer(conv2, [3, 3, in_conv3, 48], [48], [1, 1, 1, 1], 'conv3', padding, activation, init) pool2 = maxPool2x2(conv3, [1, 2, 2, 1], [1, 2, 2, 1]) print('Conv3 after pooling, shape = ', pool2) #NIN Layer conv4, w4, b4 = conv_layer(pool2, [1, 1, in_conv4, 48], [48], [1, 1, 1, 1], 'conv4', padding, activation, init) print('Conv4 layer shape = ', conv4) #Convolution layer5 conv5, w5, b5 = conv_layer(conv4, [3, 3, in_conv5, 64], [64], [1, 1, 1, 1], 'conv5', padding, activation, init) pool3 = maxPool2x2(conv5, [1, 2, 2, 1], [1, 2, 2, 1]) print('Conv5 layer after pooling, shape = ', pool3) # NIN layer conv6, w6, b6 = conv_layer(pool3, [1, 1, in_conv6, 64], [64], [1, 1, 1, 1], 'conv6', padding, activation, init) print('Conv6 layer shape = ', conv6) #Convolution layer7 conv7, w7, b7 = conv_layer(conv6, [3, 3, in_conv7, 32], [32], [1, 1, 1, 1], 'conv7', padding, activation, init) pool4 = maxPool2x2(conv7, [1, 2, 2, 1], [1, 2, 2, 1]) print('Conv7 after pooling , shape = ', pool4) # NIN Layer conv8, w8, b8 = conv_layer(pool4, [1, 1, in_conv8, 32], [32], [1, 1, 1, 1], 'conv8', padding, activation, init) print('Conv8 layer shape = ', conv8) conv9, w9, b9 = conv_layer(conv8, [3, 3, in_conv9, 32], [32], [1, 1, 1, 1], 'conv9', padding, activation, init) pool5 = maxPool2x2(conv9, [1, 2, 2, 1], [1, 2, 2, 1]) print('Conv9 after pooling, shape = ', pool5) # Dropout on the huge input from Conv layer flattened = tf.reshape(pool5, shape=[-1, fc_input]) dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') # Fully connected layer 1 with 64 neurons but gets splitted into 32 due to MFM fc1, w7, b7, = fc_layer(dropped_1, fc_input, 64, 'FC_Layer1', activation) print('Shape of FC1 = ', fc1.shape) #Output layer: 2 neurons. One for genuine and one for spoof. Dropout applied first dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') output, w8, b8 = fc_layer(dropped_2, in_outputLayer, targets, 'Output_Layer', 'no-activation') #get raw logits print('Output layer shape = ', output.shape) print('Targets in arch is : ', targets) return fc1, output, network_weights, activationList, biasList
def cnnModel0(trainSize, input_placeholder, activation, init, keep_prob1, keep_prob2): ## We have added one FClayer of 256neurons network_weights = list() activationList = list() biasList = list() print('------------ Using cnnModel0 New Architecture -----------') #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [5, 5, 1, 32], [32], [1, 1, 1, 1], 'conv1', act=activation, init_type=init) #print(conv1.shape) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool1.shape = ', pool1) network_weights.append(w1) activationList.append(pool1) biasList.append(b1) #Convolution layer2 conv2, w2, b2 = conv_layer(pool1, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv2', act=activation, init_type=init) network_weights.append(w2) #print(conv2.shape) pool2 = maxPool2x2(conv2, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool2.shape = ', pool2) network_weights.append(w2) activationList.append(pool2) biasList.append(b2) #Convolution layer3 conv3, w3, b3 = conv_layer(pool2, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv3', act=activation, init_type=init) network_weights.append(w3) #print(conv3.shape) pool3 = maxPool2x2(conv3, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool3.shape = ', pool3) network_weights.append(w3) activationList.append(pool3) biasList.append(b3) #Convolution layer4 conv4, w4, b4 = conv_layer(pool3, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv4', act=activation, init_type=init) network_weights.append(w4) #print(conv4.shape) pool4 = maxPool2x2(conv4, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool4.shape = ', pool4) network_weights.append(w4) activationList.append(pool4) biasList.append(b4) #Fully connected layer1 with dropout flattened = tf.reshape(pool4, shape=[-1, 65 * 19 * 32]) dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') fc1, fcw1, b5 = fc_layer(dropped_1, 65 * 19 * 32, 256, 'FC_Layer1', act=activation, init_type=init) network_weights.append(fcw1) #Fully connected layer2 with dropout dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') fc2, fcw2, b6 = fc_layer(dropped_2, 256, 64, 'FC_Layer2', act=activation, init_type=init) network_weights.append(fcw2) activationList.append(dropped_2) biasList.append(b6) #Output layer: 2 neurons. One for genuine and one for spoof dropped_3 = drop_layer(fc2, keep_prob2, 'dropout3') output, fcw3, b7 = fc_layer(dropped_3, 64, 2, 'Output_Layer', 'no-activation', init_type=init) #get raw logits print('Final output layer shape= ', output.shape) network_weights.append(fcw3) activationList.append(output) biasList.append(b7) return dropped_2, output, network_weights, activationList, biasList
def cnnModel3(trainSize, input_placeholder, activation, init, keep_prob1, keep_prob2): network_weights = list() network_weights = list() activationList = list() biasList = list() time_dim = 300 # default fc_input = 10560 # default input if time_dim=300 for this architecture: #10*33*32=10560 if trainSize == '4sec': time_dim = 400 fc_input = 13728 #13*33*32 elif trainSize == '5sec': time_dim = 400 fc_input = 16896 print('------------ Using cnnModel3 architecture ...') #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [5, 5, 1, 32], [32], [1, 1, 1, 1], 'conv1', act=activation, init_type=init) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) print('Conv1 layer after pooling: shape = ', pool1) # Russians do NIN after this. We put one conv layer. Convolution layer2 without Max pooling conv2, w2, b2 = conv_layer(pool1, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv2', act=activation, init_type=init) print('Conv2 layer shape = ', conv2) #Convolution layer3 conv3, w3, b3 = conv_layer(conv2, [3, 3, 32, 48], [48], [1, 1, 1, 1], 'conv3', act=activation, init_type=init) pool2 = maxPool2x2(conv3, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv3 layer, shape = ', pool2) #Convolution layer4 conv4, w4, b4 = conv_layer(pool2, [3, 3, 48, 48], [48], [1, 1, 1, 1], 'conv4', act=activation, init_type=init) print('Conv4 layer shape = ', conv4) #Convolution layer5 conv5, w5, b5 = conv_layer(conv4, [3, 3, 48, 64], [64], [1, 1, 1, 1], 'conv5', act=activation, init_type=init) pool3 = maxPool2x2(conv5, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv5 layer, shape = ', pool3) #Convolution layer6 conv6, w6, b6 = conv_layer(pool3, [3, 3, 64, 64], [64], [1, 1, 1, 1], 'conv6', act=activation, init_type=init) print('Conv6 layer shape = ', conv6) #Convolution layer7 conv7, w7, b7 = conv_layer(conv6, [3, 3, 64, 32], [32], [1, 1, 1, 1], 'conv7', act=activation, init_type=init) pool4 = maxPool2x2(conv7, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv7 layer, shape = ', pool4) #Convolution layer8 conv8, w8, b8 = conv_layer(pool4, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv8', act=activation, init_type=init) print('Conv8 layer shape = ', conv8) #Convolution layer9 conv9, w9, b9 = conv_layer(conv8, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv9', act=activation, init_type=init) pool5 = maxPool2x2(conv9, [1, 2, 2, 1], [1, 2, 2, 1]) print('After pooling in Conv9 layer, shape = ', pool5) #Fully connected layer1 with dropout flattened = tf.reshape(pool5, shape=[ -1, fc_input ]) #10*33*32=10560 if 3 seconds spectrogram as input dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') fc1, w7, b7, = fc_layer(dropped_1, fc_input, 64, 'FC_Layer1', activation) #Output layer: 2 neurons. One for genuine and one for spoof dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') output, w8, b8 = fc_layer(dropped_2, 64, 2, 'Output_Layer', 'no-activation') #get raw logits print(output.shape) return fc1, output, network_weights, activationList, biasList
def cnnModel1(trainSize, input_placeholder, activation, init, keep_prob1, keep_prob2): network_weights = list() network_weights = list() activationList = list() biasList = list() print('------------ Using cnnModel1 architecture ...') #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [5, 5, 1, 32], [32], [1, 1, 1, 1], 'conv1', act=activation, init_type=init) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool1.shape = ', pool1) network_weights.append(w1) activationList.append(pool1) biasList.append(b1) #Convolution layer2 conv2, w2, b2 = conv_layer(pool1, [3, 3, 32, 48], [48], [1, 1, 1, 1], 'conv2', act=activation, init_type=init) pool2 = maxPool2x2(conv2, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool2.shape = ', pool2) network_weights.append(w2) activationList.append(pool2) biasList.append(b2) #Convolution layer3 conv3, w3, b3 = conv_layer(pool2, [3, 3, 48, 64], [64], [1, 1, 1, 1], 'conv3', act=activation, init_type=init) pool3 = maxPool2x2(conv3, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool3.shape = ', pool3) network_weights.append(w3) activationList.append(pool3) biasList.append(b3) #Convolution layer4 conv4, w4, b4 = conv_layer(pool3, [3, 3, 64, 32], [32], [1, 1, 1, 1], 'conv4', act=activation, init_type=init) network_weights.append(w4) pool4 = maxPool2x2(conv4, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool4.shape = ', pool4) network_weights.append(w4) activationList.append(pool4) biasList.append(b4) #Convolution layer5 conv5, w5, b5 = conv_layer(pool4, [3, 3, 32, 32], [32], [1, 1, 1, 1], 'conv5', act=activation, init_type=init) pool5 = maxPool2x2(conv5, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool5.shape = ', pool5.shape) network_weights.append(w5) activationList.append(pool5) biasList.append(b5) #Fully connected layer1 with dropout flattened = tf.reshape(pool5, shape=[-1, 33 * 10 * 32]) #33*10*32=10560 dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') fc1, w6, b6, = fc_layer(dropped_1, 33 * 10 * 32, 256, 'FC_Layer1', activation) network_weights.append(w6) activationList.append(fc1) biasList.append(b6) ## NOTE: 33*10*32=10560 which is huge.And we are using only 256 neurons in FC1 layer ## I think this will loose lot of information. May be we try 1024 0r more neurons to ## capture more information ???? Think on this architecture ! #Fully connected layer2 with dropout dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') fc2, w7, b7 = fc_layer(dropped_2, 256, 64, 'FC_Layer2', activation) network_weights.append(w7) activationList.append(fc2) biasList.append(b7) #Output layer: 2 neurons. One for genuine and one for spoof dropped_3 = drop_layer(fc2, keep_prob2, 'dropout3') output, w8, b8 = fc_layer(dropped_3, 64, 2, 'Output_Layer', 'no-activation') #get raw logits print(output.shape) network_weights.append(w8) activationList.append(output) biasList.append(b8) return fc2, output, network_weights, activationList, biasList
def cnnModel2(trainSize, input_placeholder, activation, init, keep_prob1, keep_prob2): network_weights = list() network_weights = list() activationList = list() biasList = list() time_dim = 300 # default fc_input = 5 * 17 * 32 # default input if time_dim=300 for this architecture: #10*33*32=10560 if trainSize == '4sec': time_dim = 400 fc_input = 0 ## Check this before using this architecture if 4seconds input elif trainSize == '5sec': time_dim = 400 fc_input = 0 ## Check this print(' Using cnnModel2 architecture ...') #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [5, 5, 1, 32], [32], [1, 1, 1, 1], 'conv1', act=activation, init_type=init) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool1.shape = ', pool1) network_weights.append(w1) activationList.append(pool1) biasList.append(b1) #Convolution layer2 conv2, w2, b2 = conv_layer(pool1, [3, 3, 32, 48], [48], [1, 1, 1, 1], 'conv2', act=activation, init_type=init) pool2 = maxPool2x2(conv2, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool2.shape = ', pool2) network_weights.append(w2) activationList.append(pool2) biasList.append(b2) #Convolution layer3 conv3, w3, b3 = conv_layer(pool2, [3, 3, 48, 64], [64], [1, 1, 1, 1], 'conv3', act=activation, init_type=init) pool3 = maxPool2x2(conv3, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool3.shape = ', pool3) network_weights.append(w3) activationList.append(pool3) biasList.append(b3) #Convolution layer4 conv4, w4, b4 = conv_layer(pool3, [3, 3, 64, 64], [64], [1, 1, 1, 1], 'conv4', act=activation, init_type=init) network_weights.append(w4) pool4 = maxPool2x2(conv4, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool4.shape = ', pool4) network_weights.append(w4) activationList.append(pool4) biasList.append(b4) #Convolution layer5 conv5, w5, b5 = conv_layer(pool4, [3, 3, 64, 48], [48], [1, 1, 1, 1], 'conv5', act=activation, init_type=init) pool5 = maxPool2x2(conv5, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool5.shape = ', pool5.shape) network_weights.append(w5) activationList.append(pool5) biasList.append(b5) #Convolution layer6 conv6, w6, b6 = conv_layer(pool5, [3, 3, 48, 32], [32], [1, 1, 1, 1], 'conv6', act=activation, init_type=init) pool6 = maxPool2x2(conv6, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool6.shape = ', pool6.shape) network_weights.append(w6) activationList.append(pool6) biasList.append(b6) #Fully connected layer1 with dropout flattened = tf.reshape(pool6, shape=[-1, fc_input ]) #5*17*32=2720 for 300seconds input dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') fc1, w7, b7, = fc_layer(dropped_1, fc_input, 64, 'FC_Layer1', activation) network_weights.append(w7) activationList.append(dropped_1) biasList.append(b7) #Output layer: 2 neurons. One for genuine and one for spoof dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') output, w8, b8 = fc_layer(dropped_2, 64, 2, 'Output_Layer', 'no-activation') #get raw logits print('Output layer = ', output.shape) network_weights.append(w8) activationList.append(output) biasList.append(b8) return fc1, output, network_weights, activationList, biasList
def cnnModel1(input_type, trainSize, input_placeholder, activation, init, targets, fftSize, padding, keep_prob1, keep_prob2, keep_prob3): # So I will have to come back to this again for code cleaning. # Code affected would be: feature_extraction.py, extract_cnn_scores.py, nn_architecture.py, extract_cnn_features.py t = trainSize trainSize = str(trainSize) + 'sec' if input_type == 'mel_spec': f = 80 elif input_type == 'cqt_spec': f = 84 elif input_type == 'mag_spec': if fftSize == 512: f = 257 elif fftSize == 256: f = 129 elif fftSize == 1024: f = 513 elif fftSize == 2048: f = 1025 else: concatenate = False if concatenate: f = 80 # when two types of features are concatenated (eg CQCC+SCMC) else: f = 40 # just the delta+acceleration (40 dimensional) weight_list = list() activation_list = list() bias_list = list() if activation == 'mfm': fc_input = f * 64 #6448 #1*257*64 = 16448 in_conv2 = 64 in_conv3 = 64 in_conv4 = 64 in_fc2 = 128 in_fc3 = 128 in_outputLayer = 128 else: fc_input = f * 128 #32896 # 1*257*128 in_conv2 = 128 in_conv3 = 128 in_conv4 = 128 in_fc2 = 256 in_fc3 = 256 in_outputLayer = 256 print( '======================== CNN ARCHITECTURE ==============================\n' ) #Convolution layer1,2,3 conv1, w1, b1 = conv_layer(input_placeholder, [3, f, 1, 128], [128], [1, 1, 1, 1], 'conv1', padding, activation, init) weight_list.append(w1) bias_list.append(b1) print('Conv1 ', conv1) conv2, w2, b2 = conv_layer(conv1, [3, 1, in_conv2, 128], [128], [1, 1, 1, 1], 'conv2', padding, activation, init) weight_list.append(w2) bias_list.append(b2) print('Conv2 ', conv2) conv3, w3, b3 = conv_layer(conv2, [3, 1, in_conv3, 128], [128], [1, 1, 1, 1], 'conv3', padding, activation, init) weight_list.append(w3) bias_list.append(b3) print('Conv2 ', conv3) if input_type == 'cqt_spec': time_dim = 32 else: time_dim = t * 100 #Max-pooling layer over time pool1 = maxPool2x2(conv3, [1, time_dim, 1, 1], [1, time_dim, 1, 1]) print('Pool1 layer shape = ', pool1) #100x257x64 is input to maxpool #output = 1X257x64 # 1*257*64 = 16448 # Dropout on the huge input from Conv layer flattened = tf.reshape(pool1, shape=[-1, fc_input]) dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') # Fully connected layer 1 with 256 neurons but gets splitted into 128 due to MFM fc1, w4, b4, = fc_layer(dropped_1, fc_input, 256, 'FC_Layer1', activation) weight_list.append(w4) bias_list.append(b4) print('Shape of FC1 = ', fc1.shape) # Dropout followed by FC layer with 256 neurons but gets splitted into 128 due to MFM dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') fc2, w5, b5, = fc_layer(dropped_2, in_fc2, 256, 'FC_Layer2', activation) weight_list.append(w5) bias_list.append(b5) print('Shape of FC2 = ', fc2.shape) # Dropout followed by FC layer with 256 neurons but gets splitted into 128 due to MFM dropped_3 = drop_layer(fc2, keep_prob2, 'dropout3') fc3, w6, b6, = fc_layer(dropped_3, in_fc3, 256, 'FC_Layer3', activation) weight_list.append(w6) bias_list.append(b6) print('Shape of FC3 = ', fc3.shape) #Output layer: 2 neurons. One for genuine and one for spoof. Dropout applied first dropped_4 = drop_layer(fc3, keep_prob3, 'dropout4') output, w7, b7 = fc_layer(dropped_4, in_outputLayer, targets, 'Output_Layer', 'no-activation') #get raw logits weight_list.append(w7) bias_list.append(b7) print('Output layer shape = ', output.shape) print( '======================== CNN ARCHITECTURE ==============================\n' ) return fc3, output, weight_list, activation_list, bias_list
def cnnModel2(input_type, trainSize, input_placeholder, activation, init, targets, fftSize, padding, keep_prob1, keep_prob2, keep_prob3): t = trainSize trainSize = str(trainSize) + 'sec' if input_type == 'mel_spec': f = 80 elif input_type == 'cqt_spec': f = 84 elif input_type == 'mag_spec': if fftSize == 512: f = 257 elif fftSize == 256: f = 129 elif fftSize == 1024: f = 513 elif fftSize == 2048: f = 1025 else: concatenate = False if concatenate: f = 80 # when two types of features are concatenated (eg CQCC+SCMC) else: f = 40 # just the delta+acceleration (40 dimensional) weight_list = list() activation_list = list() bias_list = list() if activation == 'mfm': fc_input = 13 * 17 * 8 #f*8 #6448 #1*257*64 = 16448 in_conv2 = 8 in_conv3 = 8 in_conv4 = 8 in_fc2 = 128 in_fc3 = 128 in_outputLayer = 128 else: fc_input = 13 * 17 * 16 #f*16 #32896 # 1*257*128 in_conv2 = 16 in_conv3 = 16 in_conv4 = 16 in_fc2 = 256 in_fc3 = 256 in_outputLayer = 256 #flattened = tf.reshape(pool4, shape=[-1, 65*19*32]) print( '======================== CNN ARCHITECTURE ==============================\n' ) #Convolution layer1,2,3 conv1, w1, b1 = conv_layer(input_placeholder, [3, 10, 1, 16], [16], [1, 1, 1, 1], 'conv1', padding, activation, init) weight_list.append(w1) bias_list.append(b1) print('Conv1 ', conv1) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) conv2, w2, b2 = conv_layer(pool1, [3, 10, in_conv2, 16], [16], [1, 1, 1, 1], 'conv2', padding, activation, init) weight_list.append(w2) bias_list.append(b2) print('Conv2 ', conv2) pool2 = maxPool2x2(conv2, [1, 2, 2, 1], [1, 2, 2, 1]) conv3, w3, b3 = conv_layer(pool2, [3, 10, in_conv3, 16], [16], [1, 1, 1, 1], 'conv3', padding, activation, init) weight_list.append(w3) bias_list.append(b3) #print('Conv3 ', conv3) pool3 = maxPool2x2(conv3, [1, 2, 2, 1], [1, 2, 2, 1]) print('pool3 shape: ', pool3) if input_type == 'cqt_spec': time_dim = 32 else: time_dim = t * 100 # Dropout on the huge input from Conv layer flattened = tf.reshape(pool3, shape=[-1, fc_input]) dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') # Fully connected layer 1 with 256 neurons but gets splitted into 128 due to MFM fc1, w4, b4, = fc_layer(dropped_1, fc_input, 256, 'FC_Layer1', activation) weight_list.append(w4) bias_list.append(b4) print('Shape of FC1 = ', fc1.shape) ''' # Dropout followed by FC layer with 256 neurons but gets splitted into 128 due to MFM dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') fc2,w5,b5, = fc_layer(dropped_2, in_fc2, 256, 'FC_Layer2', activation) weight_list.append(w5) bias_list.append(b5) print('Shape of FC2 = ', fc2.shape) # Dropout followed by FC layer with 256 neurons but gets splitted into 128 due to MFM dropped_3 = drop_layer(fc2, keep_prob2, 'dropout3') fc3,w6,b6, = fc_layer(dropped_3, in_fc3, 256, 'FC_Layer3', activation) weight_list.append(w6) bias_list.append(b6) print('Shape of FC3 = ', fc3.shape) ''' #Output layer: 2 neurons. One for genuine and one for spoof. Dropout applied first dropped_4 = drop_layer(fc1, keep_prob3, 'dropout4') output, w7, b7 = fc_layer(dropped_4, in_outputLayer, targets, 'Output_Layer', 'no-activation') #get raw logits weight_list.append(w7) bias_list.append(b7) print('Output layer shape = ', output.shape) print( '======================== CNN ARCHITECTURE ==============================\n' ) return fc1, output, weight_list, activation_list, bias_list
def cnnModel1(trainSize, input_placeholder, activation, init, targets, fftSize, padding, keep_prob1, keep_prob2, keep_prob3): # Replicating the Bulbul architecture of Thomas grill. Note that they use mel-spectrogram # We are using power spectrogram at the moment print('FFT size used in this run is: ', fftSize) print('TrainSize input to architecture is: ', trainSize) trainSize = str(trainSize) + 'sec' f = 512 # lets take 512 as default time_dim = trainSize * 100 if fftSize == 512: f = 257 elif fftSize == 256: f = 129 elif fftSize == 1024: f = 513 elif fftSize == 2048: f = 1025 weight_list = list() activation_list = list() bias_list = list() #if trainSize == '1sec': # time_dim=100 if activation == 'mfm': if fftSize == 512: fc_input = 464 #2*29*8 elif fftSize == 256: fc_input = 240 else: if fftSize == 512: fc_input = 928 # 2*29*16 elif fftSize == 256: fc_input = 480 if activation == 'mfm': in_conv2 = 8 in_conv3 = 8 in_conv4 = 8 in_fc2 = 128 in_outputLayer = 16 else: in_conv2 = 16 in_conv3 = 16 in_conv4 = 16 in_fc2 = 256 in_outputLayer = 32 #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [3, 3, 1, 16], [16], [1, 1, 1, 1], 'conv1', padding, activation, init) weight_list.append(w1) bias_list.append(b1) print('Conv1 ', conv1) pool1 = maxPool2x2(conv1, [1, 3, 3, 1], [1, 3, 3, 1]) print('Pool1 layer shape = ', pool1) #in_conv2= conv2, w2, b2 = conv_layer(pool1, [3, 3, in_conv2, 16], [16], [1, 1, 1, 1], 'conv2', padding, activation, init) weight_list.append(w2) bias_list.append(b2) print('Conv2 ', conv2) pool2 = maxPool2x2(conv2, [1, 3, 3, 1], [1, 3, 3, 1]) print('Pool2 layer shape = ', pool2) #in_conv3= conv3, w3, b3 = conv_layer(pool2, [3, 1, in_conv3, 16], [16], [1, 1, 1, 1], 'conv3', padding, activation, init) weight_list.append(w3) bias_list.append(b3) print('Conv3 ', conv3) pool3 = maxPool2x2(conv3, [1, 3, 1, 1], [1, 3, 1, 1]) print('Pool3 layer shape = ', pool3) #in_conv4= conv4, w4, b4 = conv_layer(pool3, [3, 1, in_conv4, 16], [16], [1, 1, 1, 1], 'conv4', padding, activation, init) weight_list.append(w4) bias_list.append(b4) print('Conv4 ', conv4) pool4 = maxPool2x2(conv4, [1, 3, 1, 1], [1, 3, 1, 1]) print('Pool4 layer shape = ', pool4) # Dropout on the huge input from Conv layer flattened = tf.reshape(pool4, shape=[-1, fc_input]) dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') # Fully connected layer 1 with 256 neurons but gets splitted into 128 due to MFM fc1, w4, b4, = fc_layer(dropped_1, fc_input, 256, 'FC_Layer1', activation) weight_list.append(w4) bias_list.append(b4) print('Shape of FC1 = ', fc1.shape) # Dropout followed by FC layer with 32 neurons but gets splitted into 128 due to MFM dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') fc2, w5, b5, = fc_layer(dropped_2, in_fc2, 32, 'FC_Layer2', activation) print('Shape of FC1 = ', fc2.shape) weight_list.append(w5) bias_list.append(b5) #Output layer: 2 neurons. One for genuine and one for spoof. Dropout applied first dropped_3 = drop_layer(fc2, keep_prob3, 'dropout3') output, w6, b6 = fc_layer(dropped_3, in_outputLayer, targets, 'Output_Layer', 'no-activation') #get raw logits weight_list.append(w6) bias_list.append(b6) print('Output layer shape = ', output.shape) ## I want to train SVM classifier on 256 dim FC1 output and GMM on FC2 output #fc=list() #fc.append(fc1) #fc.append(fc2) return fc2, output, weight_list, activation_list, bias_list
def cnnModel2(trainSize, input_placeholder, activation, init, targets, fftSize, padding, keep_prob1, keep_prob2, keep_prob3): # Replicating the Sparrow architecture of Thomas grill. # It uses only conv layers, no FC layer is used here. print('TrainSize input to architecture is: ', trainSize) trainSize = str(trainSize) + 'sec' # Note: If this architecture works somehow. Then I will have to explore this deeply ''' Filter Depth, time, Frequency Input - 1 x 100 x 127 #Assuming we are using 1sec, 256FFT. No padding. Conv1 (3x3) 32 x 98 x 127 Conv2 (3x3) 32 x 96 x 125 Pool1 (3x3) 32 x 32 x 42 Conv3 (3x3) 32 x 30 x 40 Conv4 (3x3) 32 x 28 x 38 Conv5 (3x20) 64 x 26 x 19 Pool2 (3x3) 64 x 9 x 7 Conv6 (9x1) 256 x 1 x 7 Conv7 (1x1) 64 x 1 x 7 Conv8 (1x1) 16 x 1 x 7 O/p 2 Neurons ''' print('FFT size used in this run is: ', fftSize) f = 512 # lets take 512 as default # Input = 100x257 if fftSize == 512: f = 257 elif fftSize == 256: f = 129 elif fftSize == 1024: f = 513 elif fftSize == 2048: f = 1025 weight_list = list() activation_list = list() bias_list = list() if trainSize == '1sec': time_dim = 100 if activation == 'mfm': if fftSize == 512: fc_input = 168 #1*21*8 elif fftSize == 256: fc_input = 56 #1*7*8 else: if fftSize == 512: fc_input = 336 #1*21*16 elif fftSize == 256: fc_input = 112 #1*7*16 if activation == 'mfm': in_conv2 = 16 in_conv3 = 16 in_conv4 = 16 in_conv5 = 16 in_conv6 = 32 in_conv7 = 128 in_conv8 = 32 else: in_conv2 = 32 in_conv3 = 32 in_conv4 = 32 in_conv5 = 32 in_conv6 = 64 in_conv7 = 256 in_conv8 = 64 freq_inConv5 = 20 #chosen from their paper. Lets see the impact time_inConv6 = 9 #Convolution layer1 conv1, w1, b1 = conv_layer(input_placeholder, [3, 3, 1, 32], [32], [1, 1, 1, 1], 'conv1', padding, activation, init) weight_list.append(w1) bias_list.append(b1) print('Conv1 ', conv1) conv2, w2, b2 = conv_layer(conv1, [3, 3, in_conv2, 32], [32], [1, 1, 1, 1], 'conv2', padding, activation, init) weight_list.append(w2) bias_list.append(b2) print('Conv2 ', conv2) pool1 = maxPool2x2(conv2, [1, 3, 3, 1], [1, 3, 3, 1]) print('Pool1 layer shape = ', pool1) conv3, w3, b3 = conv_layer(pool1, [3, 3, in_conv3, 32], [32], [1, 1, 1, 1], 'conv3', padding, activation, init) weight_list.append(w3) bias_list.append(b3) print('Conv3 ', conv3) conv4, w4, b4 = conv_layer(conv3, [3, 3, in_conv4, 32], [32], [1, 1, 1, 1], 'conv4', padding, activation, init) weight_list.append(w4) bias_list.append(b4) print('Conv4 ', conv4) conv5, w5, b5 = conv_layer(conv4, [3, freq_inConv5, in_conv5, 64], [64], [1, 1, 1, 1], 'conv5', padding, activation, init) weight_list.append(w5) bias_list.append(b5) print('Conv5 ', conv5) pool2 = maxPool2x2(conv5, [1, 3, 3, 1], [1, 3, 3, 1]) print('Pool2 layer shape = ', pool2) conv6, w6, b6 = conv_layer(pool2, [time_inConv6, 1, in_conv6, 256], [256], [1, 1, 1, 1], 'conv6', padding, activation, init) weight_list.append(w6) bias_list.append(b6) print('Conv6 ', conv6) conv7, w7, b7 = conv_layer(conv6, [1, 1, in_conv7, 64], [64], [1, 1, 1, 1], 'conv7', padding, activation, init) weight_list.append(w7) bias_list.append(b7) print('Conv7 ', conv7) conv8, w8, b8 = conv_layer(conv7, [1, 1, in_conv8, 16], [16], [1, 1, 1, 1], 'conv8', padding, activation, init) weight_list.append(w8) bias_list.append(b8) print('Conv8 ', conv8) # Dropout on the huge input from Conv layer flattened = tf.reshape(conv8, shape=[-1, fc_input]) dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') # Output dense layer output, w9, b9 = fc_layer(dropped_1, fc_input, targets, 'Output_Layer', 'no-activation') #get raw logits weight_list.append(w9) bias_list.append(b9) print('Output layer shape = ', output.shape) return flattened, output, weight_list, activation_list, bias_list
def cnnModel3(input_type, trainSize, input_placeholder, activation, init, targets, fftSize, padding, keep_prob1, keep_prob2, keep_prob3, n_layers=2, fc_neurons=100, fc1_neurons=100): # This we call as google_small because it uses 100 neurons t = trainSize trainSize = str(trainSize) + 'sec' f = 129 weight_list = list() activation_list = list() bias_list = list() if activation == 'mfm': if t == 3: fc_input = 15 * 7 * 8 #f*8 #6448 #1*257*64 = 16448 elif t == 1: fc_input = 5 * 7 * 8 in_conv2 = 8 in_conv3 = 8 in_conv4 = 8 in_fc2 = int(fc_neurons / 2) #16 #50 in_outputLayer = int(fc_neurons / 2) #16 #50 else: print('ACtivation is relu') if t == 3: fc_input = 15 * 7 * 16 #f*16 #32896 # 1*257*128 elif t == 1: fc_input = 5 * 7 * 16 in_conv2 = 16 in_conv3 = 16 in_conv4 = 16 in_fc2 = fc_neurons #32 #100 in_fc3 = fc_neurons #32 #100 in_outputLayer = fc_neurons #32 #100 print( '======================== CNN ARCHITECTURE ==============================\n' ) #Convolution layer1,2,3 conv1, w1, b1 = conv_layer(input_placeholder, [1, 10, 1, 16], [16], [1, 1, 1, 1], 'conv1', padding, activation, init) weight_list.append(w1) bias_list.append(b1) #print('Conv1 ', iconv1) pool1 = maxPool2x2(conv1, [1, 2, 2, 1], [1, 2, 2, 1]) print('Pool1: ', pool1) conv2, w2, b2 = conv_layer(pool1, [1, 10, in_conv2, 16], [16], [1, 1, 1, 1], 'conv2', padding, activation, init) weight_list.append(w2) bias_list.append(b2) #print('Conv2 ', conv2) pool2 = maxPool2x2(conv2, [1, 2, 2, 1], [1, 2, 2, 1]) print('Pool2: ', pool2) conv3, w3, b3 = conv_layer(pool2, [1, 10, in_conv3, 16], [16], [1, 1, 1, 1], 'conv3', padding, activation, init) weight_list.append(w3) bias_list.append(b3) #print('Conv3 ', conv3) pool3 = maxPool2x2(conv3, [1, 5, 5, 1], [1, 5, 5, 1]) print('pool3 shape: ', pool3) if input_type == 'cqt_spec': time_dim = 32 else: time_dim = t * 100 # Dropout on the huge input from Conv layer flattened = tf.reshape(pool3, shape=[-1, fc_input]) dropped_1 = drop_layer(flattened, keep_prob1, 'dropout1') fc1, w4, b4, = fc_layer(dropped_1, fc_input, fc_neurons, 'FC_Layer1', activation) weight_list.append(w4) bias_list.append(b4) print('Shape of FC1 = ', fc1.shape) #Output layer: 2 neurons. One for genuine and one for spoof. Dropout applied first print('input to the output layer: ', in_outputLayer) dropped_2 = drop_layer(fc1, keep_prob2, 'dropout2') output, w7, b7 = fc_layer(dropped_2, in_outputLayer, targets, 'Output_Layer', 'no-activation') #get raw logits print('output layer: shape = ', output.shape) weight_list.append(w7) bias_list.append(b7) print('Output layer shape = ', output.shape) print( '======================== CNN ARCHITECTURE ==============================\n' ) return fc1, output, weight_list, activation_list, bias_list