def load_mask_labels(): '''Load both target and style masks. A mask image (nr x nc) with m labels/colors will be loaded as a 4D boolean tensor: (1, m, nr, nc) for 'channels_first' or (1, nr, nc, m) for 'channels_last' ''' target_mask_img = load_img(target_mask_path, target_size=(img_nrows, img_ncols)) target_mask_img = img_to_array(target_mask_img) style_mask_img = load_img(style_mask_path, target_size=(img_nrows, img_ncols)) style_mask_img = img_to_array(style_mask_img) if K.image_data_format() == 'channels_first': mask_vecs = np.vstack([style_mask_img.reshape((3, -1)).T, target_mask_img.reshape((3, -1)).T]) else: mask_vecs = np.vstack([style_mask_img.reshape((-1, 3)), target_mask_img.reshape((-1, 3))]) labels = kmeans(mask_vecs, num_labels) style_mask_label = labels[:img_nrows * img_ncols].reshape((img_nrows, img_ncols)) target_mask_label = labels[img_nrows * img_ncols:].reshape((img_nrows, img_ncols)) stack_axis = 0 if K.image_data_format() == 'channels_first' else -1 style_mask = np.stack([style_mask_label == r for r in range(num_labels)], axis=stack_axis) target_mask = np.stack([target_mask_label == r for r in range(num_labels)], axis=stack_axis) return (np.expand_dims(style_mask, axis=0), np.expand_dims(target_mask, axis=0))
def reference_sequence_into_tensor(reference_seq: str, tensor: np.ndarray, insert_dict: Dict, window_size: int, read_limit: int): ref_offset = len(defines.INPUTS_INDEL) for ii in sorted(insert_dict.keys(), key=int, reverse=True): if ii < 0: reference_seq = defines.INDEL_CHAR*insert_dict[ii] + reference_seq else: reference_seq = reference_seq[:ii] + defines.INDEL_CHAR*insert_dict[ii] + reference_seq[ii:] for ii,b in enumerate(reference_seq): if ii == window_size: break if b in defines.INPUTS_INDEL: if K.image_data_format() == 'channels_last': tensor[:, ii, ref_offset+defines.INPUTS_INDEL[b]] = 1.0 else: tensor[ref_offset+defines.INPUTS_INDEL[b], :, ii] = 1.0 elif b in defines.AMBIGUITY_CODES: if K.image_data_format() == 'channels_last': tensor[:, ii, ref_offset:ref_offset+4] = np.tile(defines.AMBIGUITY_CODES[b], (read_limit, 1)) else: tensor[ref_offset:ref_offset+4, :, ii] = np.transpose( np.tile(defines.AMBIGUITY_CODES[b], (read_limit, 1)))
def test_resnet50_variable_input_channels(): input_shape = (1, None, None) if K.image_data_format() == 'channels_first' else (None, None, 1) model = applications.ResNet50(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 2048) input_shape = (4, None, None) if K.image_data_format() == 'channels_first' else (None, None, 4) model = applications.ResNet50(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 2048)
def test_vgg16_variable_input_channels(): input_shape = (1, None, None) if K.image_data_format() == 'channels_first' else (None, None, 1) model = applications.VGG16(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 512) input_shape = (4, None, None) if K.image_data_format() == 'channels_first' else (None, None, 4) model = applications.VGG16(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 512)
def test_mobilenet_image_size(): valid_image_sizes = [128, 160, 192, 224] for size in valid_image_sizes: input_shape = (size, size, 3) if K.image_data_format() == 'channels_last' else (3, size, size) model = applications.MobileNet(input_shape=input_shape, weights='imagenet', include_top=True) assert model.input_shape == (None,) + input_shape invalid_image_shape = (112, 112, 3) if K.image_data_format() == 'channels_last' else (3, 112, 112) with pytest.raises(ValueError): model = applications.MobileNet(input_shape=invalid_image_shape, weights='imagenet', include_top=True)
def test_nasnet_variable_input_channels(): random.seed(time.time()) fun, dim = random.choice(NASNET_LIST) input_shape = (1, None, None) if K.image_data_format() == 'channels_first' else (None, None, 1) model = fun(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, dim) input_shape = (4, None, None) if K.image_data_format() == 'channels_first' else (None, None, 4) model = fun(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, dim)
def squeeze_excite_block(input, ratio=16): init = input channel_axis = 1 if K.image_data_format() == "channels_first" else -1 # compute channel axis filters = init._keras_shape[channel_axis] # infer input number of filters se_shape = (1, 1, filters) if K.image_data_format() == 'channels_last' else (filters, 1, 1) # determine Dense matrix shape se = GlobalAveragePooling2D()(init) se = Reshape(se_shape)(se) se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay), use_bias=False)(se) se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay), use_bias=False)(se) x = multiply([init, se]) return x
def test_nasnet_variable_input_channels(): input_shape = (1, None, None) if K.image_data_format() == 'channels_first' else (None, None, 1) model = applications.NASNetMobile(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 1056) model = applications.NASNetLarge(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 4032) input_shape = (4, None, None) if K.image_data_format() == 'channels_first' else (None, None, 4) model = applications.NASNetMobile(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 1056) model = applications.NASNetLarge(weights=None, include_top=False, input_shape=input_shape) assert model.output_shape == (None, None, None, 4032)
def preprocess_input(x, data_format=None, version=1): x_temp = np.copy(x) if data_format is None: data_format = K.image_data_format() assert data_format in {'channels_last', 'channels_first'} if version == 1: if data_format == 'channels_first': x_temp = x_temp[:, ::-1, ...] x_temp[:, 0, :, :] -= 93.5940 x_temp[:, 1, :, :] -= 104.7624 x_temp[:, 2, :, :] -= 129.1863 else: x_temp = x_temp[..., ::-1] x_temp[..., 0] -= 93.5940 x_temp[..., 1] -= 104.7624 x_temp[..., 2] -= 129.1863 elif version == 2: if data_format == 'channels_first': x_temp = x_temp[:, ::-1, ...] x_temp[:, 0, :, :] -= 91.4953 x_temp[:, 1, :, :] -= 103.8827 x_temp[:, 2, :, :] -= 131.0912 else: x_temp = x_temp[..., ::-1] x_temp[..., 0] -= 91.4953 x_temp[..., 1] -= 103.8827 x_temp[..., 2] -= 131.0912 else: raise NotImplementedError return x_temp
def cifar10_load_data(path): """Loads CIFAR10 dataset. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' # origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' # path = get_file(dirname, origin=origin, untar=True) path_ = os.path.join(path, dirname) num_train_samples = 50000 x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.zeros((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path_, 'data_batch_' + str(i)) data, labels = cifar10.load_batch(fpath) x_train[(i - 1) * 10000: i * 10000, :, :, :] = data y_train[(i - 1) * 10000: i * 10000] = labels fpath = os.path.join(path_, 'test_batch') x_test, y_test = cifar10.load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if KB.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def test_resnet50_pooling_specified_input_shape(): input_shape = (3, 300, 300) if K.image_data_format() == 'channels_first' else (300, 300, 3) model = applications.ResNet50(weights=None, include_top=False, pooling='avg', input_shape=input_shape) assert model.output_shape == (None, 2048)
def _depthwise_conv_block_detection(input, layer_name, strides = (1,1), kernel_size = 3, pointwise_conv_filters=32, alpha=1.0, depth_multiplier=1, padding = 'valid', data_format = None, activation = None, use_bias = True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer = "zeros", bias_regularizer= None, activity_regularizer = None, depthwise_constraint = None, pointwise_constraint = None, bias_constraint= None, batch_size = None, block_id=1,trainable = None, weights = None): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) x = DepthwiseConv2D((kernel_size, kernel_size), padding=padding, depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name=layer_name + '_conv_dw_%d' % block_id)(input) x = BatchNormalization(axis=channel_axis, name=layer_name + '_conv_dw_%d_bn' % block_id)(x) x = Activation(relu6, name=layer_name+'_conv_dw_%d_relu' % block_id)(x) x = Conv2D(pointwise_conv_filters, (1, 1), #padding='same', padding=padding, use_bias=False, strides=(1, 1), name=layer_name + '_conv_pw_%d' % block_id)(x) x = BatchNormalization(axis=channel_axis, name=layer_name+'_conv_pw_%d_bn' % block_id)(x) return Activation(relu6, name=layer_name+ '_conv_pw_%d_relu' % block_id)(x)
def senet_identity_block(input_tensor, kernel_size, filters, stage, block, bias=False): filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv1_reduce_name = 'conv' + str(stage) + "_" + str(block) + "_1x1_reduce" conv1_increase_name = 'conv' + str(stage) + "_" + str( block) + "_1x1_increase" conv3_name = 'conv' + str(stage) + "_" + str(block) + "_3x3" x = Conv2D(filters1, (1, 1), use_bias=bias, name=conv1_reduce_name)(input_tensor) x = BatchNormalization(axis=bn_axis, name=conv1_reduce_name + "/bn")(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', use_bias=bias, name=conv3_name)(x) x = BatchNormalization(axis=bn_axis, name=conv3_name + "/bn")(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv1_increase_name, use_bias=bias)(x) x = BatchNormalization(axis=bn_axis, name=conv1_increase_name + "/bn")(x) se = senet_se_block(x, stage=stage, block=block, bias=True) m = layers.add([x, se]) m = Activation('relu')(m) return m
def _test_application_variable_input_channels(app, last_dim): if K.image_data_format() == 'channels_first': input_shape = (1, None, None) else: input_shape = (None, None, 1) output_shape = _get_output_shape( lambda: app(weights=None, include_top=False, input_shape=input_shape)) assert output_shape == (None, None, None, last_dim) if K.image_data_format() == 'channels_first': input_shape = (4, None, None) else: input_shape = (None, None, 4) output_shape = _get_output_shape( lambda: app(weights=None, include_top=False, input_shape=input_shape)) assert output_shape == (None, None, None, last_dim)
def tensor_shape_from_args(args): in_channels = len(get_tensor_channel_map_from_args(args)) if K.image_data_format() == 'channels_last': tensor_shape = (args.read_limit, args.window_size, in_channels) else: tensor_shape = (in_channels, args.read_limit, args.window_size) return tensor_shape
def resnet_conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), bias=False): filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv1_reduce_name = 'conv' + str(stage) + "_" + str(block) + "_1x1_reduce" conv1_increase_name = 'conv' + str(stage) + "_" + str( block) + "_1x1_increase" conv1_proj_name = 'conv' + str(stage) + "_" + str(block) + "_1x1_proj" conv3_name = 'conv' + str(stage) + "_" + str(block) + "_3x3" x = Conv2D(filters1, (1, 1), strides=strides, use_bias=bias, name=conv1_reduce_name)(input_tensor) x = BatchNormalization(axis=bn_axis, name=conv1_reduce_name + "/bn")(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', use_bias=bias, name=conv3_name)(x) x = BatchNormalization(axis=bn_axis, name=conv3_name + "/bn")(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv1_increase_name, use_bias=bias)(x) x = BatchNormalization(axis=bn_axis, name=conv1_increase_name + "/bn")(x) shortcut = Conv2D(filters3, (1, 1), strides=strides, use_bias=bias, name=conv1_proj_name)(input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=conv1_proj_name + "/bn")( shortcut) x = layers.add([x, shortcut]) x = Activation('relu')(x) return x
def load_data(label_mode='fine'): """Loads CIFAR100 dataset. # Arguments label_mode: one of "fine", "coarse". # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. # Raises ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('label_mode must be one of "fine" "coarse".') dirname = 'cifar-100-python' origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return {'x_train': x_train, 'y_train': y_train, 'x_test': x_test, 'y_test': y_test}
def on_epoch_end(self, epoch, logs={}): self.model.save_weights( os.path.join(self.output_dir, 'weights%02d.h5' % (epoch))) self.show_edit_distance(256) word_batch = next(self.text_img_gen)[0] res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words]) if word_batch['the_input'][0].shape[0] < 256: cols = 2 else: cols = 1 for i in range(self.num_display_words): pylab.subplot(self.num_display_words // cols, cols, i + 1) if K.image_data_format() == 'channels_first': the_input = word_batch['the_input'][i, 0, :, :] else: the_input = word_batch['the_input'][i, :, :, 0] pylab.imshow(the_input.T, cmap='Greys_r') pylab.xlabel( 'Truth = \'%s\'\nDecoded = \'%s\'' % (word_batch['source_str'][i], res[i])) fig = pylab.gcf() fig.set_size_inches(10, 13) pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch))) pylab.close()
def VGG16_Model(img_rows=224, img_cols=224, train=False): if K.image_data_format() == 'channels_first': shape_ord = (3, img_rows, img_cols) else: # channel_last shape_ord = (img_rows, img_cols, 3) vgg16_model = vgg16.VGG16(weights=None, include_top=False, input_tensor=Input(shape_ord)) # vgg16_model.summary() for layer in vgg16_model.layers: layer.trainable = train # freeze layer #add last fully-connected layers x = Flatten(input_shape=vgg16_model.output.shape)(vgg16_model.output) x = Dense(4096, activation='relu', name='ft_fc1')(x) x = Dropout(0.5)(x) x = BatchNormalization()(x) predictions = Dense(43, activation='softmax')(x) model = Model(inputs=vgg16_model.input, outputs=predictions) #compile the model model.compile(optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) for layer in model.layers: layer.trainable = train # freeze layer return model
def load_data(): """Loads CIFAR10 dataset. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = '/Users/zuoyuan/.keras/datasets/cifar-10-batches-py' num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000: i * 10000, :, :, :], y_train[(i - 1) * 10000: i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def _conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout Args: ip: Input keras tensor nb_filter: number of filters bottleneck: add bottleneck block dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('conv_block'): x = BatchNormalization(axis=concat_axis, momentum=0.1, epsilon=1e-5)(ip) x = Activation('relu')(x) if bottleneck: inter_channel = nb_filter * 4 # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(axis=concat_axis, epsilon=1e-5, momentum=0.1)(x) x = Activation('relu')(x) x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x) if dropout_rate: x = Dropout(dropout_rate)(x) return x
def conv_scaledown(init, filters=64, factor=1, strides=(1, 1), learnall = True): x = Convolution2D(filters*factor, (3, 3), padding='same', strides=strides, kernel_initializer='he_normal', use_bias=True, trainable = learnall, kernel_regularizer = kernel_regularizer)(init) x = res_adapt_mod(x, filters*factor) channel_axis = 1 if K.image_data_format() == "channels_first" else -1 #x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) x = Convolution2D(filters*factor, (3, 3), padding='same', kernel_initializer='he_normal', use_bias=True, trainable = learnall, kernel_regularizer = kernel_regularizer)(x) x = res_adapt_mod(x, filters*factor) #new addition v2 #x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x) x = BatchNormalization(axis=channel_axis)(x) #v3 #skip = AveragePooling2D((2,2), data_format=K.image_data_format())(init) #skip = ZeroPadding3D(padding=(0,0,filters*factor/2), data_format=K.image_data_format())(skip) #skip = ZeroPadding3D(padding=((0,0),(0,int(filters*factor/2)), (int(filters*factor/2),0)), data_format=K.image_data_format())(skip) #skip = Concatenate(axis=3)([skip,init_2]) skip = Convolution2D(filters*factor, (1, 1), padding='same', strides=strides, kernel_initializer='he_normal',use_bias=True, kernel_regularizer = kernel_regularizer)(init) x = Add()([skip, x]) #new addition v2 x = Activation('relu')(x) return x
def compute_input_shape(x_train): img_channels = 1 img_rows, img_cols = x_train.shape[1], x_train.shape[2] if K.image_data_format() == 'channels_first': return (img_channels, img_rows, img_cols) else: # channel_last return (img_rows, img_cols, img_channels)
def img_to_array(img, data_format=None): """Converts a PIL Image instance to a Numpy array. # Arguments img: PIL Image instance. data_format: Image data format. # Returns A 3D Numpy array. # Raises ValueError: if invalid `img` or `data_format` is passed. """ if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format: ', data_format) # Numpy array x has format (height, width, channel) # or (channel, height, width) # but original PIL image has format (width, height, channel) x = np.asarray(img, dtype=K.floatx()) if len(x.shape) == 3: if data_format == 'channels_first': x = x.transpose(2, 0, 1) elif len(x.shape) == 2: if data_format == 'channels_first': x = x.reshape((1, x.shape[0], x.shape[1])) else: x = x.reshape((x.shape[0], x.shape[1], 1)) else: raise ValueError('Unsupported image shape: ', x.shape) return x
def build(width, height, depth, classes): # initialize the model model = Sequential() inputShape = (height, width, depth) # if we are using "channels last", update the input shape if K.image_data_format() == "channels_first": #for tensorflow inputShape = (depth, height, width) # first set of CONV => RELU => POOL layers model.add(Conv2D(20, (5, 5),padding="same",input_shape=inputShape)) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) #second set of CONV => RELU => POOL layers model.add(Conv2D(50, (5, 5), padding="same")) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) # first (and only) set of FC => RELU layers model.add(Flatten()) model.add(Dense(500)) model.add(Activation("relu")) # softmax classifier model.add(Dense(classes)) model.add(Activation("softmax")) # return the constructed network architecture return model
def load_mnist_dataset(): from keras.datasets import mnist num_classes = 10 # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() # qui modifica la forma nel caso i canali siano la 2 dimensione o la 4 if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 # normalizing? x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) return x_train, y_train, x_test, y_test
def main(): """Generate different test models and save them to the given directory.""" if len(sys.argv) != 3: print('usage: [model name] [destination file path]') sys.exit(1) else: model_name = sys.argv[1] dest_path = sys.argv[2] get_model_functions = { 'small': get_test_model_small, 'sequential': get_test_model_sequential, 'full': get_test_model_full } if not model_name in get_model_functions: print('unknown model name: ', model_name) sys.exit(2) assert K.backend() == "tensorflow" assert K.floatx() == "float32" assert K.image_data_format() == 'channels_last' np.random.seed(0) model_func = get_model_functions[model_name] model = model_func() model.save(dest_path, include_optimizer=False) # Make sure models can be loaded again, # see https://github.com/fchollet/keras/issues/7682 model = load_model(dest_path) print(model.summary())
def tensor_shape_from_tensor_type(tensor_type: str, window_size: int, read_limit: int): in_channels = len(get_tensor_channel_map_from_tensor_type(tensor_type)) if K.image_data_format() == 'channels_last': tensor_shape = (read_limit, window_size, in_channels) else: tensor_shape = (in_channels, read_limit, window_size) return tensor_shape
def setUpClass(cls): # MNIST dataset used for building pre_trained_models/mnist_cnn/model_mnist_cnn_epoch_3 cls.batch_size = 128 cls.num_classes = 10 cls.epochs = 2 # input image dimensions cls.img_rows, cls.img_cols = 28, 28 # shuffled and split between train and test sets (cls.x_train, cls.y_train), (cls.x_test, cls.y_test) = mnist.load_data() if K.image_data_format() == 'channels_first': cls.x_train = cls.x_train.reshape(cls.x_train.shape[0], 1, cls.img_rows, cls.img_cols) cls.x_test = cls.x_test.reshape(cls.x_test.shape[0], 1, cls.img_rows, cls.img_cols) cls.input_shape = (1, cls.img_rows, cls.img_cols) else: cls.x_train = cls.x_train.reshape(cls.x_train.shape[0], cls.img_rows, cls.img_cols, 1) cls.x_test = cls.x_test.reshape(cls.x_test.shape[0], cls.img_rows, cls.img_cols, 1) cls.input_shape = (cls.img_rows, cls.img_cols, 1) cls.x_train = cls.x_train.astype('float32') cls.x_test = cls.x_test.astype('float32') cls.x_train /= 255 cls.x_test /= 255 cls.x_train = (cls.x_train - 0.5) * 2 cls.x_test = (cls.x_test - 0.5) * 2 # convert class vectors to binary class matrices cls.y_train = keras.utils.to_categorical(cls.y_train, cls.num_classes) cls.y_test = keras.utils.to_categorical(cls.y_test, cls.num_classes)
def _test_smoke(channel_order=None): from kfs.layers.convolutional import Convolution2DEnergy_TemporalBasis from keras.models import Sequential #from keras.layers import Flatten, Dense input_shape = (12, 3, 64, 64) if channel_order is None: channel_order = K.image_data_format() if channel_order == 'channels_last': input_shape = (12, 64, 64, 3) rng = np.random.RandomState(42) datums = rng.randn(6, 12, 3, 64, 64).astype('float32') if channel_order == 'channels_last': datums = datums.transpose(0, 1, 3, 4, 2) nn2 = Sequential() nn2.add(Convolution2DEnergy_TemporalCorrelation(8, 16, 4, (5, 5), 7, padding='same', temporal_kernel_size=5, input_shape=input_shape)) nn2.compile(loss='mse', optimizer='sgd') pred2 = nn2.predict(datums) return nn2, nn2.predict(datums)
def arr3_to_channels_first_format(arr): """Convert a 3-tensor for channels_first""" assert len(arr.shape) == 3 assert K.image_data_format() == 'channels_last' return np.swapaxes(np.swapaxes(arr, 2, 1), 1, 0)
def Inception_Inflated3d(include_top=True, weights=None, input_tensor=None, input_shape=None, dropout_prob=0.0, endpoint_logit=True, classes=400): """Instantiates the Inflated 3D Inception v1 architecture. Optionally loads weights pre-trained on Kinetics. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Note that the default input frame(image) size for this model is 224x224. # Arguments include_top: whether to include the the classification layer at the top of the network. weights: one of `None` (random initialization) or 'kinetics_only' (pre-training on Kinetics dataset only). or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format) or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels. NUM_FRAMES should be no smaller than 8. The authors used 64 frames per example for training and testing on kinetics dataset Also, Width and height should be no smaller than 32. E.g. `(64, 150, 150, 3)` would be one valid value. dropout_prob: optional, dropout probability applied in dropout layer after global average pooling layer. 0.0 means no dropout is applied, 1.0 means dropout is applied to all features. Note: Since Dropout is applied just before the classification layer, it is only useful when `include_top` is set to True. endpoint_logit: (boolean) optional. If True, the model's forward pass will end at producing logits. Otherwise, softmax is applied after producing the logits to produce the class probabilities prediction. Setting this parameter to True is particularly useful when you want to combine results of rgb model and optical flow model. - `True` end model forward pass at logit output - `False` go further after logit to produce softmax predictions Note: This parameter is only useful when `include_top` is set to True. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)): raise ValueError( 'The `weights` argument should be either ' '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' ' 'or a valid path to a file containing `weights` values') if weights in WEIGHTS_NAME and include_top and classes != 400: raise ValueError( 'If using `weights` as one of these %s, with `include_top`' ' as true, `classes` should be 400' % str(WEIGHTS_NAME)) # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_frame_size=224, min_frame_size=32, default_num_frames=64, min_num_frames=8, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 4 # Downsampling via convolution (spatial and temporal) x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x) x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1') x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x) # Mixed 3b branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1') branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1') branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3b') # Mixed 3c branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1') branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1') branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3c') # Downsampling (spatial and temporal) x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x) # Mixed 4b branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1') branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1') branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4b') # Mixed 4c branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1') branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1') branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4c') # Mixed 4d branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1') branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4d') # Mixed 4e branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1') branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1') branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4e') # Mixed 4f branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4f') # Downsampling (spatial and temporal) x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x) # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5b') # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5c') if include_top: # Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, classes))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) if not endpoint_logit: x = Activation('softmax', name='prediction')(x) else: h = int(x.shape[2]) w = int(x.shape[3]) x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) inputs = img_input # create model model = Model(inputs, x, name='i3d_inception') # load weights if weights in WEIGHTS_NAME: if weights == WEIGHTS_NAME[0]: # rgb_kinetics_only if include_top: weights_url = WEIGHTS_PATH['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[1]: # flow_kinetics_only if include_top: weights_url = WEIGHTS_PATH['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[2]: # rgb_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5' elif weights == WEIGHTS_NAME[3]: # flow_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5' downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models') model.load_weights(downloaded_weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first' and K.backend( ) == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
def DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=None, weight_decay=1E-4): """ Build the DenseNet model :param nb_classes: int -- number of classes :param img_dim: tuple -- (channels, rows, columns) :param depth: int -- how many layers :param nb_dense_block: int -- number of dense blocks to add to end :param growth_rate: int -- number of filters to add :param nb_filter: int -- number of filters :param dropout_rate: float -- dropout rate :param weight_decay: float -- weight decay :returns: keras model with nb_layers of conv_factory appended :rtype: keras model """ if K.image_dim_ordering() == "th": concat_axis = 1 elif K.image_dim_ordering() == "tf": concat_axis = -1 model_input = Input(shape=img_dim) assert (depth - 4) % 3 == 0, "Depth must be 3 N + 4" # layers in each dense block nb_layers = int((depth - 4) / 3) # Initial convolution x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", name="initial_conv2D", use_bias=False, kernel_regularizer=l2(weight_decay))(model_input) # Add dense blocks for block_idx in range(nb_dense_block - 1): x, nb_filter = denseblock(x, concat_axis, nb_layers, nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) # add transition x = transition(x, nb_filter=nb_filter, concat_axis=concat_axis, dropout_rate=dropout_rate, weight_decay=weight_decay) # The last denseblock does not have a transition x, nb_filter = denseblock(x, concat_axis, nb_layers, nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), beta_regularizer=l2(weight_decay))(x) x = Activation('relu')(x) x = GlobalAveragePooling2D(data_format=K.image_data_format())(x) x = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay))(x) densenet = Model(inputs=[model_input], outputs=[x], name="DenseNet") return densenet
def ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=9): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 244)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=197, data_format=K.image_data_format(), include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = ZeroPadding2D((3, 3))(img_input) x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = AveragePooling2D((7, 7), name='avg_pool')(x) if include_top: x = Flatten()(x) x = Dense(classes, activation='softmax', name='fc1000')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='resnet50') # load weights if weights == 'imagenet': if include_top: weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', md5_hash='a7b3fe01876f51b976af0dea6bc144eb') else: weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='avg_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1000') layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first') if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') return model
random.shuffle(all_imgs) num_imgs = len(all_imgs) train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_data_format(), mode='train') data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length, K.image_data_format(), mode='val') input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True)
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id, layers, bottomright_stride=False): channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 shortcut = x prefix = 'expanded_conv/' infilters = backend.int_shape(x)[channel_axis] if block_id: # Expand prefix = 'expanded_conv_{}/'.format(block_id) x = layers.Conv2D( _depth(infilters * expansion), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand/BatchNorm')( x) x = activation(x) if stride == 2: shift = 1 if bottomright_stride else 0 x = layers.ZeroPadding2D( padding=correct_pad(kernel_size, shift), name=prefix + 'depthwise/pad')(x) x = layers.DepthwiseConv2D( kernel_size, strides=stride, padding='same' if stride == 1 else 'valid', use_bias=False, name=prefix + 'depthwise')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise/BatchNorm')( x) x = activation(x) if se_ratio: x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) x = layers.Conv2D( filters, kernel_size=1, padding='same', use_bias=False, name=prefix + 'project')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project/BatchNorm')( x) if stride == 1 and infilters == filters: x = layers.Add(name=prefix + 'Add')([shortcut, x]) return x
out = base_model(input_img) return Model(input=input_img, output=out) def get_img(name): img = image.load_img(name, target_size=(224, 224)) x = image.img_to_array(img) return x if __name__ == '__main__': # セットアップ model_name = 'inception' base_model = create_base_network(model_name) if K.image_data_format() == 'channels_first': input_shape = (3, 224, 224) else: input_shape = (224, 224, 3) model = build_predict(base_model, input_shape=input_shape) model.summary() # 学習で出力されたjsonファイルと重みをロード model.load_weights(sys.argv[1]) # モデルに画像を通す feat = model.predict(np.expand_dims(get_img(sys.argv[2]), axis=0), batch_size=1) print(feat) print(feat.shape)
def VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the VGG16 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 input channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: # Classification block x = Flatten(name='flatten')(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='vgg16') # load weights if weights == 'imagenet': if include_top: weights_path = WEIGHTS_PATH else: weights_path = WEIGHTS_PATH_NO_TOP model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='block5_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
def train(run_name, start_epoch, stop_epoch, img_w): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) fdir = os.path.dirname(get_file('wordlists.tgz', origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True)) img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), minibatch_size=32, img_w=img_w, img_h=img_h, downsample_factor=(pool_size ** 2), val_split=words_per_epoch - val_words ) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) if start_epoch > 0: weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) model.load_weights(weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) model.fit_generator(generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words), epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words, callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
def build(width, height, depth, classes, stages, filters, reg=0.0001, bnEps=2e-5, bnMom=0.9, dataset="cifar"): # initialize the input shape to be "channels last" and the # channels dimension itself inputShape = (height, width, depth) chanDim = -1 # if we are using "channels first", update the input shape # and channels dimension if K.image_data_format() == "channels_first": inputShape = (depth, height, width) chanDim = 1 # set the input and apply BN inputs = Input(shape=inputShape) x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(inputs) # check if we are utilizing the CIFAR dataset if dataset == "cifar": # apply a single CONV layer x = Conv2D(filters[0], (3, 3), use_bias=False, padding="same", kernel_regularizer=l2(reg))(x) # check to see if we are using the Tiny ImageNet dataset elif dataset == "tiny_imagenet": # apply CONV => BN => ACT => POOL to reduce spatial size x = Conv2D(filters[0], (5, 5), use_bias=False, padding="same", kernel_regularizer=l2(reg))(x) x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(x) x = Activation("relu")(x) x = ZeroPadding2D((1, 1))(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) # loop over the number of stages for i in range(0, len(stages)): # initialize the stride, then apply a residual module # used to reduce the spatial size of the input volume stride = (1, 1) if i == 0 else (2, 2) x = ResNet.residual_module(x, filters[i + 1], stride, chanDim, red=True, bnEps=bnEps, bnMom=bnMom) # loop over the number of layers in the stage for j in range(0, stages[i] - 1): # apply a ResNet module x = ResNet.residual_module(x, filters[i + 1], (1, 1), chanDim, bnEps=bnEps, bnMom=bnMom) # apply BN => ACT => POOL x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(x) x = Activation("relu")(x) x = AveragePooling2D((8, 8))(x) # softmax classifier x = Flatten()(x) x = Dense(classes, kernel_regularizer=l2(reg))(x) x = Activation("softmax")(x) # create the model model = Model(inputs, x, name="resnet") # return the constructed network architecture return model
def getModelGivenModelOptionsAndWeightInits(args): #read in the arguments w0=args.w0 w1=args.w1 init_weights=args.init_weights seed=args.seed np.random.seed(seed) import keras; from keras.layers import ( Activation, AveragePooling1D, BatchNormalization, Conv1D, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling1D, MaxPooling2D, Reshape, PReLU, Add ) from keras.models import Model from keras.optimizers import Adadelta, SGD, RMSprop; import keras.losses; from keras.constraints import maxnorm; from keras.layers.normalization import BatchNormalization from keras.regularizers import l1, l2 from keras import backend as K K.set_image_data_format('channels_last') print(K.image_data_format()) import collections model_inputs = ["data/genome_data_dir"] shapes = {'data/genome_data_dir': [1000, 4]} keras_inputs = collections.OrderedDict([(name, Input(shape=shapes[name], name=name)) for name in model_inputs]) inputs = keras_inputs num_tasks = ntasks seq_preds = inputs["data/genome_data_dir"] num_filters = (48, 64, 100, 150, 300, 200, 200, 200, 200) conv_width = (3, 3, 3, 7, 7, 7, 3, 3, 7) batch_norm = True pool_width=(3, 4, 4) pool_stride=(3, 4, 4) fc_layer_sizes=(1000, 1000) dropout=(0.3, 0.3) final_dropout=0.0, trainable=1 final_layer_name='tuned_i_score' j = 0 for i, (nb_filter, nb_col) in enumerate(zip(num_filters, conv_width)): seq_preds = Conv1D(nb_filter, nb_col, kernel_initializer='he_normal', trainable = bool(trainable))(seq_preds) if batch_norm: seq_preds = BatchNormalization(trainable = bool(trainable))(seq_preds) seq_preds = Activation('relu', trainable = bool(trainable))(seq_preds) if(i == 4 or i == 7 or i == 8): seq_preds = MaxPooling1D(pool_width[j], pool_stride[j], trainable = bool(trainable))(seq_preds) j = j+1 seq_preds = Flatten()(seq_preds) # fully connect, drop before fc layers for drop_rate, fc_layer_size in zip(dropout, fc_layer_sizes): seq_preds = Dense(fc_layer_size)(seq_preds) if batch_norm: seq_preds = BatchNormalization()(seq_preds) seq_preds = Activation('relu')(seq_preds) seq_preds = Dense(num_tasks, name=final_layer_name)(seq_preds) seq_preds = Activation('sigmoid')(seq_preds) random_weight_model = Model(inputs=list(keras_inputs.values()), outputs=seq_preds) model = random_weight_model if (init_weights!=None): #load the weight initializations model.load_weights(init_weights, by_name=True) adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) print("compiling!") if w0!=None: loss=get_weighted_binary_crossentropy(w0_weights=w0,w1_weights=w1) else: loss=get_ambig_binary_crossentropy() model.compile(optimizer=adam, loss=loss, metrics=[recall, specificity, fpr, fnr, precision, f1]) return model
def train(): import keras data = load_files('./dataset', shuffle=True, encoding=None, load_content=False) print(data.keys()) print(data['target_names']) print(data['target']) print(data.keys()) nb_data_samples = len(data['filenames']) print("data samples: ", nb_data_samples) with multiprocessing.Pool(processes=2) as pool: data_images = pool.map(load_dataset_item, data['filenames']) data_np = np.stack(data_images) x_train, x_test, y_train, y_test = train_test_split(data_np, data['target'], train_size=1900, test_size=2214 - 1900) from keras import backend as K if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = np.array(x_train).astype('float32') y_train = np.array(y_train).astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print(y_test[0]) from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten, Activation from keras.layers import Conv2D, MaxPooling2D model = Sequential() model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:])) model.add(Activation('relu')) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding='same')) model.add(Activation('relu')) model.add(Conv2D(64, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save('PC_RNN.h5') # creates a HDF5 file 'my_model.h5'
if label == 2: continue class_idx = np.where(self.labels == label)[0] Z = self.X_proj[class_idx, :] valid_vector_idx = find_valid_indices(Z) self.valid_idx.extend( class_idx[valid_vector_idx]) # map back to original indices self.valid_idx = np.array(self.valid_idx) if __name__ == "__main__": # Load some MNIST data. img_rows, img_cols = 28, 28 (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == "channels_first": x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) ones_idx = np.where(y_train == 1)[0] twos_idx = np.where(y_train == 2)[0] fours_idx = np.where(y_train == 4)[0] fives_idx = np.where(y_train == 5)[0] eights_idx = np.where(y_train == 8)[0] threes_idx = np.where(y_train == 3)[0]
def SE_InceptionV3(input_shape=None, include_top=True, classes=1000, weights=None, input_tensor=None, pooling=None): """Instantiates the Squeeze and Excite Inception v3 architecture. # Arguments include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)` (with `channels_last` data1 format) or `(3, 299, 299)` (with `channels_first` data1 format). It should have exactly 3 inputs channels, and width and height should be no smaller than 139. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=299, min_size=139, data_format=K.image_data_format(), require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 x = _conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') x = _conv2d_bn(x, 32, 3, 3, padding='valid') x = _conv2d_bn(x, 64, 3, 3) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = _conv2d_bn(x, 80, 1, 1, padding='valid') x = _conv2d_bn(x, 192, 3, 3, padding='valid') x = MaxPooling2D((3, 3), strides=(2, 2))(x) # mixed 0, 1, 2: 35 x 35 x 256 branch1x1 = _conv2d_bn(x, 64, 1, 1) branch5x5 = _conv2d_bn(x, 48, 1, 1) branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = _conv2d_bn(x, 64, 1, 1) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = _conv2d_bn(branch_pool, 32, 1, 1) x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed0') # squeeze and excite block x = squeeze_excite_block(x) # mixed 1: 35 x 35 x 256 branch1x1 = _conv2d_bn(x, 64, 1, 1) branch5x5 = _conv2d_bn(x, 48, 1, 1) branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = _conv2d_bn(x, 64, 1, 1) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = _conv2d_bn(branch_pool, 64, 1, 1) x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed1') # squeeze and excite block x = squeeze_excite_block(x) # mixed 2: 35 x 35 x 256 branch1x1 = _conv2d_bn(x, 64, 1, 1) branch5x5 = _conv2d_bn(x, 48, 1, 1) branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = _conv2d_bn(x, 64, 1, 1) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = _conv2d_bn(branch_pool, 64, 1, 1) x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed2') # squeeze and excite block x = squeeze_excite_block(x) # mixed 3: 17 x 17 x 768 branch3x3 = _conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') branch3x3dbl = _conv2d_bn(x, 64, 1, 1) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) x = layers.concatenate([branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3') # squeeze and excite block x = squeeze_excite_block(x) # mixed 4: 17 x 17 x 768 branch1x1 = _conv2d_bn(x, 192, 1, 1) branch7x7 = _conv2d_bn(x, 128, 1, 1) branch7x7 = _conv2d_bn(branch7x7, 128, 1, 7) branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = _conv2d_bn(x, 128, 1, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 1, 7) branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed4') # squeeze and excite block x = squeeze_excite_block(x) # mixed 5, 6: 17 x 17 x 768 for i in range(2): branch1x1 = _conv2d_bn(x, 192, 1, 1) branch7x7 = _conv2d_bn(x, 160, 1, 1) branch7x7 = _conv2d_bn(branch7x7, 160, 1, 7) branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = _conv2d_bn(x, 160, 1, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 7, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 1, 7) branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 7, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed' + str(5 + i)) # squeeze and excite block x = squeeze_excite_block(x) # mixed 7: 17 x 17 x 768 branch1x1 = _conv2d_bn(x, 192, 1, 1) branch7x7 = _conv2d_bn(x, 192, 1, 1) branch7x7 = _conv2d_bn(branch7x7, 192, 1, 7) branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = _conv2d_bn(x, 192, 1, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed7') # squeeze and excite block x = squeeze_excite_block(x) # mixed 8: 8 x 8 x 1280 branch3x3 = _conv2d_bn(x, 192, 1, 1) branch3x3 = _conv2d_bn(branch3x3, 320, 3, 3, strides=(2, 2), padding='valid') branch7x7x3 = _conv2d_bn(x, 192, 1, 1) branch7x7x3 = _conv2d_bn(branch7x7x3, 192, 1, 7) branch7x7x3 = _conv2d_bn(branch7x7x3, 192, 7, 1) branch7x7x3 = _conv2d_bn(branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) x = layers.concatenate([branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8') # squeeze and excite block x = squeeze_excite_block(x) # mixed 9: 8 x 8 x 2048 for i in range(2): branch1x1 = _conv2d_bn(x, 320, 1, 1) branch3x3 = _conv2d_bn(x, 384, 1, 1) branch3x3_1 = _conv2d_bn(branch3x3, 384, 1, 3) branch3x3_2 = _conv2d_bn(branch3x3, 384, 3, 1) branch3x3 = layers.concatenate([branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i)) branch3x3dbl = _conv2d_bn(x, 448, 1, 1) branch3x3dbl = _conv2d_bn(branch3x3dbl, 384, 3, 3) branch3x3dbl_1 = _conv2d_bn(branch3x3dbl, 384, 1, 3) branch3x3dbl_2 = _conv2d_bn(branch3x3dbl, 384, 3, 1) branch3x3dbl = layers.concatenate([branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed' + str(9 + i)) # squeeze and excite block x = squeeze_excite_block(x) if include_top: # Classification block x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='inception_v3') return model
def _generate_filter_image(input_img, layer_output, filter_index): """Generates image for one particular filter. # Arguments input_img: The input-image Tensor. layer_output: The output-image Tensor. filter_index: The to be processed filter number. Assumed to be valid. #Returns Either None if no image could be generated. or a tuple of the image (array) itself and the last loss. """ s_time = time.time() # we build a loss function that maximizes the activation # of the nth filter of the layer considered if K.image_data_format() == 'channels_first': loss = K.mean(layer_output[:, filter_index, :, :]) else: loss = K.mean(layer_output[:, :, :, filter_index]) # we compute the gradient of the input picture wrt this loss grads = K.gradients(loss, input_img)[0] # normalization trick: we normalize the gradient grads = normalize(grads) # this function returns the loss and grads given the input picture iterate = K.function([input_img], [loss, grads]) # we start from a gray image with some random noise intermediate_dim = tuple( int(x / (upscaling_factor ** upscaling_steps)) for x in output_dim) if K.image_data_format() == 'channels_first': input_img_data = np.random.random( (1, 3, intermediate_dim[0], intermediate_dim[1])) else: input_img_data = np.random.random( (1, intermediate_dim[0], intermediate_dim[1], 3)) input_img_data = (input_img_data - 0.5) * 20 + 128 # Slowly upscaling towards the original size prevents # a dominating high-frequency of the to visualized structure # as it would occur if we directly compute the 412d-image. # Behaves as a better starting point for each following dimension # and therefore avoids poor local minima for up in reversed(range(upscaling_steps)): # we run gradient ascent for e.g. 20 steps for _ in range(epochs): loss_value, grads_value = iterate([input_img_data]) input_img_data += grads_value * step # some filters get stuck to 0, we can skip them if loss_value <= K.epsilon(): return None # Calculate upscaled dimension intermediate_dim = tuple( int(x / (upscaling_factor ** up)) for x in output_dim) # Upscale img = deprocess_image(input_img_data[0]) img = np.array(pil_image.fromarray(img).resize(intermediate_dim, pil_image.BICUBIC)) input_img_data = np.expand_dims( process_image(img, input_img_data[0]), 0) # decode the resulting input image img = deprocess_image(input_img_data[0]) e_time = time.time() print('Costs of filter {:3}: {:5.0f} ( {:4.2f}s )'.format(filter_index, loss_value, e_time - s_time)) return img, loss_value
def train(): batch_size = 128 epochs = 10 learning_rate = 0.01 model_name = "cnn/models/hand_poses_wGarbage_" + str(epochs) + ".h5" # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets x_train, y_train, x_test, y_test = dataset.load_data(poses=["all"]) num_classes = len(np.unique(y_test)) if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) ####### Model structure ####### #model building model = Sequential() #convolutional layer with rectified linear unit activation model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) # 32 convolution filters used each of size 3x3 # again model.add(Conv2D(64, (3, 3), activation='relu')) # 64 convolution filters used each of size 3x3 # choose the best features via pooling model.add(MaxPooling2D(pool_size=(2, 2))) # randomly turn neurons on and off to improve convergence model.add(Dropout(0.25)) # flatten since too many dimensions, we only want a classification output model.add(Flatten()) # fully connected to get all relevant data model.add(Dense(128, activation='relu')) # one more dropout for convergence' sake :) model.add(Dropout(0.5)) # output a softmax to squash the matrix into output probabilities model.add(Dense(num_classes, activation='softmax')) # categorical ce since we have multiple classes (10) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=learning_rate), metrics=['accuracy']) ####### TRAINING ####### hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, validation_data=(x_test, y_test)) # Evaluation score = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save(model_name) # plotting the metrics fig = plt.figure() plt.subplot(2,1,1) plt.plot(hist.history['acc']) plt.plot(hist.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='lower right') plt.subplot(2,1,2) plt.plot(hist.history['loss']) plt.plot(hist.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper right') plt.tight_layout() plt.show()
def _reduction_A(ip, p, filters, weight_decay=5e-5, id=None): '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper) # Arguments: ip: input tensor `x` p: input tensor `p` filters: number of output filters weight_decay: l2 regularization weight id: string id # Returns: a Keras tensor ''' """""" channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('reduction_A_block_%s' % id): p = _adjust_block(p, ip, filters, weight_decay, id) h = Activation('relu')(ip) h = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='reduction_conv_1_%s' % id, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(h) h = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='reduction_bn_1_%s' % id)(h) with K.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), weight_decay=weight_decay, id='reduction_left1_%s' % id) x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), weight_decay=weight_decay, id='reduction_1_%s' % id) x1 = add([x1_1, x1_2], name='reduction_add_1_%s' % id) with K.name_scope('block_2'): x2_1 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_left2_%s' % id)(h) x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), weight_decay=weight_decay, id='reduction_right2_%s' % id) x2 = add([x2_1, x2_2], name='reduction_add_2_%s' % id) with K.name_scope('block_3'): x3_1 = AveragePooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_left3_%s' % id)(h) x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), weight_decay=weight_decay, id='reduction_right3_%s' % id) x3 = add([x3_1, x3_2], name='reduction_add3_%s' % id) with K.name_scope('block_4'): x4 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='reduction_left4_%s' % id)(x1) x4 = add([x2, x4]) with K.name_scope('block_5'): x5_1 = _separable_conv_block(x1, filters, (3, 3), weight_decay=weight_decay, id='reduction_left4_%s' % id) x5_2 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_right5_%s' % id)(h) x5 = add([x5_1, x5_2], name='reduction_add4_%s' % id) x = concatenate([x2, x3, x4, x5], axis=channel_dim, name='reduction_concat_%s' % id) return x, ip
def InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the Inception v3 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Note that the default input image size for this model is 299x299. # Arguments include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)` (with `channels_last` data format) or `(3, 299, 299)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 139. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_size=299, min_size=139, data_format=K.image_data_format(), include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') x = conv2d_bn(x, 32, 3, 3, padding='valid') x = conv2d_bn(x, 64, 3, 3) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv2d_bn(x, 80, 1, 1, padding='valid') x = conv2d_bn(x, 192, 3, 3, padding='valid') x = MaxPooling2D((3, 3), strides=(2, 2))(x) # mixed 0, 1, 2: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1) branch5x5 = conv2d_bn(x, 48, 1, 1) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 32, 1, 1) x = layers.concatenate( [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed0') # mixed 1: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1) branch5x5 = conv2d_bn(x, 48, 1, 1) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1, 1) x = layers.concatenate( [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed1') # mixed 2: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1) branch5x5 = conv2d_bn(x, 48, 1, 1) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1, 1) x = layers.concatenate( [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed2') # mixed 3: 17 x 17 x 768 branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn( branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) x = layers.concatenate( [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3') # mixed 4: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(x, 128, 1, 1) branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = conv2d_bn(x, 128, 1, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed4') # mixed 5, 6: 17 x 17 x 768 for i in range(2): branch1x1 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(x, 160, 1, 1) branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = conv2d_bn(x, 160, 1, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D( (3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed' + str(5 + i)) # mixed 7: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = conv2d_bn(x, 192, 1, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed7') # mixed 8: 8 x 8 x 1280 branch3x3 = conv2d_bn(x, 192, 1, 1) branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, strides=(2, 2), padding='valid') branch7x7x3 = conv2d_bn(x, 192, 1, 1) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) branch7x7x3 = conv2d_bn( branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) x = layers.concatenate( [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8') # mixed 9: 8 x 8 x 2048 for i in range(2): branch1x1 = conv2d_bn(x, 320, 1, 1) branch3x3 = conv2d_bn(x, 384, 1, 1) branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) branch3x3 = layers.concatenate( [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i)) branch3x3dbl = conv2d_bn(x, 448, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) branch3x3dbl = layers.concatenate( [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) branch_pool = AveragePooling2D( (3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed' + str(9 + i)) if include_top: # Classification block x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='inception_v3') # load weights if weights == 'imagenet': if K.image_data_format() == 'channels_first': if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') if include_top: weights_path = get_file( 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models',file_hash='9a0d58056eeedaa3f26cb7ebd46da564') else: weights_path = WEIGHTS_PATH_NO_TOP model.load_weights(weights_path) return model
def _adjust_block(p, ip, filters, weight_decay=5e-5, id=None): ''' Adjusts the input `p` to match the shape of the `input` or situations where the output number of filters needs to be changed # Arguments: p: input tensor which needs to be modified ip: input tensor whose shape needs to be matched filters: number of output filters to be matched weight_decay: l2 regularization weight id: string id # Returns: an adjusted Keras tensor ''' channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 img_dim = 2 if K.image_data_format() == 'channels_first' else -2 with K.name_scope('adjust_block'): if p is None: p = ip elif p._keras_shape[img_dim] != ip._keras_shape[img_dim]: with K.name_scope('adjust_reduction_block_%s' % id): p = Activation('relu', name='adjust_relu_1_%s' % id)(p) p1 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % id)(p) p1 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name='adjust_conv_1_%s' % id, kernel_initializer='he_normal')(p1) p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p) p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2) p2 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % id)(p2) p2 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name='adjust_conv_2_%s' % id, kernel_initializer='he_normal')(p2) p = concatenate([p1, p2], axis=channel_dim) p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='adjust_bn_%s' % id)(p) elif p._keras_shape[channel_dim] != filters: with K.name_scope('adjust_projection_block_%s' % id): p = Activation('relu')(p) p = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='adjust_conv_projection_%s' % id, use_bias=False, kernel_regularizer=l2(weight_decay), kernel_initializer='he_normal')(p) p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='adjust_bn_%s' % id)(p) return p
def NASNet(input_shape=None, penultimate_filters=4032, nb_blocks=6, stem_filters=96, skip_reduction=True, use_auxiliary_branch=False, filters_multiplier=2, dropout=0.5, weight_decay=5e-5, include_top=True, weights=None, input_tensor=None, pooling=None, classes=1000, default_size=None): """Instantiates a NASNet architecture. Note that only TensorFlow is supported for now, therefore it only works with the data format `image_data_format='channels_last'` in your Keras config at `~/.keras/keras.json`. # Arguments input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(331, 331, 3)` for NASNetLarge or `(224, 224, 3)` for NASNetMobile It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. penultimate_filters: number of filters in the penultimate layer. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters nb_blocks: number of repeated blocks of the NASNet model. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters stem_filters: number of filters in the initial stem block skip_reduction: Whether to skip the reduction step at the tail end of the network. Set to `False` for CIFAR models. use_auxiliary_branch: Whether to use the auxiliary branch during training or evaluation. filters_multiplier: controls the width of the network. - If `filters_multiplier` < 1.0, proportionally decreases the number of filters in each layer. - If `filters_multiplier` > 1.0, proportionally increases the number of filters in each layer. - If `filters_multiplier` = 1, default number of filters from the paper are used at each layer. dropout: dropout rate weight_decay: l2 regularization weight include_top: whether to include the fully-connected layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. default_size: specifies the default image size of the model # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ if K.backend() != 'tensorflow': raise RuntimeError('Only Tensorflow backend is currently supported, ' 'as other backends do not support ' 'separable convolution.') if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as ImageNet with `include_top` ' 'as true, `classes` should be 1000') if default_size is None: default_size = 331 # Determine proper input shape and default size. input_shape = _obtain_input_shape(input_shape, default_size=default_size, min_size=32, data_format=K.image_data_format(), require_flatten=include_top or weights) if K.image_data_format() != 'channels_last': warnings.warn('The NASNet family of models is only available ' 'for the input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' 'data format "channels_first" (channels, width, height).' ' You should set `image_data_format="channels_last"` ' 'in your Keras config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' 'to follow the "channels_last" data format.') K.set_image_data_format('channels_last') old_data_format = 'channels_first' else: old_data_format = None if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor assert penultimate_filters % 24 == 0, "`penultimate_filters` needs to be divisible " \ "by 24." channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 filters = penultimate_filters // 24 if not skip_reduction: x = Conv2D(stem_filters, (3, 3), strides=(2, 2), padding='valid', use_bias=False, name='stem_conv1', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(img_input) else: x = Conv2D(stem_filters, (3, 3), strides=(1, 1), padding='same', use_bias=False, name='stem_conv1', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(img_input) x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='stem_bn1')(x) p = None if not skip_reduction: # imagenet / mobile mode x, p = _reduction_A(x, p, filters // (filters_multiplier**2), weight_decay, id='stem_1') x, p = _reduction_A(x, p, filters // filters_multiplier, weight_decay, id='stem_2') for i in range(nb_blocks): x, p = _normal_A(x, p, filters, weight_decay, id='%d' % (i)) x, p0 = _reduction_A(x, p, filters * filters_multiplier, weight_decay, id='reduce_%d' % (nb_blocks)) p = p0 if not skip_reduction else p for i in range(nb_blocks): x, p = _normal_A(x, p, filters * filters_multiplier, weight_decay, id='%d' % (nb_blocks + i + 1)) auxiliary_x = None if not skip_reduction: # imagenet / mobile mode if use_auxiliary_branch: auxiliary_x = _add_auxiliary_head(x, classes, weight_decay) x, p0 = _reduction_A(x, p, filters * filters_multiplier**2, weight_decay, id='reduce_%d' % (2 * nb_blocks)) if skip_reduction: # CIFAR mode if use_auxiliary_branch: auxiliary_x = _add_auxiliary_head(x, classes, weight_decay) p = p0 if not skip_reduction else p for i in range(nb_blocks): x, p = _normal_A(x, p, filters * filters_multiplier**2, weight_decay, id='%d' % (2 * nb_blocks + i + 1)) x = Activation('relu')(x) if include_top: x = GlobalAveragePooling2D()(x) x = Dropout(dropout)(x) x = Dense(classes, activation='softmax', kernel_regularizer=l2(weight_decay), name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. if use_auxiliary_branch: model = Model(inputs, [x, auxiliary_x], name='NASNet_with_auxiliary') else: model = Model(inputs, x, name='NASNet') # load weights if weights == 'imagenet': if default_size == 224: # mobile version if include_top: if use_auxiliary_branch: weight_path = NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY model_name = 'nasnet_mobile_with_aux.h5' else: weight_path = NASNET_MOBILE_WEIGHT_PATH model_name = 'nasnet_mobile.h5' else: if use_auxiliary_branch: weight_path = NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY_NO_TOP model_name = 'nasnet_mobile_with_aux_no_top.h5' else: weight_path = NASNET_MOBILE_WEIGHT_PATH_NO_TOP model_name = 'nasnet_mobile_no_top.h5' weights_file = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_file, by_name=True) elif default_size == 331: # large version if include_top: if use_auxiliary_branch: weight_path = NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary model_name = 'nasnet_large_with_aux.h5' else: weight_path = NASNET_LARGE_WEIGHT_PATH model_name = 'nasnet_large.h5' else: if use_auxiliary_branch: weight_path = NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary_NO_TOP model_name = 'nasnet_large_with_aux_no_top.h5' else: weight_path = NASNET_LARGE_WEIGHT_PATH_NO_TOP model_name = 'nasnet_large_no_top.h5' weights_file = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_file, by_name=True) else: raise ValueError( 'ImageNet weights can only be loaded on NASNetLarge or NASNetMobile' ) if old_data_format: K.set_image_data_format(old_data_format) return model
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1, attention_module=None): """Adds a depthwise convolution block. A depthwise convolution block consists of a depthwise conv, batch normalization, relu6, pointwise convolution, batch normalization and relu6 activation. # Arguments inputs: Input tensor of shape `(rows, cols, channels)` (with `channels_last` data format) or (channels, rows, cols) (with `channels_first` data format). pointwise_conv_filters: Integer, the dimensionality of the output space (i.e. the number output of filters in the pointwise convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. block_id: Integer, a unique identification designating the block number. # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. # Returns Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) x = DepthwiseConv2D((3, 3), padding='same', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name='conv_dw_%d' % block_id)(inputs) x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) x = Conv2D(pointwise_conv_filters, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%d' % block_id)(x) x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x) x = Activation(relu6, name='conv_pw_%d_relu' % block_id)(x) # attention_module if attention_module is not None: x = attach_attention_module(x, attention_module) return x
# for resizing import scipy #other from skimage.transform import resize import numpy as np from collections import deque import numpy as np import gym import random import datetime import os # to make sure the image data is in the correct order import keras.backend as backend assert backend.image_data_format() == "channels_last" # for frame testing purposes only #import matplotlib.pyplot as plt GAME = "BreakoutDeterministic-v4" COLAB = False USE_TARGET_NETWORK = False SAVE_PATH = os.path.join("colaboratory_models", "colab_models") if COLAB else "." SAVE_NAME = GAME + str(datetime.datetime.now()) NETWORK_UPDATE_FREQUENCY = 10000 # in parameter updates, not in steps taken! INITIAL_REPLAY_MEMORY_SIZE = 50000
def __init__(self, file_path, seg_data_generator, data_dir, data_suffix, label_dir, label_suffix, classes, ignore_label=255, crop_mode='none', label_cval=255, pad_size=None, target_size=None, color_mode='rgb', data_format='default', class_mode='sparse', batch_size=1, shuffle=True, seed=None, save_to_dir=None, save_prefix='', save_format='jpeg', loss_shape=None): if data_format == 'default': data_format = K.image_data_format() self.file_path = file_path self.data_dir = data_dir self.data_suffix = data_suffix self.label_suffix = label_suffix self.label_dir = label_dir self.classes = classes self.seg_data_generator = seg_data_generator self.target_size = tuple(target_size) self.ignore_label = ignore_label self.crop_mode = crop_mode self.label_cval = label_cval self.pad_size = pad_size if color_mode not in {'rgb', 'grayscale'}: raise ValueError('Invalid color mode:', color_mode, '; expected "rgb" or "grayscale".') self.color_mode = color_mode self.data_format = data_format self.nb_label_ch = 1 self.loss_shape = loss_shape if (self.label_suffix == '.npy') or (self.label_suffix == 'npy'): self.label_file_format = 'npy' else: self.label_file_format = 'img' if target_size: if self.color_mode == 'rgb': if self.data_format == 'channels_last': self.image_shape = self.target_size + (3,) else: self.image_shape = (3,) + self.target_size else: if self.data_format == 'channels_last': self.image_shape = self.target_size + (1,) else: self.image_shape = (1,) + self.target_size if self.data_format == 'channels_last': self.label_shape = self.target_size + (self.nb_label_ch,) else: self.label_shape = (self.nb_label_ch,) + self.target_size elif batch_size != 1: raise ValueError( 'Batch size must be 1 when target image size is undetermined') else: self.image_shape = None self.label_shape = None if class_mode not in {'sparse', None}: raise ValueError('Invalid class_mode:', class_mode, '; expected one of ' '"sparse", or None.') self.class_mode = class_mode if save_to_dir: self.palette = None self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'npy'} # build lists for data files and label files self.data_files = [] self.label_files = [] fp = open(file_path) lines = fp.readlines() fp.close() self.nb_sample = len(lines) for line in lines: line = line.strip('\n') self.data_files.append(line + data_suffix) self.label_files.append(line + label_suffix) super(SegDirectoryIterator, self).__init__( self.nb_sample, batch_size, shuffle, seed)
def MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights=None, input_tensor=None, pooling=None, classes=1000, attention_module=None): """Instantiates the SE-MobileNet architecture. Note that only TensorFlow is supported for now, therefore it only works with the data format `image_data_format='channels_last'` in your Keras config at `~/.keras/keras.json`. To load a MobileNet model via `load_model`, import the custom objects `relu6` and `DepthwiseConv2D` and pass them to the `custom_objects` parameter. E.g. model = load_model('mobilenet.h5', custom_objects={ 'relu6': mobilenet.relu6, 'DepthwiseConv2D': mobilenet.DepthwiseConv2D}) # Arguments input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or (3, 224, 224) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: depth multiplier for depthwise convolution (also called the resolution multiplier) dropout: dropout rate include_top: whether to include the fully-connected layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ if K.backend() != 'tensorflow': raise RuntimeError('Only TensorFlow backend is currently supported, ' 'as other backends do not support ' 'depthwise convolution.') if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as ImageNet with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. if input_shape is None: default_size = 224 else: if K.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = _obtain_input_shape(input_shape, default_size=default_size, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = _conv_block(img_input, 32, alpha, strides=(2, 2)) x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1, attention_module=attention_module) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2, attention_module=attention_module) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3, attention_module=attention_module) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4, attention_module=attention_module) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5, attention_module=attention_module) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6, attention_module=attention_module) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7, attention_module=attention_module) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8, attention_module=attention_module) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9, attention_module=attention_module) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10, attention_module=attention_module) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11, attention_module=attention_module) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12, attention_module=attention_module) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13, attention_module=attention_module) if include_top: if K.image_data_format() == 'channels_first': shape = (int(1024 * alpha), 1, 1) else: shape = (1, 1, int(1024 * alpha)) x = GlobalAveragePooling2D()(x) x = Reshape(shape, name='reshape_n_1')(x) x = Dropout(dropout, name='dropout')(x) x = Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) x = Activation('softmax', name='act_softmax')(x) x = Reshape((classes, ), name='reshape_final')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='se_mobilenet_%0.2f_%s' % (alpha, rows)) return model
def load_data(): """Loads PCam dataset. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = os.path.join('datasets', 'pcam') base = 'https://drive.google.com/uc?export=download&id=' try: y_train = HDF5Matrix( get_file('camelyonpatch_level_2_split_train_y.h5', origin=base + '1269yhu3pZDP8UYFQs-NYs3FPwuK-nGSG', cache_subdir=dirname, archive_format='gzip'), 'y') x_valid = HDF5Matrix( get_file('camelyonpatch_level_2_split_valid_x.h5', origin=base + '1hgshYGWK8V-eGRy8LToWJJgDU_rXWVJ3', cache_subdir=dirname, archive_format='gzip'), 'x') y_valid = HDF5Matrix( get_file('camelyonpatch_level_2_split_valid_y.h5', origin=base + '1bH8ZRbhSVAhScTS0p9-ZzGnX91cHT3uO', cache_subdir=dirname, archive_format='gzip'), 'y') x_test = HDF5Matrix( get_file('camelyonpatch_level_2_split_test_x.h5', origin=base + '1qV65ZqZvWzuIVthK8eVDhIwrbnsJdbg_', cache_subdir=dirname, archive_format='gzip'), 'x') y_test = HDF5Matrix( get_file('camelyonpatch_level_2_split_test_y.h5', origin=base + '17BHrSrwWKjYsOgTMmoqrIjDy6Fa2o_gP', cache_subdir=dirname, archive_format='gzip'), 'y') meta_train = pd.read_csv( get_file('camelyonpatch_level_2_split_train_meta.csv', origin=base + '1XoaGG3ek26YLFvGzmkKeOz54INW0fruR', cache_subdir=dirname)) meta_valid = pd.read_csv( get_file('camelyonpatch_level_2_split_valid_meta.csv', origin=base + '16hJfGFCZEcvR3lr38v3XCaD5iH1Bnclg', cache_subdir=dirname)) meta_test = pd.read_csv( get_file('camelyonpatch_level_2_split_test_meta.csv', origin=base + '19tj7fBlQQrd4DapCjhZrom_fA4QlHqN4', cache_subdir=dirname)) x_train = HDF5Matrix( get_file('camelyonpatch_level_2_split_train_x.h5', origin=base + '1Ka0XfEMiwgCYPdTI-vv6eUElOBnKFKQ2', cache_subdir=dirname, archive_format='gzip'), 'x') except OSError: raise NotImplementedError( 'Direct download currently not working. Please go to https://drive.google.com/drive/folders/1gHou49cA1s5vua2V5L98Lt8TiWA3FrKB and press download all. Then place files (ungzipped) in ~/.keras/datasets/pcam.' ) if K.image_data_format() == 'channels_first': raise NotImplementedError() return (x_train, y_train, meta_train), (x_valid, y_valid, meta_valid), (x_test, y_test, meta_test)
random_state=42) x_train = x_train.astype('float32') x_test = x_test.astype('float32') # x_train.values.reshape(x_train.shape[0], 28, 28, 1).shape # # x_train = x_train.values.reshape(-1, 28, 28, 1) # x_test = x_test.values.reshape(-1, 28, 28, 1) # # y_train = y_train.values # y_test = y_test.values # # input_shape = (28, 28, 1) if K.image_data_format() == 'channels_first': # Theano backend x_train = x_train.values.reshape(x_train.shape[0], 1, 28, 28) x_test = x_test.values.reshape(x_test.shape[0], 1, 28, 28) input_shape = (1, 28, 28) else: # Tensorflow backend x_train = x_train.values.reshape(x_train.shape[0], 28, 28, 1) x_test = x_test.values.reshape(x_test.shape[0], 28, 28, 1) input_shape = (28, 28, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') input_size = x.shape[1] # input_size no_classes = len(labels)
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), dilation_rate=1, multigrid=[1, 2, 1], use_se=True): # conv filters filters1, filters2, filters3 = filters # compute dataformat if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' # dailated rate if dilation_rate > 1: strides = (1, 1) else: multigrid = [1, 1, 1] # forward x = Conv2D(filters1, (1, 1), strides=strides, name=conv_name_base + '2a', dilation_rate=dilation_rate * multigrid[0])(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b', dilation_rate=dilation_rate * multigrid[1])(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c', dilation_rate=dilation_rate * multigrid[2])(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) shortcut = Conv2D(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) # stage after 5 squeeze and excittation if use_se and stage < 5: se = _squeeze_excite_block(x, filters3, k=1, name=conv_name_base + '_se') x = multiply([x, se]) x = add([x, shortcut]) x = Activation('relu')(x) return x
def PSPNet50(input_shape=(512, 512, 3), n_labels=2, output_stride=16, num_blocks=4, multigrid=[1, 1, 1], levels=[6, 3, 2, 1], use_se=True, output_mode="softmax", upsample_type='deconv'): # Input shape img_input = Input(shape=input_shape, name="input") # compute input shape if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), use_se=use_se) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', use_se=use_se) x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', use_se=use_se) x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', use_se=use_se) x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', use_se=use_se) x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', use_se=use_se) x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', use_se=use_se) if output_stride == 8: rate_scale = 2 elif output_stride == 16: rate_scale = 1 x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', dilation_rate=1 * rate_scale, multigrid=multigrid, use_se=use_se) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', dilation_rate=1 * rate_scale, multigrid=multigrid, use_se=use_se) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', dilation_rate=1 * rate_scale, multigrid=multigrid, use_se=use_se) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', dilation_rate=1 * rate_scale, multigrid=multigrid, use_se=use_se) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', dilation_rate=1 * rate_scale, multigrid=multigrid, use_se=use_se) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', dilation_rate=1 * rate_scale, multigrid=multigrid, use_se=use_se) init_rate = 2 for block in range(4, num_blocks + 1): if block == 4: block = '' x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a%s' % block, dilation_rate=init_rate * rate_scale, multigrid=multigrid, use_se=use_se) x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b%s' % block, dilation_rate=init_rate * rate_scale, multigrid=multigrid, use_se=use_se) x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c%s' % block, dilation_rate=init_rate * rate_scale, multigrid=multigrid, use_se=use_se) init_rate *= 2 # x1 = aspp_block(x,256,rate_scale=rate_scale,output_stride=output_stride,input_shape=input_shape) x = pyramid_pooling_module(x, num_filters=512, input_shape=input_shape, output_stride=output_stride, levels=levels) # x = merge([ # x1, # x2, # ], mode='concat', concat_axis=3) # upsample_type if upsample_type == 'duc': x = duc(x, factor=output_stride, output_shape=(input_shape[0], input_shape[1], n_labels)) out = _conv(filters=n_labels, kernel_size=(1, 1), padding='same', block='out_duc_%s' % output_stride)(x) elif upsample_type == 'bilinear': x = _conv(filters=n_labels, kernel_size=(1, 1), padding='same', block='out_bilinear_%s' % output_stride)(x) out = BilinearUpSampling2D((n_labels, input_shape[0], input_shape[1]), factor=output_stride)(x) elif upsample_type == 'deconv': out = Conv2DTranspose(filters=n_labels, kernel_size=(output_stride * 2, output_stride * 2), strides=(output_stride, output_stride), padding='same', kernel_initializer='he_normal', kernel_regularizer=None, use_bias=False, name='upscore_{}'.format('out'))(x) out = Reshape((input_shape[0], input_shape[1], n_labels), input_shape=(input_shape[0], input_shape[1], n_labels))(out) # default "softmax" out = Activation(output_mode, name="akhri")(out) keras.backend.argmax(out, axis=-1) model = Model(inputs=img_input, outputs=out) return model
def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, classifier_activation='softmax', layers=None): if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match if input_shape is not None and input_tensor is not None: try: is_input_t_tensor = backend.is_keras_tensor(input_tensor) except ValueError: try: is_input_t_tensor = backend.is_keras_tensor( layer_utils.get_source_inputs(input_tensor)) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is not type input_tensor') if is_input_t_tensor: if backend.image_data_format() == 'channels_first': if backend.int_shape(input_tensor)[1] != input_shape[1]: raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: if backend.int_shape(input_tensor)[2] != input_shape[1]: raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: raise ValueError('input_tensor specified: ', input_tensor, 'is not a keras tensor') # If input_shape is None, infer shape from input_tensor if input_shape is None and input_tensor is not None: try: backend.is_keras_tensor(input_tensor) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is type: ', type(input_tensor), 'which is not a valid type') if backend.is_keras_tensor(input_tensor): if backend.image_data_format() == 'channels_first': rows = backend.int_shape(input_tensor)[2] cols = backend.int_shape(input_tensor)[3] input_shape = (3, cols, rows) else: rows = backend.int_shape(input_tensor)[1] cols = backend.int_shape(input_tensor)[2] input_shape = (cols, rows, 3) # If input_shape is None and input_tensor is None using standart shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if rows and cols and (rows < 32 or cols < 32): raise ValueError('Input size must be at least 32x32; got `input_shape=' + str(input_shape) + '`') if weights == 'imagenet': if (not minimalistic and alpha not in [0.75, 1.0] or minimalistic and alpha != 1.0): raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of `0.75`, `1.0` for non minimalistic' ' or `1.0` for minimalistic only.') if rows != cols or rows != 224: logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not 224.' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 if minimalistic: kernel = 3 activation = relu se_ratio = None else: kernel = 5 activation = hard_swish se_ratio = 0.25 x = img_input x = layers.Rescaling(scale=1. / 127.5, offset=-1.)(x) x = layers.Conv2D( 16, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv/BatchNorm')(x) x = activation(x) x = stack_fn(x, kernel, activation, se_ratio) last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6) # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_point_ch = _depth(last_point_ch * alpha) x = layers.Conv2D( last_conv_ch, kernel_size=1, padding='same', use_bias=False, name='Conv_1')(x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1/BatchNorm')(x) x = activation(x) x = layers.Conv2D( last_point_ch, kernel_size=1, padding='same', use_bias=True, name='Conv_2')(x) x = activation(x) if include_top: x = layers.GlobalAveragePooling2D()(x) if channel_axis == 1: x = layers.Reshape((last_point_ch, 1, 1))(x) else: x = layers.Reshape((1, 1, last_point_ch))(x) if dropout_rate > 0: x = layers.Dropout(dropout_rate)(x) x = layers.Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x) x = layers.Flatten()(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Activation(activation=classifier_activation, name='Predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = models.Model(inputs, x, name='MobilenetV3' + model_type) # Load weights. if weights == 'imagenet': model_name = '{}{}_224_{}_float'.format( model_type, '_minimalistic' if minimalistic else '', str(alpha)) if include_top: file_name = 'weights_mobilenet_v3_' + model_name + '.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = data_utils.get_file( file_name, BASE_WEIGHT_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def aspp_block(x, num_filters=256, rate_scale=1, output_stride=16, input_shape=(512, 512, 3)): # compute dataformat if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 # forward conv3_3_1 = ZeroPadding2D(padding=(6 * rate_scale, 6 * rate_scale))(x) conv3_3_1 = _conv(filters=num_filters, kernel_size=(3, 3), dilation_rate=(6 * rate_scale, 6 * rate_scale), padding='valid', block='assp_3_3_1_%s' % output_stride)(conv3_3_1) conv3_3_1 = BatchNormalization(axis=bn_axis, name='bn_3_3_1_%s' % output_stride)(conv3_3_1) conv3_3_2 = ZeroPadding2D(padding=(12 * rate_scale, 12 * rate_scale))(x) conv3_3_2 = _conv(filters=num_filters, kernel_size=(3, 3), dilation_rate=(12 * rate_scale, 12 * rate_scale), padding='valid', block='assp_3_3_2_%s' % output_stride)(conv3_3_2) conv3_3_2 = BatchNormalization(axis=bn_axis, name='bn_3_3_2_%s' % output_stride)(conv3_3_2) conv3_3_3 = ZeroPadding2D(padding=(18 * rate_scale, 18 * rate_scale))(x) conv3_3_3 = _conv(filters=num_filters, kernel_size=(3, 3), dilation_rate=(18 * rate_scale, 18 * rate_scale), padding='valid', block='assp_3_3_3_%s' % output_stride)(conv3_3_3) conv3_3_3 = BatchNormalization(axis=bn_axis, name='bn_3_3_3_%s' % output_stride)(conv3_3_3) conv1_1 = _conv(filters=num_filters, kernel_size=(1, 1), padding='same', block='assp_1_1_%s' % output_stride)(x) conv1_1 = BatchNormalization(axis=bn_axis, name='bn_1_1_%s' % output_stride)(conv1_1) # global_feat = AveragePooling2D((input_shape[0]/output_stride,input_shape[1]/output_stride))(x) # global_feat = _conv(filters=num_filters, kernel_size=(1, 1),padding='same')(global_feat) # global_feat = BatchNormalization()(global_feat) # global_feat = BilinearUpSampling2D((256,input_shape[0]/output_stride,input_shape[1]/output_stride),factor=input_shape[1]/output_stride)(global_feat) # channel merge y = merge( [ conv3_3_1, conv3_3_2, conv3_3_3, conv1_1, ], # global_feat, mode='concat', concat_axis=3) # y = _conv_bn_relu(filters=1, kernel_size=(1, 1),padding='same')(y) y = _conv(filters=256, kernel_size=(1, 1), padding='same', block='assp_out_%s' % output_stride)(y) y = BatchNormalization(axis=bn_axis, name='bn_out_%s' % output_stride)(y) return y