def figure_ground_a(*args, **kwargs): dataset = cx.Dataset() url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/figure_ground_a.npy" path = get_file("figure_ground_a.npy", origin=url) ds = np.load(path) ## [[[letter], [brim, body]], ...] letters = np.array([pair[0] for pair in ds]) brims = np.array([pair[1][0] for pair in ds]) bodies = np.array([pair[1][1] for pair in ds]) dataset.name = "Figure-Ground A" dataset.description = """ This dataset (the so-called a-tabase) originates from Douglas Hofstadter's research group: http://goosie.cogsci.indiana.edu/pub/gridfonts.data ![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png) These data (all the letter A) have been processed to make them neural network friendly: https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py The brim and body parts have been idenified manually. The dataset is composed of letters on a 17 row x 9 column grid (4 lines not used on top and another 4 not used on the bottom of each letter were removed from the original 25x9 latter images). The inputs are composed of the full letter. The targets are composed of a picture of the body and the brim. You can read a thesis using part of this dataset here: https://repository.brynmawr.edu/compsci_pubs/78/ """ dataset.load_direct([letters], [brims, bodies]) return dataset
def dataset_downloader(): filename = get_file('qm9.tar.gz', DATASET_URL, extract=True, cache_dir=DATA_PATH, cache_subdir=DATA_PATH) os.rename(DATA_PATH + 'gdb9.sdf', DATA_PATH + 'qm9.sdf') os.rename(DATA_PATH + 'gdb9.sdf.csv', DATA_PATH + 'qm9.sdf.csv') os.remove(DATA_PATH + 'qm9.tar.gz') return filename
def gridfonts(*args, **kwargs): dataset = cx.Dataset() url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/gridfonts.npy" path = get_file("gridfonts.npy", origin=url) ds = np.load(path) ## [letters, labels] letters = np.array([matrix for matrix in ds[0]]) targets = np.array([matrix for matrix in ds[0]]) labels = np.array([char for char in ds[1]], dtype=str) dataset.name = "Gridfonts" dataset.description = """ This dataset originates from Douglas Hofstadter's research group: http://goosie.cogsci.indiana.edu/pub/gridfonts.data ![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png) These data have been processed to make them neural network friendly: https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py The dataset is composed of letters on a 25 row x 9 column grid. The inputs and targets are identical, and the labels contain a string identifying the letter. You can read a thesis using part of this dataset here: https://repository.brynmawr.edu/compsci_pubs/78/ """ dataset.load_direct([letters], [targets], [labels]) return dataset
def _download_(self): challenges = { 'CN': 'CBTest/data/cbtest_CN_cbt_{}.txt', 'NE': 'CBTest/data/cbtest_N_{}.txt', 'P': 'CBTest/data/cbtest_P_{}.txt', 'V': 'CBTest/data/cbtest_V_{}.txt', # 'generic': 'CBTest/data/cbt_{}.txt', } path = get_file('CBTest.tar', origin=self.__URL__) with tarfile.open(path) as tar: challenge = challenges[self.__task__] train = 'train' valid = 'valid_2000ex' test = 'test_2500ex' ex_file = tar.extractfile(challenge.format(train)) train_stories = self.__get_stories__(ex_file, only_supporting=self.__only_supporting__) ex_file = tar.extractfile(challenge.format(valid)) train_stories += self.__get_stories__(ex_file, only_supporting=self.__only_supporting__) ex_file = tar.extractfile(challenge.format(test)) test_stories = self.__get_stories__(ex_file, only_supporting=self.__only_supporting__) return train_stories, test_stories
def load_cifar10(): # download and extract data dirname = 'cifar-10-batches-py' origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin, untar=True, cache_dir='Z:\\', cache_subdir="datasets") num_train_samples = 50000 x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.zeros((num_train_samples, ), dtype='uint8') # load train data for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) data, labels = load_batch(fpath) x_train[(i - 1) * 10000:i * 10000, :, :, :] = data y_train[(i - 1) * 10000:i * 10000] = labels # load test data fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if backend.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def __init__(self): logger.info('Loading Deeplab') local_path = os.path.join(config.WEIGHT_PATH, config.DEEPLAB_FILENAME) self.weights_path = get_file(os.path.abspath(local_path), config.DEEPLAB_URL, cache_subdir='models') self.graph = tf.Graph() with self.graph.as_default(): self.image_placeholder = tf.placeholder(tf.float32, shape=(None, None, None, 3)) self.net = DeepLabResNetModel({'data': self.image_placeholder}, is_training=False, num_classes=self.NUM_CLASSES) restore_var = tf.global_variables() # Set up TF session and initialize variables. config_tf = tf.ConfigProto() config_tf.gpu_options.allow_growth = True self.sess = tf.Session(config=config_tf) init = tf.global_variables_initializer() self.sess.run(init) # Load weights. loader = tf.train.Saver(var_list=restore_var) loader.restore(self.sess, self.weights_path)
def download_imagenet(self): """ Download pre-trained weights for the specified backbone name. This name is in the format mobilenet{rows}_{alpha} where rows is the imagenet shape dimension and 'alpha' controls the width of the network. For more info check the explanation from the keras mobilenet script itself. """ alpha = float(self.backbone.split('_')[1]) rows = int(self.backbone.split('_')[0].replace('mobilenet', '')) # load weights if keras.backend.image_data_format() == 'channels_first': raise ValueError('Weights for "channels_last" format ' 'are not available.') if alpha == 1.0: alpha_text = '1_0' elif alpha == 0.75: alpha_text = '7_5' elif alpha == 0.50: alpha_text = '5_0' else: alpha_text = '2_5' model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows) weights_url = mobilenet.BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weights_url, cache_subdir='models') return weights_path
def get_embeddings_index( embedding_name: str, url: str = None, embeddings_filename: str = None, parent_dir: str = None, cache_dir: str = None, embedding_dimensions: int = 300, ) -> Dict[str, ndarray]: """ High level function for get an embedding index, usually from a public url, downloading and caching locally. :param embedding_name: the name of the embeddings, used to look up metadata values :param url: the URL where the embeddings may be found; this parameter overrides the baked in metadata paths :param embeddings_filename: the filename; usually appended onto the url :param parent_dir: where to store the files locally, if not specified then the keras cache directory will be used. :param cache_dir: where to store the files locally, if parent_dir is not specified then the keras cache directory will be used. :param embedding_dimensions: integer: 300 or 100, 50 etc :return: a dictionary of strings to ndarray of embedding values """ file_template = "" if embedding_name in EMBEDDINGS_METADATA: url, file_template = EMBEDDINGS_METADATA[embedding_name] if not cache_dir and parent_dir: cache_dir = os.path.join(parent_dir, "data", embedding_name) if not os.path.exists(cache_dir): os.makedirs(cache_dir) parts = urlparse(str(url)) filename = parts.path.split("/")[-1] parent_path = Path(str(cache_dir)) embeddings_dir = parent_path / "datasets" if embeddings_filename: embeddings_file = embeddings_filename else: if "{" in file_template and "}" in file_template: embeddings_file = file_template.format(embedding_dimensions) else: embeddings_file = file_template embed_file = embeddings_dir / embeddings_file if not embed_file.exists(): # if not exists, fetch LOG.info("initializing, please wait.") data_archive = get_file(fname=filename, origin=url, cache_dir=cache_dir, untar=False, extract=True) # pylint disable:unused-variable LOG.info("Done initializing") if not data_archive: LOG.warning("Fail in fetch") embeddings_index = load_embeddings( str(embed_file), embedding_dimensions=embedding_dimensions) return embeddings_index
def load_model_weights(weights_collection, model, dataset, classes, include_top): weights = find_weights(weights_collection, model.name, dataset, include_top) if weights: weights = weights[0] if include_top and weights['classes'] != classes: raise ValueError('If using `weights` and `include_top`' ' as true, `classes` should be {}'.format( weights['classes'])) weights_path = get_file(weights['name'], weights['url'], cache_subdir='models', md5_hash=weights['md5']) model.load_weights(weights_path) else: raise ValueError( 'There is no weights for such configuration: ' + 'model = {}, dataset = {}, '.format(model.name, dataset) + 'classes = {}, include_top = {}.'.format(classes, include_top))
def __init__(self): logger.info('Loading Tensorflow Detection API') weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL, cache_dir=os.path.abspath(config.WEIGHT_PATH), cache_subdir='models') extract_path = weights_path.replace('.tar.gz', '') if not os.path.exists(extract_path): tar = tarfile.open(weights_path, "r:gz") tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models')) tar.close() pb_path = os.path.join(extract_path, self.PB_NAME) self.graph = tf.Graph() with self.graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(pb_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) self.categories = label_map_util.convert_label_map_to_categories(self.label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(self.categories)
def load_data(): """This function downloads, extracts, loads, normalizes and one-hot encodes Flower Photos dataset""" # download the dataset and extract it data_dir = get_file(origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', fname='flower_photos', untar=True) data_dir = pathlib.Path(data_dir) # count how many images are there image_count = len(list(data_dir.glob('*/*.jpg'))) print("Number of images:", image_count) # get all classes for this dataset (types of flowers) excluding LICENSE file CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if item.name != "LICENSE.txt"]) # roses = list(data_dir.glob('roses/*')) # 20% validation set 80% training set image_generator = ImageDataGenerator(rescale=1/255, validation_split=0.2) # make the training dataset generator train_data_gen = image_generator.flow_from_directory(directory=str(data_dir), batch_size=batch_size, classes=list(CLASS_NAMES), target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]), shuffle=True, subset="training") # make the validation dataset generator test_data_gen = image_generator.flow_from_directory(directory=str(data_dir), batch_size=batch_size, classes=list(CLASS_NAMES), target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]), shuffle=True, subset="validation") return train_data_gen, test_data_gen, CLASS_NAMES
def load_data(width=128, height=128): """ Load CALTECH256 dataset, resize and cache for future load :param width: Width to resize :param height: Height to resize :return: Image data and labels """ img_res = str(width) + 'x' + str(height) try: image_data = np.load(file='caltech256_' + img_res + '.images.npy') image_label = np.load(file='caltech256_' + img_res + '.labels.npy') image_data, image_label = shuffle(image_data, image_label, random_state=0) return image_data, image_label except IOError: print('Cached images not found! Loading images...') dirname = '256_ObjectCategories' origin = 'http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar' path = get_file(dirname, origin=origin, untar=True) num_samples = 30607 image_data = np.empty((num_samples, width, height, 3), dtype='uint8') image_label = np.empty((num_samples,), dtype='uint8') category_list = os.listdir(path) sample = 0 for i, category in enumerate(category_list): # Progress bar print('\r', end='', flush=True) progress = round(i/len(category_list)*100) print("[%-100s] %3d%%" % ('#'*progress, progress), end='', flush=True) category_path = path + '/' + category + '/' image_list = os.listdir(category_path) for image in image_list: if image.lower().endswith('.jpg'): img = load_img(category_path + image) img = img_to_array(img) img = resize(img, (width, height)) image_data[sample, :, :, :] = img image_label[sample] = int(category.split('.')[0]) sample += 1 print(flush=True) image_data = image_data[:sample, :, :, :] image_label = image_label[:sample] # Save images and labels to cache file np.save(file='caltech256_' + img_res + '.images', arr=image_data) np.save(file='caltech256_' + img_res + '.labels', arr=image_label) image_data, image_label = shuffle(image_data, image_label, random_state=0) return image_data, image_label
def load_pretrained_model(self): cache_subdir = 'pretrained_models' prefix = 'https://fancy-nlp-1253403094.cos.ap-shanghai.myqcloud.com/pretrained_models/' preprocessor_file = get_file( fname='msra_ner_bilstm_cnn_crf_preprocessor.pkl', origin=prefix + 'msra_ner_bilstm_cnn_crf_preprocessor.pkl', cache_subdir=cache_subdir) json_file = get_file(fname='msra_ner_bilstm_cnn_crf.json', origin=prefix + 'msra_ner_bilstm_cnn_crf.json', cache_subdir=cache_subdir) weights_file = get_file(fname='msra_ner_bilstm_cnn_crf.hdf5', origin=prefix + 'msra_ner_bilstm_cnn_crf.hdf5', cache_subdir=cache_subdir) self.load(preprocessor_file, json_file, weights_file)
def _local_model(self, remote_location, model_path): """ Builds a local model :param remote_location: where to get the h5 file :param model_path: Where to save the files, defaults to settings['PIPOTTER_MODEL_DIRECTORY'] """ logger.debug("Loading model file") model_file = get_file(fname=H5, origin="{}/{}".format(remote_location, H5), cache_dir=model_path) self.model = load_model(model_file) class_file = get_file(fname=CLASSES, origin="{}/{}".format(remote_location, CLASSES), cache_dir=model_path) logger.debug("Loading class file") with open(class_file, 'r') as jfile: self.classes = loads(jfile.read())
def load_pretrained_model(self): cache_subdir = 'pretrained_models' preprocessor_file = get_file( fname='toutiao_text_classification_cnn_preprocessor.pkl', origin=MODEL_STORAGE_PREFIX + 'toutiao_text_classification_cnn_preprocessor.pkl', cache_subdir=cache_subdir, cache_dir=CACHE_DIR) json_file = get_file( fname='toutiao_text_classification_cnn.json', origin=MODEL_STORAGE_PREFIX + 'toutiao_text_classification_cnn.json', cache_subdir=cache_subdir, cache_dir=CACHE_DIR) weights_file = get_file( fname='toutiao_text_classification_cnn.hdf5', origin=MODEL_STORAGE_PREFIX + 'toutiao_text_classification_cnn.hdf5', cache_subdir=cache_subdir, cache_dir=CACHE_DIR) self.load(preprocessor_file, json_file, weights_file)
def __init__(self): """ :param predictor_model_path: path to shape_predictor_68_face_landmarks.dat file """ predictor_model_path = unpack_bz2(get_file('shape_predictor_68_face_landmarks.dat.bz2', LANDMARKS_MODEL_URL, cache_subdir='temp')) self.detector = dlib.get_frontal_face_detector() # cnn_face_detection_model_v1 also can be used self.shape_predictor = dlib.shape_predictor(predictor_model_path)
def download_imagenet(backbone): validate_backbone(backbone) weights_path = get_file( 'squeezenet_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models') return weights_path
def pic_cut(path1, path2): landmarks_model_path = unpack_bz2( get_file('shape_predictor_68_face_landmarks.dat.bz2', LANDMARKS_MODEL_URL, cache_subdir='temp')) landmarks_detector = LandmarksDetector(landmarks_model_path) face_landmarks = landmarks_detector.get_landmarks(path1) image_align(path1, path2, face_landmarks)
def initModel(self, dataset_name): OS = 8 assert dataset_name in [ 'CDnet', 'SBI', 'UCSD' ], 'dataset_name must be either one in ["CDnet", "SBI", "UCSD"]]' assert len(self.img_shape) == 3 h, w, d = self.img_shape img_input = Input(shape=(h, w, d), name='img_input') x, skip1 = mobilenetV2(img_input) b4 = AveragePooling2D(pool_size=(int(np.ceil(h / OS)), int(np.ceil(w / OS))))(x) b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) b4 = Activation('relu')(b4) b4 = Lambda(lambda x: tf.image.resize_bilinear( x, size=(int(np.ceil(h / OS)), int(np.ceil(w / OS)))))(b4) # simple 1x1 b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) b0 = Activation('relu', name='aspp0_activation')(b0) x = Concatenate()([b4, b0]) x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) x = Activation('relu')(x) x = Dropout(0.1)(x) x = Conv2D(1, (1, 1), padding='same', name='custom_logits_semantic')(x) x = Lambda(lambda x: tf.image.resize_bilinear(x, size=(h, w)))(x) x = Activation('sigmoid')(x) model = Model(img_input, x, name='DeepLab3+') # opt = keras.optimizers.RMSprop(lr=self.lr, rho=0.9, epsilon=1e-08, decay=0.) # Since UCSD has no void label, we do not need to filter out if dataset_name == 'UCSD': c_loss = loss2 c_acc = acc2 else: c_loss = loss c_acc = acc weights_path = get_file( 'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5', self.mobile_weights_path, cache_subdir='models') model.load_weights(weights_path, by_name=True) model.compile(loss=c_loss, optimizer=Adam(self.lr), metrics=[c_acc]) return model
def download_files(files, target_dir, url): """Download files by URLS, function get_file use TensorFlow""" data_paths = [] for file in files: path = target_dir + file data_path = get_file(file, url + file) if not os.path.exists(path) else path data_paths.append(path) print('All files downloaded') return data_paths
def load_PSPNet(): random.seed(0) class_colors = [(random.randint(0,255),random.randint(0,255),random.randint(0,255)) for _ in range(5000)] model_url = "https://getfile.dokpub.com/yandex/get/https://yadi.sk/d/BR1EAlZ-UQMzQQ" model_config = get_file('PSP-Net.h5', model_url) model = load_model(model_config, custom_objects={'Interp': Interp}) global graph graph = ktf.get_default_graph() return model, class_colors
def save(self, url): """ save pretrained models """ unzipped = _os.path.join(self.file_path, self.model_name) path = _utils.get_file(self.model_name + ".gz", url) with open(unzipped, "wb") as fout: zcat = _subprocess.Popen(["zcat"], stdin=open(path), stdout=fout) zcat.wait()
def pspnet_50_ADE_20K_SUNRGB(height=473, width=473): model_url = "https://www.dropbox.com/s/" \ "0uxn14y26jcui4v/pspnet50_ade20k.h5?dl=1" latest_weights = get_file("pspnet50_ade20k.h5", model_url) model = pspnet_50_sunrgb(input_height=height, input_width=width) model.load_weights(latest_weights) return model
def _download_data(): _ = get_file('qm9.tar.gz', DATASET_URL, extract=True, cache_dir=DATA_PATH, cache_subdir=DATA_PATH) os.rename(DATA_PATH + 'gdb9.sdf', DATA_PATH + 'qm9.sdf') os.rename(DATA_PATH + 'gdb9.sdf.csv', DATA_PATH + 'qm9.sdf.csv') os.remove(DATA_PATH + 'qm9.tar.gz')
def __init__(self, args, batch_size=1, perc_model=None, sess=None): self.sess = tf.get_default_session() if sess is None else sess K.set_session(self.sess) self.epsilon = 0.00000001 self.lr = args.lr self.decay_rate = args.decay_rate self.decay_steps = args.decay_steps self.img_size = args.image_size self.layer = args.use_vgg_layer self.vgg_loss = args.use_vgg_loss self.face_mask = args.face_mask self.use_grabcut = args.use_grabcut self.scale_mask = args.scale_mask self.mask_dir = args.mask_dir if (self.layer <= 0 or self.vgg_loss <= self.epsilon): self.vgg_loss = None self.pixel_loss = args.use_pixel_loss if (self.pixel_loss <= self.epsilon): self.pixel_loss = None self.mssim_loss = args.use_mssim_loss if (self.mssim_loss <= self.epsilon): self.mssim_loss = None self.lpips_loss = args.use_lpips_loss if (self.lpips_loss <= self.epsilon): self.lpips_loss = None self.l1_penalty = args.use_l1_penalty if (self.l1_penalty <= self.epsilon): self.l1_penalty = None self.adaptive_loss = args.use_adaptive_loss self.sharpen_input = args.sharpen_input self.batch_size = batch_size if perc_model is not None and self.lpips_loss is not None: self.perc_model = perc_model else: self.perc_model = None self.ref_img = None self.ref_weight = None self.perceptual_model = None self.ref_img_features = None self.features_weight = None self.loss = None self.discriminator_loss = args.use_discriminator_loss if (self.discriminator_loss <= self.epsilon): self.discriminator_loss = None if self.discriminator_loss is not None: self.discriminator = None self.stub = create_stub(batch_size) if self.face_mask: import dlib self.detector = dlib.get_frontal_face_detector() LANDMARKS_MODEL_URL = 'http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2' landmarks_model_path = unpack_bz2( get_file('shape_predictor_68_face_landmarks.dat.bz2', LANDMARKS_MODEL_URL, cache_subdir='temp')) self.predictor = dlib.shape_predictor(landmarks_model_path)
def coco_download(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths): zip_paths = coco_files(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths) for url, filename, md5 in zip(urls, filenames, md5s): path = get_file(filename, url, md5_hash=md5, extract=True, cache_subdir=dataset_path) # TODO(ahundt) check if it is already extracted, don't re-extract. see # https://github.com/fchollet/keras/issues/5861 zip_file = zipfile.ZipFile(path, 'r') zip_file.extractall(path=dataset_path) zip_file.close()
def test_get_file(): drusen_model = get_file( 'drusen_model.h5', deepseenet.deepseenet_drusen.DRUSEN_PATH, cache_dir='models', md5_hash=deepseenet.deepseenet_drusen.DRUSEN_MD5 ) print(drusen_model) assert os.path.exists(drusen_model)
def load_dataset_npz(path, url): """loads an .npz file of saved image data, and returns the images and their associated labels as numpy arrays """ from keras.utils import get_file path = get_file(path, origin=url) f = np.load(path, allow_pickle=True) images, labels = f['data'], f['labels'] return images, labels
def loadGenderModel(): # Source of the model source = "https://s3.ap-south-1.amazonaws.com/arunponnusamy/pre-trained-weights/gender_detection.model" path = get_file("gender_detection.model", source, cache_subdir="pre-trained", cache_dir=os.getcwd()) # Load and return the model return load_model(path)
def __init__(self): mloc = get_file( "dlib_face_recognition_resnet_model_v1.dat", origin="https://github.com/distant-viewing/dvt/" "releases/download/0.0.1/" "dlib_face_recognition_resnet_model_v1.dat", ) self.dlib = importlib.import_module("dlib") self.encode = self.dlib.face_recognition_model_v1(mloc) mloc = get_file( "shape_predictor_5_face_landmarks.dat", origin="https://github.com/distant-viewing/dvt/" "releases/download/0.0.1/" "shape_predictor_5_face_landmarks.dat", ) self.pose = self.dlib.shape_predictor(mloc)
def pascal_voc_download(dataset_path, filenames, dataset_root, urls, md5s): zip_paths = pascal_voc_files(dataset_path, filenames, dataset_root, urls, md5s) for url, filename, md5 in zip(urls, filenames, md5s): path = get_file(filename, url, md5_hash=md5, extract=True, cache_subdir=dataset_path)
def load_dataset_npz(path, url): """loads an .npz file of saved image data, and returns the images and their associated labels as numpy arrays """ from keras.utils import get_file path = get_file(path, origin=url) f = np.load(path) images, labels = f['data'], f['labels'] return images, labels
def load_dataset_npz(path, url): """loads a normed face dataset file and returns a numpy array of shape (num, vector_size) with dtype float32, and an array of label strings """ from keras.utils import get_file path = get_file(path, origin=url) f = np.load(path) images, labels = f['data'], f['labels'] return images, labels
def get_glove_embeddings(embeddings): keras_dataset_dir = expanduser('~/.keras/datasets/') glove_zip_file_url = 'http://nlp.stanford.edu/data/glove.840B.300d.zip' glove_zip_file = 'glove.840B.300d.zip' glove_file = 'glove.840B.300d.txt' if not exists(keras_dataset_dir + glove_zip_file): zf = ZipFile(get_file(glove_zip_file, glove_zip_file_url)) zf.extract(glove_file, path=keras_dataset_dir) print("Processing", glove_file)
def colors(*args, path='colors.csv', url="https://raw.githubusercontent.com/Calysto/conx-data/master/colors/colors.csv", **kwargs): dataset = cx.Dataset() from keras.utils import get_file path = get_file(path, origin=url) fp = open(path, "r") reader = csv.reader(fp) inputs = [] labels = [] targets = [] count = 1 for line in reader: name, r, g, b = line if name == "name": continue # first line is header inputs.append([float(int(r)/255), float(int(g)/255), float(int(b)/255)]) targets.append([count]) labels.append(name) count += 1 inputs = np.array(inputs, dtype='float32') targets = np.array(targets, dtype='uint16') dataset.name = "Colors" dataset.description = """ Original source: https://github.com/andrewortman/colorbot This dataset also includes some ignored in original data. Inspired by: * http://aiweirdness.com/tagged/paint-colors When initially loaded, this database has the following format: * labels: [color_name_string, ...] # order matches target * inputs: [[red, green, blue], ...] # scaled between 0 and 1 * targets: [[int], ...] # number of label For example: ``` >>> import conx as cx >>> ds = cx.Dataset.get("colors") >>> ds.labels[0], ds.inputs[0], ds.targets[0] ('tidewater', [0.7686274647712708, 0.843137264251709, 0.8352941274642944], [1]) ``` """ dataset.load_direct([inputs], [targets], [labels]) return dataset
def resnet_50(input_shape): img_input = Input(input_shape) x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input) if input_shape[-1] > 3: x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1_changed')(img_input) x = BatchNormalization(name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') print("Loading pretrained weights for Resnet50...") weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', resnet50_padding.WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') model = Model(img_input, x) model.load_weights(weights_path, by_name=True) if input_shape[-1] > 3: print("Loading weights for conv1 layer separately for the first 3 channels") conv1_weights = np.zeros((7, 7, input_shape[-1], 64), dtype="float32") resnet_ori = ResNet50(include_top=False, input_shape=(224, 224, 3)) conv1_weights[:, :, :3, :] = resnet_ori.get_layer("conv1").get_weights()[0][:, :, :, :] # random init conv1_weights[:, :, 3:, :] = model.get_layer('conv1_changed').get_weights()[0][:, :, 3:, :] bias = resnet_ori.get_layer("conv1").get_weights()[1] model.get_layer('conv1_changed').set_weights((conv1_weights, bias)) model.get_layer('conv1_changed').name = 'conv1' return model
def download_imagenet(self): """ Download pre-trained weights for the specified backbone name. This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop where backbone is the densenet + number of layers (e.g. densenet121). For more info check the explanation from the keras densenet script itself: https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py """ origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/' file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5' # load weights if keras.backend.image_data_format() == 'channels_first': raise ValueError('Weights for "channels_first" format are not available.') weights_url = origin + file_name.format(self.backbone) return get_file(file_name.format(self.backbone), weights_url, cache_subdir='models')
def mnist_h5(*args, **kwargs): """ Load the Keras MNIST dataset from an H5 file. """ import h5py path = "mnist.h5" url = "https://raw.githubusercontent.com/Calysto/conx-data/master/mnist/mnist.h5" path = get_file(path, origin=url) h5 = h5py.File(path, "r") dataset = cx.Dataset() dataset._inputs = h5["inputs"] dataset._targets = h5["targets"] dataset._labels = h5["labels"] dataset.h5 = h5 dataset.name = "MNIST-H5" dataset.description = description dataset._cache_values() return dataset
def download_imagenet(self): """ Downloads ImageNet weights and returns path to weights file. Weights can be downloaded at https://github.com/fizyr/keras-models/releases . """ if self.backbone == 'vgg16': resource = keras.applications.vgg16.WEIGHTS_PATH_NO_TOP checksum = '6d6bbae143d832006294945121d1f1fc' elif self.backbone == 'vgg19': resource = keras.applications.vgg19.WEIGHTS_PATH_NO_TOP checksum = '253f8cb515780f3b799900260a226db6' else: raise ValueError("Backbone '{}' not recognized.".format(self.backbone)) return get_file( '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone), resource, cache_subdir='models', file_hash=checksum )
def download_imagenet(self): """ Downloads ImageNet weights and returns path to weights file. """ resnet_filename = 'ResNet-{}-model.keras.h5' resnet_resource = 'https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}'.format(resnet_filename) depth = int(self.backbone.replace('resnet', '')) filename = resnet_filename.format(depth) resource = resnet_resource.format(depth) if depth == 50: checksum = '3e9f4e4f77bbe2c9bec13b53ee1c2319' elif depth == 101: checksum = '05dc86924389e5b401a9ea0348a3213c' elif depth == 152: checksum = '6ee11ef2b135592f8031058820bb9e71' return get_file( filename, resource, cache_subdir='models', md5_hash=checksum )
def SqueezeNet(input_tensor=None, input_shape=None, weights='imagenet', classes=1000): if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') input_shape = _obtain_input_shape(input_shape, default_size=227, min_size=48, data_format=K.image_data_format(), include_top=True) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input) x = Activation('relu', name='relu_conv1')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x) x = fire_module(x, fire_id=2, squeeze=16, expand=64) x = fire_module(x, fire_id=3, squeeze=16, expand=64) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x) x = fire_module(x, fire_id=4, squeeze=32, expand=128) x = fire_module(x, fire_id=5, squeeze=32, expand=128) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x) x = fire_module(x, fire_id=6, squeeze=48, expand=192) x = fire_module(x, fire_id=7, squeeze=48, expand=192) x = fire_module(x, fire_id=8, squeeze=64, expand=256) x = fire_module(x, fire_id=9, squeeze=64, expand=256) x = Dropout(0.5, name='drop9')(x) x = Convolution2D(classes, (1, 1), padding='valid', name='conv10')(x) x = Activation('relu', name='relu_conv10')(x) x = GlobalAveragePooling2D()(x) out = Activation('softmax', name='loss')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs, out, name='squeezenet') # load weights if weights == 'imagenet': weights_path = get_file('squeezenet_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models') model.load_weights(weights_path) if K.image_data_format() == 'channels_first': if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') return model