Python get_file Examples, tensorflow.python.keras.utils.get_file Python Examples

Example #1

0

Show file

    def _download_weights_if_needed(self, download_region):
        """Checks for directories containing the ensemble weights, downloads them if neccessary.

        The CnnEnsemble uses many GB of pretrained weights for prediction. When using the package for the first
        time, these weights (too heavy for pypi) need to be downloaded. We use the built in keras utility get_file to
        handle the download and extraction. This function leaves some hidden files behind after extraction,
        which we manually remove.

        After download, the zamba/zamba/models/cnnensemble/ directory will have three new directories:

        1. input - contains training fold splits from the original data, and stores formatting information for output
        2. output - contains models weights for all models used in the ensemble
        3. data_fast - contains cached training image preprocessing results
        """

        download_regions = ['us', 'eu', 'asia']
        if download_region not in download_regions:
            raise ValueError(f"download_region must be one of:\t{download_regions}")
        else:
            region_urls = {'us': 'https://s3.amazonaws.com/drivendata-public-assets/',
                           'eu': 'https://s3.eu-central-1.amazonaws.com/drivendata-public-assets-eu/',
                           'asia': 'https://s3-ap-southeast-1.amazonaws.com/drivendata-public-assets-asia/'}
            region_url = region_urls[download_region]

        # file names, paths
        fnames = ["input.tar.gz", "output.tar.gz", "data_fast.zip"]

        cache_dir = Path(__file__).parent
        cache_subdir = Path("cnnensemble")

        paths_needed = [cache_dir / cache_subdir / "input",
                        cache_dir / cache_subdir / "output",
                        cache_dir / cache_subdir / "data_fast"]

        # download and extract if needed
        for path_needed, fname in zip(paths_needed, fnames):
            if not path_needed.exists():
                origin = region_url + fname
                get_file(fname=fname, origin=origin, cache_dir=cache_dir, cache_subdir=cache_subdir,
                         extract=True)

                # remove the compressed file
                remove(cache_dir / cache_subdir / fname)

        # remove hidden files or dirs if present
        cnnpath = cache_dir / cache_subdir
        hidden_dirs = [pth for pth in cnnpath.glob("**/*") if pth.parts[-1].startswith("._") and pth.is_dir()]
        if hidden_dirs:
            deque(map(rmtree, hidden_dirs))
        hidden_files = [pth for pth in cnnpath.glob("**/*") if pth.parts[-1].startswith("._") and pth.is_file()]
        if hidden_files:
            deque(map(remove, hidden_files))

Example #2

0

Show file

File: sparse_cnn.py Project: subutai/nupic.tensorflow

 def __init__(self,
              data_format=IMAGE_DATA_FORMAT,
              pre_trained=False,
              name=None,
              batch_norm=True):
     super(GSCSuperSparseCNN, self).__init__(
         linear_units=1500,
         linear_percent_on=0.067,
         linear_weight_sparsity=0.1,
         data_format=data_format,
         pre_trained=False,
         name=name,
         batch_norm=batch_norm,
     )
     if pre_trained:
         if not batch_norm:
             raise NotImplementedError(
                 "Unable to load pre-trained models with no BatchNorm")
         model_url, model_hash = MODEL_URLS["gsc_super_sparse_cnn"]
         file_name = "gsc_super_sparse_cnn-{:.8}".format(model_hash)
         archive_path = get_file(
             fname="{}.tar.gz".format(file_name),
             origin=model_url,
             file_hash=model_hash,
             extract=True,
             cache_subdir="models",
         )
         cache_dir = os.path.dirname(archive_path)
         self.load_weights(
             os.path.join(cache_dir, "gsc_super_sparse_cnn.h5"))

Example #3

0

Show file

    def load_data(
            cls,
            subset_name: str = 'train',
            task_name: str = 'ner',
            shuffle: bool = True) -> Tuple[List[List[str]], List[List[str]]]:
        """

        """
        corpus_path = get_file(cls.__corpus_name__,
                               cls.__zip_file__name,
                               cache_dir=k.DATA_PATH,
                               untar=True)

        if subset_name not in {'train', 'test', 'valid'}:
            raise ValueError()

        file_path = os.path.join(corpus_path, f'{subset_name}.txt')

        if task_name not in {'pos', 'chunking', 'ner'}:
            raise ValueError()

        data_index = ['pos', 'chunking', 'ner'].index(task_name) + 1

        x_data, y_data = DataReader.read_conll_format_file(
            file_path, label_index=data_index)
        if shuffle:
            x_data, y_data = utils.unison_shuffled_copies(x_data, y_data)
        logging.debug(
            f"loaded {len(x_data)} samples from {file_path}. Sample:\n"
            f"x[0]: {x_data[0]}\n"
            f"y[0]: {y_data[0]}")
        return x_data, y_data

Example #4

0

Show file

File: load_data.py Project: wsf1297139301/capsules-tensorflow

def load_data():
    """Loads the Fashion-MNIST dataset.
    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """
    dirname = os.path.join('datasets', 'fashion-mnist')
    base = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/'
    files = [
        'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
        't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
    ]

    paths = []
    for file in files:
        paths.append(get_file(file, origin=base + file, cache_subdir=dirname))

    with gzip.open(paths[0], 'rb') as lbpath:
        y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(paths[1], 'rb') as imgpath:
        x_train = np.frombuffer(imgpath.read(), np.uint8,
                                offset=16).reshape(len(y_train), 28, 28)

    with gzip.open(paths[2], 'rb') as lbpath:
        y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(paths[3], 'rb') as imgpath:
        x_test = np.frombuffer(imgpath.read(), np.uint8,
                               offset=16).reshape(len(y_test), 28, 28)

    return (x_train, y_train), (x_test, y_test)

Example #5

0

Show file

def build_dataset():
    """Reads 20 new dataset and returns text(list), labels(list) and a dict of labels to index """
    print('Processing Dataset...')
    zipped = get_file('20news-18828.tar.gz', DATA_URL, extract=True)
    data_dir = zipped[:-7]  # remove .tar.gz

    # Text samples and their labels
    texts = []  # list of text samples
    label2idx = {}  # dictionary mapping label name to numeric id
    labels = []  # list of label ids

    for folder in sorted(os.listdir(data_dir)):
        path = os.path.join(data_dir, folder)
        if os.path.isdir(path):
            label = len(label2idx)
            label2idx[folder] = label

            for fname in sorted(os.listdir(path)):
                if fname.isdigit():  # A text file
                    fpath = os.path.join(path, fname)

                    with open(fpath, encoding='latin-1') as f:
                        t = f.read()
                        i = t.find('\n\n')  # skip header
                        if 0 < i: t = t[i:]
                        texts.append(t)
                    labels.append(label)

    print('Found {} texts.'.format(len(texts)))

    return texts, labels, label2idx

Example #6

0

Show file

    def test_bert_embedding(self):
        text, label = ChineseDailyNerCorpus.load_data()
        is_bold = np.random.randint(1, 3, (len(text), 12))

        bert_path = get_file(
            'bert_sample_model',
            "http://s3.bmio.net/kashgari/bert_sample_model.tar.bz2",
            cache_dir=DATA_PATH,
            untar=True)

        text_embedding = BERTEmbedding(bert_path,
                                       task=kashgari.LABELING,
                                       sequence_length=12)
        num_feature_embedding = NumericFeaturesEmbedding(2,
                                                         'is_bold',
                                                         sequence_length=12)

        stack_embedding = StackedEmbedding(
            [text_embedding, num_feature_embedding])
        stack_embedding.analyze_corpus((text, is_bold), label)

        tensor = stack_embedding.process_x_dataset((text[:3], is_bold[:3]))
        print(tensor[0][0].shape)
        print(tensor[0][1].shape)
        print(tensor[1].shape)
        print(stack_embedding.embed_model.input_shape)
        print(stack_embedding.embed_model.summary())
        r = stack_embedding.embed((text[:3], is_bold[:3]))
        assert r.shape == (3, 12, 24)

Example #7

0

Show file

 def load_weights(self):
     weights_path = get_file(
         'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
         WEIGHTS_PATH_NO_TOP,
         cache_subdir='models',
         file_hash='6d6bbae143d832006294945121d1f1fc')
     self.created_model.load_weights(weights_path, by_name=True)

Example #8

0

Show file

File: fashion.py Project: caihengyu520/FewShotLearning

def load_data():
    base = "file:///D:/fashionmnist/"
    files = [
        'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
        't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
    ]

    paths = []
    for fname in files:
        paths.append(get_file(fname, origin=base + fname))

    with gzip.open(paths[0], 'rb') as lbpath:
        y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(paths[1], 'rb') as imgpath:
        x_train = np.frombuffer(imgpath.read(), np.uint8,
                                offset=16).reshape(len(y_train), 28, 28)

    with gzip.open(paths[2], 'rb') as lbpath:
        y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(paths[3], 'rb') as imgpath:
        x_test = np.frombuffer(imgpath.read(), np.uint8,
                               offset=16).reshape(len(y_test), 28, 28)

    return (x_train, y_train), (x_test, y_test)

Example #9

0

Show file

File: vgg16.py Project: nincode/courses

    def create(self):
        """
            Creates the VGG16 network achitecture and loads the pretrained weights.

            Args:   None
            Returns:   None
        """
        model = self.model = Sequential()
#        model.add(Lambda(vgg_preprocess, input_shape=(3,224,224), output_shape=(3,224,224)))
        model.add(Lambda(vgg_preprocess, input_shape=(224,224, 3)))

        self.ConvBlock(2, 64)
        self.ConvBlock(2, 128)
        self.ConvBlock(3, 256)
        self.ConvBlock(3, 512)
        self.ConvBlock(3, 512)

        model.add(Flatten())
        self.FCBlock()
        self.FCBlock()
        model.add(Dense(1000, activation='softmax'))

        fname = 'vgg16.h5'
        fname = 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'
        model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))

Example #10

0

Show file

 def setUpClass(cls):
     cls.embedding_class = BERTEmbedding
     bert_path = get_file(
         'bert_sample_model',
         "http://s3.bmio.net/kashgari/bert_sample_model.tar.bz2",
         cache_dir=DATA_PATH,
         untar=True)
     cls.config = {'model_folder': bert_path}

Example #11

0

Show file

File: vgg16.py Project: nincode/courses

 def get_classes(self):
     """
         Downloads the Imagenet classes index file and loads it to self.classes.
         The file is downloaded only if it not already in the cache.
     """
     fname = 'imagenet_class_index.json'
     fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models')
     with open(fpath) as f:
         class_dict = json.load(f)
     self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))]

Example #12

0

Show file

File: movie_lens.py Project: zhongqin1/recsys

def load_data():
    '''
    load data from MovieLens 100K Dataset
    http://grouplens.org/datasets/movielens/

    Note that this method uses ua.base and ua.test in the dataset.

    :return: train_users, train_x, test_users, test_x
    :rtype: list of int, numpy.array, list of int, numpy.array
    '''
    path = get_file(
        'ml-100k.zip', origin='http://files.grouplens.org/datasets/movielens/ml-100k.zip')
    with ZipFile(path, 'r') as ml_zip:
        max_item_id = -1
        train_history = {}
        with ml_zip.open('ml-100k/ua.base', 'r') as file:
            for line in file:
                user_id, item_id, rating, timestamp = line.decode(
                    'utf-8').rstrip().split('\t')
                if int(user_id) not in train_history:
                    train_history[int(user_id)] = [int(item_id)]
                else:
                    train_history[int(user_id)].append(int(item_id))

                if max_item_id < int(item_id):
                    max_item_id = int(item_id)

        test_history = {}
        with ml_zip.open('ml-100k/ua.test', 'r') as file:
            for line in file:
                user_id, item_id, rating, timestamp = line.decode(
                    'utf-8').rstrip().split('\t')
                if int(user_id) not in test_history:
                    test_history[int(user_id)] = [int(item_id)]
                else:
                    test_history[int(user_id)].append(int(item_id))

    max_item_id += 1  # item_id starts from 1
    train_users = list(train_history.keys())
    train_x = numpy.zeros((len(train_users), max_item_id), dtype=numpy.int32)
    print(train_x.shape)
    for i, hist in enumerate(train_history.values()):
        # print(hist)
        mat = to_categorical(hist, max_item_id)
        # print(mat.shape)
        train_x[i] = numpy.sum(mat, axis=0)
        # print(len(train_x[i]))

    test_users = list(test_history.keys())
    test_x = numpy.zeros((len(test_users), max_item_id), dtype=numpy.int32)
    for i, hist in enumerate(test_history.values()):
        mat = to_categorical(hist, max_item_id)
        test_x[i] = numpy.sum(mat, axis=0)

    return train_users, train_x, test_users, test_x

Example #13

0

Show file

File: test_word_embedding.py Project: ericperfect/Bert_Position_BiLSTM_Attention_CRF_LSTMDecoder

    def setUpClass(cls):
        sample_w2v_path = get_file('sample_w2v.txt',
                                   "http://s3.bmio.net/kashgari/sample_w2v.txt",
                                   cache_dir=DATA_PATH)

        cls.embedding_class = WordEmbedding

        cls.config = {
            'w2v_path': sample_w2v_path
        }
        cls.embedding_size = 100

Example #14

0

Show file

    def load_data(cls,
                  subset_name: str = 'train',
                  shuffle: bool = True,
                  cutter: str = 'char') -> Tuple[List[List[str]], List[str]]:
        """
        Load dataset as sequence classification format, char level tokenized

        features: ``[['听', '新', '闻', '。'], ['电', '视', '台', '在', '播', '什', '么'], ...]``

        labels: ``['news', 'epg', ...]``

        Samples::
            train_x, train_y = SMP2018ECDTCorpus.load_data('train')
            test_x, test_y = SMP2018ECDTCorpus.load_data('test')

        Args:
            subset_name: {train, test, valid}
            shuffle: should shuffle or not, default True.
            cutter: sentence cutter, {char, jieba}

        Returns:
            dataset_features and dataset labels
        """

        corpus_path = get_file(cls.__corpus_name__,
                               cls.__zip_file__name,
                               cache_dir=k.DATA_PATH,
                               untar=True)

        if cutter not in ['char', 'jieba', 'none']:
            raise ValueError(
                'cutter error, please use one onf the {char, jieba}')

        df_path = os.path.join(corpus_path, f'{subset_name}.csv')
        df = pd.read_csv(df_path)
        if cutter == 'jieba':
            try:
                import jieba
            except ModuleNotFoundError:
                raise ModuleNotFoundError(
                    "please install jieba, `$ pip install jieba`")
            x_data = [list(jieba.cut(item)) for item in df['query'].to_list()]
        elif 'char':
            x_data = [list(item) for item in df['query'].to_list()]
        y_data = df['label'].to_list()

        if shuffle:
            x_data, y_data = utils.unison_shuffled_copies(x_data, y_data)
        logging.debug(f"loaded {len(x_data)} samples from {df_path}. Sample:\n"
                      f"x[0]: {x_data[0]}\n"
                      f"y[0]: {y_data[0]}")
        return x_data, y_data

Example #15

0

Show file

File: motion_models.py Project: rizkiailham/two-stream-action-recognition-1

def CrossModalityXception(num_classes,
                          pre_trained,
                          cross_modality_pre_training,
                          input_shape,
                          include_feature_fields=False):
    cross_modality_pre_training = cross_modality_pre_training and pre_trained

    # create the model
    model = Xception(classes=num_classes,
                     weights=None,
                     input_shape=input_shape,
                     include_top=True)
    channels = input_shape[2]

    # load weight file >>> downloads some file from github
    weights_path = get_file(
        'xception_weights_tf_dim_ordering_tf_kernels_notop.h5',
        TF_WEIGHTS_PATH_NO_TOP,
        cache_subdir='models',
        file_hash='b0042744bf5b25fce3cb969f33bebb97')

    weight_values_ = get_named_layer_weights_from_h5py(weights_path)
    symbolic_weights_ = get_symbolic_filtered_layer_weights_from_model(
        model)[:len(weight_values_)]

    if cross_modality_pre_training:  # use a pretrained convolution weight
        # update it (name,[kernel,bias])
        # cross modality pre-training for kernel
        # leave bias as is of course
        weight_values_[0] = (
            "conv1_cross_modality",
            [
                cross_modality_init(
                    kernel=weight_values_[0][1][0], in_channels=channels
                ),  # 0 = first layer , 1 = weight_value , 0 = kernel
                # Xception has no bias
            ])

    else:  # start the first convolution layer as random glorot
        symbolic_weights_ = symbolic_weights_[1:]
        weight_values_ = weight_values_[1:]

    if pre_trained:
        # do weight loading
        load_layer_weights(weight_values=weight_values_,
                           symbolic_weights=symbolic_weights_)

    if include_feature_fields:
        return Model(model.inputs,
                     [layer.output for layer in model.layers[-2:]])
    else:
        return model

Example #16

0

Show file

File: models.py Project: sahanalva/City-Scape-Image-Segmentation

def resnet50_encoder(pretrained_weights='imagenet',
                     num_classes=20,
                     input_size=(256, 256, 1)):

    pretrained_url = "https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5"

    inp = Input(input_size)

    x = ZeroPadding2D((3, 3))(inp)
    x = Conv2D(64, (7, 7), strides=(2, 2))(x)
    feature_map_1 = x

    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv_block(x, [64, 64, 256], strides=(1, 1))
    x = identity_block(x, [64, 64, 256])
    x = identity_block(x, [64, 64, 256])
    feature_map_2 = ZeroPadding2D((1, 1))(x)
    feature_map_2 = Lambda(lambda x: x[:, :-1, :-1, :])(feature_map_2)

    x = conv_block(x, [128, 128, 512])
    x = identity_block(x, [128, 128, 512])
    x = identity_block(x, [128, 128, 512])
    x = identity_block(x, [128, 128, 512])
    feature_map_3 = x

    x = conv_block(x, [256, 256, 1024])
    x = identity_block(x, [256, 256, 1024])
    x = identity_block(x, [256, 256, 1024])
    x = identity_block(x, [256, 256, 1024])
    x = identity_block(x, [256, 256, 1024])
    x = identity_block(x, [256, 256, 1024])
    feature_map_4 = x

    x = conv_block(x, [512, 512, 2048])
    x = identity_block(x, [512, 512, 2048])
    x = identity_block(x, [512, 512, 2048])
    feature_map_5 = x

    model = Model(inputs=inp, outputs=feature_map_5)

    if pretrained_weights == 'imagenet':
        weights_path = utils.get_file(
            pretrained_url.split("/")[-1], pretrained_url)
        model.load_weights(weights_path)

    return inp, [
        feature_map_1, feature_map_2, feature_map_3, feature_map_4,
        feature_map_5
    ]

Example #17

0

Show file

    def load_data(cls, model_name):
        """
        Download pretrained GPT-2 models
        Args:
            model_name: {117M, 345M}

        Returns:
            GPT-2 model folder
        """
        model_folder: pathlib.Path = pathlib.Path(
            os.path.join(macros.DATA_PATH, 'datasets', f'gpt2-{model_name}'))
        model_folder.mkdir(exist_ok=True, parents=True)

        for filename in [
                'checkpoint', 'encoder.json', 'hparams.json',
                'model.ckpt.data-00000-of-00001', 'model.ckpt.index',
                'model.ckpt.meta', 'vocab.bpe'
        ]:
            url = "https://storage.googleapis.com/gpt-2/models/" + model_name + "/" + filename
            get_file(os.path.join(f'gpt2-{model_name}', filename),
                     url,
                     cache_dir=macros.DATA_PATH)
        return str(model_folder)

Example #18

0

Show file

File: motion_models.py Project: rizkiailham/two-stream-action-recognition-1

def CrossModalityResNet50(num_classes, pre_trained,
                          cross_modality_pre_training, input_shape):
    """Pretrained Resnet50 model from keras which uses cross modality pretraining to obtain a convolution weight which suits 20 channels needed by motion stream"""
    cross_modality_pre_training = cross_modality_pre_training and pre_trained

    # create the model
    model = ResNet50(classes=num_classes,
                     weights=None,
                     input_shape=input_shape,
                     include_top=True)
    channels = input_shape[2]

    # load weight file >>> downloads some file from github
    weights_path = get_file(
        'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
        WEIGHTS_PATH_NO_TOP,
        cache_subdir='models',
        md5_hash='a268eb855778b3df3c7506639542a6af')

    # get the named weights of each layer
    weight_values_ = get_named_layer_weights_from_h5py(weights_path)
    # get the symbolic weights of each layer
    symbolic_weights_ = get_symbolic_filtered_layer_weights_from_model(
        model)[:len(weight_values_)]

    if cross_modality_pre_training:  # use a pretrained convolution weight
        # update it (name,[kernel,bias])
        # cross modality pre-training for kernel
        # leave bias as is of course
        weight_values_[0] = (
            "conv1_cross_modality",
            [
                cross_modality_init(
                    kernel=weight_values_[0][1][0], in_channels=channels
                ),  # 0 = first layer , 1 = weight_value , 0 = kernel
                weight_values_[0][1][1]
            ]  # 0 = first layer , 1 = weight_value , 1 = bias
        )

    else:  # start the first convolution layer as random glorot
        symbolic_weights_ = symbolic_weights_[1:]
        weight_values_ = weight_values_[1:]

    if pre_trained:
        # do weight loading
        load_layer_weights(weight_values=weight_values_,
                           symbolic_weights=symbolic_weights_)

    return model

Example #19

0

Show file

File: datasets.py Project: louiskirsch/tfdatasets

    def _load_data(self):
        dirname = 'cifar-100-python'
        origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
        path = get_file(dirname, origin=origin, untar=True)

        fpath = os.path.join(path, 'train')
        x_train, yf_train, yc_train = self._extract_data(fpath)

        fpath = os.path.join(path, 'test')
        x_test, yf_test, yc_test = self._extract_data(fpath)

        # Put channel last
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

        return (x_train, yc_train, yf_train), (x_test, yc_test, yf_test)

Example #20

0

Show file

File: imdb.py Project: lordsoffallen/tensorflow

def load_imdb(seed=123):
    """ Loads the Imdb movie reviews sentiment analysis dataset.

    Parameters
    ----------
    seed: int
        Seed for randomizer.

    Returns
    -------
    train_texts, train_labels, test_texts, test_labels:
        A tuple of training and validation data.
    """

    zipped = get_file('aclImdb_v1.tar.gz', IMDB_URL, extract=True)
    data_path = zipped[:-10]  # remove .tar.gz

    # Load the training data
    train_texts = []
    train_labels = []
    for category in ['pos', 'neg']:
        train_path = os.path.join(data_path, 'train', category)
        for fname in sorted(os.listdir(train_path)):
            if fname.endswith('.txt'):
                with open(os.path.join(train_path, fname)) as f:
                    train_texts.append(f.read())
                train_labels.append(0 if category == 'neg' else 1)

    # Load the validation data.
    test_texts = []
    test_labels = []
    for category in ['pos', 'neg']:
        test_path = os.path.join(data_path, 'test', category)
        for fname in sorted(os.listdir(test_path)):
            if fname.endswith('.txt'):
                with open(os.path.join(test_path, fname)) as f:
                    test_texts.append(f.read())
                test_labels.append(0 if category == 'neg' else 1)

    # Shuffle the training data and labels.
    random.seed(seed)
    random.shuffle(train_texts)
    random.seed(seed)
    random.shuffle(train_labels)

    return (train_texts, np.array(train_labels)), (test_texts,
                                                   np.array(test_labels))

Example #21

0

Show file

    def _get_model(self, file_name="megadetector_v3.pb", model_url=None):
        if model_url is None:
            model_url = self.MODEL_URL

        cache_subdir = "megadetector"
        model_path = zamba.config.cache_dir / cache_subdir / file_name

        if not model_path.exists():
            model_path = get_file(
                fname=file_name,
                origin=model_url,
                cache_dir=zamba.config.cache_dir,
                cache_subdir=cache_subdir,
                extract=True,
            )

        return model_path

Example #22

0

Show file

    def load_data(
            cls,
            subset_name: str = 'train',
            shuffle: bool = True) -> Tuple[List[List[str]], List[List[str]]]:
        """
        Load dataset as sequence labeling format, char level tokenized

        features: ``[['海', '钓', '比', '赛', '地', '点', '在', '厦', '门', ...], ...]``

        labels: ``[['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', ...], ...]``

        Sample::

            train_x, train_y = ChineseDailyNerCorpus.load_data('train')
            test_x, test_y = ChineseDailyNerCorpus.load_data('test')

        Args:
            subset_name: {train, test, valid}
            shuffle: should shuffle or not, default True.

        Returns:
            dataset_features and dataset labels
        """
        corpus_path = get_file(cls.__corpus_name__,
                               cls.__zip_file__name,
                               cache_dir=k.DATA_PATH,
                               untar=True)

        if subset_name == 'train':
            file_path = os.path.join(corpus_path, 'example.train')
        elif subset_name == 'test':
            file_path = os.path.join(corpus_path, 'example.test')
        else:
            file_path = os.path.join(corpus_path, 'example.dev')

        x_data, y_data = DataReader.read_conll_format_file(file_path)
        if shuffle:
            x_data, y_data = utils.unison_shuffled_copies(x_data, y_data)
        logging.debug(
            f"loaded {len(x_data)} samples from {file_path}. Sample:\n"
            f"x[0]: {x_data[0]}\n"
            f"y[0]: {y_data[0]}")
        return x_data, y_data

Example #23

0

Show file

    def _get_model(self,
                   file_name="zamba-and-obj-rec-0.859.joblib",
                   model_url=None):
        if model_url is None:
            model_url = self.MODEL_URL

        cache_subdir = "blanknonblank"
        model_path = zamba.config.cache_dir / cache_subdir / file_name

        if not model_path.exists():
            model_path = get_file(
                fname=file_name,
                origin=model_url,
                cache_dir=zamba.config.cache_dir,
                cache_subdir=cache_subdir,
                extract=True,
            )

        return model_path

Example #24

0

Show file

    def test_build_with_BERT_and_fit(self):
        from kashgari.embeddings import BERTEmbedding
        from tensorflow.python.keras.utils import get_file
        from kashgari.macros import DATA_PATH

        sample_bert_path = get_file(
            'bert_sample_model',
            "http://s3.bmio.net/kashgari/bert_sample_model.tar.bz2",
            cache_dir=DATA_PATH,
            untar=True)

        processor = MultiOutputProcessor()
        embedding = BERTEmbedding(model_folder=sample_bert_path,
                                  processor=processor)
        m = MultiOutputModel(embedding=embedding)
        m.build_model(train_x, (output_1, output_2))
        m.fit(train_x, (output_1, output_2), epochs=2)
        res = m.predict(train_x[:10])
        assert len(res) == 2
        assert res[0].shape == (10, 3)

Example #25

0

Show file

File: babi_rnn.py Project: Alex-zhai/keras_examples_with_tensorflow_estimators

def generate_data():
    path = get_file('babi-tasks-v1-2.tar.gz',
                    origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
    challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
    with tarfile.open(path) as tar:
        train = get_stories(tar.extractfile(challenge.format('train')))
        test = get_stories(tar.extractfile(challenge.format('test')))
    vocab = set()
    for story, q, answer in train + test:
        vocab |= set(story + q + [answer])
    # 36 words
    vocab = sorted(vocab)
    vocab_size = len(vocab) + 1
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
    story_maxlen = max(map(len, (x for x, _, _ in train + test)))
    query_maxlen = max(map(len, (x for _, x, _ in train + test)))
    # xs:1000 * 552  xqs: 1000*5  ys: 1000*36
    train_sqa_vec = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
    test_sqa_vec = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
    return train_sqa_vec, test_sqa_vec, vocab_size, story_maxlen, query_maxlen

Example #26

0

Show file

File: imagerecon.py Project: AKLLaursen/kerastools

    def get_classes(self):
        '''
            Downloads the ImageNet class index file and loads it to 
            self.get_classes unless it is already cached.

            Args:
                None

            Returns:
                None
        '''

        # Get the ImageNet class indexes and cache them
        fname = 'imagenet_class_index.json'
        fpath = get_file(fname,
                         self.FILE_PATH + fname,
                         cache_subdir = 'models')

        # Open file and parse json
        with open(fpath) as f:
            class_dict = json.load(f)
        self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))]

Example #27

0

Show file

File: __main__.py Project: aFuerst/faascache-sim

def main(args):
    global cold
    was_cold = cold
    cold = False

    try:
        input_bucket = args.get("input_bucket", 1)
        object_key = args.get("object_key", 1)

        model_object_key = args.get(
            "model_object_key",
            1)  # example : squeezenet_weights_tf_dim_ordering_tf_kernels.h5
        model_bucket = args.get("model_bucket", 1)

        # download_path = tmp + '{}{}'.format(uuid.uuid4(), object_key)
        download_path = get_file(
            '{}{}'.format(uuid.uuid4(), object_key),
            "https://github.com/kmu-bigdata/serverless-faas-workbench/raw/master/dataset/image/animal-dog.jpg",
            cache_dir='/tmp/')
        # s3_client.download_file(input_bucket, object_key, download_path)

        model_path = tmp + '{}{}'.format(uuid.uuid4(), model_object_key)
        # s3_client.download_file(model_bucket, model_object_key, model_path)

        latency, result = predict(download_path)

        _tmp_dic = {x[1]: {'N': str(x[2])} for x in result[0]}

        return {"body": {"latency": latency, "msg": msg, "cold": was_cold}}
    except Exception as e:
        err = "whelp"
        try:
            err = traceback.format_exc()
        except Exception as fug:
            err = str(fug)
        return {"body": {"cust_error": msg, "thing": err, "cold": was_cold}}

Example #28

0

Show file

File: sparse_cnn.py Project: subutai/nupic.tensorflow

    def __init__(
        self,
        cnn_out_channels=(64, 64),
        cnn_percent_on=(0.095, 0.125),
        linear_units=1000,
        linear_percent_on=0.1,
        linear_weight_sparsity=0.4,
        boost_strength=1.5,
        boost_strength_factor=0.9,
        k_inference_factor=1.5,
        duty_cycle_period=1000,
        data_format=IMAGE_DATA_FORMAT,
        pre_trained=False,
        name=None,
        batch_norm=True,
        **kwargs,
    ):
        super(GSCSparseCNN, self).__init__(name=name, **kwargs)

        if data_format == "channels_first":
            axis = 1
            input_shape = (1, 32, 32)
        else:
            axis = -1
            input_shape = (32, 32, 1)

        self.add(
            keras.layers.Conv2D(
                name="cnn1",
                data_format=data_format,
                input_shape=input_shape,
                filters=cnn_out_channels[0],
                kernel_size=5,
            ))
        if batch_norm:
            self.add(
                keras.layers.BatchNormalization(
                    name="cnn1_batchnorm",
                    axis=axis,
                    epsilon=1e-05,
                    momentum=0.9,
                    center=False,
                    scale=False,
                ))
        self.add(
            keras.layers.MaxPool2D(
                name="cnn1_maxpool",
                pool_size=2,
                padding="same",
                data_format=data_format,
            ))
        self.add(
            KWinners2d(
                name="cnn1_kwinner",
                data_format=data_format,
                percent_on=cnn_percent_on[0],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
            ))
        self.add(
            keras.layers.Conv2D(
                name="cnn2",
                data_format=data_format,
                filters=cnn_out_channels[1],
                kernel_size=5,
            ))
        if batch_norm:
            self.add(
                keras.layers.BatchNormalization(
                    name="cnn2_batchnorm",
                    axis=axis,
                    epsilon=1e-05,
                    momentum=0.9,
                    center=False,
                    scale=False,
                ))
        self.add(
            keras.layers.MaxPool2D(
                name="cnn2_maxpool",
                pool_size=2,
                padding="same",
                data_format=data_format,
            ))
        self.add(
            KWinners2d(
                name="cnn2_kwinner",
                data_format=data_format,
                percent_on=cnn_percent_on[1],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
            ))
        self.add(keras.layers.Flatten(name="flatten", data_format=data_format))
        self.add(
            keras.layers.Dense(
                name="linear",
                units=linear_units,
                kernel_constraint=SparseWeights(linear_weight_sparsity),
            ))
        if batch_norm:
            self.add(
                keras.layers.BatchNormalization(name="linear_bn",
                                                epsilon=1e-05,
                                                momentum=0.9,
                                                center=False,
                                                scale=False))
        self.add(
            KWinners(
                name="linear_kwinner",
                percent_on=linear_percent_on,
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
            ))
        self.add(keras.layers.Dense(name="output", units=12))
        self.add(keras.layers.Softmax(axis=1))

        if pre_trained:
            if not batch_norm:
                raise NotImplementedError(
                    "Unable to load pre-trained models with no BatchNorm")
            model_url, model_hash = MODEL_URLS["gsc_sparse_cnn"]
            file_name = "gsc_sparse_cnn-{:.8}".format(model_hash)
            archive_path = get_file(
                fname="{}.tar.gz".format(file_name),
                origin=model_url,
                file_hash=model_hash,
                extract=True,
                cache_subdir="models",
            )
            cache_dir = os.path.dirname(archive_path)
            self.load_weights(os.path.join(cache_dir, "gsc_sparse_cnn.h5"))

Example #29

0

Show file

File: cornell_grasp_dataset_writer.py Project: mdheller/costar_plan

    def download(self, data_dir=None, dataset='all'):
        '''Cornell Grasping Dataset - about 5GB total size

        http:pr.cs.cornell.edu/grasping/rect_data/data.php

        Downloads to `~/.keras/datasets/cornell_grasping` by default.
        Includes grasp_listing.txt with all files in all datasets;
        the feature csv files which specify the dataset size,
        the features (data channels), and the number of grasps;
        and the tfrecord files which actually contain all the data.

        If `grasp_listing_hashed.txt` is present, an additional
        hashing step will will be completed to verify dataset integrity.
        `grasp_listing_hashed.txt` will be generated automatically when
        downloading with `dataset='all'`.

        # Arguments

            dataset: The name of the dataset to download, downloads all by default
                with the '' parameter, 102 will download the 102 feature dataset
                found in grasp_listing.txt.

        # Returns

           list of paths to the downloaded files

        '''
        dataset = self._update_dataset_param(dataset)
        if data_dir is None:
            if self.data_dir is None:
                data_dir = FLAGS.data_dir
            else:
                data_dir = self.data_dir
        hypertree_utilities.mkdir_p(data_dir)
        print('Downloading datasets to: ', data_dir)

        url_prefix = ''
        # If a hashed version of the listing is available,
        # download the dataset and verify hashes to prevent data corruption.
        listing_hash = os.path.join(data_dir, 'grasp_listing_hash.txt')
        if os.path.isfile(listing_hash):
            files_and_hashes = np.genfromtxt(listing_hash,
                                             dtype='str',
                                             delimiter=' ')
            files = [
                get_file(fpath.split('/')[-1],
                         url_prefix + fpath,
                         cache_subdir=data_dir,
                         file_hash=hash_str,
                         extract=True)
                for fpath, hash_str in tqdm(files_and_hashes)
                if '_' + str(dataset) in fpath
            ]
        else:
            # If a hashed version of the listing is not available,
            # simply download the dataset normally.
            listing_url = 'https://raw.githubusercontent.com/ahundt/robot-grasp-detection/master/grasp_listing.txt'
            grasp_listing_path = get_file('grasp_listing.txt',
                                          listing_url,
                                          cache_subdir=data_dir)
            grasp_files = np.genfromtxt(grasp_listing_path, dtype=str)
            files = [
                get_file(fpath.split('/')[-1],
                         url_prefix + fpath,
                         cache_subdir=data_dir,
                         extract=True) for fpath in tqdm(grasp_files)
                if '_' + dataset in fpath
            ]

            # If all files are downloaded, generate a hashed listing.
            if dataset is 'all' or dataset is '':
                print('Hashing all dataset files to prevent corruption...')
                hashes = []
                for i, f in enumerate(tqdm(files)):
                    hashes.append(_hash_file(f))
                file_hash_np = np.column_stack([grasp_files, hashes])
                with open(listing_hash, 'wb') as hash_file:
                    np.savetxt(hash_file,
                               file_hash_np,
                               fmt='%s',
                               delimiter=' ',
                               header='file_path sha256')
                print(
                    'Hashing complete, {} contains each url plus hash, and will be used to verify the '
                    'dataset during future calls to download().'.format(
                        listing_hash))

        return files

Example #30

0

Show file

 def load(path):
     """Ensures that a file is downloaded locally, then unzips and reads it."""
     return GzipFile(get_file(Path(path).name, path)).read()