Esempio n. 1
0
 def test_load_openvino(self):
     local_path = self.create_temp_dir()
     model = InferenceModel(1)
     model_url = data_url + "/analytics-zoo-models/openvino/2018_R5/resnet_v1_50.xml"
     weight_url = data_url + "/analytics-zoo-models/openvino/2018_R5/resnet_v1_50.bin"
     model_path = maybe_download("resnet_v1_50.xml", local_path, model_url)
     weight_path = maybe_download("resnet_v1_50.bin", local_path,
                                  weight_url)
     model.load_openvino(model_path, weight_path)
     input_data = np.random.random([4, 1, 224, 224, 3])
     model.predict(input_data)
 def test_load_tf_openvino_ic(self):
     local_path = self.create_temp_dir()
     print(local_path)
     url = data_url + "/models/resnet_v1_50_2016_08_28.tar.gz"
     file_abs_path = maybe_download("resnet_v1_50_2016_08_28.tar.gz",
                                    local_path, url)
     tar = tarfile.open(file_abs_path, "r:gz")
     print("Extracting %s to %s" % (file_abs_path, local_path))
     tar.extractall(local_path)
     tar.close()
     model = InferenceModel(3)
     model.load_tf_image_classification_as_openvino(
         model_path=None,
         image_classification_model_type="resnet_v1_50",
         checkpoint_path=local_path + "/resnet_v1_50.ckpt",
         input_shape=[4, 224, 224, 3],
         if_reverse_input_channels=True,
         mean_values=[123.68, 116.78, 103.94],
         scale=1)
     print(model)
     input_data = np.random.random([4, 1, 224, 224, 3])
     s3url = "https://s3-ap-southeast-1.amazonaws.com/"
     var_url = s3url + "analytics-zoo-models/openvino/val_bmp_32.tar"
     lib_url = s3url + "analytics-zoo-models/openvino/opencv_4.0.0_ubuntu_lib.tar"
     var_file_abs_path = maybe_download("val_bmp_32.tar", local_path,
                                        var_url)
     lib_file_abs_path = maybe_download("opencv_4.0.0_ubuntu_lib.tar",
                                        local_path, lib_url)
     var_tar = tarfile.open(var_file_abs_path, "r")
     print("Extracting %s to %s" % (var_file_abs_path, local_path))
     var_tar.extractall(local_path)
     var_tar.close()
     lib_tar = tarfile.open(lib_file_abs_path, "r")
     print("Extracting %s to %s" % (lib_file_abs_path, local_path))
     lib_tar.extractall(local_path)
     lib_tar.close()
     validation_file_path = local_path + "/val_bmp_32/val.txt"
     opencv_lib_path = local_path + "/lib"
     model2 = InferenceModel(3)
     model2.load_tf_as_calibrated_openvino(
         model_path=None,
         model_type="resnet_v1_50",
         checkpoint_path=local_path + "/resnet_v1_50.ckpt",
         input_shape=[4, 224, 224, 3],
         if_reverse_input_channels=True,
         mean_values=[123.68, 116.78, 103.94],
         scale=1,
         network_type='C',
         validation_file_path=validation_file_path,
         subset=32,
         opencv_lib_path=opencv_lib_path)
     print(model2)
     model2.predict(input_data)
Esempio n. 3
0
 def test_load_openvino(self):
     local_path = self.create_temp_dir()
     url = data_url + "/IR_faster_rcnn_resnet101_coco_2018_01_28"
     maybe_download("frozen_inference_graph.xml", local_path,
                    url + "/frozen_inference_graph.xml")
     maybe_download("frozen_inference_graph.bin", local_path,
                    url + "/frozen_inference_graph.bin")
     model = InferenceModel()
     model.load_openvino(local_path + "/frozen_inference_graph.xml",
                         local_path + "/frozen_inference_graph.bin")
     input_data = np.random.random([1, 1, 3, 600, 600])
     output_data = model.predict(input_data)
Esempio n. 4
0
    def test_openvino(self):
        with tempfile.TemporaryDirectory() as local_path:
            model_url = data_url + "/analytics-zoo-data/openvino2020_resnet50.tar"
            model_path = maybe_download("openvino2020_resnet50.tar",
                                        local_path, model_url)
            cmd = "tar -xvf " + model_path + " -C " + local_path
            subprocess.Popen(cmd.split())
            model_path = os.path.join(
                local_path, "openvino2020_resnet50/resnet_v1_50.xml")
            est = Estimator.from_openvino(model_path=model_path)

            # ndarray
            input_data = np.random.random([20, 4, 3, 224, 224])
            result = est.predict(input_data)
            print(result)

            # xshards
            input_data_list = [
                np.random.random([1, 4, 3, 224, 224]),
                np.random.random([2, 4, 3, 224, 224])
            ]
            sc = init_nncontext()
            rdd = sc.parallelize(input_data_list, numSlices=2)
            shards = SparkXShards(rdd)

            def pre_processing(images):
                return {"x": images}

            shards = shards.transform_shard(pre_processing)
            result = est.predict(shards)
            result_c = result.collect()
            print(result_c)
 def test_load_tf_openvino(self):
     local_path = self.create_temp_dir()
     url = data_url + "/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz"
     file_abs_path = maybe_download(
         "faster_rcnn_resnet101_coco_2018_01_28.tar.gz", local_path, url)
     tar = tarfile.open(file_abs_path, "r:gz")
     extracted_to = os.path.join(local_path,
                                 "faster_rcnn_resnet101_coco_2018_01_28")
     if not os.path.exists(extracted_to):
         print("Extracting %s to %s" % (file_abs_path, extracted_to))
         tar.extractall(local_path)
         tar.close()
     model = InferenceModel(3)
     model.load_tf(model_path=extracted_to + "/frozen_inference_graph.pb",
                   backend="openvino",
                   model_type="faster_rcnn_resnet101_coco",
                   ov_pipeline_config_path=extracted_to +
                   "/pipeline.config",
                   ov_extensions_config_path=None)
     input_data = np.random.random([4, 1, 3, 600, 600])
     output_data = model.predict(input_data)
     model2 = InferenceModel(3)
     model2.load_tf_object_detection_as_openvino(
         model_path=extracted_to + "/frozen_inference_graph.pb",
         object_detection_model_type="faster_rcnn_resnet101_coco",
         pipeline_config_path=extracted_to + "/pipeline.config",
         extensions_config_path=None)
     model2.predict(input_data)
def load_data(path='boston_housing.npz',
              dest_dir='/tmp/.zoo/dataset',
              test_split=0.2):
    """Loads the Boston Housing dataset, the source url of download
       is copied from keras.datasets
    # Arguments
        dest_dir: where to cache the data (relative to `~/.zoo/dataset`).
        nb_words: number of words to keep, the words are already indexed by frequency
                  so that the less frequent words would be abandoned
        oov_char: index to pad the abandoned words, if None, one abandoned word
                  would be taken place with its next word and total length -= 1
        test_split: the ratio to split part of dataset to test data,
                    the remained data would be train data

    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """

    path = base.maybe_download(
        path, dest_dir,
        'https://s3.amazonaws.com/keras-datasets/boston_housing.npz')

    with np.load(path) as f:
        x = f['x']
        y = f['y']

    shuffle_by_seed([x, y])
    split_index = int(len(x) * (1 - test_split))

    x_train, y_train = x[:split_index], y[:split_index]

    x_test, y_test = x[split_index:], y[split_index:]

    return (x_train, y_train), (x_test, y_test)
Esempio n. 7
0
def read_data_sets(train_dir, data_type="train"):
    """
    Parse or download mnist data if train_dir is empty.

    :param: train_dir: The directory storing the mnist data

    :param: data_type: Reading training set or testing set.It can be either "train" or "test"

    :return:

    ```
    (ndarray, ndarray) representing (features, labels)
    features is a 4D unit8 numpy array [index, y, x, depth]
    representing each pixel valued from 0 to 255.
    labels is 1D unit8 nunpy array representing the label valued from 0 to 9.
    ```

    """
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

    if data_type == "train":
        local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                         SOURCE_URL + TRAIN_IMAGES)
        with open(local_file, 'rb') as f:
            train_images = extract_images(f)

        local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                         SOURCE_URL + TRAIN_LABELS)
        with open(local_file, 'rb') as f:
            train_labels = extract_labels(f)
        return train_images, train_labels

    else:
        local_file = base.maybe_download(TEST_IMAGES, train_dir,
                                         SOURCE_URL + TEST_IMAGES)
        with open(local_file, 'rb') as f:
            test_images = extract_images(f)

        local_file = base.maybe_download(TEST_LABELS, train_dir,
                                         SOURCE_URL + TEST_LABELS)
        with open(local_file, 'rb') as f:
            test_labels = extract_labels(f)
        return test_images, test_labels
Esempio n. 8
0
def download_news20(dest_dir):
    file_name = "20news-bydate.tar.gz"
    file_abs_path = base.maybe_download(file_name, dest_dir, NEWS20_URL)
    tar = tarfile.open(file_abs_path, "r:gz")
    if not os.path.exists(dest_dir):
        print("Extracting %s" % (file_abs_path))
        tar.extractall(dest_dir)
        tar.close()
Esempio n. 9
0
def read_data_sets(train_dir, data_type="train"):
    """
    Parse or download mnist data if train_dir is empty.

    :param: train_dir: The directory storing the mnist data

    :param: data_type: Reading training set or testing set.It can be either "train" or "test"

    :return:

    ```
    (ndarray, ndarray) representing (features, labels)
    features is a 4D unit8 numpy array [index, y, x, depth] representing each pixel valued from 0 to 255.
    labels is 1D unit8 nunpy array representing the label valued from 0 to 9.
    ```

    """
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

    if data_type == "train":
        local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                         SOURCE_URL + TRAIN_IMAGES)
        with open(local_file, 'rb') as f:
            train_images = extract_images(f)

        local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                         SOURCE_URL + TRAIN_LABELS)
        with open(local_file, 'rb') as f:
            train_labels = extract_labels(f)
        return train_images, train_labels

    else:
        local_file = base.maybe_download(TEST_IMAGES, train_dir,
                                         SOURCE_URL + TEST_IMAGES)
        with open(local_file, 'rb') as f:
            test_images = extract_images(f)

        local_file = base.maybe_download(TEST_LABELS, train_dir,
                                         SOURCE_URL + TEST_LABELS)
        with open(local_file, 'rb') as f:
            test_labels = extract_labels(f)
        return test_images, test_labels
Esempio n. 10
0
def download_news20(dest_dir):
    news20 = "20news-18828.tar.gz"
    news20_path = base.maybe_download(news20, dest_dir, NEWS20_URL)
    tar = tarfile.open(news20_path, "r:gz")
    news20_dir = os.path.join(dest_dir, "20news-18828")
    if not os.path.exists(news20_dir):
        print("Extracting %s to %s" % (news20_path, news20_dir))
        tar.extractall(dest_dir)
        tar.close()
Esempio n. 11
0
def download_glove(dest_dir):
    glove = "glove.6B.zip"
    glove_path = base.maybe_download(glove, dest_dir, GLOVE_URL)
    zip_ref = zipfile.ZipFile(glove_path, 'r')
    glove_dir = os.path.join(dest_dir, "glove.6B")
    if not os.path.exists(glove_dir):
        print("Extracting %s to %s" % (glove_path, glove_dir))
        zip_ref.extractall(glove_dir)
        zip_ref.close()
Esempio n. 12
0
def download_news20(dest_dir):
    file_name = "20news-18828.tar.gz"
    file_abs_path = base.maybe_download(file_name, dest_dir, NEWS20_URL)
    tar = tarfile.open(file_abs_path, "r:gz")
    extracted_to = os.path.join(dest_dir, "20news-18828")
    if not os.path.exists(extracted_to):
        print("Extracting %s to %s" % (file_abs_path, extracted_to))
        tar.extractall(dest_dir)
        tar.close()
    return extracted_to
Esempio n. 13
0
def download_news20(dest_dir):
    file_name = "20news-18828.tar.gz"
    file_abs_path = base.maybe_download(file_name, dest_dir, NEWS20_URL)
    tar = tarfile.open(file_abs_path, "r:gz")
    extracted_to = os.path.join(dest_dir, "20news-18828")
    if not os.path.exists(extracted_to):
        print("Extracting %s to %s" % (file_abs_path, extracted_to))
        tar.extractall(dest_dir)
        tar.close()
    return extracted_to
Esempio n. 14
0
 def load_roberta(self):
     os.makedirs(local_path, exist_ok=True)
     model_url = data_url + "/analytics-zoo-data/roberta/roberta.tar"
     model_path = maybe_download("roberta.tar",
                                 local_path, model_url)
     tar = tarfile.open(model_path)
     tar.extractall(path=local_path)
     tar.close()
     model_path = os.path.join(local_path, "roberta/model.xml")
     self.est = Estimator.from_openvino(model_path=model_path)
Esempio n. 15
0
 def setUp(self):
     with tempfile.TemporaryDirectory() as local_path:
         model_url = data_url + "/analytics-zoo-data/openvino2020_resnet50.tar"
         model_path = maybe_download("openvino2020_resnet50.tar",
                                     local_path, model_url)
         tar = tarfile.open(model_path)
         tar.extractall(path=local_path)
         tar.close()
         model_path = os.path.join(
             local_path, "openvino2020_resnet50/resnet_v1_50.xml")
         self.est = Estimator.from_openvino(model_path=model_path)
Esempio n. 16
0
def download_glove_w2v(dest_dir):
    file_name = "glove.6B.zip"
    file_abs_path = base.maybe_download(file_name, dest_dir, GLOVE_URL)
    import zipfile
    zip_ref = zipfile.ZipFile(file_abs_path, 'r')
    extracted_to = os.path.join(dest_dir, "glove.6B")
    if not os.path.exists(extracted_to):
        print("Extracting %s to %s" % (file_abs_path, extracted_to))
        zip_ref.extractall(extracted_to)
        zip_ref.close()
    return extracted_to
Esempio n. 17
0
def download_glove_w2v(dest_dir):
    file_name = "glove.6B.zip"
    file_abs_path = base.maybe_download(file_name, dest_dir, GLOVE_URL)
    import zipfile
    zip_ref = zipfile.ZipFile(file_abs_path, 'r')
    extracted_to = os.path.join(dest_dir, "glove.6B")
    if not os.path.exists(extracted_to):
        print("Extracting %s to %s" % (file_abs_path, extracted_to))
        zip_ref.extractall(extracted_to)
        zip_ref.close()
    return extracted_to
Esempio n. 18
0
def get_data_iters(config, kv):
    import os
    import zipfile
    import mxnet as mx
    from bigdl.dataset.base import maybe_download

    # In order to avoid conflict where multiple workers on the same node download and
    # zip data under the same location, here we let each worker have its own folder.

    # Not using mxnet.test_utils.get_mnist_iterator directly because data path is
    # hard-coded in this function.

    # In practice, data is supposed to be stored on a file system accessible to workers on
    # all nodes, for example, on HDFS or S3.
    maybe_download("mnist.zip", "worker" + str(kv.rank),
                   "http://data.mxnet.io/mxnet/data/mnist.zip")
    if not os.path.isdir("worker" + str(kv.rank) + "/data"):
        with zipfile.ZipFile("worker" + str(kv.rank) + "/mnist.zip") as zf:
            zf.extractall("worker" + str(kv.rank) + "/data")

    train_iter = mx.io.MNISTIter(
        image="worker" + str(kv.rank) + "/data/train-images-idx3-ubyte",
        label="worker" + str(kv.rank) + "/data/train-labels-idx1-ubyte",
        input_shape=(1, 28, 28),
        batch_size=config["batch_size"],
        shuffle=True,
        flat=False,
        num_parts=kv.num_workers,
        part_index=kv.rank)
    val_iter = mx.io.MNISTIter(
        image="worker" + str(kv.rank) + "/data/t10k-images-idx3-ubyte",
        label="worker" + str(kv.rank) + "/data/t10k-labels-idx1-ubyte",
        input_shape=(1, 28, 28),
        batch_size=config["batch_size"],
        flat=False,
        num_parts=kv.num_workers,
        part_index=kv.rank)
    return train_iter, val_iter
Esempio n. 19
0
def download_reuters(dest_dir):
    """Download pre-processed reuters newswire data

    :argument
        dest_dir: destination directory to store the data

    :return
        The absolute path of the stored data
    """
    file_name = 'reuters.pkl'
    file_abs_path = base.maybe_download(file_name,
                                        dest_dir,
                                        'https://s3.amazonaws.com/text-datasets/reuters.pkl')
    return file_abs_path
Esempio n. 20
0
def download_imdb(dest_dir):
    """Download pre-processed IMDB movie review data

    :argument
        dest_dir: destination directory to store the data

    :return
        The absolute path of the stored data
    """
    file_name = "imdb_full.pkl"
    file_abs_path = base.maybe_download(file_name,
                                        dest_dir,
                                        'https://s3.amazonaws.com/text-datasets/imdb_full.pkl')
    return file_abs_path
Esempio n. 21
0
    def load_resnet(self):
        input_file_path = os.path.join(resource_path, "orca/learn/resnet_input")
        output_file_path = os.path.join(resource_path, "orca/learn/resnet_output")
        self.input = read_file_and_cast(input_file_path)
        self.output = read_file_and_cast(output_file_path)
        self.input = np.array(self.input).reshape([3, 224, 224])
        self.output = np.array(self.output).reshape([4, 1000])[:1]

        os.makedirs(local_path, exist_ok=True)
        model_url = data_url + "/analytics-zoo-data/openvino2020_resnet50.tar"
        model_path = maybe_download("openvino2020_resnet50.tar",
                                    local_path, model_url)
        tar = tarfile.open(model_path)
        tar.extractall(path=local_path)
        tar.close()
        model_path = os.path.join(local_path, "openvino2020_resnet50/resnet_v1_50.xml")
        self.est = Estimator.from_openvino(model_path=model_path)
Esempio n. 22
0
def read_data_sets(data_dir):
    """
    Parse or download movielens 1m  data if train_dir is empty.

    :param data_dir: The directory storing the movielens data
    :return: a 2D numpy array with user index and item index in each row 
    """
    WHOLE_DATA = 'ml-1m.zip'
    local_file = base.maybe_download(WHOLE_DATA, data_dir, SOURCE_URL + WHOLE_DATA)
    zip_ref = zipfile.ZipFile(local_file, 'r')
    extracted_to = os.path.join(data_dir, "ml-1m")
    if not os.path.exists(extracted_to):
        print("Extracting %s to %s" % (local_file, data_dir))
        zip_ref.extractall(data_dir)
        zip_ref.close()
    rating_files = os.path.join(extracted_to,"ratings.dat")

    rating_list = [i.strip().split("::") for i in open(rating_files,"r").readlines()]    
    movielens_data = np.array(rating_list).astype(int)
    return movielens_data 
Esempio n. 23
0
def get_word_index(dest_dir='/tmp/.zoo/dataset', filename='reuters_word_index.pkl'):
    """Retrieves the dictionary mapping word indices back to words.

    # Arguments
        dest_dir: where to cache the data (relative to `~/.zoo/dataset`).
        filename: dataset file name

    # Returns
        The word index dictionary.
    """

    path = base.maybe_download(filename,
                               dest_dir,
                               'https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl')

    f = open(path, 'rb')

    data = cPickle.load(f, encoding='latin1')

    f.close()
    return data
Esempio n. 24
0
 def test_load_tf_openvino(self):
     local_path = self.create_temp_dir()
     url = data_url + "/TF_faster_rcnn_resnet101_coco_2018_01_28"
     maybe_download("frozen_inference_graph.pb", local_path,
                    url + "/frozen_inference_graph.pb")
     maybe_download("pipeline.config", local_path, url + "/pipeline.config")
     maybe_download("faster_rcnn_support.json", local_path,
                    url + "/faster_rcnn_support.json")
     model = InferenceModel(3)
     model.load_tf(local_path + "/frozen_inference_graph.pb",
                   backend="openvino",
                   ov_pipeline_config_path=local_path + "/pipeline.config",
                   ov_extensions_config_path=local_path +
                   "/faster_rcnn_support.json")
     input_data = np.random.random([4, 1, 3, 600, 600])
     output_data = model.predict(input_data)
     model2 = InferenceModel(5)
     model2.load_tf(local_path + "/frozen_inference_graph.pb",
                    backend="openvino",
                    model_type="faster_rcnn_resnet101_coco")
     output_data2 = model2.predict(input_data)
Esempio n. 25
0
def load_data(data_dir):
    WHOLE_DATA = 'ml-1m.zip'
    local_file = base.maybe_download(WHOLE_DATA, data_dir,
                                     SOURCE_URL + WHOLE_DATA)
    zip_ref = zipfile.ZipFile(local_file, 'r')
    extracted_to = os.path.join(data_dir, "ml-1m")
    if not os.path.exists(extracted_to):
        print("Extracting %s to %s" % (local_file, data_dir))
        zip_ref.extractall(data_dir)
        zip_ref.close()
    rating_files = os.path.join(extracted_to, "ratings.dat")

    # replace :: to : for spark 2.4 support
    new_rating_files = os.path.join(extracted_to, "ratings_new.dat")
    if not os.path.exists(new_rating_files):
        fin = open(rating_files, "rt")
        # output file to write the result to
        fout = open(new_rating_files, "wt")
        # for each line in the input file
        for line in fin:
            # read replace the string and write to output file
            fout.write(line.replace('::', ':'))
        # close input and output files
        fin.close()
        fout.close()

    # read movive len csv to XShards of Pandas Dataframe
    full_data = zoo.orca.data.pandas.read_csv(new_rating_files,
                                              sep=':',
                                              header=None,
                                              names=COLUMN_NAMES,
                                              usecols=[0, 1, 2],
                                              dtype={
                                                  0: np.int32,
                                                  1: np.int32,
                                                  2: np.int32
                                              })

    user_set = set(full_data['user'].unique())
    item_set = set(full_data['item'].unique())

    min_user_id = min(user_set)
    max_user_id = max(user_set)
    min_item_id = min(item_set)
    max_item_id = max(item_set)
    print(min_user_id, max_user_id, min_item_id, max_item_id)

    # update label starting from 0
    def update_label(df):
        df['label'] = df['label'] - 1
        return df

    full_data = full_data.transform_shard(update_label)

    # split to train/test dataset
    def split_train_test(data):
        # splitting the full set into train and test sets.
        train, test = train_test_split(data, test_size=0.2, random_state=100)
        return train, test

    train_data, test_data = full_data.transform_shard(split_train_test).split()

    def to_train_val_shard(df):
        result = {
            "x": (df['user'].to_numpy(), df['item'].to_numpy()),
            "y": df['label'].to_numpy()
        }
        return result

    train_data = train_data.transform_shard(to_train_val_shard)
    test_data = test_data.transform_shard(to_train_val_shard)
    return train_data, test_data, max_user_id, max_item_id
Esempio n. 26
0
def download_data(dest_dir):
    TINYSHAKESPEARE_URL = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'  # noqa
    file_name = "input.txt"
    file_abs_path = base.maybe_download(file_name, dest_dir,
                                        TINYSHAKESPEARE_URL)
    return file_abs_path
Esempio n. 27
0
def download_data(dest_dir):
    TINYSHAKESPEARE_URL = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'  # noqa
    file_name = "input.txt"
    file_abs_path = base.maybe_download(file_name, dest_dir, TINYSHAKESPEARE_URL)
    return file_abs_path