def test_load_openvino(self): local_path = self.create_temp_dir() model = InferenceModel(1) model_url = data_url + "/analytics-zoo-models/openvino/2018_R5/resnet_v1_50.xml" weight_url = data_url + "/analytics-zoo-models/openvino/2018_R5/resnet_v1_50.bin" model_path = maybe_download("resnet_v1_50.xml", local_path, model_url) weight_path = maybe_download("resnet_v1_50.bin", local_path, weight_url) model.load_openvino(model_path, weight_path) input_data = np.random.random([4, 1, 224, 224, 3]) model.predict(input_data)
def test_load_tf_openvino_ic(self): local_path = self.create_temp_dir() print(local_path) url = data_url + "/models/resnet_v1_50_2016_08_28.tar.gz" file_abs_path = maybe_download("resnet_v1_50_2016_08_28.tar.gz", local_path, url) tar = tarfile.open(file_abs_path, "r:gz") print("Extracting %s to %s" % (file_abs_path, local_path)) tar.extractall(local_path) tar.close() model = InferenceModel(3) model.load_tf_image_classification_as_openvino( model_path=None, image_classification_model_type="resnet_v1_50", checkpoint_path=local_path + "/resnet_v1_50.ckpt", input_shape=[4, 224, 224, 3], if_reverse_input_channels=True, mean_values=[123.68, 116.78, 103.94], scale=1) print(model) input_data = np.random.random([4, 1, 224, 224, 3]) s3url = "https://s3-ap-southeast-1.amazonaws.com/" var_url = s3url + "analytics-zoo-models/openvino/val_bmp_32.tar" lib_url = s3url + "analytics-zoo-models/openvino/opencv_4.0.0_ubuntu_lib.tar" var_file_abs_path = maybe_download("val_bmp_32.tar", local_path, var_url) lib_file_abs_path = maybe_download("opencv_4.0.0_ubuntu_lib.tar", local_path, lib_url) var_tar = tarfile.open(var_file_abs_path, "r") print("Extracting %s to %s" % (var_file_abs_path, local_path)) var_tar.extractall(local_path) var_tar.close() lib_tar = tarfile.open(lib_file_abs_path, "r") print("Extracting %s to %s" % (lib_file_abs_path, local_path)) lib_tar.extractall(local_path) lib_tar.close() validation_file_path = local_path + "/val_bmp_32/val.txt" opencv_lib_path = local_path + "/lib" model2 = InferenceModel(3) model2.load_tf_as_calibrated_openvino( model_path=None, model_type="resnet_v1_50", checkpoint_path=local_path + "/resnet_v1_50.ckpt", input_shape=[4, 224, 224, 3], if_reverse_input_channels=True, mean_values=[123.68, 116.78, 103.94], scale=1, network_type='C', validation_file_path=validation_file_path, subset=32, opencv_lib_path=opencv_lib_path) print(model2) model2.predict(input_data)
def test_load_openvino(self): local_path = self.create_temp_dir() url = data_url + "/IR_faster_rcnn_resnet101_coco_2018_01_28" maybe_download("frozen_inference_graph.xml", local_path, url + "/frozen_inference_graph.xml") maybe_download("frozen_inference_graph.bin", local_path, url + "/frozen_inference_graph.bin") model = InferenceModel() model.load_openvino(local_path + "/frozen_inference_graph.xml", local_path + "/frozen_inference_graph.bin") input_data = np.random.random([1, 1, 3, 600, 600]) output_data = model.predict(input_data)
def test_openvino(self): with tempfile.TemporaryDirectory() as local_path: model_url = data_url + "/analytics-zoo-data/openvino2020_resnet50.tar" model_path = maybe_download("openvino2020_resnet50.tar", local_path, model_url) cmd = "tar -xvf " + model_path + " -C " + local_path subprocess.Popen(cmd.split()) model_path = os.path.join( local_path, "openvino2020_resnet50/resnet_v1_50.xml") est = Estimator.from_openvino(model_path=model_path) # ndarray input_data = np.random.random([20, 4, 3, 224, 224]) result = est.predict(input_data) print(result) # xshards input_data_list = [ np.random.random([1, 4, 3, 224, 224]), np.random.random([2, 4, 3, 224, 224]) ] sc = init_nncontext() rdd = sc.parallelize(input_data_list, numSlices=2) shards = SparkXShards(rdd) def pre_processing(images): return {"x": images} shards = shards.transform_shard(pre_processing) result = est.predict(shards) result_c = result.collect() print(result_c)
def test_load_tf_openvino(self): local_path = self.create_temp_dir() url = data_url + "/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz" file_abs_path = maybe_download( "faster_rcnn_resnet101_coco_2018_01_28.tar.gz", local_path, url) tar = tarfile.open(file_abs_path, "r:gz") extracted_to = os.path.join(local_path, "faster_rcnn_resnet101_coco_2018_01_28") if not os.path.exists(extracted_to): print("Extracting %s to %s" % (file_abs_path, extracted_to)) tar.extractall(local_path) tar.close() model = InferenceModel(3) model.load_tf(model_path=extracted_to + "/frozen_inference_graph.pb", backend="openvino", model_type="faster_rcnn_resnet101_coco", ov_pipeline_config_path=extracted_to + "/pipeline.config", ov_extensions_config_path=None) input_data = np.random.random([4, 1, 3, 600, 600]) output_data = model.predict(input_data) model2 = InferenceModel(3) model2.load_tf_object_detection_as_openvino( model_path=extracted_to + "/frozen_inference_graph.pb", object_detection_model_type="faster_rcnn_resnet101_coco", pipeline_config_path=extracted_to + "/pipeline.config", extensions_config_path=None) model2.predict(input_data)
def load_data(path='boston_housing.npz', dest_dir='/tmp/.zoo/dataset', test_split=0.2): """Loads the Boston Housing dataset, the source url of download is copied from keras.datasets # Arguments dest_dir: where to cache the data (relative to `~/.zoo/dataset`). nb_words: number of words to keep, the words are already indexed by frequency so that the less frequent words would be abandoned oov_char: index to pad the abandoned words, if None, one abandoned word would be taken place with its next word and total length -= 1 test_split: the ratio to split part of dataset to test data, the remained data would be train data # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = base.maybe_download( path, dest_dir, 'https://s3.amazonaws.com/keras-datasets/boston_housing.npz') with np.load(path) as f: x = f['x'] y = f['y'] shuffle_by_seed([x, y]) split_index = int(len(x) * (1 - test_split)) x_train, y_train = x[:split_index], y[:split_index] x_test, y_test = x[split_index:], y[split_index:] return (x_train, y_train), (x_test, y_test)
def read_data_sets(train_dir, data_type="train"): """ Parse or download mnist data if train_dir is empty. :param: train_dir: The directory storing the mnist data :param: data_type: Reading training set or testing set.It can be either "train" or "test" :return: ``` (ndarray, ndarray) representing (features, labels) features is a 4D unit8 numpy array [index, y, x, depth] representing each pixel valued from 0 to 255. labels is 1D unit8 nunpy array representing the label valued from 0 to 9. ``` """ TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' if data_type == "train": local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f) return train_images, train_labels else: local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f) return test_images, test_labels
def download_news20(dest_dir): file_name = "20news-bydate.tar.gz" file_abs_path = base.maybe_download(file_name, dest_dir, NEWS20_URL) tar = tarfile.open(file_abs_path, "r:gz") if not os.path.exists(dest_dir): print("Extracting %s" % (file_abs_path)) tar.extractall(dest_dir) tar.close()
def download_news20(dest_dir): news20 = "20news-18828.tar.gz" news20_path = base.maybe_download(news20, dest_dir, NEWS20_URL) tar = tarfile.open(news20_path, "r:gz") news20_dir = os.path.join(dest_dir, "20news-18828") if not os.path.exists(news20_dir): print("Extracting %s to %s" % (news20_path, news20_dir)) tar.extractall(dest_dir) tar.close()
def download_glove(dest_dir): glove = "glove.6B.zip" glove_path = base.maybe_download(glove, dest_dir, GLOVE_URL) zip_ref = zipfile.ZipFile(glove_path, 'r') glove_dir = os.path.join(dest_dir, "glove.6B") if not os.path.exists(glove_dir): print("Extracting %s to %s" % (glove_path, glove_dir)) zip_ref.extractall(glove_dir) zip_ref.close()
def download_news20(dest_dir): file_name = "20news-18828.tar.gz" file_abs_path = base.maybe_download(file_name, dest_dir, NEWS20_URL) tar = tarfile.open(file_abs_path, "r:gz") extracted_to = os.path.join(dest_dir, "20news-18828") if not os.path.exists(extracted_to): print("Extracting %s to %s" % (file_abs_path, extracted_to)) tar.extractall(dest_dir) tar.close() return extracted_to
def load_roberta(self): os.makedirs(local_path, exist_ok=True) model_url = data_url + "/analytics-zoo-data/roberta/roberta.tar" model_path = maybe_download("roberta.tar", local_path, model_url) tar = tarfile.open(model_path) tar.extractall(path=local_path) tar.close() model_path = os.path.join(local_path, "roberta/model.xml") self.est = Estimator.from_openvino(model_path=model_path)
def setUp(self): with tempfile.TemporaryDirectory() as local_path: model_url = data_url + "/analytics-zoo-data/openvino2020_resnet50.tar" model_path = maybe_download("openvino2020_resnet50.tar", local_path, model_url) tar = tarfile.open(model_path) tar.extractall(path=local_path) tar.close() model_path = os.path.join( local_path, "openvino2020_resnet50/resnet_v1_50.xml") self.est = Estimator.from_openvino(model_path=model_path)
def download_glove_w2v(dest_dir): file_name = "glove.6B.zip" file_abs_path = base.maybe_download(file_name, dest_dir, GLOVE_URL) import zipfile zip_ref = zipfile.ZipFile(file_abs_path, 'r') extracted_to = os.path.join(dest_dir, "glove.6B") if not os.path.exists(extracted_to): print("Extracting %s to %s" % (file_abs_path, extracted_to)) zip_ref.extractall(extracted_to) zip_ref.close() return extracted_to
def get_data_iters(config, kv): import os import zipfile import mxnet as mx from bigdl.dataset.base import maybe_download # In order to avoid conflict where multiple workers on the same node download and # zip data under the same location, here we let each worker have its own folder. # Not using mxnet.test_utils.get_mnist_iterator directly because data path is # hard-coded in this function. # In practice, data is supposed to be stored on a file system accessible to workers on # all nodes, for example, on HDFS or S3. maybe_download("mnist.zip", "worker" + str(kv.rank), "http://data.mxnet.io/mxnet/data/mnist.zip") if not os.path.isdir("worker" + str(kv.rank) + "/data"): with zipfile.ZipFile("worker" + str(kv.rank) + "/mnist.zip") as zf: zf.extractall("worker" + str(kv.rank) + "/data") train_iter = mx.io.MNISTIter( image="worker" + str(kv.rank) + "/data/train-images-idx3-ubyte", label="worker" + str(kv.rank) + "/data/train-labels-idx1-ubyte", input_shape=(1, 28, 28), batch_size=config["batch_size"], shuffle=True, flat=False, num_parts=kv.num_workers, part_index=kv.rank) val_iter = mx.io.MNISTIter( image="worker" + str(kv.rank) + "/data/t10k-images-idx3-ubyte", label="worker" + str(kv.rank) + "/data/t10k-labels-idx1-ubyte", input_shape=(1, 28, 28), batch_size=config["batch_size"], flat=False, num_parts=kv.num_workers, part_index=kv.rank) return train_iter, val_iter
def download_reuters(dest_dir): """Download pre-processed reuters newswire data :argument dest_dir: destination directory to store the data :return The absolute path of the stored data """ file_name = 'reuters.pkl' file_abs_path = base.maybe_download(file_name, dest_dir, 'https://s3.amazonaws.com/text-datasets/reuters.pkl') return file_abs_path
def download_imdb(dest_dir): """Download pre-processed IMDB movie review data :argument dest_dir: destination directory to store the data :return The absolute path of the stored data """ file_name = "imdb_full.pkl" file_abs_path = base.maybe_download(file_name, dest_dir, 'https://s3.amazonaws.com/text-datasets/imdb_full.pkl') return file_abs_path
def load_resnet(self): input_file_path = os.path.join(resource_path, "orca/learn/resnet_input") output_file_path = os.path.join(resource_path, "orca/learn/resnet_output") self.input = read_file_and_cast(input_file_path) self.output = read_file_and_cast(output_file_path) self.input = np.array(self.input).reshape([3, 224, 224]) self.output = np.array(self.output).reshape([4, 1000])[:1] os.makedirs(local_path, exist_ok=True) model_url = data_url + "/analytics-zoo-data/openvino2020_resnet50.tar" model_path = maybe_download("openvino2020_resnet50.tar", local_path, model_url) tar = tarfile.open(model_path) tar.extractall(path=local_path) tar.close() model_path = os.path.join(local_path, "openvino2020_resnet50/resnet_v1_50.xml") self.est = Estimator.from_openvino(model_path=model_path)
def read_data_sets(data_dir): """ Parse or download movielens 1m data if train_dir is empty. :param data_dir: The directory storing the movielens data :return: a 2D numpy array with user index and item index in each row """ WHOLE_DATA = 'ml-1m.zip' local_file = base.maybe_download(WHOLE_DATA, data_dir, SOURCE_URL + WHOLE_DATA) zip_ref = zipfile.ZipFile(local_file, 'r') extracted_to = os.path.join(data_dir, "ml-1m") if not os.path.exists(extracted_to): print("Extracting %s to %s" % (local_file, data_dir)) zip_ref.extractall(data_dir) zip_ref.close() rating_files = os.path.join(extracted_to,"ratings.dat") rating_list = [i.strip().split("::") for i in open(rating_files,"r").readlines()] movielens_data = np.array(rating_list).astype(int) return movielens_data
def get_word_index(dest_dir='/tmp/.zoo/dataset', filename='reuters_word_index.pkl'): """Retrieves the dictionary mapping word indices back to words. # Arguments dest_dir: where to cache the data (relative to `~/.zoo/dataset`). filename: dataset file name # Returns The word index dictionary. """ path = base.maybe_download(filename, dest_dir, 'https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl') f = open(path, 'rb') data = cPickle.load(f, encoding='latin1') f.close() return data
def test_load_tf_openvino(self): local_path = self.create_temp_dir() url = data_url + "/TF_faster_rcnn_resnet101_coco_2018_01_28" maybe_download("frozen_inference_graph.pb", local_path, url + "/frozen_inference_graph.pb") maybe_download("pipeline.config", local_path, url + "/pipeline.config") maybe_download("faster_rcnn_support.json", local_path, url + "/faster_rcnn_support.json") model = InferenceModel(3) model.load_tf(local_path + "/frozen_inference_graph.pb", backend="openvino", ov_pipeline_config_path=local_path + "/pipeline.config", ov_extensions_config_path=local_path + "/faster_rcnn_support.json") input_data = np.random.random([4, 1, 3, 600, 600]) output_data = model.predict(input_data) model2 = InferenceModel(5) model2.load_tf(local_path + "/frozen_inference_graph.pb", backend="openvino", model_type="faster_rcnn_resnet101_coco") output_data2 = model2.predict(input_data)
def load_data(data_dir): WHOLE_DATA = 'ml-1m.zip' local_file = base.maybe_download(WHOLE_DATA, data_dir, SOURCE_URL + WHOLE_DATA) zip_ref = zipfile.ZipFile(local_file, 'r') extracted_to = os.path.join(data_dir, "ml-1m") if not os.path.exists(extracted_to): print("Extracting %s to %s" % (local_file, data_dir)) zip_ref.extractall(data_dir) zip_ref.close() rating_files = os.path.join(extracted_to, "ratings.dat") # replace :: to : for spark 2.4 support new_rating_files = os.path.join(extracted_to, "ratings_new.dat") if not os.path.exists(new_rating_files): fin = open(rating_files, "rt") # output file to write the result to fout = open(new_rating_files, "wt") # for each line in the input file for line in fin: # read replace the string and write to output file fout.write(line.replace('::', ':')) # close input and output files fin.close() fout.close() # read movive len csv to XShards of Pandas Dataframe full_data = zoo.orca.data.pandas.read_csv(new_rating_files, sep=':', header=None, names=COLUMN_NAMES, usecols=[0, 1, 2], dtype={ 0: np.int32, 1: np.int32, 2: np.int32 }) user_set = set(full_data['user'].unique()) item_set = set(full_data['item'].unique()) min_user_id = min(user_set) max_user_id = max(user_set) min_item_id = min(item_set) max_item_id = max(item_set) print(min_user_id, max_user_id, min_item_id, max_item_id) # update label starting from 0 def update_label(df): df['label'] = df['label'] - 1 return df full_data = full_data.transform_shard(update_label) # split to train/test dataset def split_train_test(data): # splitting the full set into train and test sets. train, test = train_test_split(data, test_size=0.2, random_state=100) return train, test train_data, test_data = full_data.transform_shard(split_train_test).split() def to_train_val_shard(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), "y": df['label'].to_numpy() } return result train_data = train_data.transform_shard(to_train_val_shard) test_data = test_data.transform_shard(to_train_val_shard) return train_data, test_data, max_user_id, max_item_id
def download_data(dest_dir): TINYSHAKESPEARE_URL = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt' # noqa file_name = "input.txt" file_abs_path = base.maybe_download(file_name, dest_dir, TINYSHAKESPEARE_URL) return file_abs_path