def get_model(latest_checkpoint, strategy, model_source, model_name, image_size, trainable_layer, fc_size, category_labels, loss_function, tfa_metrics): with strategy.scope(): if model_source == 'keras': include_top = False if model_name == 'VGG16': base_model = tf.keras.applications.VGG16( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('VGG16') elif model_name == 'VGG19': base_model = tf.keras.applications.VGG19( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('VGG19') elif model_name == 'MobileNetV2': base_model = tf.keras.applications.MobileNetV2( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('MobileNetV2') elif model_name == 'InceptionV3': base_model = tf.keras.applications.InceptionV3( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('InceptionV3') elif model_name == 'Xception': base_model = tf.keras.applications.Xception( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('Xception') elif model_name == 'ResNet50': base_model = tf.keras.applications.ResNet50( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('ResNet50') elif model_name == 'DenseNet201': base_model = tf.keras.applications.densenet.DenseNet201( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('DenseNet201') elif model_name == 'NASNetLarge': base_model = tf.keras.applications.nasnet.NASNetLarge( input_shape=(image_size, image_size, 3), include_top=include_top, weights='imagenet') print('NASNetLarge') else: print("Wrong model name") exit() if trainable_layer > 0: for layer in base_model.layers[:-trainable_layer]: layer.trainable = False else: for layer in base_model.layers: layer.trainable = False elif model_source == 'tfhub': if model_name == 'MobileNetV2': base_model_folder = '/mnt/AI/tfhub/7d894117f08a295a627d24c65df048e34e7ac7d4/' print('MobileNetV2') elif model_name == 'InceptionV3': base_model_folder = '/mnt/AI/tfhub/3f675e18714cfa891d083a31557195a0508e560d/' print('InceptionV3') elif model_name == 'ResNet50': base_model_folder = '/mnt/AI/tfhub/5e690529696a1ca5ff36a5e9c7f7255180ef2364/' print('ResNet50') elif model_name == 'NASNetLarge': base_model_folder = '/mnt/AI/tfhub/c57f54b3f7d0ff4ab1eba180075fb0afe4101034/' print('NASNetLarge') else: print("Wrong model name") exit() if trainable_layer == 0: base_model_trainable = False else: base_model_trainable = True base_model = tf.keras.Sequential([ hub.KerasLayer(hub.load(base_model_folder), trainable=base_model_trainable, input_shape=(image_size, image_size, 3)) ]) else: print("Wrong model source") exit() x = base_model.output if model_source == 'keras': x = tf.keras.layers.GlobalAveragePooling2D()(x) if fc_size > 0: x = tf.keras.layers.Dense(fc_size, activation='relu')(x) x = tf.keras.layers.Dropout(0.25)(x) predictions = tf.keras.layers.Dense( len(category_labels), activation=tf.nn.softmax, name='predictions')(x) model_created = tf.keras.Model(inputs=base_model.input, outputs=predictions) # model_created.summary() # for layer in model_created.layers: # print(layer, layer.trainable) if loss_function == 'fl': loss = tfa.losses.SigmoidFocalCrossEntropy() if loss_function == 'ce' or loss_function == 'wce': loss = tf.keras.losses.CategoricalCrossentropy() if 2 == len(category_labels): loss = tf.keras.losses.BinaryCrossentropy() if loss_function == 'ck': loss = CohenKappaLoss(len(category_labels)) metrics = [tf.keras.metrics.CategoricalAccuracy()] if 2 == len(category_labels): metrics = [tf.keras.metrics.BinaryAccuracy()] metrics += [tf.keras.metrics.AUC(name='auc'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), # tf.keras.metrics.TruePositives(name='true_positives'), # tf.keras.metrics.FalsePositives(name='false_positives'), # tf.keras.metrics.TrueNegatives(name='true_negatives'), # tf.keras.metrics.FalseNegatives(name='false_negatives'), # tfa.metrics.CohenKappa(num_classes=len(category_labels)), tfa.metrics.F1Score(num_classes=len(category_labels)), tfa.metrics.FBetaScore(num_classes=len(category_labels))] if tfa_metrics != 0: metrics += [tfa.metrics.CohenKappa(num_classes=len(category_labels))] model_created.compile(optimizer=tf.keras.optimizers.Adam(), loss=loss, metrics=metrics) # model_created.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=loss, metrics=metrics) if latest_checkpoint: model_created.load_weights(latest_checkpoint) return model_created
import tensorflow as tf import tensorflow_hub as hub import numpy as np from PIL import Image img = Image.open('test.jpg') np_array = np.array(img) tf_img = tf.convert_to_tensor(np_array, dtype=tf.float32) tf_img = tf_img[tf.newaxis, ...] localizer = hub.load("https://tfhub.dev/google/object_detection/mobile_object_localizer_v1/1") print(localizer.signatures['default'](tf_img))
"""More detailed information about installing Tensorflow can be found at [https://www.tensorflow.org/install/](https://www.tensorflow.org/install/).""" #@title Load the Universal Sentence Encoder's TF Hub module from absl import logging import tensorflow as tf import tensorflow_hub as hub import matplotlib.pyplot as plt import numpy as np import os import pandas as pd import re import seaborn as sns module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"] model = hub.load(module_url) print ("module %s loaded" % module_url) def embed(input): return model(input) #@title Compute a representation for each message, showing various lengths supported. word = "Elephant" sentence = "I am a sentence for which I would like to get its embedding." paragraph = ( "Universal Sentence Encoder embeddings also support short paragraphs. " "There is no hard limit on how long the paragraph is. Roughly, the longer " "the more 'diluted' the embedding will be.") messages = [word, sentence, paragraph] # Reduce logging output. logging.set_verbosity(logging.ERROR)
import tensorflow_hub as hub import numpy as np import tensorflow_text import re # Load model from tensorflow hub embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/2") # Load model from local # embed = hub.KerasLayer('./model/') def text_preprocess(text,lang): if lang=='en': # TEXT CLENAING TEXT_CLEANING_RE = "[^A-Za-z0-9]" # Remove link,user and special characters ptext = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower()) # Remove extra whitespace ptext = re.sub('[\s]{2,}',' ',ptext).strip() return ptext elif lang=='zh': # TEXT CLENAING TEXT_CLEANING_RE = "[^\u4E00-\u9FFF0-9]" # Remove link,user and special characters ptext = re.sub(TEXT_CLEANING_RE, ' ', text) # Remove extra whitespace ptext = re.sub('[\s]{2,}',' ',ptext).strip() return ptext else: return text def evaluate(sentences1,sentences2,trans_type):
print(lang) print(" train: " + str(len(train_dataset_by_lang[lang]))) print(" dev: " + str(len(dev_dataset_by_lang[lang]))) train_set = [ data for data_by_lang in train_dataset_by_lang.values() for data in data_by_lang ] dev_set = [ data for data_by_lang in dev_dataset_by_lang.values() for data in data_by_lang ] print(len(train_set), len(dev_set)) preprocessor = hub.load( "https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/2") tokenize = hub.KerasLayer(preprocessor.tokenize) bert_pack_inputs = hub.KerasLayer(preprocessor.bert_pack_inputs, arguments=dict(seq_length=128)) single_bert_input = hub.KerasLayer(preprocessor) def take_first(item): return { "input_mask": item["input_mask"][0], "input_type_ids": item["input_type_ids"][0], "input_word_ids": item["input_word_ids"][0], } @tf.function(input_signature=[
return filtered_dic def run_detector(detector, path): img = load_img(path) converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...] start_time = time.time() result = detector(converted_img) end_time = time.time() result = {key: value.numpy() for key, value in result.items()} filtered_dic = create_filtered_dic(filter_result(result, 0.3, 10), result) print("Found %d objects." % len(result["detection_scores"])) print("Inference time: ", end_time - start_time) image_with_boxes = draw_boxes( img.numpy(), filtered_dic["detection_boxes"], filtered_dic["detection_class_entities"], filtered_dic["detection_scores"]) display_image(image_with_boxes) crop_out_boxes(img.numpy(), filtered_dic["detection_boxes"]) module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1" detector = hub.load(module_handle).signatures['default'] run_detector(detector, "img_19.jpg")
import tensorflow_hub as hub import cv2 import numpy as np import tensorflow as tf detector = hub.load("/home/grandpadzb/tfhub_modules/ssd_mobilenet_v2_2") print("Complete loading") camera = cv2.VideoCapture(0) while (cv2.waitKey(1) != 113): _, src = camera.read() # src = cv2.imread("/home/grandpadzb/MathscriptsLib/selfDifineNetwork/party.jpg") src = cv2.resize(src, dsize=(320, 320)) src = src[np.newaxis, :] img = tf.convert_to_tensor(src, dtype="uint8") # ========================== output = detector(img) figure_num = 0 for i in range(int(output["num_detections"].numpy()[0])): class_index = output["detection_classes"].numpy()[0][i] if class_index == 1.0: box = np.fix(output["detection_boxes"].numpy()[0][i] * 320) cv2.rectangle(src[0, :], (box[1], box[0]), (box[3], box[2]), (255, 255, 255), 2) figure_num += 1 if figure_num >= 1: break cv2.imshow("result", src[0, :]) src = src[0, :]
def __init__(self, model: EmbeddingModel): self.model = hub.load(model.model_name)
# %decor_header Fast Examples Of Neural Networks get_file('01_00_002.png', 'paintings/') content_image = load_img('01_00_002.png') x=tf.keras.applications.vgg19\ .preprocess_input(content_image*255) x = tf.image.resize(x, (224, 224)) vgg19=tf.keras.applications\ .VGG19(include_top=True,weights='imagenet') prediction_probabilities = vgg19(x) predicted_top5=tf.keras.applications.vgg19\ .decode_predictions(prediction_probabilities.numpy())[0] [print([class_name, prob]) for (number, class_name, prob) in predicted_top5] tensor_to_image(content_image) hub_module = th.load(tfhub_path) get_file('00_03_002.png', 'paintings/') get_file('00_00_001.png', 'patterns/') content_image = load_img('00_03_002.png') style_image = load_img('00_00_001.png') stylized_image = hub_module(tf.constant(content_image), tf.constant(style_image))[0] tensor_to_image(stylized_image) # Commented out IPython magic to ensure Python compatibility. # %decor_header Some Variants Of Keras Models def mlp_model(img_size, num_classes): model = tf.keras.models.Sequential([ tkl.Flatten(input_shape=(img_size, img_size, 3)),
class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, float): return str(obj) elif isinstance(obj, PIL.Image.Image): return str(obj.size) return json.JSONEncoder.default(self, obj) ssd = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1") model_yn1 = tf.keras.models.load_model('yn1_model.h5') model_eye = tf.keras.models.load_model('eye_crop_model.h5') app = Flask(__name__) def sharpness(img): im = img.convert('L') # to grayscale array = np.asarray(im, dtype=np.int32) gy, gx = np.gradient(array) gnorm = np.sqrt(gx**2 + gy**2) return np.average(gnorm) def bird_eye(oc):
if category not in categories: categories[category] = [] categories[category].append(video) print("Found %d videos in %d categories." % (len(ucf_videos), len(categories))) for category, sequences in categories.items(): summary = ", ".join(sequences[:2]) print("%-20s %4d videos (%s, ...)" % (category, len(sequences), summary)) # Get a sample cricket video. video_path = fetch_ucf_video("v_CricketShot_g04_c02.avi") sample_video = load_video(video_path) sample_video.shape i3d = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1").signatures['default'] def predict(sample_video): # Add a batch axis to the to the sample video. model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...] logits = i3d(model_input)['default'][0] probabilities = tf.nn.softmax(logits) print("Top 5 actions:") for i in np.argsort(probabilities)[::-1][:5]: print(f" {labels[i]:22}: {probabilities[i] * 100:5.2f}%") predict(sample_video) ## https://commons.wikimedia.org/wiki/Category
async def setup_model(): model = hub.load(model_file_url) return model
def load_model(): #model = tf.keras.applications.MobileNetV2(weights="imagenet") model = hub.load( 'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2' ) return model
import tensorflow as tf import tensorflow_hub as hub import numpy as np import pyautogui import win32api, win32con, win32gui import cv2 import math import time detector = hub.load( "https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1") size_scale = 3 while True: # Get rect of Window hwnd = win32gui.FindWindow(None, 'Counter-Strike: Global Offensive') #hwnd = win32gui.FindWindow("UnrealWindow", None) # Fortnite rect = win32gui.GetWindowRect(hwnd) region = rect[0], rect[1], rect[2] - rect[0], rect[3] - rect[1] # Get image of screen ori_img = np.array(pyautogui.screenshot(region=region)) ori_img = cv2.resize( ori_img, (ori_img.shape[1] // size_scale, ori_img.shape[0] // size_scale)) image = np.expand_dims(ori_img, 0) img_w, img_h = image.shape[2], image.shape[1] # Detection result = detector(image) result = {key: value.numpy() for key, value in result.items()}
# Print Tensorflow version log.info("TensorFlow version: %s", tf.__version__) # Check available GPU devices. log.info("The following GPU devices are available: %s" % tf.test.gpu_device_name()) # Object detection module module_env = os.environ.get("MODULE") log.info("MODULE env: %s", module_env) module_path = "/model_faster_rcnn" if module_env == "FASTER_RCNN" else "/model_ssd" log.info("Loading module_env from: %s", module_path) start_time = time.time() tf_hub_module = hub.load(module_path).signatures["default"] end_time = time.time() log.info("Loading module time: %.2f", end_time - start_time) object_detector = ObjectDetectorFromBase64(tf_hub_module) @app.get("/healthcheck") def healthcheck(): return "OK" @app.post("/predict") def detect_objects_base64(results: dict = Depends(object_detector)): return results
senta = row[0].strip() sentb = row[1].strip() senta = re.sub(pattern, '', senta) sentb = re.sub(pattern, '', sentb) veca = embed(senta)["outputs"] vecb = embed(sentb)["outputs"] score = np.inner(veca, vecb)[0] if str(row[-1]) == '1': yes_thresholds.append(score) else: no_thresholds.append(score) if len(yes_thresholds) == 0 or len(no_thresholds) == 0: print("thresholds == []") return print('yes_thresholds = avg:{} max:{} min:{}'.format( sum(yes_thresholds) / len(yes_thresholds), max(yes_thresholds), min(yes_thresholds))) print('no_thresholds = avg:{} max:{} min:{}'.format( sum(no_thresholds) / len(no_thresholds), max(no_thresholds), min(no_thresholds))) if __name__ == "__main__": pattern = r'、|《|》|~|`|!|@|#|¥|%|…|&|(|)|;|;|×|—|-|=|\(|\)|>|<|\\|/|_|。|,|"|”|【|】|\[|\]|{|}|' pattern += r'正常|等|关于|设计|竞争性系统|项目|公告|.标|工标|单一|工程|合同|来源|失败|公告|流标|废标|终止|暂停|中止|更改|变更|更正|补遗|补充|澄清|延期|交易|结果|公示|成交|中选|中标|比选|比价|竞标|竞价|限制价|控制价|控价|限价|询价|询比|预公告|预公示|预审|抽签|选人|采购|邀请|需求|招标|磋商|谈判|竞争性磋商' embed = hub.load("./universal-sentence-encoder-multilingual_2") # get_new_annotated_data() main()
# -*- coding: utf-8 -*- """ https://towardsdatascience.com/use-cases-of-googles-universal-sentence-encoder-in-production-dd5aaab4fc15 Created on Sun Dec 1 14:26:45 2019 @author: Amaan """ import tensorflow_hub as hub import numpy as np import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline #embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/3") embed = hub.load("model/tf2/universal-sentence-encoder-v3") # Sentence Similarity messages = [ "EPS estimates for APPL is expected to increase by 20 bps", "ROI estimates for APPL is expected to increase by 9%", "ROI estimates for GOOG is expected to increase by 9%", "ROI estimates for GOOG is expected to increase by 2%" ] encoding_matrix = embed(messages)["outputs"] corr = np.inner(encoding_matrix, encoding_matrix)
import io import os import numpy as np import tensorflow as tf import tensorflow_hub as hub app = FastAPI(root_path="/api/") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_headers=["*"], allow_methods=["*"], ) # loading hub module hub_module = hub.load( "https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/1") def tensor_to_image(tensor): tensor = tensor * 255 tensor = np.array(tensor, dtype=np.uint8) if np.ndim(tensor) > 3: assert tensor.shape[0] == 1 tensor = tensor[0] return Image.fromarray(tensor) def load_img(path_to_img): max_dim = 512 img = tf.io.read_file(path_to_img) img = tf.image.decode_image(img, channels=3)
def get_use_vectors(source_texts: List, target_texts: List, model_path: str): use_embeddings = hub.load(model_path) source_vecs = use_embeddings(source_texts)['outputs'].numpy() target_vecs = use_embeddings(target_texts)['outputs'].numpy() return source_vecs, target_vecs
from sklearn.externals import joblib knn = joblib.load('/home/thanaphat_phetkrow/API/knn.pkl') from sklearn.decomposition import PCA pca = joblib.load('/home/thanaphat_phetkrow/API/pca.pkl') from sklearn.preprocessing import StandardScaler scaler = joblib.load('/home/thanaphat_phetkrow/API/scaler.pkl') import tensorflow_hub as hub import numpy as np import tensorflow_text pre = pretext.pretextprocessing() embed = hub.load("/home/thanaphat_phetkrow/API/model3") import firebase_admin from firebase_admin import credentials from firebase_admin import firestore from firebase_admin import storage cred = credentials.Certificate( '/home/thanaphat_phetkrow/API/serviceAccountKey.json') firebase_admin.initialize_app(cred, {'storageBucket': 'fir-c1ec0.appspot.com'}) db = firestore.client() bucket = storage.bucket() batch = db.batch() increment = firestore.Increment(1) from linebot import LineBotApi
# ### 6.2. USE Embeddings # In[ ]: # 1. Loading USE vector representation of all questions in the dataset use_embeddings = np.load('use_embeddings.npy') # In[ ]: # 2. Laoding pretrained USE model use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5") # In[ ]: # 3. A function to create vector representation of query def USE_sentence_vec(query): clean_query = text_preprocessor([query], stop_word = False, remove_digits = False) use_out = use_model(clean_query) return use_out # In[ ]:
from flask import Flask import numpy as np from tensorflow import keras import tensorflow_hub as hub import tensorflow as tf from tensorflow.python.keras import backend as K model = keras.models.load_model('/app/next_word_predictor') vocab_arr = np.load('/app/vocab_arr.npy') embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") app = Flask(__name__) @app.route('/') def hello_world(): return 'hello!' @app.route('/getNextWord/<sent>') def getNextWord(sent): return vocab_arr[np.argmax(model.predict(embed([sent]).numpy())[-1])] # if __name__ == '__main__': # app.run(host='0.0.0.0', port=80, debug=False)
def create_data(se_path, excluded_ids_path, target_folder, n_train_queries, n_dev_queries, n_dev_queries_max_percentage, n_max_questions, pool_size, pooling, gpu=False): if excluded_ids_path and os.path.exists(excluded_ids_path): with open(excluded_ids_path, 'r') as f: excluded_q_ids = set([l.strip() for l in f]) else: logger.info( 'Either no excluded ids path given, or path does not exist! {}'. format(excluded_ids_path)) excluded_q_ids = set() questiondict, qids = read_questions(se_path, excluded_ids=excluded_q_ids) qids = set(qids) accepted_answer_ids = set( [q['ANSWER'] for q in questiondict.values() if q['ANSWER']]) answerdict, aids = read_answers(se_path, accepted_answer_ids) duplicatesdict = read_duplicates(se_path, qids) if not path.exists(target_folder): os.makedirs(target_folder, exist_ok=True) logger.info('len(excluded_ids)={}'.format(len(excluded_q_ids))) logger.info('Now validating that no excluded ids were returned') for qid in qids: assert qid not in excluded_q_ids logger.info('[ok] did not include any excluded ids') # the number of included questions in the dataset. this can be more than the number of queries so that we have # more variation for random sampling while not including too many queries (for large forums) # # we make sure to prefer ones with correct answers and duplicates before adding ones without q_with_duplicates = set(duplicatesdict.keys()) q_with_only_a = set([k for (k, q) in questiondict.items() if q['ANSWER'] ]) - q_with_duplicates rest = list(qids - (q_with_duplicates | q_with_only_a)) sampled_qids = list(q_with_duplicates) if len(sampled_qids) < n_max_questions: q_with_only_a = list(q_with_only_a) random.shuffle(q_with_only_a) sampled_qids += q_with_only_a[:n_max_questions - len(sampled_qids)] if len(sampled_qids) < n_max_questions: random.shuffle(rest) sampled_qids += rest[:n_max_questions - len(sampled_qids)] else: sampled_qids = random.sample(sampled_qids, n_max_questions) logger.info('N sampled_qids = {}'.format(len(sampled_qids))) # adjust number of dev queries, if needed n_dev_queries = min(int(len(sampled_qids) * n_dev_queries_max_percentage), n_dev_queries) logger.info( 'n_dev_queries_max_percentage={}'.format(n_dev_queries_max_percentage)) logger.info('n_questions(all)={}'.format(len(qids))) logger.info('n_questions(sampled)={}'.format(len(sampled_qids))) logger.info('n_train_queries={}'.format(n_train_queries)) logger.info('n_dev_queries={}'.format(n_dev_queries)) n_sample_train_dev = n_train_queries + n_dev_queries if len(qids) < n_sample_train_dev: logger.info( 'Number of questions in SE dump less than train+dev (={})'.format( n_sample_train_dev)) n_sample_train_dev = len(qids) sampled_qids_train_dev = random.sample(sampled_qids, n_sample_train_dev) sampled_qids_train = sampled_qids_train_dev[:-n_dev_queries] sampled_qids_dev = sampled_qids_train_dev[-n_dev_queries:] logger.info('N sampled_qids_train = {}'.format(len(sampled_qids_train))) logger.info('N sampled_qids_dev = {}'.format(len(sampled_qids_dev))) with gzip.open(target_folder + "/questions.tsv.gz", 'wt', encoding='utf-8') as f: for qid in sampled_qids: title = _clean_text(questiondict[qid]['TITLE']) body = _clean_text(questiondict[qid]['BODY']) answer = '' answer_id = questiondict[qid]['ANSWER'] if answer_id and answer_id in answerdict: answer = _clean_text(answerdict[answer_id]['BODY']) duplicates = ','.join([d for d in duplicatesdict[qid]]) f.write('{}\t{}\t{}\t{}\t{}\n'.format(qid, title, body, answer, duplicates)) with gzip.open(target_folder + "/train.tsv.gz", 'wt', encoding='utf-8') as f: for qid in sampled_qids_train: f.write('{}\n'.format(qid)) with gzip.open(target_folder + "/dev.tsv.gz", 'wt', encoding='utf-8') as f: if pooling != "none": # pooling with USE sentence embeddings over question titles and similarity search logger.info( 'Building FAISS index with sentence embeddings of Q titles') if pooling != 'use': raise Exception('Unknown pooling method "{}"'.format(pooling)) module = hub.load( 'https://tfhub.dev/google/universal-sentence-encoder-qa/3') dim = 512 titles = [questiondict[qid]['TITLE'] for qid in sampled_qids] titles = [t.lower() for t in titles] logger.info('Computing all embeddings...') embeddings = np.empty((0, dim)).astype('float32') for i in tqdm(range(0, len(sampled_qids), 128)): e = module.signatures['question_encoder'](tf.constant( titles[i:i + 128]))['outputs'].numpy() embeddings = np.vstack((embeddings, e)) logger.info('Normalizing embeddings...') embeddings = embeddings / LA.norm(embeddings, axis=0) # normalize embeddings so that IP-index does cosine similarity logger.info('Adding embeddings to FAISS index...') logger.info('embeddings shape: {}'.format(embeddings.shape)) index = faiss.IndexFlatIP(512) index.add(embeddings) if gpu: res = faiss.StandardGpuResources() # use a single GPU index = faiss.index_cpu_to_gpu(res, 0, index) logger.info('Querying FAISS...') for i, qid in tqdm(enumerate(sampled_qids_dev)): embedding = index.reconstruct(sampled_qids.index(qid)) _, similar_items = index.search(np.reshape(embedding, [1, -1]), pool_size) similar_items_qids = [ sampled_qids[j] for j in similar_items[0] ] neg = ' '.join(similar_items_qids) f.write('{}\t{}\n'.format(qid, neg)) # print some examples if i < 3: logger.info('Query: {}'.format(questiondict[qid]['TITLE'])) for qid_similar in similar_items_qids[:3]: logger.info('=>: {}'.format( questiondict[qid_similar]['TITLE'])) logger.info('-' * 10) else: # pooling with random sampling for qid in sampled_qids_dev: neg = ' '.join([ str(i) for i in random.sample(sampled_qids_train_dev, pool_size) ]) f.write('{}\t{}\n'.format(qid, neg))
def get_elmo(): module_url = "https://tfhub.dev/google/elmo/2" return hub.load(module_url)
import os import tensorflow as tf # Load compressed models from tensorflow_hub os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED' import numpy as np import PIL.Image from uuid import uuid4 import time import functools import tensorflow_hub as hub from uuid import uuid4 import shutil import requests hub_model = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2') def tensor_to_image(tensor): tensor = tensor*255 tensor = np.array(tensor, dtype=np.uint8) if np.ndim(tensor)>3: assert tensor.shape[0] == 1 tensor = tensor[0] return PIL.Image.fromarray(tensor) def save_url_to_database(img_url): url = 'https://go-deployment.herokuapp.com/img_url' x = requests.post(url, json={"firebase_url": img_url}) if x.status_code == 200:
def __init__(self, model_url: str = 'https://tfhub.dev/google/yamnet/1'): self.model_url = model_url self.model = hub.load(self.model_url) self.model_name = self.model_url.replace('https://tfhub.dev/google/', '').replace('/', '_') self.vector_length = 1024
def main(): logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) args = get_args() print_args(args) initialization.init_output_dir(args) initialization.save_args(args) task = get_task(args.task_name, args.data_dir) # load model print("loading Universal Sentence Encoder......") USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") # prepare dataset label_list = task.get_labels() label_map = {v: i for i, v in enumerate(label_list)} print("loading raw data ... ") train_examples = task.get_train_examples() val_examples = task.get_dev_examples() test_examples = task.get_test_examples() print("converting to data loader ... ") train_loader = get_dataloader(train_examples, label_map, args.train_batch_size) val_loader = get_dataloader(val_examples, label_map, args.eval_batch_size) test_loader = get_dataloader(test_examples, label_map, args.test_batch_size) # run embedding for train set print("Run embedding for train set") for _ in trange(1, desc="Epoch"): run_encoding(loader=train_loader, model=USE, args=args, mode='train') print("Run embedding for dev set") for _ in trange(1, desc="Epoch"): run_encoding(loader=val_loader, model=USE, args=args, mode='dev') print("Run embedding for test set") for _ in trange(1, desc="Epoch"): run_encoding(loader=test_loader, model=USE, args=args, mode='test') # HACK FOR MNLI mis-matched if args.task_name == 'mnli': print("Run Embedding for MNLI Mis-Matched Datasets") print("loading raw data ... ") mm_val_example = MnliMismatchedProcessor().get_dev_examples( args.data_dir) mm_test_examples = MnliMismatchedProcessor().get_test_examples( args.data_dir) print("converting to data loader ... ") mm_val_loader = get_dataloader(mm_val_example, label_map, args.eval_batch_size) mm_test_loader = get_dataloader(mm_test_examples, label_map, args.test_batch_size) print("Run embedding for mm_dev set") for _ in trange(1, desc="Epoch"): run_encoding(loader=mm_val_loader, model=USE, args=args, mode='mm_dev') print("Run embedding for test set") for _ in trange(1, desc="Epoch"): run_encoding(loader=mm_test_loader, model=USE, args=args, mode='mm_test')
def preprocess(path_interactions: str = "interactions.csv", path_items: str = "items.csv", path_serialised: str = ".", embeds_use_url: str = EMBEDS_USE_URL, embeds_mf_dim: int = 8): """ Read data, fit the data, convert title to USE & MF embeddings and serialise these embeddings. Args: path_interactions (str, optional): Path to interactions file. Defaults to "interactions.csv". path_items (str, optional): Path to items file. Defaults to "items.csv". path_serialised (str, optional): Directory to save the serialised data. Defaults to ".". embeds_use_url (str, optional): The URL of the USE encoder model to use from TF Hub. Defaults to EMBEDS_USE_URL (version 4). embeds_mf_dim (int, optional): The no. of dimensions of the MF embedding. Defaults to 8. Raises: FileNotFoundError: If "path_interactions" is invalid. FileNotFoundError: If "path_serialised" is invalid. ColumnNotFoundError: If "user", "item", and "interaction" columns are not found in the interactions file ColumnNotFoundError: If "title" column not found in the items file """ # Check file paths path_interactions = Path(path_interactions) path_items = Path(path_items) path_serialised = Path(path_serialised) if not path_interactions.exists(): raise FileNotFoundError("Specify a file for interactions") if not path_items.exists(): raise FileNotFoundError("Specify a file for items") if not path_serialised.exists(): path_serialised.mkdir() # Read data df_intxn = pd.read_csv(path_interactions) df_items = pd.read_csv(path_items, index_col="id") if set(df_intxn.columns) - set(["interaction", "item", "user"]): raise ColumnNotFoundError( "These columns must be present in interactions: " f"{str(['interaction', 'item', 'user'])}") if "title" not in df_items.columns: raise ColumnNotFoundError("`title` must be present in items") # Aggregate interactions data df_intxn = df_intxn.groupby(["user", "item"]).sum().reset_index() # Format to usable data # Titles are batched to encoder the titles # CSR matrix is a sparse matrix that is used in implicit titles = df_items["title"].tolist() batched_titles = batch(titles) mat = csr_matrix( (df_intxn["interaction"], (df_intxn["item"], df_intxn["user"]))) # USE model model_use = hub.load(embeds_use_url) # MF Model model_mf = implicit.als.AlternatingLeastSquares(factors=embeds_mf_dim) model_mf.fit(mat) # Title embeddings encoded using USE & MF respectively embeds_use = [model_use(batched).numpy() for batched in batched_titles] embeds_use = np.vstack(embeds_use) embeds_mf = model_mf.item_factors.copy() # Serialise embeddings np.save(path_serialised / "embeds_use.npy", embeds_use) np.save(path_serialised / "embeds_mf.npy", embeds_mf)
def __init__(self): self.model_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3" self.model = hub.load(self.model_url) self.model_name = self.model_url.replace('https://tfhub.dev/google/', '').replace('/', '_') self.vector_length = 512
def eval_and_report(): """Eval on voxceleb.""" tf.logging.info('samples_key: %s', FLAGS.samples_key) logging.info('Logdir: %s', FLAGS.logdir) logging.info('Batch size: %s', FLAGS.batch_size) writer = tf.summary.create_file_writer(FLAGS.eval_dir) model = models.get_keras_model( bottleneck_dimension=FLAGS.bottleneck_dimension, output_dimension=FLAGS.output_dimension, alpha=FLAGS.alpha, mobilenet_size=FLAGS.mobilenet_size, frontend=not FLAGS.precomputed_frontend_and_targets, avg_pool=FLAGS.average_pool) checkpoint = tf.train.Checkpoint(model=model) for ckpt in tf.train.checkpoints_iterator(FLAGS.logdir, timeout=FLAGS.timeout): assert 'ckpt-' in ckpt, ckpt step = ckpt.split('ckpt-')[-1] logging.info('Starting to evaluate step: %s.', step) checkpoint.restore(ckpt) logging.info('Loaded weights for eval step: %s.', step) reader = tf.data.TFRecordDataset ds = get_data.get_data(file_pattern=FLAGS.file_pattern, teacher_fn=get_data.savedmodel_to_func( hub.load(FLAGS.teacher_model_hub), FLAGS.output_key), output_dimension=FLAGS.output_dimension, reader=reader, samples_key=FLAGS.samples_key, min_length=FLAGS.min_length, batch_size=FLAGS.batch_size, loop_forever=False, shuffle=False) logging.info('Got dataset for eval step: %s.', step) if FLAGS.take_fixed_data: ds = ds.take(FLAGS.take_fixed_data) mse_m = tf.keras.metrics.MeanSquaredError() mae_m = tf.keras.metrics.MeanAbsoluteError() logging.info('Starting the ds loop...') count, ex_count = 0, 0 s = time.time() for wav_samples, targets in ds: wav_samples.shape.assert_is_compatible_with( [None, FLAGS.min_length]) targets.shape.assert_is_compatible_with( [None, FLAGS.output_dimension]) logits = model(wav_samples, training=False) logits.shape.assert_is_compatible_with(targets.shape) mse_m.update_state(y_true=targets, y_pred=logits) mae_m.update_state(y_true=targets, y_pred=logits) ex_count += logits.shape[0] count += 1 logging.info('Saw %i examples after %i iterations as %.2f secs...', ex_count, count, time.time() - s) with writer.as_default(): tf.summary.scalar('mse', mse_m.result().numpy(), step=int(step)) tf.summary.scalar('mae', mae_m.result().numpy(), step=int(step)) logging.info('Done with eval step: %s in %.2f secs.', step, time.time() - s)