def do_upload(table_name, img_path, model, milvus_client, mysql_cli): try: if not table_name: table_name = DEFAULT_TABLE feat = model.resnet50_extract_feat(img_path) ids = milvus_client.insert(table_name, [feat]) milvus_client.create_index(table_name) mysql_cli.create_mysql_table(table_name) mysql_cli.load_data_to_mysql(table_name, [(str(ids[0]), img_path.encode())]) return ids[0] except Exception as e: LOGGER.error(" Error with upload : {}".format(e)) sys.exit(1)
def insert(self, collection_name, vectors, ids=None): try: self.create_collection(collection_name) # data = [vectors] self.collection.insert([ids, vectors]) # vids = mr.primary_keys # self.collection.load() LOGGER.debug( "Insert vectors to Milvus in collection: {} with {} rows". format(collection_name, len(vectors))) return ids except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)
def count(self, collection_name): try: status, num = self.client.count_entities( collection_name=collection_name) if not status.code: LOGGER.debug( "Successfully get the num:{} of the collection:{}".format( num, collection_name)) return num else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to count vectors in Milvus: {}".format(e)) sys.exit(1)
def search_by_milvus_ids(self, ids, table_name): self.test_connection() str_ids = str(ids).replace('[', '').replace(']', '') sql1 = "select * from " + table_name + " where milvus_id in (" + str_ids + ") order by field (milvus_id," + str_ids + ");" try: self.cursor.execute(sql1) results = self.cursor.fetchall() results_id = [res[0] for res in results] results_class = [res[1] for res in results] LOGGER.debug("MYSQL search by milvus id.") return results_id, results_class except Exception as e: LOGGER.error("MYSQL ERROR: {} with sql: {}".format(e, sql1)) sys.exit(1)
async def do_search_api(table_name: str = None, query_sentence: str = None): try: ids, title, text, distances = search_in_milvus(table_name, query_sentence, MILVUS_CLI, MYSQL_CLI) res = [] for p, d in zip(title, text): dicts = {'title': p, 'content': d} res += [dicts] LOGGER.info("Successfully searched similar text!") return res except Exception as e: LOGGER.error(e) return {'status': False, 'msg': e}, 400
def search_vectors(self, collection_name, vectors, top_k, search_params): try: self.set_collection(collection_name) # search_params = {"metric_type": METRIC_TYPE, "params": {"nprobe": nprobe}} # data = [vectors] res = self.collection.search(vectors, anns_field="embedding", param=search_params, limit=top_k) LOGGER.debug("Successfully search in collection: {}".format(res)) return res except Exception as e: LOGGER.error("Failed to search vectors in Milvus: {}".format(e)) sys.exit(1)
def do_count(table_name, milvus_cli, mysql_cli): if not table_name: table_name = DEFAULT_TABLE try: if not milvus_cli.has_collection(table_name): return None milvus_num = milvus_cli.count(table_name) mysql_num = mysql_cli.count_table(table_name) LOGGER.debug("The num of Milvus: {} and Mysql: {}".format( milvus_num, mysql_num)) return milvus_num except Exception as e: LOGGER.error(" Error with count table {}".format(e)) sys.exit(1)
def create_index(self, collection_name): try: index_param = {'nlist': 16384} status = self.client.create_index(collection_name, IndexType.IVF_FLAT, index_param) if not status.code: LOGGER.debug( "Successfully create index in collection:{} with param:{}". format(collection_name, index_param)) return status else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to create index: {}".format(e)) sys.exit(1)
def insert(self, collection_name, vectors): try: self.create_colllection(collection_name) status, ids = self.client.insert(collection_name=collection_name, records=vectors) if not status.code: LOGGER.debug( "Insert vectors to Milvus in collection: {} with {} rows". format(collection_name, len(vectors))) return ids else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)
def do_drop(table_name, milvus_cli, mysql_cli): if not table_name: table_name = DEFAULT_TABLE try: if not milvus_cli.has_collection(table_name): msg = "Milvus doesn't have a collection named {}".format( table_name) return msg #return {'status': True, 'msg': msg} status = milvus_cli.delete_collection(table_name) mysql_cli.delete_table(table_name) return status except Exception as e: LOGGER.error(" Error with drop table: {}".format(e)) sys.exit(1)
def create_index(self, collection_name, index_params): try: self.set_collection(collection_name) status = self.collection.create_index(field_name="embedding", index_params=index_params) if not status.code: self.collection.load() LOGGER.debug( "Successfully create index in collection:{} with param:{}". format(collection_name, index_params)) return status else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to create index: {}".format(e)) sys.exit(1)
def search_in_milvus(table_name, query_sentence,milvus_cli, mysql_cli): if not table_name: table_name = DEFAULT_TABLE try: query_data = [query_sentence] vectors = bc.encode(query_data) query_list = normaliz_vec(vectors.tolist()) LOGGER.info("Successfully insert query list") results = milvus_cli.search_vectors(table_name,query_list,TOP_K) vids = [str(x.id) for x in results[0]] print("-----------------", vids) ids,title,text= mysql_cli.search_by_milvus_ids(vids, table_name) distances = [x.distance for x in results[0]] return ids,title, text, distances except Exception as e: LOGGER.error(" Error with search : {}".format(e)) sys.exit(1)
def create_colllection(self, collection_name): try: if not self.has_collection(collection_name): collection_param = { 'collection_name': collection_name, 'dimension': VECTOR_DIMENSION, 'index_file_size': INDEX_FILE_SIZE, 'metric_type': METRIC_TYPE } status = self.client.create_collection(collection_param) if status.code != 0: raise Exception(status.message) LOGGER.debug( "Create Milvus collection: {}".format(collection_name)) except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)
def search_vectors(self, collection_name, vectors, top_k): try: search_param = {'nprobe': 16} status, result = self.client.search( collection_name=collection_name, query_records=vectors, top_k=top_k, params=search_param) if not status.code: LOGGER.debug("Successfully search in collection: {}".format( collection_name)) return result else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to search vectors in Milvus: {}".format(e)) sys.exit(1)
def do_search(host, table_name, img_path, model, milvus_client, mysql_cli): if not table_name: table_name = DEFAULT_TABLE try: feat = model.resnet50_extract_feat(img_path) vectors = milvus_client.search_vectors(table_name, [feat], TOP_K) vids = [str(x.id) for x in vectors[0]] paths = mysql_cli.search_by_milvus_ids(vids, table_name) distances = [x.distance for x in vectors[0]] for i in range(len(paths)): tmp = "http://" + str(host) + "/data?gif_path=" + str(paths[i]) paths[i] = tmp return paths, distances except Exception as e: LOGGER.error(" Error with search : {}".format(e)) sys.exit(1)
def get_index_params(index_type): if index_type == 'FLAT': index_param = {"index_type": index_type} elif index_type == 'RNSG': params = { "search_length": SEARCH_LENGTH, "out_degree": OUT_DEGREE, "candidate_pool_size": CANDIDATE_POOL, "knng": KNNG } index_param = { "index_type": index_type, "metric_type": METRIC_TYPE, "params": params } elif index_type == 'HNSW': params = {"M": HNSW_M, "efConstruction": EFCONSTRUCTION} index_param = { "index_type": index_type, "metric_type": METRIC_TYPE, "params": params } elif index_type == 'ANNOY': params = {"n_tress": N_TREE} index_param = { "index_type": index_type, "metric_type": METRIC_TYPE, "params": params } elif index_type == 'IVF_PQ': params = {"nlist": NLIST, "m": PQ_M} index_param = { "index_type": index_type, "metric_type": METRIC_TYPE, "params": params } else: params = {"nlist": NLIST} index_param = { "index_type": index_type, "metric_type": METRIC_TYPE, "params": params } LOGGER.info(index_param) return index_param
async def search_images(image: UploadFile = File(...), table_name: str = None): # Search the upload image in Milvus/MySQL try: # Save the upload image to server. content = await image.read() print('read pic succ') img_path = os.path.join(UPLOAD_PATH, image.filename) with open(img_path, "wb+") as f: f.write(content) paths, distances = do_search(table_name, img_path, MODEL, MILVUS_CLI, MYSQL_CLI) res = dict(zip(paths, distances)) res = sorted(res.items(), key=lambda item: item[1]) LOGGER.info("Successfully searched similar images!") return res except Exception as e: LOGGER.error(e) return {'status': False, 'msg': e}, 400
def send_email(subject: str, message: str) -> None: msg = EmailMessage() msg["Subject"] = subject msg["From"] = SENDER_EMAIL msg["To"] = RECEIVER_EMAIL msg.set_content(message) try: server = smtplib.SMTP_SSL("smtp.gmail.com", 465) server.ehlo() authorization = server.login(SENDER_EMAIL, PASSWORD) send_message = server.send_message(msg) server.close() LOGGER.debug( f"Successfully sent email notification to {RECEIVER_EMAIL} from {SENDER_EMAIL}" ) except smtplib.SMTPAuthenticationError as auth_error: LOGGER.exception(auth_error.smtp_error)
def extract_features(img_dir, model): try: cache = Cache('./tmp') feats = [] names = [] img_list = get_imgs(img_dir) total = len(img_list) cache['total'] = total for i, img_path in enumerate(img_list): norm_feat = model.resnet50_extract_feat(img_path) feats.append(norm_feat) names.append(img_path.encode()) cache['current'] = i + 1 print("Extracting feature from image No. %d , %d images in total" % (i + 1, total)) return feats, names except Exception as e: LOGGER.error(" Error with extracting feature from image {}".format(e)) sys.exit(1)
async def do_search_api(table_name: str = None, query_sentence: str = None): try: ids, results_classes, seq_genes, distances = search_in_milvus( table_name, query_sentence, MILVUS_CLI, MYSQL_CLI) res = [] for i, c, s, d in zip(ids, results_classes, seq_genes, distances): dicts = { 'milvus_id': i, 'seq_class': c, 'seq_gene': s, 'IP distance': d } res += [dicts] LOGGER.info("Successfully searched similar sequence!") return res except Exception as e: LOGGER.error(e) return {'status': False, 'msg': e}, 400
def search_vectors(self, collection_name, vectors, top_k): # status = utility.list_collections() try: self.set_collection(collection_name) search_params = { "metric_type": METRIC_TYPE, "params": { "nprobe": 16 } } res = self.collection.search(vectors, anns_field="embedding", param=search_params, limit=top_k) print(res[0]) LOGGER.debug("Successfully search in collection: {}".format(res)) return res except Exception as e: LOGGER.error("Failed to search vectors in Milvus: {}".format(e)) sys.exit(1)
async def load_text(file: UploadFile = File(...), table_name: str = None): try: text = await file.read() fname = file.filename dirs = "data" if not os.path.exists(dirs): os.makedirs(dirs) fname_path = os.path.join(os.getcwd(), os.path.join(dirs, fname)) with open(fname_path, 'wb') as f: f.write(text) except Exception as e: return {'status': False, 'msg': 'Failed to load data.'} # Insert all the image under the file path to Milvus/MySQL try: total_num = import_data(table_name, fname_path, MILVUS_CLI, MYSQL_CLI) LOGGER.info( "Successfully loaded data, total count: {}".format(total_num)) return "Successfully loaded data!" except Exception as e: LOGGER.error(e) return {'status': False, 'msg': e}, 400
def search_in_milvus(table_name, query_sentence, milvus_cli, mysql_cli): if not table_name: table_name = DEFAULT_TABLE try: kmers = build_kmers(query_sentence,KMER_K) query_data = [" ".join(kmers)] query_list = encode_seq(query_data) LOGGER.info("Searching...") results = milvus_cli.search_vectors(table_name,query_list,TOP_K) vids = [str(x.id) for x in results[0]] print("-----------------", vids) ids, results_classes = mysql_cli.search_by_milvus_ids(vids, table_name) distances = [x.distance for x in results[0]] df_class = pd.read_table(SEQ_CLASS_PATH) class_dict = dict() for i in range(len(df_class)): class_dict[df_class['class'][i]] = df_class['gene_family'][i] seq_genes = [class_dict[int(x)] for x in results_classes] return ids, results_classes, seq_genes, distances except Exception as e: LOGGER.error(" Error with search : {}".format(e)) sys.exit(1)
def extract_features(video_dir, model, frame): try: cache = Cache('./tmp') feats = [] names = [] video_list = get_video(video_dir) total = len(video_list) cache['total'] = total for i, video_path in enumerate(video_list): imgs = frame.extract_frame(video_path) for img_path in imgs: norm_feat = model.resnet50_extract_feat(img_path) feats.append(norm_feat) names.append(video_path.encode()) cache['current'] = i + 1 print( "%d video in total, extracting feature from video No. %d , and the video has %d frames." % (i + 1, total, len(imgs))) return feats, names except Exception as e: LOGGER.error(" Error with extracting feature from image {}".format(e)) sys.exit(1)
def create_index(self, collection_name): try: self.set_collection(collection_name) default_index = { "index_type": "IVF_SQ8", "metric_type": METRIC_TYPE, "params": { "nlist": 16384 } } status = self.collection.create_index(field_name="embedding", index_params=default_index) if not status.code: LOGGER.debug( "Successfully create index in collection:{} with param:{}". format(collection_name, default_index)) return status else: raise Exception(status.message) except Exception as e: LOGGER.error("Failed to create index: {}".format(e)) sys.exit(1)
async def search_images(request: Request, image: UploadFile = File(...), table_name: str = None): # Search the upload image in Milvus/MySQL try: # Save the upload image to server. content = await image.read() img_path = os.path.join(UPLOAD_PATH, image.filename) with open(img_path, "wb+") as f: f.write(content) host = request.headers['host'] paths, distances = do_search(host, table_name, img_path, MODEL, MILVUS_CLI, MYSQL_CLI) res = {} for p, d in zip(paths, distances): if not p in res or res[p] > d: res[p] = d res = sorted(res.items(), key=lambda item: item[1]) LOGGER.info("Successfully searched similar images!") return res except Exception as e: LOGGER.error(e) return {'status': False, 'msg': e}, 400
def create_collection(self, collection_name): try: if not self.has_collection(collection_name): field1 = FieldSchema(name="id", dtype=DataType.INT64, descrition="int64", is_primary=True, auto_id=True) field2 = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, descrition="float vector", dim=VECTOR_DIMENSION, is_primary=False) schema = CollectionSchema(fields=[field1, field2], description="collection description") self.collection = Collection(name=collection_name, schema=schema) LOGGER.debug("Create Milvus collection: {}".format( self.collection)) return "OK" except Exception as e: LOGGER.error("Failed to load data to Milvus: {}".format(e)) sys.exit(1)
async def do_load_api(file: UploadFile = File(...), table_name: str = None): try: text = await file.read() fname = file.filename dirs = "QA_data" if not os.path.exists(dirs): os.makedirs(dirs) fname_path = os.path.join(os.getcwd(), os.path.join(dirs, fname)) with open(fname_path, 'wb') as f: f.write(text) except Exception as e: return {'status': False, 'msg': 'Failed to load data.'} try: total_num = do_load(table_name, fname_path, MODEL, MILVUS_CLI, MYSQL_CLI) LOGGER.info( "Successfully loaded data, total count: {}".format(total_num)) return { 'status': True, 'msg': "Successfully loaded data: {}".format(total_num) }, 200 except Exception as e: LOGGER.error(e) return {'status': False, 'msg': e}, 400
def grab_latest() -> List: """ Grab the latest [LIMIT] posts from subreddits inside [SUBREDDITS] and filter them down to just a handful of relevant fields and remove posts that have already been parsed in the past. """ # TODO: async requests, cuz fuggit posts = {} for sub in SUBREDDITS: url = _get_subreddit_url(sub) LOGGER.debug(f"Querying: {url}") response = get(url, headers={"User-Agent": USER_AGENT}) try: new_posts = parse_json_response(response.json()) except Exception as e: LOGGER.exception( f"Request to {url} failed with code {response.status_code}: {response.reason}" ) # We got some posts we haven't seen before. Let's filter through them if new_posts: posts[sub] = filter_results(new_posts, sub) subject = format_subject(posts) message = format_response(posts) if not message: LOGGER_RESULTS.info("No new posts.") return LOGGER_RESULTS.info(f"\n{message}") if EMAIL_NOTIFICATIONS: send_email(subject, message)
def do_search(table_name, img_path, model, milvus_client, mysql_cli): try: if not table_name: table_name = DEFAULT_TABLE detector = Detector() run(detector, img_path) vecs = get_object_vector(model, img_path + '/object') # feat = model.resnet50_extract_feat(img_path) results = milvus_client.search_vectors(table_name, vecs, TOP_K) ids = [] distances = [] for result in results: for j in result: ids.append(j.id) distances.append(j.distance) # res_id = [x for x in query_name_from_ids(vids)] # vids = [str(x.id) for x in vectors[0]] paths = mysql_cli.search_by_milvus_ids(ids, table_name) # distances = [x.distance for x in vectors[0]] shutil.rmtree(img_path) return paths, distances except Exception as e: LOGGER.error(" Error with search : {}".format(e)) sys.exit(1)