def main(config): # 读取在 DATA_FOR_CORPUS_PATH 目录下的全部xls格式文件 file_names = os.listdir(config["DATA_FOR_CORPUS_PATH"]) chat = [] for file in file_names: if ".xls" in file: print(file) data = pd.read_excel(config["DATA_FOR_CORPUS_PATH"] + "/" + file) chat += list(data['对话内容']) # 得到训练语料 fin = open(config["corpus_file"], 'w', encoding='utf-8') for t in range(len(chat)): word_segment = [] curr_chat = str.lower(str(chat[t])) sentence_segment = [i for i in curr_chat.split('\r\n') if i] if len(sentence_segment) > 3: for sentence in sentence_segment[:-1]: if "访客" not in sentence and "客服人员说:" not in sentence: if "欢迎使用中国银联网络客服" not in sentence: word_segment += [ word for word in utils.segment( utils.remove_url(sentence)) if word != ' ' ] if word_segment: fin.write(' '.join(word_segment) + '\n') fin.close()
def add_frame(self, image): mask = utils.segment(image) face, foundFace = utils.detect_face(image) mask = utils.eliminate_face(face, foundFace, mask) if self.__using_stabilization: mask = utils.stabilize(foundFace, self.__no_of_frames + 1, image, face, mask) hand, hand_contour = utils.get_my_hand(mask, True) hand_pose, direction = 'None', 'None' if hand_contour is not None: motion_detected = self.__motion.get_hand_motion(hand_contour) else: return if not motion_detected: hand_pose = recognise_hand_pose( hand, directly_from_hand=True, model_path='Models/silatra_gesture_signs.sav') else: direction = motion_detected self.__observations.append((hand_pose, direction)) self.__no_of_frames += 1
def bits(self, s): """Compute the bits of entropy in a string. If the string uses characters missing from the language model, __init__(default_bits) will be used for that element instead. Args: s: String to compute the entropy for """ total_bits = 0 for context, value in utils.all_but_the_last(utils.segment(s)): total_bits += self.get(context).bits(value, self.__default_bits) return total_bits
def compile(f, n=3): """Compile the raw password database. Builds a histogram of n tuples provided by f. Args: f: File containing raw password data n: tuple length (defaults to 3) Returns: LanguageModel """ language_model = LanguageModel() for line in list(f): language_model.extend(utils.segment(line.strip(), n)) return language_model
def recognise_hand_pose(image, directly_from_hand=False, model_path='Models/silatra_digits_and_letters.sav', using_stabilization=False, no_of_frames=1): ''' ### SiLaTra Hand Pose Recognition Provides classification for input hand pose image. Inputs: (a) Mandatory Parameter - Image for which Hand Pose Classification is to be performed. (b) Optional Parameters (Use them only if you understand them): (1) directly_from_hand - boolean - Set this to true if you are passing already cropped hand region in `image` parameter. (2) model_path - String - If an alternate model is to be used, pass the path of its .sav file. (3) using_stabilization - boolean - If you intend to use Object stabilization, set this to True. Only use this option if you are classifying hand poses from a continuous feed, else its useless. (4) no_of_frames - Integer - ONLY TO BE USED IF using_stabilization IS True, pass the number of the frame from the continuous feed you are processing. ''' import pickle from sklearn.neighbors import KNeighborsClassifier if not directly_from_hand: mask = utils.segment(image) face, foundFace = utils.detect_face(image) mask = utils.eliminate_face(face, foundFace, mask) if using_stabilization: mask = utils.stabilize(foundFace, no_of_frames, image, face, mask) hand = utils.get_my_hand(mask) if hand is False: return 'No hand pose in image' features = utils.extract_features(hand) else: features = utils.extract_features(image) classifier = pickle.load(open(model_path, 'rb')) hand_pose = classifier.predict([features])[0] return hand_pose
def segment(): raise NotImplementedError() series_instance_uid = request.args.get('series_instance_uid') model_id = request.args.get('model_id') foo = request.args.get('foo',None) task_id = request.args.get('task_id',None) return_type = request.args.get('return_type',"html") if task_id is None: task = utils.segment(series_instance_uid,model_id,foo=foo) print(series_instance_uid,task_id,task.ready()) return redirect(url_for('segment', series_instance_uid = series_instance_uid, model_id = model_id, foo = foo, task_id = task.id, )) task = utils.celery.AsyncResult(task_id) print(series_instance_uid,task_id,task.ready()) response = {'ready': task.ready(),'task_id':task_id} # for debugging if return_type == "html": if task.ready() is True: return render_template("show_results.html",results=response,) else: # for learning redirect html return render_template("loading.html",task_id=task_id) else: if task.ready() is True: try: response["result"]=task.get() except: return jsonify(response) return jsonify(response) else: return jsonify(response)
def recognise_hand_pose(image, directly_from_hand=False, model_path='Models/digits_and_letters.sav', using_stabilization=False, no_of_frames=1): import pickle from sklearn.neighbors import KNeighborsClassifier if not directly_from_hand: mask = utils.segment(image) face, foundFace = utils.detect_face(image) mask = utils.eliminate_face(face, foundFace, mask) if using_stabilization: mask = utils.stabilize(foundFace, no_of_frames, image, face, mask) hand = utils.get_my_hand(mask) if hand is False: return 'No hand pose in image' features = utils.extract_features(hand) else: features = utils.extract_features(image) classifier = pickle.load(open(model_path, 'rb')) hand_pose = classifier.predict([features])[0] return hand_pose
def main(config): print("\n正在进行第一次聚类...\n") # 读取词向量模型 model = gensim.models.Word2Vec.load(config["w2v_model_file"]) # 读取数据 data = pd.read_excel(config["cluster_excel_file"]) # 得到句子平均词向量 w2v_dim = model.vector_size print("词向量模型的维度为", w2v_dim) # 样本的特征矩阵,一列为一个样本 feature = np.zeros([len(data), w2v_dim]) split_word = [] for i in range(len(data)): word_list = utils.segment(data.iat[i, 5]) feature[i, :], select_word = get_mean_vec(word_list, model, w2v_dim) split_word.append(' '.join(select_word)) # 只保留分词不为空的数据 bool_idx = [ True if split_word[i] else False for i in range(len(split_word)) ] num_idx = [i for i in range(len(split_word)) if split_word[i]] data = data[bool_idx] split_word = np.array(split_word)[num_idx] feature = feature[num_idx, :] # 利用sk-learn中的K-means函数进行聚类 y_pred = KMeans(n_clusters=config["n_cluster_first"]).fit_predict(feature) data["sublabel"] = y_pred data["cutwords"] = split_word data = data.sort_values(["sublabel"]) # 保存数据 data.to_excel(config["cluster_excel_file"], index=False) print("\n第一次聚类完成!\n")
op1 = "QUIT\r\n" client.send(op1.encode('ascii')) break data = client.recv(size,socket.MSG_WAITALL) #Reference: https://www.binarytides.com/receive-full-data-with-the-recv-socket-function-in-python/ # Instead of storing the image as mentioned in the 1st reference: https://stackoverflow.com/a/23312964/5370202 # we can directly convert it to Opencv Mat format # Reference: https://stackoverflow.com/a/17170855/5370202 nparr = np.fromstring(data, np.uint8) img_np = cv2.imdecode(nparr, cv2.IMREAD_COLOR) img_np = imutils.rotate_bound(img_np,90) img_np = cv2.resize(img_np,(0,0), fx=0.7, fy=0.7) mask1 = utils.segment(img_np) gray = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) # detect faces in the grayscale image haar_cascade_face = cv2.CascadeClassifier('/home/aarav/Desktop/MajorProject/Models/haarcascade_frontalface_default.xml') rects = haar_cascade_face.detectMultiScale(gray, scaleFactor = 1.2, minNeighbors = 5); maxArea1 = 0 faceRect = -1 foundFace = False for (x,y,w,h) in rects: if w*h > maxArea1: maxArea1 = w*h faceRect = (x,y,w,h)
def HMM(sequence, Dic, ProDic): lastmaxpro, maxpre = Forward(sequence, Dic=Dic, ProDic=ProDic) tag = Backward(lastmaxpro, maxpre) res = segment(sequence, tag) return tag, res
from utils import segment import cv2, numpy as np print('Press q/Q to quit') cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FPS, 30) lower = np.array([0, 140, 60], np.uint8) upper = np.array([255, 180, 127], np.uint8) while True: _, frame = cap.read() mask = segment(frame) ''' Normal segmentation using YCRCB mask = cv2.cvtColor(frame, cv2.COLOR_BGR2YCR_CB) mask = cv2.inRange(mask, lower, upper) ''' cv2.imshow('Original', frame) cv2.imshow('Segmentation mask', mask) key = cv2.waitKey(1) if key == ord('q') or key == ord('Q'): break cv2.destroyAllWindows() cap.release()
contour_start=False lower = np.array([0,147,60],np.uint8) upper = np.array([255,180,127],np.uint8) grid = (20,20) print('gaf0 gaf1') while(1): start_time = time.time() _, frame = cap.read() x,y,w,h = 100,100,300,300 cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), thickness=2) roi = frame[y:y+h,x:x+w] mask = segment(roi, lower, upper) if not contour_start: cv2.imshow('Skin segmentation using YCrCb mask',mask) cv2.imshow('Original',frame) else: try: _,thresh = cv2.threshold(mask,127,255,0) hand = get_my_hand(thresh) cv2.imshow('Your hand',hand) cv2.imshow('Original',frame) features = extract_features(hand, grid) predictions = classifier.predict_proba([features]).tolist()[0] for prob in predictions: print('%.2f'%(prob),end=' ')
classifier = KNeighborsClassifier(n_neighbors=3) classifier.fit(X, Y) lower = np.array([0, 147, 60], np.uint8) upper = np.array([255, 180, 127], np.uint8) cap = cv2.VideoCapture('good afternoon.avi') obs = [] frame_no, grid = 1, (20, 20) print('Frame: [gaf0, gaf1]') while (1): try: _, frame = cap.read() mask = segment(frame, lower, upper) _, thresh = cv2.threshold(mask, 127, 255, 0) hand = get_my_hand(thresh) features = extract_features(hand, grid) pred = classifier.predict([features]) print('%5d' % (frame_no), end=': ') print(pred) pred = pred.tolist() obs.append(pred) frame_no += 1 except: break cap.release() print(obs) states = ('gaf0', 'gaf1')
payload = fread.readline() if (payload == '\r\n' or payload == '\n' or payload == '\r'): continue if (not payload): break payload = payload.strip() payloads.append(payload) y.append(classes[name]) y = np.array(y) np.save(y_train_dir, y) for payload in payloads: tempseg = segment(payload) if (tempseg == []): print(payload) payloads_seged.append(tempseg) lens.append(len(tempseg)) with open(lens_dir, 'wb') as f: pickle.dump(lens, f) model = Word2Vec(payloads_seged, size=embedding_size, iter=iter_num, sg=1, min_count=min_num, max_vocab_size=max_voc)
import cv2, numpy as np from utils import segment lower = np.array([0, 147, 60], np.uint8) upper = np.array([255, 180, 127], np.uint8) cap = cv2.VideoCapture(0) fourcc = cv2.VideoWriter_fourcc(*'DIVX') out = cv2.VideoWriter('test2.avi', fourcc, 10, (300, 300)) start_rec = False while (1): _, frame = cap.read() mask, _, _ = segment(frame, lower, upper) x, y, w, h = 100, 100, 300, 300 cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), thickness=2) roi = frame[y:y + h, x:x + w] cv2.imshow('You', frame) if not start_rec: cv2.imshow('Segmented', mask) else: out.write(roi) k = cv2.waitKey(50) if k == ord('q'): break elif k == ord('s'): start_rec = not start_rec cv2.destroyAllWindows() cap.release() out.release()