def __init__(self, numGestures, minDescriptorsPerFrame, numWords, descType, numPredictions, parent): self.numGestures = numGestures self.numWords = numWords self.minDescriptorsPerFrame = minDescriptorsPerFrame self.parent = parent self.classifier = None self.windowName = "Testing preview" self.handWindowName = "Cropped hand" self.binaryWindowName = "Binary frames" self.predictionList = [-1] * numPredictions self.handTracker = HandTracker(kernelSize=7, thresholdAngle=0.4, defectDistFromHull=30, parent=self) self.featureExtractor = FeatureExtractor(type=descType, parent=self) self.numSideFrames = 10 self.prevFrameList = np.zeros( (self.numSideFrames, self.parent.imHeight / self.numSideFrames, self.parent.imWidth / self.numSideFrames, 3), "uint8") self.numPrevFrames = 0 self.predictionScoreThreshold = 0.2 self.learningRate = 0.01 self.numReinforce = 1
def __set_fast_data(img_file_path, lbl): payload = list() db_handle = MongoDB() feature_vector = FeatureExtractor().get_features(img_file_path) feature_map = dict() key_p = os.path.splitext(os.path.basename(img_file_path)) key = key_p[0] + '_' + key_p[1][1:] + '_' + str( int(time.time() * 1000.0)) key = key.replace('.', '_') feature_map['file'] = key feature_map['label'] = lbl feature_map['feature'] = feature_vector payload.append(feature_map) try: db_handle.to_db(payload=payload, key=None, db=MONGO_HOPS_DB, collection=MONGO_XRAY_COLLECTION) payload.clear() db_handle.close() except Exception as e: db_handle.close() print(img_file_path) print("Ignoring Exception : " + str(e))
def __init__(self, numGestures, numFramesPerGesture, minDescriptorsPerFrame, numWords, descType, kernel, numIter, parent): self.numGestures = numGestures self.numFramesPerGesture = numFramesPerGesture self.numWords = numWords self.minDescriptorsPerFrame = minDescriptorsPerFrame self.parent = parent self.desList = [] self.voc = None self.classifier = None self.windowName = "Training preview" self.handWindowName = "Cropped hand" self.binaryWindowName = "Binary frames" self.handTracker = HandTracker(kernelSize=7, thresholdAngle=0.4, defectDistFromHull=30, parent=self) self.featureExtractor = FeatureExtractor(type=descType, parent=self) self.kernel = kernel self.numIter = numIter self.numDefects = None self.firstFrameList = [] self.trainLabels = []
def get_column_names(self): """Get column names.""" columns = FeatureExtractor(self.config_path).column_names + ['label'] return columns
featureExtractorModulePath = '/cab0/wammar/exp/feat-ext' if featureExtractorModulePath not in sys.path: sys.path.append(featureExtractorModulePath) from feature_extractor import FeatureExtractor logResModulePath = '/cab0/wammar/exp/log-reg/source' if logResModulePath not in sys.path: sys.path.append(logResModulePath) from log_reg import * ####################### # SUPERVISED LEARNING # ####################### # initialize the feature extractor extractor = FeatureExtractor('|') # for each labeled example in raw/docId-label, write a line in features/labeled.txt. Instead of the site-ID in the original file, write the non-zero feature IDs and their values. labeledFeaturesFilename = '{0}.labeled'.format(outputPrefix) WriteFeaturesFile('{0}/docId-label.txt'.format(rawDir), labeledFeaturesFilename) # initialize logistic regressin model logReg = LogisticRegression() # specify learning info learningInfo = LearningInfo( stoppingCriterion = StoppingCriterion.TRAIN_LOGLIKELIHOOD, stoppingCriterionThreshold = 0.00001, positiveDevSetSize = 0, negativeDevSetSize = 0, minTrainingIterationsCount = 3,
def draw_features_from_db(action, db, volt_collection, tag_collection, port=27017, host='localhost', ndevices=3, offset=0, action_num=0): client = MongoClient(port=port, host=host) database = client[db] tag_collection = database[tag_collection] volt_collection = database[volt_collection] try: if volt_collection.count_documents( {}) + tag_collection.count_documents({}) < 2: raise CollectionError('Collection not found!') except CollectionError as e: print(e.message) ntags = tag_collection.count_documents({'tag': action}) title = config['volt_collection'][6:] + "" + action + "_features" fig = plt.figure(title, figsize=(6, 8)) # 根据时间采集数据,基本单位为s,比如1s、10s、30s、60s # interval表示每次分析的时间跨度,rate表示间隔多长时间进行一次分析 interval = 1 rate = 1 fig.suptitle(action + " (" + "interval:" + str(interval) + "s, " + "stepsize:" + str(rate) + "s)") # 定义特征提取器 extractor = FeatureExtractor() for feature in feature_names: # 定义特征提取模块 module = eval(feature + "(" + str(interval) + "," + str(rate) + ")") # 注册特征提取模块 extractor.register(module) # 定义画布左右位置的计数:标签累加,即人数累加 tag_acc = 1 # read the data that is of a certain action one by one for tag in tag_collection.find({'tag': action}): inittime, termtime = tag['inittime'], tag['termtime'] # get the arrays according to which we will plot later times, volts = {}, {} for i in range(1, ndevices + 1): times[i] = [] volts[i] = [] sampling_counter = 0 sampling_factor = 3 #表示sampling_factor个数据只下采样一个数据 for volt in volt_collection.find( {'time': { '$gt': inittime, '$lt': termtime }}): if (sampling_counter % sampling_factor == 0): device_no = int(volt['device_no']) v = volt['voltage'] time = volt['time'] times[device_no].append(time) volts[device_no].append(v) sampling_counter = 1 sampling_counter += 1 # 定义存储时间、特征列表 feature_times, feature_values = {}, {} for i in range(1, ndevices + 1): feature_times[i] = [] from collections import defaultdict feature_values[i] = defaultdict(list) for feature in feature_names: feature_values[i][feature[:-6]] = [] # 提取第几个设备的特征 start = 1 end = ndevices # 对每个采集设备进行特征提取 ndevices for i in range(start, end + 1): for j in range(len(volts[i])): value = {"time": times[i][j], "volt": volts[i][j]} output = extractor.process(value) if (output): features = { "device_no": i, "feature_time": times[i][j], "feature_value": output, "interval": interval, "rate": rate } feature_times[i].append(features['feature_time']) for feature_type in feature_values[i].keys(): feature_values[i][feature_type].append( features['feature_value'][feature_type]) # 清理所有模块,防止过期数据 extractor.clear() # 定义特征数量 nfeatures = len(feature_values[1]) # 定义特征类型 feature_type = list( feature_values[1].keys()) # keys()方法虽然返回的是列表,但是不可以索引 for i in range(start, end + 1): # 如果文件存在,则以添加的方式打开 if (os.path.exists("feature_matrixs/feature_matrix" + str(i) + ".npy")): feature_matrix = np.load("feature_matrixs/feature_matrix" + str(i) + ".npy") label_matrix = np.load("feature_matrixs/label_matrix" + str(i) + ".npy") temp_matrix = np.zeros((len(feature_times[i]), nfeatures), dtype=float) os.remove("feature_matrixs/feature_matrix" + str(i) + ".npy") os.remove("feature_matrixs/label_matrix" + str(i) + ".npy") for j in range(len(feature_times[i])): for k in range(nfeatures): temp_matrix[j][k] = feature_values[i][ feature_type[k]][j] label_matrix = np.append(label_matrix, [action_num]) # np.append(feature_matrixs, [temp_matrix], axis=0) feature_matrix = np.insert(feature_matrix, feature_matrix.shape[0], values=temp_matrix, axis=0) np.save('feature_matrixs/feature_matrix' + str(i), feature_matrix) np.save('feature_matrixs/label_matrix' + str(i), label_matrix) print("feature_matrix" + str(i) + ":" + str(feature_matrix.shape)) # 如果文件不存在,则定义特征矩阵和标签矩阵 else: feature_matrix = np.zeros((len(feature_times[i]), nfeatures), dtype=float) label_matrix = np.zeros((len(feature_times[i]), 1), dtype=int) for j in range(len(feature_times[i])): for k in range(nfeatures): feature_matrix[j][k] = feature_values[i][ feature_type[k]][j] label_matrix[j] = action_num # np.save保存时自动为8位小数 np.save('feature_matrixs/feature_matrix' + str(i), feature_matrix) np.save('feature_matrixs/label_matrix' + str(i), label_matrix) print("feature_matrix" + str(i) + ":" + str(feature_matrix.shape)) tag_acc += 1
def __init__(self): self.ut = Utility() self.fwe = FiveWExtractor() self.fex = FeatureExtractor() self.nlp = NLPHelper() self.tr = ModelTrainer()
import numpy as np import pickle import math from data_reader import DataReader from feature_extractor import FeatureExtractor from sklearn.naive_bayes import GaussianNB data_reader = DataReader( ) # reads the images files and converts them into numpy 2D arrays feature_extractor = FeatureExtractor( ) # calculates the eigenfaces. Follows the fit->transform paradigm. clf = GaussianNB( ) # a naive bayes classifier where the individual variables are supposed to follow a gaussian distribution # since the number of images available is relatively low (400 images), # we'll use cross-validation to assess the performance of the face recognition system. data = data_reader.getAllData( shuffle=True) # we shuffle the data so we can do Cross-Validation num_folds = 10 fold_length = math.floor(len(data[0]) / num_folds) average_accuracy = 0.0 # the performance measure of the system for k in range(num_folds): # get train data and test data from data train_data, test_data = [None, None], [None, None] for i in range(2): if k == num_folds - 1: train_data[i] = data[i][:k * fold_length] test_data[i] = data[i][k * fold_length:] else:
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3 global NO_COMP, SOFT_COMP, HARD_COMP, GEN_COMP NO_COMP, HARD_COMP, SOFT_COMP, GEN_COMP = 0, 1, 2, 3 self.composition = options.nucleus_composition all_rels = vocab[5] functional_rels = ['det', 'case', 'clf', 'cop', 'mark', 'aux', 'cc'] if self.composition in [HARD_COMP, SOFT_COMP]: self.compositional_relations = functional_rels elif self.composition in [GEN_COMP]: self.compositional_relations = all_rels else: self.compositional_relations = [] self.compositional_relations_dict = { rel: idx for idx, rel in enumerate(self.compositional_relations) } self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model, options, vocab, self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * ( self.k + 1) else: mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * ( self.k + 1) print("The size of the MLP input layer is {0}".format(mlp_in_dims)) if self.composition in [SOFT_COMP, GEN_COMP]: rel_emb_sz = 10 self.cmp_rel_lookup = self.model.add_lookup_parameters( (len(self.compositional_relations), rel_emb_sz)) cmb_sz = 2 * 2 * options.lstm_output_size + rel_emb_sz out_sz = 2 * options.lstm_output_size self.combiner_W1 = self.model.add_parameters((out_sz, cmb_sz), name='cmbW1') self.combiner_b1 = self.model.add_parameters(out_sz, name='cmbb1') self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 2 * len(self.irels) + 2, self.activation)
if __name__ == '__main__': model_file = "model.pkl" raw_file = sys.argv[1] annotations_file = sys.argv[2] if check_file(raw_file): print "file is in wrong format expected raw and not proccessed file" data = {} for sen_id, sen in utils.read_lines(sys.argv[1]): data[sen_id] = utils.nlp(sen) annotation_sentences = load_annotation_sentences(annotations_file) lexicon_helper = Lexicon_helper() feature_extractor = FeatureExtractor(lexicon_helper) extracted_ents_rules = rules_extractor.predict(data, lexicon_helper) extracted_ents_rules = sorted(extracted_ents_rules, key=utils.get_senid_int) sen_entities_with_x = get_x_data(feature_extractor, data) filtered_sen_entities_with_x = filter_ents(sen_entities_with_x, extracted_ents_rules) tagged_sen_entites = tag_entities(filtered_sen_entities_with_x, annotation_sentences) clf = LinearSVC(random_state=0, tol=1e-5) allx = np.array([x[3].toarray()[0] for x in tagged_sen_entites]) yall = np.array([y[4] for y in tagged_sen_entites]) clf.fit(allx, yall)
def feature_to_matrix_file(action, db, volt_collection, tag_collection, port=27017, host='localhost', ndevices=3, offset=0, action_num=0, interval=2, rate=1): # 根据时间采集数据,基本单位为s,比如1s、10s、30s、60s # interval表示每次分析的时间跨度,rate表示间隔多长时间进行一次分析 # print(interval,rate) # 针对不同动作,设置不同时间窗口 # if(action == "turn_over"): # interval = 2 # rate = 1 client = MongoClient(port=port, host=host) database = client[db] tag_collection = database[tag_collection] volt_collection = database[volt_collection] try: if volt_collection.count_documents( {}) + tag_collection.count_documents({}) < 2: raise CollectionError('Collection not found!') except CollectionError as e: print(e.message) ntags = tag_collection.count_documents({'tag': action}) # 提取第几个设备的特征 start = 1 end = ndevices # 定义特征提取器 extractor = FeatureExtractor() for feature in feature_names: # 定义特征提取模块 module = eval(feature + "(" + str(interval) + "," + str(rate) + ")") # 注册特征提取模块 extractor.register(module) # 定义画布左右位置的计数:标签累加,即人数累加 tag_acc = 0 # read the data that is of a certain action one by one for tag in tag_collection.find({'tag': action}): tag_acc += 1 if (tag_acc > 8): break print("people_" + str(tag_acc)) inittime, termtime = tag['inittime'], tag['termtime'] # get the arrays according to which we will plot later times, volts, filter_volts, normalize_volts = {}, {}, {}, {} for i in range(start, ndevices + 1): times[i] = [] volts[i] = [] filter_volts[i] = [] normalize_volts[i] = [] for volt in volt_collection.find( {'time': { '$gt': inittime, '$lt': termtime }}): device_no = int(volt['device_no']) v = volt['voltage'] t = volt['time'] times[device_no].append(t) volts[device_no].append(v) filter_thread = [0.2, 0.06, 0.08] for i in range(start, end + 1): filter_volts[i] = volts[i] # 小波变换滤波 filter_volts[i] = cwt_filter(volts[i], 0.08) # 傅里叶变换滤波 # filter_volts[i] = fft_filter(filter_volts[i], 1 / 70, 15) # 低通滤波器滤波 # b, a = signal.butter(8, 3 / 7, 'lowpass') # 配置滤波器,8表示滤波器的阶数 # filter_volts[i] = signal.filtfilt(b, a, filter_volts[i]) # 移动平均滤波,参数可选:full, valid, same # filter_volts[i] = np_move_avg(filter_volts[i], 5, mode="same") # 归一化数据 normalize_volts[i] = getNormalization(filter_volts[i]) # 定义存储时间、特征列表 feature_times, feature_values = {}, {} for i in range(start, end + 1): feature_times[i] = [] from collections import defaultdict feature_values[i] = defaultdict(list) for feature in feature_names: feature_values[i][feature[:-6]] = [] # 对每个采集设备进行特征提取 for i in range(start, end + 1): for j in range(len(normalize_volts[i])): value = {"time": times[i][j], "volt": normalize_volts[i][j]} output = extractor.process(value) if (output): features = { "device_no": i, "feature_time": times[i][j], "feature_value": output, "interval": interval, "rate": rate } feature_times[i].append(features['feature_time']) for feature_type in feature_values[i].keys(): feature_values[i][feature_type].append( features['feature_value'][feature_type]) # 清理所有模块,防止过期数据 extractor.clear() extractor.clear() # 定义特征数量 nfeatures = len(feature_values[1]) # 定义特征类型 feature_type = list( feature_values[1].keys()) # keys()方法虽然返回的是列表,但是不可以索引 for i in range(start, end + 1): # 如果文件存在,则以添加的方式打开 if (os.path.exists("feature_matrixs/feature_matrix" + str(i) + ".npy")): feature_matrix = np.load("feature_matrixs/feature_matrix" + str(i) + ".npy") label_matrix = np.load("feature_matrixs/label_matrix" + str(i) + ".npy") temp_matrix = np.zeros((len(feature_times[i]), nfeatures), dtype=float) os.remove("feature_matrixs/feature_matrix" + str(i) + ".npy") os.remove("feature_matrixs/label_matrix" + str(i) + ".npy") for j in range(len(feature_times[i])): for k in range(nfeatures): temp_matrix[j][k] = feature_values[i][ feature_type[k]][j] label_matrix = np.append(label_matrix, [action_num]) # np.append(feature_matrixs, [temp_matrix], axis=0) feature_matrix = np.insert(feature_matrix, feature_matrix.shape[0], values=temp_matrix, axis=0) np.save('feature_matrixs/feature_matrix' + str(i), feature_matrix) np.save('feature_matrixs/label_matrix' + str(i), label_matrix) np.set_printoptions(suppress=True) np.savetxt('feature_matrixs/feature_matrix' + str(device_no) + '.txt', feature_matrix, fmt="%.18f,%.18f") print("feature_matrix" + str(i) + ":" + str(feature_matrix.shape)) # 如果文件不存在,则定义特征矩阵和标签矩阵 else: feature_matrix = np.zeros((len(feature_times[i]), nfeatures), dtype=float) label_matrix = np.zeros((len(feature_times[i]), 1), dtype=int) for j in range(len(feature_times[i])): for k in range(nfeatures): feature_matrix[j][k] = feature_values[i][ feature_type[k]][j] label_matrix[j] = action_num # np.save保存时自动为8位小数 np.save('feature_matrixs/feature_matrix' + str(i), feature_matrix) np.save('feature_matrixs/label_matrix' + str(i), label_matrix) np.set_printoptions(suppress=True) np.savetxt('feature_matrixs/feature_matrix' + str(device_no) + '.txt', feature_matrix, fmt="%.18f,%.18f") print("feature_matrix" + str(i) + ":" + str(feature_matrix.shape))
'Loaded ' + str(len(train_images_filenames)) + ' training images filenames with classes ', set(train_labels)) print( 'Loaded ' + str(len(test_images_filenames)) + ' testing images filenames with classes ', set(test_labels)) # Load precomputed labels if avaliable precomp_label_filename = classifier + '_' + feature_method + '.npy' if os.path.isfile(precomp_label_filename) and not force_reload: print 'Loading previous predictions' predicted_classes = np.load(precomp_label_filename) else: start = time.time() print 'Extracting features' fe = FeatureExtractor(feature_method) (X, y) = fe.extract_features(train_images_filenames, train_labels, nimmax=30) print 'Training a classifier' c = Classifier(classifier) c.fit(X, y) print 'Predicting test set labels with the classifier' numtestimages = 0 predicted_classes = [] for i in range(len(test_images_filenames)): imfilename = test_images_filenames[i] des = fe.extract_single_image_features(imfilename) predictedclass = c.predict(des)