def recognition(self, clustering_model): """ Select the segmentation with the smallest number of symbols that produces the highest geometric mean over the class probabilities produced by the random forest classifier from Project 1 """ self.symbols = [] if len(clustering_model) > 0: # load random forest classifier rf_model = open('rf.pkl', 'rb') classifier = pickle.load(rf_model) selected_k_index = 0 highest_gmean = 0 for i in range(len(clustering_model)): model = clustering_model[i] features = [] # extract features for each cluster for cluster in model.clusters.keys(): strokes = [] for stroke in model.clusters[cluster]: strokes.append(stroke.coods) if len(strokes) > 0: features.append( ExtractFeatures.generate_features(strokes)) # calculate class probabilities for each cluster class_probabilities = classifier.predict_proba(features) # get geometric mean g_mean = self.geometric_mean(class_probabilities) # select k with highest geometric mean if g_mean > highest_gmean: selected_k_index = i highest_gmean = g_mean self.selected_k = selected_k_index # Store the clusters for strokes for the selected k # along with symbol class obtained from the classifier for cluster in clustering_model[selected_k_index].clusters.keys(): strokes = [] strokeid_list = [] for stroke in clustering_model[selected_k_index].clusters[ cluster]: strokes.append(stroke.coods) strokeid_list.append(stroke.id) if len(strokes) > 0: # extract feature for cluster features = ExtractFeatures.generate_features(strokes) features = np.array(features).reshape(1, -1) # predict class label for cluster y_pred = classifier.predict(features) sym_class = str(y_pred[0]) sym_id = sym_class + '_' + str(strokeid_list[0]) self.symbols.append( Symbol(sym_id, sym_class, strokeid_list))
def get_features_str(prev_prev_tag, prev_tag, line_array, word_index): if word_index < 1: prev_arr = [None, prev_tag] prev_prev_arr = [None, prev_prev_tag] elif word_index < 2: prev_arr = [line_array[word_index - 1], prev_tag] prev_prev_arr = [None, prev_prev_tag] else: prev_arr = [line_array[word_index - 1], prev_tag] prev_prev_arr = [line_array[word_index - 2], prev_prev_tag] if word_index > (len(line_array) - 2): next_arr = [None, None] next_next_arr = [None, None] elif word_index > (len(line_array) - 3): next_arr = [line_array[word_index + 1], None] next_next_arr = [None, None] else: next_arr = [line_array[word_index + 1], None] next_next_arr = [line_array[word_index + 2], None] is_rare = line_array[word_index] not in popular_words feature_str = "" feature_str += ExtractFeatures.get_features_by_word( line_array[word_index], is_rare) feature_str += ExtractFeatures.get_features_by_2_prevs( prev_prev_arr, prev_arr) feature_str += ExtractFeatures.get_features_by_next_word(next_arr) feature_str += ExtractFeatures.get_features_by_next_next_word( next_next_arr) return feature_str
def onButtonClicked(self): import inspect, os filepath = os.path.dirname( os.path.abspath(inspect.getfile( inspect.currentframe()))) + "/log/save_csv.log" current_directory = self.get_current_opened_directory(filepath) window = self.window.text() try: val = float(window) except ValueError: QMessageBox.about(self, "Error in Window Time", "That's not a number!") return if val >= self.duration: QMessageBox.about( self, "Error in Window Time", "time need to be smaller than: " + str(self.duration)) return # filename = QFileDialog.getSaveFileName(self, self.tr('csv File'), current_directory, self.tr('csv (*.csv)')) saved_dir = str( QFileDialog.getExistingDirectory(self, "Select Directory", current_directory)) # if filename[0] != '': # with open(filepath, "w") as f: # f.write(filename[0]) if saved_dir != '': with open(filepath, "w") as f: f.write(saved_dir) topics = self.selected_bag_topics specific_features_selection = self.selected_specific_features general_features_selection = self.selected_general_features with open(get_path() + 'logger.log', "w") as f: for topic in topics: f.write(topic + "\n") for topic1 in specific_features_selection: f.write(topic1 + "\n") for topic2 in general_features_selection: f.write(topic2 + "\n") ef = E.ExtractFeatures(topics, float(window), specific_features_selection, general_features_selection) counter = 0 for bag_file in self.bag_files: df = ef.generate_features(bag_file) if len(self.bag_files) == 1: counter = -1 # temp = filename + "/" + # temp = get_corrent_file_name(filename[0], ".csv", counter) csv_path = generate_csv_from_bag(saved_dir, bag_file) # temp = "%s_%s%s" % (filename[0],counter,".csv") E.write_to_csv(csv_path, df) counter = counter + 1 QMessageBox.about(self, "csv export", "csv was exported successfuly")
def extract_features(self, file_address): tree = xml.etree.ElementTree.parse(file_address) tree = tree.getroot() ef = ExtractFeatures() ef.extract(tree) en = EvaluateNaming() ef.extracted_features['variable_meaning'] = en.evaluate( ef.extracted_features) * 10.00 return ef.extracted_features
def __call__(self): aux_data = pickle.load(open(self.feature_map_file, "rb")) model: SGDClassifier = pickle.load(open(self.model_file_name, "rb")) frequent_words = aux_data[TrainModel.FREQUENT_WORDS] vectorizer = DictVectorizer() vectorizer.vocabulary_ = aux_data[TrainModel.FEATURE_IDXS] vectorizer.feature_names_ = aux_data[TrainModel.FEATURE_NAMES] tagged_sentences = [] with open(self.input_file_name, 'r') as in_f: lines = [line.rstrip() for line in in_f.readlines()] already_tagged = all( map(lambda l: all(map(lambda w: '/' in w, l.split(' '))), lines)) print('input already tagged:', already_tagged) sentences = [ ExtractFeatures.split_by_whitespace_and_seperate_tags(l) for l in lines ] sentences = list(map(lambda s: list(map(lambda t: t[0], s)), sentences)) sentences_with_idxs = [(s, i) for (i, s) in enumerate(sentences)] sentences = sorted(sentences_with_idxs, key=lambda t: len(t[0])) idxs_processed = [] for l, g in itertools.groupby(sentences, key=lambda t: len(t[0])): g = list(g) sents_of_len_l = np.asarray(list(map(operator.itemgetter(0), g))) idxs_of_len_l = list(map(operator.itemgetter(1), g)) idxs_processed.extend(idxs_of_len_l) tags_of_len_l = np.empty(sents_of_len_l.shape, dtype="U8") for i in range(l): feats_for_ith_word = [] for sent_i, word in enumerate(sents_of_len_l[:, i]): feats = ExtractFeatures.extract( sents_of_len_l[sent_i, :], tags_of_len_l[sent_i, :], i, (word not in frequent_words)) feats_for_ith_word.append(feats) X = vectorizer.transform(feats_for_ith_word) tags_pred = model.predict(X) tags_of_len_l[:, i] = tags_pred tagged_sents_of_len_l = np.char.add( np.char.add(sents_of_len_l, '/'), tags_of_len_l) tagged_sentences.extend( [' '.join(row) for row in tagged_sents_of_len_l]) tagged_sentences = map( operator.itemgetter(0), sorted(zip(tagged_sentences, idxs_processed), key=operator.itemgetter(1))) tagged_sentences = [ w.replace('$EQ$', '=') for w in (s for s in tagged_sentences) ] with open(self.output_file, 'w+') as out_f: out_f.write('\n'.join(tagged_sentences) + '\n')
def getDataTrain_LogMeanFluxes(): dt_FluxMeanLog = ExFt.extract_DataTraining_Means() print(dt_FluxMeanLog.columns) dt_FluxMeanLog.iloc[:,12:24] = dt_FluxMeanLog.iloc[:,12:24].apply( np.log ) return dt_FluxMeanLog
def FeatureExtraction(): #[trainX, trainY, testX, testY] = ef.LoadData() trainFolder = "" testFile = ["./Data/diyDataset/test/1"] trainY = ["test"] print testFile[0] + ".wav" testF = ef.ExtractFeaturesByLibrosa(testFile, trainY, trainFolder) return testF
def get_segmentations(self, directed_graph, data): X, y = SegmenterFeatureExtractor.getAllFeatures(directed_graph, data) if len(X) == 0: return y_pred = self.merge_classifier.predict(X) segmentations = [] for i in range(len(directed_graph)): if y_pred[i] == '*': if len(segmentations) == 0: segmentations.append(directed_graph[i]) else: found = False for seg in segmentations: if found: break if directed_graph[i][0] in seg: if directed_graph[i][1] in seg: found = True else: seg.append(directed_graph[i][1]) found = True else: if directed_graph[i][1] in seg: seg.append(directed_graph[i][0]) found = True if not found: segmentations.append(directed_graph[i]) for i in range(len(data.strokeID)): found = False for seg in segmentations: if data.strokeID[i] in seg: found = True if not found: segmentations.append([data.strokeID[i]]) symbols = [] for seg in segmentations: strokes = [] for stroke_id in seg: strokes.append(data.coordinates[stroke_id]) if len(strokes) > 0: # extract feature for cluster features = ExtractFeatures.generate_features(strokes) features = np.array(features).reshape(1, -1) # predict class label for cluster y_pred = self.symbol_classifier.predict(features) sym_class = str(y_pred[0]) sym_id = sym_class + '_' + str(seg[0]) symbols.append(Symbol(sym_id, sym_class, seg)) self.symbols = symbols
def processPatient(folder, patient, nSamples, overwrite=False): # CALLS: ExtractFeatures.extractFeatures(), writeToFile() # CALLED BY: __main__() # there is training and test data for three patients, stored in folders like 'training_1', 'test_3', etc. # training data: I_J_K.mat - the Jth training data segment corresponding to the Kth class for the Ith patient # test data: I_J.mat - the Jth testing data segment for the Ith patient # K=0 for interictal, K=1 for preictal # this function looks at the training samples for a single patient # for each sample, call extractFeatures() and write new features to a single output file for the patient # uses writeToFile() to write new features to the patient's file # interictal and preictal data are combined in the file, but labelled print "Processing patient ", patient subFolder = "training_%d" % patient inputFolder = os.path.join( folder, subFolder) # folder full of samples for the patient outputFileName = "patient_%d_training.mat" % patient outputFile = os.path.join( folder, outputFileName) # the file that features will be written to # find out which features have already been written, so we do not calculate them again if os.path.exists(outputFile) and not overwrite: matFile = loadmat(outputFile) existingFeatures = [ key for key in matFile.keys() if not key.startswith('_') ] # print matFile['nSamplesSegment'] del matFile else: existingFeatures = [] if not overwrite: print "Existing features: ", existingFeatures else: print "Overwriting existing features." # keep track of extracted features and collect all of them newFeatures = {} for EEG_class in range(2): # 0=interictal, 1=preictal for sampleNumber in range(1, nSamples[EEG_class] + 1): fileName = "%d_%d_%d.mat" % (patient, sampleNumber, EEG_class) inputSample = os.path.join(inputFolder, fileName) # single input sample print 'Extracting features from ', fileName # add extracted features to newFeatures dictionary matFile = loadmat(inputSample) sampleFeatures = ExtractFeatures.extractFeatures( matFile, existingFeatures) for key in sampleFeatures.keys(): newFeatures.setdefault(key, []).append(sampleFeatures[key]) newFeatures.setdefault('EEG_class', []).append(EEG_class) if not overwrite: print "Existing features: ", existingFeatures else: print "Overwriting existing features." print "New features: ", newFeatures.keys(), "\n" writeToFile(outputFile, newFeatures, overwrite) # add new features to output file
import matplotlib.pyplot as plt import numpy as np import librosa import ExtractFeatures as ef [trainX, trainY, testX, testY] = ef.LoadData() trainFolder = "./Data/diyDataset/train/" print trainX[0:3] print trainY[0:3] cmnFile = trainFolder + trainX[0] + ".wav" engFile = trainFolder + trainX[1] + ".wav" print cmnFile print engFile y, sr = librosa.load(cmnFile) print "end", y D = np.abs(librosa.stft(y))**2 S = librosa.feature.melspectrogram(S=D) # Passing through arguments to the Mel filters S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) import matplotlib.pyplot as plt librosa.display.specshow(librosa.logamplitude(S, ref_power=np.max), y_axis='mel', fmax=8000, x_axis='time')
if len(sentenceData.depTree) == 0: usedSentenceLength += sum(1 for c in sentenceData.sentence if c.strip() != '') continue sp = ShortestPath(sentenceData.depTree) for mt in sentenceData.maths: #PM if sentenceData.sentence[mt[0]:mt[1]] not in ann._math: continue for np in sentenceData.nps: if (not(mt[0] == np[0] and mt[1] == np[1])) and ((mt[0] < np[0] and mt[1] <= np[0]) or (np[0] < mt[0] and np[1] <= mt[1])): #Extracting features #Put ann instead of None in 'ef' declaration for PM ef = ExtractFeatures(sentenceData.sentence, sentenceData.tagInfo, np, mt, sentenceData.depTree, ann) mtInNP = not (np[0] == ef._np[0] and np[1] == ef._np[1]) colon, comma, othermath = ef.FirstFeature() insidebracket = ef.SecondFeature() distance = ef.ThirdFeature() mathbefore = ef.FourthFeature() verb = ef.FifthFeature() nppresurf, npprepos, npnextsurf, npnextpos = ef.SixthFeature(3) mathpresurf, mathprepos, mathnextsurf, mathnextpos = ef.SeventhFeature(3) pattern1, pattern2, pattern3, pattern4, pattern5, pattern6, pattern7 = ef.EighthFeature(ptn1, ptn2, ptn3, ptn4, ptn5, ptn6) npstart, npend, mathstart = ef.PreTenthFeature() depdistance, rel_math, rel_np, math_out, np_out = sp.TenthFeature(npstart, npend, mathstart) #PM isDesc, annStartIdx, annEndIdx = ef.isDescription(mathbefore)
def __init__(self, bag_files, listtopics, duration): super(BagParser, self).__init__() # window title self.setWindowTitle("Making csv file") # size of window self.resize(960, 720) #self.showFullScreen() #self.setWindowState(Qt.WindowMaximized) # print listtopics # print E.get_general_features_options() # print E.get_specific_features_options() self.topics_items = dict() self.topics_items["0"] = listtopics self.topics_items["1"] = E.get_general_features_options() self.topics_items["2"] = E.get_specific_features_options() print self.topics_items #path to bag file self.bag_files = bag_files self.selected_bag_topics = [] self.selected_specific_features = [] self.selected_general_features = [] self.items_list_topics = [] self.area = QScrollArea(self) self.areagen = QScrollArea(self) self.areaspec = QScrollArea(self) self.main_widget = QWidget(self.area) self.main_widget1 = QWidget(self.areagen) self.main_widget2 = QWidget(self.areaspec) self.ok_button = QPushButton("Export To CSV", self) #self.ok_button.setFixedSize(150, 30) self.ok_button.clicked.connect(self.onButtonClicked) self.clear_button = QPushButton("Clear Selection", self) # self.clear_button.resize(self.clear_button.sizeHint()) self.clear_button.clicked.connect(self.onClearClicked) self.choose_button = QPushButton("Get Last Export Choose", self) self.choose_button.clicked.connect(self.onButtonChooseCliked) self.ok_button.setEnabled(False) self.label1 = QLabel("Select topic from bag(s)", self) self.label1.setAlignment(Qt.AlignCenter) self.label2 = QLabel("Statistics Features", self) self.label2.setAlignment(Qt.AlignCenter) self.label3 = QLabel("Specific Features", self) self.label3.setAlignment(Qt.AlignCenter) self.duration = duration self.label5 = QLabel("Duration Time: " + str("%.1f" % duration), self) self.label5.setAlignment(Qt.AlignCenter) self.main_vlayout = QVBoxLayout(self) # self.main_vlayout = QGridLayout(self) self.main_vlayout.addWidget(self.label1) self.main_vlayout.addWidget(self.area) self.main_vlayout.addWidget(self.label2) self.main_vlayout.addWidget(self.areagen) self.main_vlayout.addWidget(self.label3) self.main_vlayout.addWidget(self.areaspec) self.label4 = QLabel("Window time", self) self.label4.setAlignment(Qt.AlignCenter) # self.main_vlayout.addWidget(self.label4) self.window = QLineEdit(self) # self.main_vlayout.addWidget(self.window) self.window.setText("1") self.windows_time_3 = QHBoxLayout(self) self.windows_time_3.addWidget(self.label4) self.windows_time_3.addWidget(self.window) self.windows_time_3.addWidget(self.label5) self.main_vlayout.addLayout(self.windows_time_3) # self.window = QLineEdit(self) # self.window.setText("1") # self.box = QVBoxLayout() # self.box.addStretch(1) # self.box.addWidget(self.clear_button) # self.box.addWidget(self.choose_button) # self.box.addWidget(self.label4) # self.box.addWidget(self.window) # self.box.addWidget(self.label5) # self.box.addWidget(self.ok_button) #self.main_vlayout.addWidget(self.from_nodes_button) # self.main_vlayout.addLayout(self.box) self.two_buttons = QHBoxLayout(self) self.two_buttons.addWidget(self.choose_button) self.two_buttons.addWidget(self.clear_button) self.main_vlayout.addLayout(self.two_buttons) self.main_vlayout.addWidget(self.ok_button) self.setLayout(self.main_vlayout) self.selection_vlayout = QVBoxLayout(self) self.item_all = MyQCheckBox("All", self, self.selection_vlayout, None) self.item_all.stateChanged.connect( lambda x: self.updateList(x, self.item_all, None)) self.selection_vlayout.addWidget(self.item_all) topic_data_list = listtopics topic_data_list.sort() for topic in topic_data_list: self.addCheckBox(topic, self.selection_vlayout, self.selected_bag_topics) self.selection_vlayout1 = QVBoxLayout(self) self.item_all1 = MyQCheckBox("All", self, self.selection_vlayout1, None) self.item_all1.stateChanged.connect( lambda x: self.updateList(x, self.item_all1, None)) self.selection_vlayout1.addWidget(self.item_all1) topic_data_list1 = E.get_general_features_options() topic_data_list1.sort() for topic in topic_data_list1: self.addCheckBox(topic, self.selection_vlayout1, self.selected_general_features) self.selection_vlayout2 = QVBoxLayout(self) self.item_all2 = MyQCheckBox("All", self, self.selection_vlayout2, None) self.item_all2.stateChanged.connect( lambda x: self.updateList(x, self.item_all2, None)) self.selection_vlayout2.addWidget(self.item_all2) topic_data_list2 = E.get_specific_features_options() topic_data_list2.sort() for topic in topic_data_list2: self.addCheckBox(topic, self.selection_vlayout2, self.selected_specific_features) self.main_widget.setLayout(self.selection_vlayout) self.main_widget1.setLayout(self.selection_vlayout1) self.main_widget2.setLayout(self.selection_vlayout2) self.area.setWidget(self.main_widget) self.areagen.setWidget(self.main_widget1) self.areaspec.setWidget(self.main_widget2) self.show()
def create_shapefile(state_code, county_code, year, avg=True, level='tract', zone=False, points=None, taz=True, clip=None, intersect=None, nocheck=False, outname='out'): all_population_columns = [ ] # Keep a list of all population columns for areal interpolation bg = (level == 'bg') base_shape = Names.SF_CENSUS_COL_BG if bg else Names.SF_CENSUS_COL_TRACT if level != 'bg' and level != 'tract': print "Invalid Census level" exit(1) if avg and not points: print "Cannot average incidents without counting incidents!" exit(1) county, state = Names.get_location(state_code, county_code) msg = 'You have selected to create a shapefile for {}, {} for {} with the following properties:\n'. \ format(county, state, year) msg += 'Demographics for {} in {}, {}\n'.format(level, county, state) if points: if avg: msg += "Averaged " else: msg += "Yearly " msg += "incident counts\n" if taz: msg += "Taz data\n" if clip: msg += "Clipped against {}'s response zone\n".format(clip) if intersect: msg += "Intersected against {}'s response zone\n".format(intersect) if zone: msg += "With zoning proportions of each shape\n" msg += "Saving as shapefile: {}".format(outname) r = '' print msg while not nocheck and r != 'y': r = raw_input("Is this correct? (y/n): ") if r == 'n': print "Exiting..." exit(1) gdf = GetDemographics.create_demographic_df(state_code=state_code, county_code=county_code, year=year, bg=bg) GetNeighborData.get_neighbor_data(census_gdf=gdf, target_columns=['bldavg', 'medinc']) all_population_columns.extend(CodeDicts.get_pop_labels()) if taz: taz_gdf = get_taz_gdf(state_code, county_code, year) gdf = GetTazData.merge_census_to_taz( census_gdf=gdf, taz_gdf=taz_gdf, census_level_lbl=base_shape, taz_level_lbl=Names.SF_TAZ_COL_TRACT) base_shape = Names.SF_TAZ_COL_TAZ all_population_columns.extend(TazCodeList.taz_to_sum) # Get employment and population density density_cols = ['TPE_TOTEMP', 'TPE_POP'] area_column = 'TPE_AREA_L' gdf = ExtractFeatures.get_densities(gdf=gdf, target_columns=density_cols, area_column=area_column, drop=False) # NOTE: population values are interpolated during clipping. Make sure all population features are added # before clipping! if clip: resp_area_fpath = Names.get_response_zone_shapefile(state_code, clip) gdf = GetTrimmedSF.clip_shapefile(gdf, resp_area_fpath, all_population_columns) if intersect: resp_area_fpath = Names.get_response_zone_shapefile( state_code, intersect) gdf = GetTrimmedSF.intersect_shapefile(gdf, resp_area_fpath) if points: gdf = GetIncidentCounts.get_count_gdf(geo_df=gdf, base_shape=base_shape, state_code=state_code, city=points, year=year, avg=False) if zone: zone_gdf = gpd.read_file( Names.get_zoning_shapefile(state_code=state_code, county_code=county_code, year=year)) gdf = GetZoningData.append_zone_proportions(base_gdf=gdf, zone_gdf=zone_gdf, base_shp_col=base_shape) zones_to_combine = \ { 'RESIDENTIAL': ['SINGLE FAMILY', 'MULTI-FAMILY'], 'COMMERCIAL': ['OFFICE', 'BUSINESS', 'MIXED USE'] } gdf = CombineFeatures.sum_features(gdf, zones_to_combine) print("Creating shapefile as {}".format(outname)) gdf.to_file(outname, driver='ESRI Shapefile') gdf.drop(['geometry'], axis=1, inplace=True) gdf.to_csv(outname + '.csv')
else: jishu[self.ytr[min_index]] = 1 distances[min_index] = distances[max_index] Ypred[i] = max(jishu.items(), key=lambda x: x[1])[0] #print(Ypred) return Ypred feature = np.loadtxt('feature3.txt') labels = np.loadtxt('labels3.txt') picnum = 567 #filename = '../gray/' + str(picnum) + '.jpg' filename = '../image/' + str(picnum) + '.jpg' test = ef.hist(filename) #nn = KNearestN()0 #nn.train(feature,labels) #t = nn.predict(test,k=50,dis=3) ''' def DisCos(a,b): zi = np.sum(a*b,axis=1) mu = np.sqrt(np.sum(a**2,axis=1))*np.sqrt(np.sum(b**2,axis=1)) return zi/mu ''' def L2(a, b): distances = np.sqrt(np.sum((a - b)**2, axis=1))
def predict_tags_for_line(line): v_dict = {} bp_dict = {} line_array = line.split() prediction_tags_array = [0] * len(line_array) max_values_for_normilazation = [0] * len(line_array) max_values_for_normilazation[0] = 1 tag_set = set() old_2prevs_id = None old_prev_id = None old_scores = None for tag in tag_to_id.keys(): tag_set.add(tag) tag_set.add('start') for i in range(len(line_array)): for possible_tag in tag_to_id.keys(): if not is_tag_possible_for_word(line_array[i], possible_tag): continue first_prediction_for_tag = True for prev_tag in tag_set: if i < 1: if prev_tag != 'start': continue else: if not is_tag_possible_for_word(line_array[i - 1], prev_tag): continue if prev_tag == 'start': continue # if prune_by_seq and (prev_tag, possible_tag) in impossible_seqs: # continue max_viterbi_val = 0 for prev_prev_tag in tag_set: if i == 0: if prev_prev_tag == 'start' and prev_tag == 'start': v_dict_val = 1 else: continue elif i == 1: if prev_prev_tag != 'start': continue else: if not is_tag_possible_for_word( line_array[i - 2], prev_prev_tag): continue if prev_prev_tag == 'start': continue v_dict_val = v_dict[(i - 1, prev_prev_tag, prev_tag)] if v_dict.has_key( (i - 1, prev_prev_tag, prev_tag)) else 0 if first_prediction_for_tag: features_vec = get_features_vec( prev_prev_tag, prev_tag, line_array, i) scored_tags_dict, old_scores = predictor.predict( features_vec) first_prediction_for_tag = False else: new_prev_id = feature_to_id.get( ExtractFeatures.get_feat_str_by_prevtag(prev_tag), None) new_2prevs_id = feature_to_id\ .get(ExtractFeatures.get_feat_str_by_2prevs(prev_prev_tag, prev_tag), None) scored_tags_dict, old_scores = predictor.predict_with_trasitions_change( old_scores, old_prev_id, old_2prevs_id, new_prev_id, new_2prevs_id) old_prev_id = feature_to_id.get( ExtractFeatures.get_feat_str_by_prevtag(prev_tag), None) old_2prevs_id = feature_to_id.get( ExtractFeatures.get_feat_str_by_2prevs( prev_prev_tag, prev_tag), None) # print ("scored_tags_dict" + repr(scored_tags_dict)) # print ("old_scores" + repr(old_scores)) # print("i= %d" % i) # print(possible_tag) # print (tag_to_id[possible_tag]) # print(scored_tags_dict[tag_to_id[possible_tag]]) # print("n=%f" % max_values_for_normilazation[i]) viterbi_val = (v_dict_val / float(max_values_for_normilazation[i])) * \ scored_tags_dict[tag_to_id[possible_tag]] if viterbi_val > max_viterbi_val: # print("v=%f max=%f" % (viterbi_val, max_viterbi_val)) if i < len( line_array ) - 1 and viterbi_val > max_values_for_normilazation[ i + 1]: max_values_for_normilazation[i + 1] = viterbi_val max_viterbi_val = viterbi_val v_dict[(i, prev_tag, possible_tag)] = viterbi_val bp_dict[(i, prev_tag, possible_tag)] = prev_prev_tag #prediction_tags_array[len(line_array)-1] max_viterbi_val = 0 for possible_tag in tag_to_id.keys(): for prev_tag in tag_set: v_dict_val = v_dict[(len(line_array)-1, prev_tag, possible_tag)] \ if v_dict.has_key((len(line_array)-1, prev_tag, possible_tag)) else 0 if v_dict_val > max_viterbi_val: max_viterbi_val = v_dict_val prediction_tags_array[len(line_array) - 1] = possible_tag #prediction_tags_array[len(line_array)-2] max_viterbi_val = 0 for possible_tag in tag_to_id.keys(): for prev_tag in tag_set: v_dict_val = v_dict[(len(line_array)-2, prev_tag, possible_tag)] if \ v_dict.has_key((len(line_array)-2, prev_tag, possible_tag)) else 0 if v_dict_val > max_viterbi_val: max_viterbi_val = v_dict_val prediction_tags_array[len(line_array) - 2] = possible_tag for i in range(len(line_array) - 3, -1, -1): prediction_tags_array[i] = bp_dict[(i + 2, prediction_tags_array[i + 1], prediction_tags_array[i + 2])] return prediction_tags_array
def identifyPersonalityTraits(file_name_orig): clf_emotional_stability = load('knowme_EmotionalSt.joblib') clf_knowme_MentalE_WlPower = load('knowme_MentalE_WlPower.joblib') clf_knowme_Modesty = load('knowme_Modesty.joblib') clf_lackOfDiscipline = load('lackOfDiscipline.joblib') clf_PoorConcentration = load('PoorConcentration.joblib') clf_SocialIsolation = load('SocialIsolation.joblib') # file_name_orig ="Michael_HW.png" # crop(file_name_orig) file_name = resize(file_name_orig) raw_features = extract.start(file_name) raw_baseline_angle = raw_features[0] baseline_angle, comment = categorize.determine_baseline_angle( raw_baseline_angle) print("Baseline Angle: " + comment) raw_top_margin = raw_features[1] top_margin, comment = categorize.determine_top_margin(raw_top_margin) print("Top Margin: " + comment) raw_letter_size = raw_features[2] letter_size, comment = categorize.determine_letter_size(raw_letter_size) print("Letter Size: " + comment) raw_line_spacing = raw_features[3] line_spacing, comment = categorize.determine_line_spacing(raw_line_spacing) print("Line Spacing: " + comment) raw_word_spacing = raw_features[4] word_spacing, comment = categorize.determine_word_spacing(raw_word_spacing) print("Word Spacing: " + comment) raw_pen_pressure = raw_features[5] pen_pressure, comment = categorize.determine_pen_pressure(raw_pen_pressure) print("Pen Pressure: " + comment) raw_slant_angle = raw_features[6] slant_angle, comment = categorize.determine_slant_angle(raw_slant_angle) print("Slant: " + comment) emotional_stability = clf_emotional_stability.predict( [[baseline_angle, slant_angle]]) MentalE_WlPower = clf_knowme_MentalE_WlPower.predict( [[letter_size, pen_pressure]]) Modesty = clf_knowme_Modesty.predict([[letter_size, top_margin]]) Discipline = clf_lackOfDiscipline.predict([[slant_angle, top_margin]]) Concentration = clf_PoorConcentration.predict([[letter_size, line_spacing]]) SocialIsolation = clf_SocialIsolation.predict( [[line_spacing, word_spacing]]) if (emotional_stability[0] == 1): emotional_stability = "Stable" else: emotional_stability = "Not Stable" if (MentalE_WlPower[0] == 1): MentalE_WlPower = "High or Average" else: MentalE_WlPower = "Low" if (Modesty[0] == 1): modesty = "Observed" else: modesty = "Not Observed" if (Concentration[0] == 1): concentration = "Observed" else: concentration = "Not Observed" if (Discipline[0] == 1): discipline = "Observed" else: discipline = "Not Observed" if (SocialIsolation[0] == 1): SocialIsolation = "Observed" else: SocialIsolation = "Not Observed" personality_Trait_dict = { "Emotional_Stability": emotional_stability, "Mental_Power": MentalE_WlPower, "Modesty": modesty, "Discipline": discipline, "Concentration": concentration, "Social_Isolation": SocialIsolation } print(personality_Trait_dict) return personality_Trait_dict
'', end=''), # print("\n") if (s.tag == "w"): # print("%s %s" % (s.text, s.attrib['ctag']), '', end=''), single_sentence.append(s.text) single_sentence.append( s.attrib['ctag']) test_sen.append(s.text) y_test.append(s.attrib['ctag']) print(single_sentence) for x in range(0, len(single_sentence), 2): # print(get_features(single_sentence,x)) X_test.append( ExtractFeatures.get_testfeatures( single_sentence, x)) # # print('\n') # print(len(single_sentence)) if (subdiv.tag == "p"): for sentence in subdiv.findall('s'): # print(sentence.attrib) single_sentence = [] for s in sentence: if (s.tag == "foreign"): for words in s.findall('w'): print("%s/%s" % (words.text, words.attrib['ctag']), '', end=''), # print("\n")
trainLabels = [ 'JX', 'NN', 'II', 'JX', 'NN', 'VE', 'IKM', 'NN', 'NN', 'IKM', 'JX', 'NN', 'IKO', 'NN', 'IKM', 'NN', 'II', 'NN', 'NP', 'NP', 'NP', 'IE', 'MM', 'JX', 'NN', 'NN', 'IKM', 'NN', 'VR', 'VI', 'VE', 'II', 'NN', 'IKM', 'NN', 'JX', 'NN', 'IKM', 'NN', 'CC', 'DDX', 'II', 'VN', 'NN', 'IKM', 'NN', 'II', 'JX', 'VI', 'VE', 'IKM', 'VVYN1', 'YF' ] test_feat = [] test = [ 'संसद', 'को', 'अधिवेशन', 'आषाढ', 'को', 'शुरु', 'मा', 'हुने', 'राष्ट्रियसभा', 'को', 'गठन', 'यै', 'महिना', 'मा', 'भईसक्ने', 'चीन-सोभियत', 'सीमा', 'मा', 'बढी', 'सबल', 'सुरक्षा' ] # test = [{'pos': 'IKM', 'prev-prev-word': 'अधिवेशन', 'word': 'को', 'prev-word': 'आषाढ', 'prev-pos': 'NN', 'next-next-word': 'मा', 'prev-prev-pos': 'NN', 'next-pos': 'NN', 'next-word': 'शुरु', 'nextnextpos': 'II'}] for x in range(0, len(test)): test_feat.append(ExtractFeatures.get_wordFeatures(test, x)) print(test, test_feat) # from sklearn.feature_extraction import DictVectorizer # from sklearn.svm import LinearSVC # from sklearn.svm import SVC # from sklearn.pipeline import Pipeline import numpy as np import matplotlib.pyplot as plt from sklearn.feature_extraction import DictVectorizer from sklearn import svm vec = DictVectorizer(sparse=False) X_arr = vec.fit_transform(trainFeatures) print("Converting word features into Numpy arrays") # with open("train.txt", "wb") as f:
def main(): duration, subject, trial = check_args(sys.argv[1:]) voice = trial_record(duration, subject, trial) trial_vad(voice, subject) vad_path = "vad/" + subject ExtractFeatures.generate_model(vad_path)
def getDataTrain_Mean_PassbandFluxes(): datafeatures = ExFt.extract_DataTraining_Means_byPassband() datafeatures.target = datafeatures.target.astype('category') return datafeatures
def getDataTrain_MeanFluxes(): datafeatures = ExFt.extract_DataTraining_Means() return datafeatures