Esempio n. 1
0
    def recognition(self, clustering_model):
        """
        Select the segmentation with the smallest number of symbols
        that produces the highest geometric mean over the class probabilities
        produced by the random forest classifier from Project 1
        """
        self.symbols = []
        if len(clustering_model) > 0:
            # load random forest classifier
            rf_model = open('rf.pkl', 'rb')
            classifier = pickle.load(rf_model)

            selected_k_index = 0
            highest_gmean = 0

            for i in range(len(clustering_model)):
                model = clustering_model[i]
                features = []

                # extract features for each cluster
                for cluster in model.clusters.keys():
                    strokes = []
                    for stroke in model.clusters[cluster]:
                        strokes.append(stroke.coods)
                    if len(strokes) > 0:
                        features.append(
                            ExtractFeatures.generate_features(strokes))

                # calculate class probabilities for each cluster
                class_probabilities = classifier.predict_proba(features)
                # get geometric mean
                g_mean = self.geometric_mean(class_probabilities)

                # select k with highest geometric mean
                if g_mean > highest_gmean:
                    selected_k_index = i
                    highest_gmean = g_mean

            self.selected_k = selected_k_index

            # Store the clusters for strokes for the selected k
            # along with symbol class obtained from the classifier
            for cluster in clustering_model[selected_k_index].clusters.keys():
                strokes = []
                strokeid_list = []
                for stroke in clustering_model[selected_k_index].clusters[
                        cluster]:
                    strokes.append(stroke.coods)
                    strokeid_list.append(stroke.id)
                if len(strokes) > 0:
                    # extract feature for cluster
                    features = ExtractFeatures.generate_features(strokes)
                    features = np.array(features).reshape(1, -1)
                    # predict class label for cluster
                    y_pred = classifier.predict(features)

                    sym_class = str(y_pred[0])
                    sym_id = sym_class + '_' + str(strokeid_list[0])
                    self.symbols.append(
                        Symbol(sym_id, sym_class, strokeid_list))
Esempio n. 2
0
def get_features_str(prev_prev_tag, prev_tag, line_array, word_index):
    if word_index < 1:
        prev_arr = [None, prev_tag]
        prev_prev_arr = [None, prev_prev_tag]
    elif word_index < 2:
        prev_arr = [line_array[word_index - 1], prev_tag]
        prev_prev_arr = [None, prev_prev_tag]
    else:
        prev_arr = [line_array[word_index - 1], prev_tag]
        prev_prev_arr = [line_array[word_index - 2], prev_prev_tag]

    if word_index > (len(line_array) - 2):
        next_arr = [None, None]
        next_next_arr = [None, None]
    elif word_index > (len(line_array) - 3):
        next_arr = [line_array[word_index + 1], None]
        next_next_arr = [None, None]
    else:
        next_arr = [line_array[word_index + 1], None]
        next_next_arr = [line_array[word_index + 2], None]
    is_rare = line_array[word_index] not in popular_words
    feature_str = ""
    feature_str += ExtractFeatures.get_features_by_word(
        line_array[word_index], is_rare)
    feature_str += ExtractFeatures.get_features_by_2_prevs(
        prev_prev_arr, prev_arr)
    feature_str += ExtractFeatures.get_features_by_next_word(next_arr)
    feature_str += ExtractFeatures.get_features_by_next_next_word(
        next_next_arr)
    return feature_str
Esempio n. 3
0
 def onButtonClicked(self):
     import inspect, os
     filepath = os.path.dirname(
         os.path.abspath(inspect.getfile(
             inspect.currentframe()))) + "/log/save_csv.log"
     current_directory = self.get_current_opened_directory(filepath)
     window = self.window.text()
     try:
         val = float(window)
     except ValueError:
         QMessageBox.about(self, "Error in Window Time",
                           "That's not a number!")
         return
     if val >= self.duration:
         QMessageBox.about(
             self, "Error in Window Time",
             "time need to be smaller than: " + str(self.duration))
         return
     # filename = QFileDialog.getSaveFileName(self, self.tr('csv File'), current_directory, self.tr('csv (*.csv)'))
     saved_dir = str(
         QFileDialog.getExistingDirectory(self, "Select Directory",
                                          current_directory))
     # if filename[0] != '':
     #     with open(filepath, "w") as f:
     #         f.write(filename[0])
     if saved_dir != '':
         with open(filepath, "w") as f:
             f.write(saved_dir)
         topics = self.selected_bag_topics
         specific_features_selection = self.selected_specific_features
         general_features_selection = self.selected_general_features
         with open(get_path() + 'logger.log', "w") as f:
             for topic in topics:
                 f.write(topic + "\n")
             for topic1 in specific_features_selection:
                 f.write(topic1 + "\n")
             for topic2 in general_features_selection:
                 f.write(topic2 + "\n")
         ef = E.ExtractFeatures(topics, float(window),
                                specific_features_selection,
                                general_features_selection)
         counter = 0
         for bag_file in self.bag_files:
             df = ef.generate_features(bag_file)
             if len(self.bag_files) == 1:
                 counter = -1
             # temp = filename + "/" +
             # temp = get_corrent_file_name(filename[0], ".csv", counter)
             csv_path = generate_csv_from_bag(saved_dir, bag_file)
             # temp = "%s_%s%s" % (filename[0],counter,".csv")
             E.write_to_csv(csv_path, df)
             counter = counter + 1
         QMessageBox.about(self, "csv export",
                           "csv was exported successfuly")
Esempio n. 4
0
    def extract_features(self, file_address):
        tree = xml.etree.ElementTree.parse(file_address)
        tree = tree.getroot()

        ef = ExtractFeatures()
        ef.extract(tree)

        en = EvaluateNaming()
        ef.extracted_features['variable_meaning'] = en.evaluate(
            ef.extracted_features) * 10.00

        return ef.extracted_features
Esempio n. 5
0
 def __call__(self):
     aux_data = pickle.load(open(self.feature_map_file, "rb"))
     model: SGDClassifier = pickle.load(open(self.model_file_name, "rb"))
     frequent_words = aux_data[TrainModel.FREQUENT_WORDS]
     vectorizer = DictVectorizer()
     vectorizer.vocabulary_ = aux_data[TrainModel.FEATURE_IDXS]
     vectorizer.feature_names_ = aux_data[TrainModel.FEATURE_NAMES]
     tagged_sentences = []
     with open(self.input_file_name, 'r') as in_f:
         lines = [line.rstrip() for line in in_f.readlines()]
     already_tagged = all(
         map(lambda l: all(map(lambda w: '/' in w, l.split(' '))), lines))
     print('input already tagged:', already_tagged)
     sentences = [
         ExtractFeatures.split_by_whitespace_and_seperate_tags(l)
         for l in lines
     ]
     sentences = list(map(lambda s: list(map(lambda t: t[0], s)),
                          sentences))
     sentences_with_idxs = [(s, i) for (i, s) in enumerate(sentences)]
     sentences = sorted(sentences_with_idxs, key=lambda t: len(t[0]))
     idxs_processed = []
     for l, g in itertools.groupby(sentences, key=lambda t: len(t[0])):
         g = list(g)
         sents_of_len_l = np.asarray(list(map(operator.itemgetter(0), g)))
         idxs_of_len_l = list(map(operator.itemgetter(1), g))
         idxs_processed.extend(idxs_of_len_l)
         tags_of_len_l = np.empty(sents_of_len_l.shape, dtype="U8")
         for i in range(l):
             feats_for_ith_word = []
             for sent_i, word in enumerate(sents_of_len_l[:, i]):
                 feats = ExtractFeatures.extract(
                     sents_of_len_l[sent_i, :], tags_of_len_l[sent_i, :], i,
                     (word not in frequent_words))
                 feats_for_ith_word.append(feats)
             X = vectorizer.transform(feats_for_ith_word)
             tags_pred = model.predict(X)
             tags_of_len_l[:, i] = tags_pred
         tagged_sents_of_len_l = np.char.add(
             np.char.add(sents_of_len_l, '/'), tags_of_len_l)
         tagged_sentences.extend(
             [' '.join(row) for row in tagged_sents_of_len_l])
     tagged_sentences = map(
         operator.itemgetter(0),
         sorted(zip(tagged_sentences, idxs_processed),
                key=operator.itemgetter(1)))
     tagged_sentences = [
         w.replace('$EQ$', '=') for w in (s for s in tagged_sentences)
     ]
     with open(self.output_file, 'w+') as out_f:
         out_f.write('\n'.join(tagged_sentences) + '\n')
def getDataTrain_LogMeanFluxes():
    dt_FluxMeanLog = ExFt.extract_DataTraining_Means()
    print(dt_FluxMeanLog.columns)

    dt_FluxMeanLog.iloc[:,12:24] = dt_FluxMeanLog.iloc[:,12:24].apply( np.log )

    return dt_FluxMeanLog
Esempio n. 7
0
def FeatureExtraction():
    #[trainX, trainY, testX, testY] = ef.LoadData()

    trainFolder = ""
    testFile = ["./Data/diyDataset/test/1"]
    trainY = ["test"]
    print testFile[0] + ".wav"
    testF = ef.ExtractFeaturesByLibrosa(testFile, trainY, trainFolder)

    return testF
    def get_segmentations(self, directed_graph, data):
        X, y = SegmenterFeatureExtractor.getAllFeatures(directed_graph, data)

        if len(X) == 0:
            return

        y_pred = self.merge_classifier.predict(X)

        segmentations = []
        for i in range(len(directed_graph)):
            if y_pred[i] == '*':
                if len(segmentations) == 0:
                    segmentations.append(directed_graph[i])
                else:
                    found = False
                    for seg in segmentations:
                        if found:
                            break
                        if directed_graph[i][0] in seg:
                            if directed_graph[i][1] in seg:
                                found = True
                            else:
                                seg.append(directed_graph[i][1])
                                found = True
                        else:
                            if directed_graph[i][1] in seg:
                                seg.append(directed_graph[i][0])
                                found = True
                    if not found:
                        segmentations.append(directed_graph[i])

        for i in range(len(data.strokeID)):
            found = False
            for seg in segmentations:
                if data.strokeID[i] in seg:
                    found = True
            if not found:
                segmentations.append([data.strokeID[i]])

        symbols = []
        for seg in segmentations:
            strokes = []
            for stroke_id in seg:
                strokes.append(data.coordinates[stroke_id])
            if len(strokes) > 0:
                # extract feature for cluster
                features = ExtractFeatures.generate_features(strokes)
                features = np.array(features).reshape(1, -1)
                # predict class label for cluster
                y_pred = self.symbol_classifier.predict(features)

                sym_class = str(y_pred[0])
                sym_id = sym_class + '_' + str(seg[0])
                symbols.append(Symbol(sym_id, sym_class, seg))
        self.symbols = symbols
def processPatient(folder, patient, nSamples, overwrite=False):

    # CALLS: ExtractFeatures.extractFeatures(), writeToFile()
    # CALLED BY: __main__()

    # there is training and test data for three patients, stored in folders like 'training_1', 'test_3', etc.
    # training data: I_J_K.mat - the Jth training data segment corresponding to the Kth class for the Ith patient
    # test data: I_J.mat - the Jth testing data segment for the Ith patient
    # K=0 for interictal, K=1 for preictal
    # this function looks at the training samples for a single patient
    # for each sample, call extractFeatures() and write new features to a single output file for the patient
    # uses writeToFile() to write new features to the patient's file
    # interictal and preictal data are combined in the file, but labelled

    print "Processing patient ", patient
    subFolder = "training_%d" % patient
    inputFolder = os.path.join(
        folder, subFolder)  # folder full of samples for the patient

    outputFileName = "patient_%d_training.mat" % patient
    outputFile = os.path.join(
        folder, outputFileName)  # the file that features will be written to

    # find out which features have already been written, so we do not calculate them again
    if os.path.exists(outputFile) and not overwrite:
        matFile = loadmat(outputFile)
        existingFeatures = [
            key for key in matFile.keys() if not key.startswith('_')
        ]
        # print matFile['nSamplesSegment']
        del matFile
    else:
        existingFeatures = []

    if not overwrite:
        print "Existing features: ", existingFeatures
    else:
        print "Overwriting existing features."

    # keep track of extracted features and collect all of them
    newFeatures = {}

    for EEG_class in range(2):  # 0=interictal, 1=preictal
        for sampleNumber in range(1, nSamples[EEG_class] + 1):
            fileName = "%d_%d_%d.mat" % (patient, sampleNumber, EEG_class)
            inputSample = os.path.join(inputFolder,
                                       fileName)  # single input sample
            print 'Extracting features from ', fileName
            # add extracted features to newFeatures dictionary
            matFile = loadmat(inputSample)
            sampleFeatures = ExtractFeatures.extractFeatures(
                matFile, existingFeatures)
            for key in sampleFeatures.keys():
                newFeatures.setdefault(key, []).append(sampleFeatures[key])
            newFeatures.setdefault('EEG_class', []).append(EEG_class)

    if not overwrite:
        print "Existing features: ", existingFeatures
    else:
        print "Overwriting existing features."
    print "New features: ", newFeatures.keys(), "\n"
    writeToFile(outputFile, newFeatures,
                overwrite)  # add new features to output file
Esempio n. 10
0
import matplotlib.pyplot as plt
import numpy as np
import librosa

import ExtractFeatures as ef

[trainX, trainY, testX, testY] = ef.LoadData()
trainFolder = "./Data/diyDataset/train/"

print trainX[0:3]
print trainY[0:3]
cmnFile = trainFolder + trainX[0] + ".wav"
engFile = trainFolder + trainX[1] + ".wav"

print cmnFile
print engFile

y, sr = librosa.load(cmnFile)
print "end", y

D = np.abs(librosa.stft(y))**2
S = librosa.feature.melspectrogram(S=D)

# Passing through arguments to the Mel filters
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)

import matplotlib.pyplot as plt
librosa.display.specshow(librosa.logamplitude(S, ref_power=np.max),
                         y_axis='mel',
                         fmax=8000,
                         x_axis='time')
Esempio n. 11
0
                if len(sentenceData.depTree) == 0:
                    usedSentenceLength += sum(1 for c in sentenceData.sentence if c.strip() != '')
                    continue

                sp = ShortestPath(sentenceData.depTree)
                for mt in sentenceData.maths:
                    #PM
                    if sentenceData.sentence[mt[0]:mt[1]] not in ann._math:
                        continue

                    for np in sentenceData.nps:
                        if (not(mt[0] == np[0] and mt[1] == np[1])) and ((mt[0] < np[0] and mt[1] <= np[0]) or (np[0] < mt[0] and np[1] <= mt[1])):
                            #Extracting features
                            #Put ann instead of None in 'ef' declaration for PM

                            ef = ExtractFeatures(sentenceData.sentence, sentenceData.tagInfo, np, mt, sentenceData.depTree, ann)
                            mtInNP = not (np[0] == ef._np[0] and np[1] == ef._np[1])
                            colon, comma, othermath = ef.FirstFeature()
                            insidebracket = ef.SecondFeature()
                            distance = ef.ThirdFeature()
                            mathbefore = ef.FourthFeature()
                            verb = ef.FifthFeature()
                            nppresurf, npprepos, npnextsurf, npnextpos = ef.SixthFeature(3)
                            mathpresurf, mathprepos, mathnextsurf, mathnextpos = ef.SeventhFeature(3)
                            pattern1, pattern2, pattern3, pattern4, pattern5, pattern6, pattern7 = ef.EighthFeature(ptn1, ptn2, ptn3, ptn4, ptn5, ptn6)
                            npstart, npend, mathstart = ef.PreTenthFeature()
                            depdistance, rel_math, rel_np, math_out, np_out = sp.TenthFeature(npstart, npend, mathstart)

                            #PM
                            isDesc, annStartIdx, annEndIdx = ef.isDescription(mathbefore)
Esempio n. 12
0
    def __init__(self, bag_files, listtopics, duration):
        super(BagParser, self).__init__()

        # window title
        self.setWindowTitle("Making csv file")
        # size of window
        self.resize(960, 720)
        #self.showFullScreen()
        #self.setWindowState(Qt.WindowMaximized)

        # print listtopics
        # print E.get_general_features_options()
        # print E.get_specific_features_options()

        self.topics_items = dict()
        self.topics_items["0"] = listtopics
        self.topics_items["1"] = E.get_general_features_options()
        self.topics_items["2"] = E.get_specific_features_options()

        print self.topics_items

        #path to bag file
        self.bag_files = bag_files

        self.selected_bag_topics = []
        self.selected_specific_features = []
        self.selected_general_features = []

        self.items_list_topics = []

        self.area = QScrollArea(self)
        self.areagen = QScrollArea(self)
        self.areaspec = QScrollArea(self)
        self.main_widget = QWidget(self.area)
        self.main_widget1 = QWidget(self.areagen)
        self.main_widget2 = QWidget(self.areaspec)
        self.ok_button = QPushButton("Export To CSV", self)
        #self.ok_button.setFixedSize(150, 30)
        self.ok_button.clicked.connect(self.onButtonClicked)

        self.clear_button = QPushButton("Clear Selection", self)
        # self.clear_button.resize(self.clear_button.sizeHint())
        self.clear_button.clicked.connect(self.onClearClicked)

        self.choose_button = QPushButton("Get Last Export Choose", self)
        self.choose_button.clicked.connect(self.onButtonChooseCliked)
        self.ok_button.setEnabled(False)

        self.label1 = QLabel("Select topic from bag(s)", self)
        self.label1.setAlignment(Qt.AlignCenter)

        self.label2 = QLabel("Statistics Features", self)
        self.label2.setAlignment(Qt.AlignCenter)

        self.label3 = QLabel("Specific Features", self)
        self.label3.setAlignment(Qt.AlignCenter)

        self.duration = duration

        self.label5 = QLabel("Duration Time: " + str("%.1f" % duration), self)
        self.label5.setAlignment(Qt.AlignCenter)

        self.main_vlayout = QVBoxLayout(self)
        # self.main_vlayout = QGridLayout(self)
        self.main_vlayout.addWidget(self.label1)
        self.main_vlayout.addWidget(self.area)
        self.main_vlayout.addWidget(self.label2)
        self.main_vlayout.addWidget(self.areagen)
        self.main_vlayout.addWidget(self.label3)
        self.main_vlayout.addWidget(self.areaspec)

        self.label4 = QLabel("Window time", self)
        self.label4.setAlignment(Qt.AlignCenter)

        # self.main_vlayout.addWidget(self.label4)

        self.window = QLineEdit(self)
        # self.main_vlayout.addWidget(self.window)
        self.window.setText("1")

        self.windows_time_3 = QHBoxLayout(self)

        self.windows_time_3.addWidget(self.label4)

        self.windows_time_3.addWidget(self.window)

        self.windows_time_3.addWidget(self.label5)

        self.main_vlayout.addLayout(self.windows_time_3)

        # self.window = QLineEdit(self)
        # self.window.setText("1")

        # self.box = QVBoxLayout()
        # self.box.addStretch(1)
        # self.box.addWidget(self.clear_button)
        # self.box.addWidget(self.choose_button)
        # self.box.addWidget(self.label4)
        # self.box.addWidget(self.window)
        # self.box.addWidget(self.label5)
        # self.box.addWidget(self.ok_button)

        #self.main_vlayout.addWidget(self.from_nodes_button)

        # self.main_vlayout.addLayout(self.box)

        self.two_buttons = QHBoxLayout(self)

        self.two_buttons.addWidget(self.choose_button)

        self.two_buttons.addWidget(self.clear_button)

        self.main_vlayout.addLayout(self.two_buttons)

        self.main_vlayout.addWidget(self.ok_button)

        self.setLayout(self.main_vlayout)

        self.selection_vlayout = QVBoxLayout(self)
        self.item_all = MyQCheckBox("All", self, self.selection_vlayout, None)
        self.item_all.stateChanged.connect(
            lambda x: self.updateList(x, self.item_all, None))
        self.selection_vlayout.addWidget(self.item_all)
        topic_data_list = listtopics
        topic_data_list.sort()
        for topic in topic_data_list:
            self.addCheckBox(topic, self.selection_vlayout,
                             self.selected_bag_topics)

        self.selection_vlayout1 = QVBoxLayout(self)
        self.item_all1 = MyQCheckBox("All", self, self.selection_vlayout1,
                                     None)
        self.item_all1.stateChanged.connect(
            lambda x: self.updateList(x, self.item_all1, None))
        self.selection_vlayout1.addWidget(self.item_all1)
        topic_data_list1 = E.get_general_features_options()
        topic_data_list1.sort()
        for topic in topic_data_list1:
            self.addCheckBox(topic, self.selection_vlayout1,
                             self.selected_general_features)

        self.selection_vlayout2 = QVBoxLayout(self)
        self.item_all2 = MyQCheckBox("All", self, self.selection_vlayout2,
                                     None)
        self.item_all2.stateChanged.connect(
            lambda x: self.updateList(x, self.item_all2, None))
        self.selection_vlayout2.addWidget(self.item_all2)
        topic_data_list2 = E.get_specific_features_options()
        topic_data_list2.sort()
        for topic in topic_data_list2:
            self.addCheckBox(topic, self.selection_vlayout2,
                             self.selected_specific_features)

        self.main_widget.setLayout(self.selection_vlayout)
        self.main_widget1.setLayout(self.selection_vlayout1)
        self.main_widget2.setLayout(self.selection_vlayout2)

        self.area.setWidget(self.main_widget)
        self.areagen.setWidget(self.main_widget1)
        self.areaspec.setWidget(self.main_widget2)
        self.show()
def create_shapefile(state_code,
                     county_code,
                     year,
                     avg=True,
                     level='tract',
                     zone=False,
                     points=None,
                     taz=True,
                     clip=None,
                     intersect=None,
                     nocheck=False,
                     outname='out'):

    all_population_columns = [
    ]  # Keep a list of all population columns for areal interpolation

    bg = (level == 'bg')
    base_shape = Names.SF_CENSUS_COL_BG if bg else Names.SF_CENSUS_COL_TRACT

    if level != 'bg' and level != 'tract':
        print "Invalid Census level"
        exit(1)

    if avg and not points:
        print "Cannot average incidents without counting incidents!"
        exit(1)

    county, state = Names.get_location(state_code, county_code)
    msg = 'You have selected to create a shapefile for {}, {} for {} with the following properties:\n'. \
        format(county, state, year)
    msg += 'Demographics for {} in {}, {}\n'.format(level, county, state)
    if points:
        if avg:
            msg += "Averaged "
        else:
            msg += "Yearly "
        msg += "incident counts\n"
    if taz:
        msg += "Taz data\n"
    if clip:
        msg += "Clipped against {}'s response zone\n".format(clip)
    if intersect:
        msg += "Intersected against {}'s response zone\n".format(intersect)
    if zone:
        msg += "With zoning proportions of each shape\n"
    msg += "Saving as shapefile: {}".format(outname)
    r = ''
    print msg

    while not nocheck and r != 'y':
        r = raw_input("Is this correct? (y/n): ")
        if r == 'n':
            print "Exiting..."
            exit(1)

    gdf = GetDemographics.create_demographic_df(state_code=state_code,
                                                county_code=county_code,
                                                year=year,
                                                bg=bg)
    GetNeighborData.get_neighbor_data(census_gdf=gdf,
                                      target_columns=['bldavg', 'medinc'])
    all_population_columns.extend(CodeDicts.get_pop_labels())

    if taz:
        taz_gdf = get_taz_gdf(state_code, county_code, year)
        gdf = GetTazData.merge_census_to_taz(
            census_gdf=gdf,
            taz_gdf=taz_gdf,
            census_level_lbl=base_shape,
            taz_level_lbl=Names.SF_TAZ_COL_TRACT)

        base_shape = Names.SF_TAZ_COL_TAZ
        all_population_columns.extend(TazCodeList.taz_to_sum)

        # Get employment and population density
        density_cols = ['TPE_TOTEMP', 'TPE_POP']
        area_column = 'TPE_AREA_L'
        gdf = ExtractFeatures.get_densities(gdf=gdf,
                                            target_columns=density_cols,
                                            area_column=area_column,
                                            drop=False)

    # NOTE: population values are interpolated during clipping. Make sure all population features are added
    # before clipping!
    if clip:
        resp_area_fpath = Names.get_response_zone_shapefile(state_code, clip)
        gdf = GetTrimmedSF.clip_shapefile(gdf, resp_area_fpath,
                                          all_population_columns)

    if intersect:
        resp_area_fpath = Names.get_response_zone_shapefile(
            state_code, intersect)
        gdf = GetTrimmedSF.intersect_shapefile(gdf, resp_area_fpath)

    if points:
        gdf = GetIncidentCounts.get_count_gdf(geo_df=gdf,
                                              base_shape=base_shape,
                                              state_code=state_code,
                                              city=points,
                                              year=year,
                                              avg=False)

    if zone:
        zone_gdf = gpd.read_file(
            Names.get_zoning_shapefile(state_code=state_code,
                                       county_code=county_code,
                                       year=year))
        gdf = GetZoningData.append_zone_proportions(base_gdf=gdf,
                                                    zone_gdf=zone_gdf,
                                                    base_shp_col=base_shape)

        zones_to_combine = \
            {
                'RESIDENTIAL': ['SINGLE FAMILY', 'MULTI-FAMILY'],
                'COMMERCIAL': ['OFFICE', 'BUSINESS', 'MIXED USE']
            }

        gdf = CombineFeatures.sum_features(gdf, zones_to_combine)

    print("Creating shapefile as {}".format(outname))
    gdf.to_file(outname, driver='ESRI Shapefile')
    gdf.drop(['geometry'], axis=1, inplace=True)
    gdf.to_csv(outname + '.csv')
Esempio n. 14
0
                else:
                    jishu[self.ytr[min_index]] = 1
                distances[min_index] = distances[max_index]
            Ypred[i] = max(jishu.items(), key=lambda x: x[1])[0]
        #print(Ypred)
        return Ypred


feature = np.loadtxt('feature3.txt')
labels = np.loadtxt('labels3.txt')

picnum = 567

#filename = '../gray/' + str(picnum) + '.jpg'
filename = '../image/' + str(picnum) + '.jpg'
test = ef.hist(filename)

#nn = KNearestN()0
#nn.train(feature,labels)
#t = nn.predict(test,k=50,dis=3)
'''
def DisCos(a,b):
    zi = np.sum(a*b,axis=1)
    mu = np.sqrt(np.sum(a**2,axis=1))*np.sqrt(np.sum(b**2,axis=1))
    
    return zi/mu
'''


def L2(a, b):
    distances = np.sqrt(np.sum((a - b)**2, axis=1))
Esempio n. 15
0
def predict_tags_for_line(line):
    v_dict = {}
    bp_dict = {}
    line_array = line.split()
    prediction_tags_array = [0] * len(line_array)
    max_values_for_normilazation = [0] * len(line_array)
    max_values_for_normilazation[0] = 1
    tag_set = set()
    old_2prevs_id = None
    old_prev_id = None
    old_scores = None
    for tag in tag_to_id.keys():
        tag_set.add(tag)
    tag_set.add('start')
    for i in range(len(line_array)):
        for possible_tag in tag_to_id.keys():
            if not is_tag_possible_for_word(line_array[i], possible_tag):
                continue
            first_prediction_for_tag = True
            for prev_tag in tag_set:
                if i < 1:
                    if prev_tag != 'start':
                        continue
                else:
                    if not is_tag_possible_for_word(line_array[i - 1],
                                                    prev_tag):
                        continue
                    if prev_tag == 'start':
                        continue
                # if prune_by_seq and (prev_tag, possible_tag) in impossible_seqs:
                #     continue
                max_viterbi_val = 0
                for prev_prev_tag in tag_set:
                    if i == 0:
                        if prev_prev_tag == 'start' and prev_tag == 'start':
                            v_dict_val = 1
                        else:
                            continue
                    elif i == 1:
                        if prev_prev_tag != 'start':
                            continue
                    else:
                        if not is_tag_possible_for_word(
                                line_array[i - 2], prev_prev_tag):
                            continue
                        if prev_prev_tag == 'start':
                            continue
                        v_dict_val = v_dict[(i - 1, prev_prev_tag,
                                             prev_tag)] if v_dict.has_key(
                                                 (i - 1, prev_prev_tag,
                                                  prev_tag)) else 0

                    if first_prediction_for_tag:
                        features_vec = get_features_vec(
                            prev_prev_tag, prev_tag, line_array, i)
                        scored_tags_dict, old_scores = predictor.predict(
                            features_vec)
                        first_prediction_for_tag = False
                    else:
                        new_prev_id = feature_to_id.get(
                            ExtractFeatures.get_feat_str_by_prevtag(prev_tag),
                            None)
                        new_2prevs_id = feature_to_id\
                            .get(ExtractFeatures.get_feat_str_by_2prevs(prev_prev_tag, prev_tag), None)
                        scored_tags_dict, old_scores = predictor.predict_with_trasitions_change(
                            old_scores, old_prev_id, old_2prevs_id,
                            new_prev_id, new_2prevs_id)

                    old_prev_id = feature_to_id.get(
                        ExtractFeatures.get_feat_str_by_prevtag(prev_tag),
                        None)
                    old_2prevs_id = feature_to_id.get(
                        ExtractFeatures.get_feat_str_by_2prevs(
                            prev_prev_tag, prev_tag), None)
                    # print ("scored_tags_dict" + repr(scored_tags_dict))
                    # print ("old_scores" + repr(old_scores))

                    # print("i= %d" % i)
                    # print(possible_tag)
                    # print (tag_to_id[possible_tag])
                    # print(scored_tags_dict[tag_to_id[possible_tag]])
                    # print("n=%f" % max_values_for_normilazation[i])
                    viterbi_val = (v_dict_val / float(max_values_for_normilazation[i])) * \
                              scored_tags_dict[tag_to_id[possible_tag]]

                    if viterbi_val > max_viterbi_val:
                        # print("v=%f max=%f" % (viterbi_val, max_viterbi_val))
                        if i < len(
                                line_array
                        ) - 1 and viterbi_val > max_values_for_normilazation[
                                i + 1]:
                            max_values_for_normilazation[i + 1] = viterbi_val
                        max_viterbi_val = viterbi_val
                        v_dict[(i, prev_tag, possible_tag)] = viterbi_val
                        bp_dict[(i, prev_tag, possible_tag)] = prev_prev_tag

    #prediction_tags_array[len(line_array)-1]
    max_viterbi_val = 0
    for possible_tag in tag_to_id.keys():
        for prev_tag in tag_set:
            v_dict_val = v_dict[(len(line_array)-1, prev_tag, possible_tag)] \
                if v_dict.has_key((len(line_array)-1, prev_tag, possible_tag)) else 0
            if v_dict_val > max_viterbi_val:
                max_viterbi_val = v_dict_val
                prediction_tags_array[len(line_array) - 1] = possible_tag
    #prediction_tags_array[len(line_array)-2]
    max_viterbi_val = 0
    for possible_tag in tag_to_id.keys():
        for prev_tag in tag_set:
            v_dict_val = v_dict[(len(line_array)-2, prev_tag, possible_tag)] if \
                v_dict.has_key((len(line_array)-2, prev_tag, possible_tag)) else 0
            if v_dict_val > max_viterbi_val:
                max_viterbi_val = v_dict_val
                prediction_tags_array[len(line_array) - 2] = possible_tag

    for i in range(len(line_array) - 3, -1, -1):
        prediction_tags_array[i] = bp_dict[(i + 2,
                                            prediction_tags_array[i + 1],
                                            prediction_tags_array[i + 2])]

    return prediction_tags_array
Esempio n. 16
0
def identifyPersonalityTraits(file_name_orig):

    clf_emotional_stability = load('knowme_EmotionalSt.joblib')
    clf_knowme_MentalE_WlPower = load('knowme_MentalE_WlPower.joblib')
    clf_knowme_Modesty = load('knowme_Modesty.joblib')
    clf_lackOfDiscipline = load('lackOfDiscipline.joblib')
    clf_PoorConcentration = load('PoorConcentration.joblib')
    clf_SocialIsolation = load('SocialIsolation.joblib')

    # file_name_orig ="Michael_HW.png"
    # crop(file_name_orig)
    file_name = resize(file_name_orig)

    raw_features = extract.start(file_name)
    raw_baseline_angle = raw_features[0]
    baseline_angle, comment = categorize.determine_baseline_angle(
        raw_baseline_angle)
    print("Baseline Angle: " + comment)

    raw_top_margin = raw_features[1]
    top_margin, comment = categorize.determine_top_margin(raw_top_margin)
    print("Top Margin: " + comment)

    raw_letter_size = raw_features[2]
    letter_size, comment = categorize.determine_letter_size(raw_letter_size)
    print("Letter Size: " + comment)

    raw_line_spacing = raw_features[3]
    line_spacing, comment = categorize.determine_line_spacing(raw_line_spacing)
    print("Line Spacing: " + comment)

    raw_word_spacing = raw_features[4]
    word_spacing, comment = categorize.determine_word_spacing(raw_word_spacing)
    print("Word Spacing: " + comment)

    raw_pen_pressure = raw_features[5]
    pen_pressure, comment = categorize.determine_pen_pressure(raw_pen_pressure)
    print("Pen Pressure: " + comment)

    raw_slant_angle = raw_features[6]
    slant_angle, comment = categorize.determine_slant_angle(raw_slant_angle)
    print("Slant: " + comment)

    emotional_stability = clf_emotional_stability.predict(
        [[baseline_angle, slant_angle]])
    MentalE_WlPower = clf_knowme_MentalE_WlPower.predict(
        [[letter_size, pen_pressure]])
    Modesty = clf_knowme_Modesty.predict([[letter_size, top_margin]])
    Discipline = clf_lackOfDiscipline.predict([[slant_angle, top_margin]])
    Concentration = clf_PoorConcentration.predict([[letter_size,
                                                    line_spacing]])
    SocialIsolation = clf_SocialIsolation.predict(
        [[line_spacing, word_spacing]])

    if (emotional_stability[0] == 1):
        emotional_stability = "Stable"
    else:
        emotional_stability = "Not Stable"
    if (MentalE_WlPower[0] == 1):
        MentalE_WlPower = "High or Average"
    else:
        MentalE_WlPower = "Low"
    if (Modesty[0] == 1):
        modesty = "Observed"
    else:
        modesty = "Not Observed"
    if (Concentration[0] == 1):
        concentration = "Observed"
    else:
        concentration = "Not Observed"
    if (Discipline[0] == 1):
        discipline = "Observed"
    else:
        discipline = "Not Observed"
    if (SocialIsolation[0] == 1):
        SocialIsolation = "Observed"
    else:
        SocialIsolation = "Not Observed"

    personality_Trait_dict = {
        "Emotional_Stability": emotional_stability,
        "Mental_Power": MentalE_WlPower,
        "Modesty": modesty,
        "Discipline": discipline,
        "Concentration": concentration,
        "Social_Isolation": SocialIsolation
    }

    print(personality_Trait_dict)
    return personality_Trait_dict
Esempio n. 17
0
                                                  '',
                                                  end=''),
                                    # print("\n")
                                    if (s.tag == "w"):
                                        # print("%s %s" % (s.text, s.attrib['ctag']), '', end=''),
                                        single_sentence.append(s.text)
                                        single_sentence.append(
                                            s.attrib['ctag'])
                                        test_sen.append(s.text)
                                        y_test.append(s.attrib['ctag'])
                                print(single_sentence)

                                for x in range(0, len(single_sentence), 2):
                                    # print(get_features(single_sentence,x))
                                    X_test.append(
                                        ExtractFeatures.get_testfeatures(
                                            single_sentence, x))
                        #         # print('\n')
                        # print(len(single_sentence))
                        if (subdiv.tag == "p"):
                            for sentence in subdiv.findall('s'):
                                # print(sentence.attrib)
                                single_sentence = []
                                for s in sentence:
                                    if (s.tag == "foreign"):
                                        for words in s.findall('w'):
                                            print("%s/%s" %
                                                  (words.text,
                                                   words.attrib['ctag']),
                                                  '',
                                                  end=''),
                                    # print("\n")
Esempio n. 18
0
trainLabels = [
    'JX', 'NN', 'II', 'JX', 'NN', 'VE', 'IKM', 'NN', 'NN', 'IKM', 'JX', 'NN',
    'IKO', 'NN', 'IKM', 'NN', 'II', 'NN', 'NP', 'NP', 'NP', 'IE', 'MM', 'JX',
    'NN', 'NN', 'IKM', 'NN', 'VR', 'VI', 'VE', 'II', 'NN', 'IKM', 'NN', 'JX',
    'NN', 'IKM', 'NN', 'CC', 'DDX', 'II', 'VN', 'NN', 'IKM', 'NN', 'II', 'JX',
    'VI', 'VE', 'IKM', 'VVYN1', 'YF'
]
test_feat = []
test = [
    'संसद', 'को', 'अधिवेशन', 'आषाढ', 'को', 'शुरु', 'मा', 'हुने',
    'राष्ट्रियसभा', 'को', 'गठन', 'यै', 'महिना', 'मा', 'भईसक्ने', 'चीन-सोभियत',
    'सीमा', 'मा', 'बढी', 'सबल', 'सुरक्षा'
]
# test = [{'pos': 'IKM', 'prev-prev-word': 'अधिवेशन', 'word': 'को', 'prev-word': 'आषाढ', 'prev-pos': 'NN', 'next-next-word': 'मा', 'prev-prev-pos': 'NN', 'next-pos': 'NN', 'next-word': 'शुरु', 'nextnextpos': 'II'}]
for x in range(0, len(test)):
    test_feat.append(ExtractFeatures.get_wordFeatures(test, x))
print(test, test_feat)

# from sklearn.feature_extraction import DictVectorizer
# from sklearn.svm import LinearSVC
# from sklearn.svm import SVC
# from sklearn.pipeline import Pipeline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction import DictVectorizer
from sklearn import svm

vec = DictVectorizer(sparse=False)
X_arr = vec.fit_transform(trainFeatures)
print("Converting word features into Numpy arrays")
# with open("train.txt", "wb") as f:
Esempio n. 19
0
def main():
    duration, subject, trial = check_args(sys.argv[1:])
    voice = trial_record(duration, subject, trial)
    trial_vad(voice, subject)
    vad_path = "vad/" + subject
    ExtractFeatures.generate_model(vad_path)
def getDataTrain_Mean_PassbandFluxes():
    datafeatures = ExFt.extract_DataTraining_Means_byPassband()
    datafeatures.target = datafeatures.target.astype('category')
    return datafeatures
def getDataTrain_MeanFluxes():
    datafeatures = ExFt.extract_DataTraining_Means()
    return datafeatures