Example #1
0
def grafic (params,etiqueta):
    
    params['split']=etiqueta
    get_features(params)
    ap_list,dict_=eval_rankings(params)
    mean=np.mean(dict_)
    return mean
Example #2
0
def models_combined():
	df = load_data()
	df = process_df(df)
	X, y = get_features(df)

	print "running gradient boosted model with 4000 estimators..."
	gb4000_clf, gb4000_Xtest, gb4000_ytest = run_gradient_boosted(X, y, n_estimators = 4000)

	print "running gradient boosted model with 1000 estimators..."
	gb1000_clf, gb1000_Xtest, gb1000_ytest = run_gradient_boosted(X, y, n_estimators = 1000)
	
	print "running random forest model..."
	rf1_clf, rf1_Xtest, rf1_ytest = run_rf_churn(X,y)
	
	df = load_data()
	df = process_df(df)
	train_df, test_df = train_test_equal_weight(df)
	X_train, y_train = get_features(train_df)
	X_test, y_test = get_features(test_df)
	
	print "running gradient boosted model with even fraud and non-fraud..."
	gbEF_clf, gbEF_Xtest, gbEF_ytest = run_gradient_boosted_evenFraud(X_train, y_train, X_test, y_test)
	
	print "running random forest model with even fraud and non-fraud..."
	rf2_clf, rf2_Xtest, rf2_ytest = run_rf_churn2(X_train, y_train, X_test, y_test)

	return gb4000_clf, gb4000_Xtest, gb4000_ytest, gb1000_clf, gb1000_Xtest, gb1000_ytest, gbEF_clf, gbEF_Xtest, gbEF_ytest, rf1_clf, rf1_Xtest, rf1_ytest, rf2_clf, rf2_Xtest, rf2_ytest
Example #3
0
def _write_feature_files(files, truth_db, truth_function):
    """Function used to output a feature file for use in WEKA.
    """
    get_features.get_features(files[RELATION_FILE_PATH], files[PMI_FILE_PATH],
                              files[COOCCURRENCE_FILE_PATH],
                              files[FEATURE_FILE_PATH], truth_db,
                              truth_function)
Example #4
0
def main():
    # ffs = [features_funcs.dists_2_refs_lips,
    #        features_funcs.dists_2_refs_lower_lip,
    #        features_funcs.dists_2_refs_contour,
    #        features_funcs.dists_2_refs_contour_top4,
    #        features_funcs.dists_2_refs_nose,
    #        features_funcs.dists_2_refs_nose_top4,
    #        features_funcs.dists_2_refs_eyebrows,
    #        features_funcs.dists_2_refs_eyebrows_corners,
    #        features_funcs.dists_2_refs_eyes]
    # ffs = [features_funcs.dists_lips,
    #        features_funcs.dists_lower_lip,
    #        features_funcs.dists_contour,
    #        features_funcs.dists_contour_top4,
    #        features_funcs.dists_nose,
    #        features_funcs.dists_nose_top4,
    #        features_funcs.dists_eyebrows,
    #        features_funcs.dists_eyebrows_corners,
    #        features_funcs.dists_eyes]
    # ffs = [features_funcs.butterfly_catastrophe]#, features_funcs.ellipse_picked,
        #    features_funcs.ellipse, features_funcs.polyfit6,
        #    features_funcs.face_contour]

    ffs = [features_funcs.fwhr]

    test_data = True

    for feature_func in ffs:
        print('get features (with gender)', flush=True)
        t0 = time()
        if test_data:
            features = get_features("../data/testdata.csv", feature_func,
                                    "../data/testdata_labels.txt",
                                    None, face_id_clean=True, limit_deg=180)
        else:
            features = get_features("../data/data.csv", feature_func,
                                    "../data/data_labels.txt",
                                    "../data/gender.csv", face_id_clean=False, limit_deg=1)
        print(time() - t0)

        feature_length = 0
        for v in features.values():
            feature_length = len(v)
            break

        print('save as np array')
        t0 = time()
        arr = np.zeros((len(features), feature_length))
        for row, v in enumerate(features.values()):
            arr[row] = v
        if test_data:
            np.save('../data/'+feature_func.__name__+'_test', arr)
        else:
            np.save('../data/'+feature_func.__name__, arr)
        print(time() - t0)
Example #5
0
    def fetch(self, entries):
        recordID = int(entries[0][1].get())
        if recordID in self.patient_data:
            for widget in self.body.winfo_children():
                widget.destroy()
            Label(self.body, text = "Patient with given record ID already exists ").grid(row = 0, sticky = 'news', pady = 5)
            Button(self.body, text = "Try Again", command=self.add_patient).grid(row = 1, pady = 5)
            return

        self.patient_data[recordID] = {}

        for i in range(1, len(entries)):
            field = entries[i][0]
            try:
                value = float(entries[i][1].get())
            except:
                value = -1

            self.patient_data[recordID][field] = value

        with open('patient_data.pkl', 'wb') as f:
            pickle.dump(self.patient_data, f)

        for widget in self.body.winfo_children():
            widget.destroy()

        Label(self.body, text = "Patient details recorded successfully! ", font = ('Courier', 20)).grid(row = 0, sticky = 'news', pady = 5)
        Button(self.body, text = "Add more", command=self.add_patient).grid(row = 1, pady = 5)
        Label(self.body, text = "Most important parameters: ", font = ('Courier', 20)).grid(row = 2, sticky = 'news', pady = 5)
        features = get_features.get_features(self.patient_data[recordID]['ICUType'])
        #features = ['Albumin', 'Temp', 'Urine', 'HCT', 'NiMAP']
        for i in range(len(features)):
            Label(self.body, text = features[i], font = ('Courier', 30)).grid(row = i+3, sticky = 'news', pady = 5)
Example #6
0
def main():
	inventory = []
	huur, verhuurd, koop, verkocht = get_url_list.get_aparts(60, 230, 390, 760)
	url_list = []
	#Compile the four lists into one
	for item in huur:
		url_list.append([item, "huur"])
	for item in verhuurd:
		url_list.append([item, "verhurrd"])
	for item in koop:
		url_list.append([item, "koop"])
	for item in verkocht:
		url_list.append([item, "verkocht"])
	print "Done getting all the urls!"	

	new_url_list = get_chunks(url_list, 1000)
	threads = []
	for i in range(len(new_url_list)):
		t = threading.Thread(target = get_inventory, args=(inventory, new_url_list[i], ))
		threads.append(t)
		t.start()
	for t in threads:
		t.join()
		
	print "Done getting all the features!"

	threading_output = unicodecsv.writer(open("FundaInventoryLatest.csv", "wb"), encoding='utf-8', delimiter='|')
	sample = get_features.get_features(sample_url)
	sample["type"] = "sample"
	threading_output.writerow(sample.keys())
	for row in inventory:
		threading_output.writerow(row.values())
	print "Done writing output file!"
Example #7
0
def main():
    print('getting faces')
    g1, g2 = split_genders(
        get_features("../data/testdata.csv",
                     pure_landmarks,
                     "../data/testdata_labels.txt",
                     None,
                     face_id_clean=True,
                     limit_deg=180))

    # g1, g2 = split_genders(
    #     get_features("../data/data.csv", pure_landmarks,
    #                  "../data/data_labels.txt",
    #                  "../data/gender.csv", face_id_clean=False, limit_deg=1)
    # )

    # g1 = g1[:1000]
    # g2 = g2[:1000]
    print('averaging')
    g1 = [(x, y) for x, y in zip(np.mean(g1, 0)[::2], np.mean(g1, 0)[1::2])]
    g2 = [(x, y) for x, y in zip(np.mean(g2, 0)[::2], np.mean(g2, 0)[1::2])]
    g1 = np.array(g1)
    g2 = np.array(g2)
    # np.save('g0_2deg', g1)
    # np.save('g1_2deg', g2)
    plt.plot(g1[:, 0], g1[:, 1], '.')
    # plt.savefig('gender0.png')
    # plt.clf()
    plt.plot(g2[:, 0], g2[:, 1], '.')
    # plt.savefig('gender1.png')
    plt.show()
Example #8
0
def predict_price(image_file, model, symbol_search='$'):

    df = get_text.get_text(image_file, symbol_search)
    df = get_features.get_features(df)

    features = [
        'block_confidence',
        'paragraph_confidence',
        'word_confidence',
        'block_weigh',
        'paragraph_weigh',
        'word_weigh',
        'rel_word_block_area',
        'rel_word_parag_area',
        'rel_parag_block_area',
        'prev_symbol',
        'next_symbol',
        'text_type',
        'prev_text_type',
        'next_text_type',
        'text_len',
        'is_a_symbol',
        'number_type',
    ]
    X = df[features].copy()

    y_pred = model.predict(X.values)
    df['predict'] = y_pred

    return df
Example #9
0
def get_training_data(dataframe):

    # print(ravdess.head())
    print(dataframe.head())
    X = []
    Y = []

    for index, row in dataframe.iterrows():

        # print(index, row)
        emotion = row["emotion"]
        path = row["path"]
        print("processing file", path)
        # print(emotion, path)
        # duration and offset are used to take care of the no audio
        # in start and the ending of each audio files as seen above.
        data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)

        # augment_data returns the original data as the first element
        for augmented_data in augment_data(data, sample_rate):
            features = get_features(augmented_data, sample_rate)
            # Storing a single list of all the features plus the last one is the target label
            X.append(features)
            Y.append(emotion)

    df = pd.DataFrame(X)
    df["label"] = Y

    return df
Example #10
0
def auto_feature_select(ds, obj):
    obj.features = []  # Reset the features
    best_score = 0
    for i in range(0, np.size(ds.X, 1)):
        print('Current features: ')
        get_features(obj.features)
        print('Now trying feature: ')
        get_features([i])
        obj.features.append(i)
        cross_validation(ds)
        print_results([obj])
        if (obj.best_score > best_score):
            improvement = obj.best_score - best_score
            print('Kept feature; score improved by %0.2f%%: ' % improvement)
            best_score = obj.best_score
        else:
            print('Did not keep feature')
            obj.features.pop()
Example #11
0
def predict(df, model):
    '''
    INPUT: Processed datapoint / dataframe
    OUTPUT: Array of Predictions, Array of Probabilities
    '''
    X = get_features(df, point=True)
    predictions = model.predict(X)
    probs = model.predict_proba(X)
    return predictions, probs
Example #12
0
def start_prediction(model):
    df = pd.read_csv(TEST_PATH)
    print(df)
    features = get_features(df)
    print("Test features len")
    print(len(features))
    predictions = model.predict(features)

    with open("submission.csv", 'w') as f:
        f.write("id,target\n")
        for i in range(0, len(predictions)):
            f.write(str(df.at[i, 'id']) + "," + str(predictions[i]) + "\n")
Example #13
0
def objectTracking(filename):
    cap = cv2.VideoCapture(filename)
    img1 = None
    img2 = None
    writer = skvideo.io.FFmpegWriter('Easy.avi')
    bboxs = np.load('easy.npy')
    frame_num = 0
    while (cap.isOpened()):
        frame_num += 1
        ret, frame = cap.read()
        if not ret:
            break
        if img1 is None and img2 is None:
            img2 = frame
            img2 = cv2.GaussianBlur(img2, (7, 7), 0)
            gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
            startYs, startXs = get_features(gray, bboxs)
            continue
        img1 = img2
        img2 = frame
        img2 = cv2.GaussianBlur(img2, (7, 7), 0)
        newXs, newYs = estimateAllTranslation(startXs, startYs, img1, img2)
        startXs, startYs, bboxs = applyGeometricTransformation(
            startXs, startYs, newXs, newYs, bboxs)

        bb_img = frame
        delete_mask = np.ones(bboxs.shape[0], dtype=bool)
        for idx, bbox in enumerate(bboxs):
            mask = np.logical_or(startXs[:, idx] >= frame.shape[1],
                                 startYs[:, idx] >= frame.shape[0])
            startXs[:, idx][mask] = -1
            startYs[:, idx][mask] = -1
            if (startXs[:, idx] < 0).all() and (startYs[:, idx] < 0).all():
                delete_mask[idx] = False
                continue
            bb_img = draw_bounding_box(bbox, bb_img)

        bboxs = bboxs[delete_mask, :, :]
        startXs = startXs[:, delete_mask]
        startYs = startYs[:, delete_mask]

        for idx, (x, y) in enumerate(zip(startXs, startYs)):
            for ind in range(bboxs.shape[0]):
                if x[ind] >= 0 and y[ind] >= 0:
                    cv2.circle(bb_img, (np.int32(x[ind]), np.int32(y[ind])), 3,
                               (0, 0, 255), -1)
        writer.writeFrame(bb_img[:, :, [2, 1, 0]])
        cv2.imshow('frame', bb_img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    writer.close()
    cap.release()
    cv2.destroyAllWindows()
Example #14
0
def start_training():
    df = pd.read_csv(TRAIN_PATH)
    print('Number of training sentences: ', len(df))
    #df = df.sample(200)
    labels = df['target']
    print(labels)
    features = get_features(df)
    print(len(features))
    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels)
    lr_clf = LogisticRegression()
    lr_clf.fit(train_features, train_labels)
    print(lr_clf.score(test_features, test_labels))
    return lr_clf
Example #15
0
    def __init__(self, articles):
        Corpus.__init__(self, articles)
        for article in articles:
            features = get_features(
                article)  # Get the feature values for the current article.
            if article.train:  # put feature dict in either testing or training.
                self.train_feats.append(features)
                self.train_articles.append(
                    article
                )  # keep a list of all articles in the training set.
            else:
                self.test_feats.append(features)
                self.test_articles.append(
                    article)  # keep a list of all articles in the testing set.

        self.feat_names = features.keys()
Example #16
0
def get_inventory(inventory, url_list):
#	failed_urls = 	unicodecsv.writer(open("failed_urls_all_threading_20150516.csv", "wb"), encoding='utf-8', delimiter='|')
	count = 0
	#Loop through all huur and verhuurd urls
	for row in url_list:
		try:
			apart = get_features.get_features(row[0])
			apart["type"] = row[1]
			inventory.append(apart)
		except:
			print "error url:" + row[0]
			print sys.exc_info()
#			failed_urls.writerow([row[0], sys.exc_info()])
		count += 1
		if count % 100 == 0:
			print "We have gathered properties: " + str(count)
		time.sleep(2)
Example #17
0
def get_inventory(backfill = False):
	inventory = []
	properties = get_property_list(backfill = backfill)
	total_count = len(properties)
	print "Total Properties: ", total_count
	count = 0
	for prop in properties:
		try:
			features = get_features(prop['Id'])
			inventory.append(features)
			count += 1
		except:
			print "Count: ", count
			print "error id: ", prop['Id']
			print sys.exc_info()
		if count % 100 == 0:
			print "Crawled Properties: ", count
	return inventory
def update_loss(target, vgg, content_features, style_weights, style_grams, content_weight, style_weight):
    # for displaying the target image intermittently
    show_every = 400

    # iteration hyperparamaters
    optimizer = optim.Adam([target], lr=0.004)
    steps = 2000            # variable; can be updated as needed

    for ii in range(1, steps+1):
        target_features = get_features(target, vgg)

        # calculate the content loss
        content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2)

        # calculate the style loss iterating through a number of layers
        style_loss = 0
        for layer in style_weights:
            # get the "target" style representation for the layer
            target_feature = target_features[layer]
            target_gram = gram_matrix(target_feature)
            batch_size, d, h, w = target_feature.shape
            # get the "style" style representation for the layer
            style_gram = style_grams[layer]
            # the style loss for one layer weighted
            layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram)**2)
            # add to the style loss
            style_loss += layer_style_loss / (d * h * w)

        # calculate the total loss
        total_loss = content_weight * content_loss + style_weight * style_loss

        # update the target image
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # display intermediate images and print the loss
        if ii % show_every == 0:
            print('Total loss: ', total_loss.item())
            plt.imshow(im_convert(target))
            plt.show()
def detect(img, x_start_stop, y_start_stop, window_sz, overlap):
    y_start = y_start_stop[0]
    y_stop = y_start_stop[1] - window_sz[1]
    x_start = x_start_stop[0]
    x_stop = x_start_stop[1] - window_sz[0]

    # Compute the number of pixels per step in x/y
    x_step = int(window_sz[0] * (1-overlap[0]))
    y_step = int(window_sz[1] * (1-overlap[1]))

    # Step through image at current scale and aspect ratio; append if positive prediction
    corners = []
    for y in np.arange(y_start, y_stop, y_step):
        for x in np.arange(x_start, x_stop, x_step):
            cur_img = img[y:y+window_sz[1], x:x+window_sz[0]]
            cur_img_resize = cv2.resize(cur_img, (64,64))

            if clf.predict(get_features(cur_img_resize)):
                corners.append( np.array([x, y, x+window_sz[0], y+window_sz[1]]) )
            #corners.append( np.array([x, y, x+window_sz[0], y+window_sz[1]]) )

    return corners
Example #20
0
    devfile = open(devfilename)
    devJson = json.load(devfile)
    devfile.close()

    testfile = open(testfilename)
    testJson = json.load(testfile)
    testfile.close()

    negativefilename = 'negative_train.json'
    negativefile = open(negativefilename)
    nJson = json.load(negativefile)
    negativefile.close()
    
    # acquire features for training
    train_tokens,train_tags,train_otherfeats = get_features(trainJson)
    neg_tokens,neg_tags,neg_otherfeats = get_features(nJson)

    labels = tokenizer.getlabels(trainJson) + tokenizer.getlabels(nJson)
    train_tokens += neg_tokens
    train_tags += neg_tags
    train_otherfeats += neg_otherfeats

    #vec = TfidfVectorizer(tokenizer=dummy,preprocessor=dummy)
    vec = CountVectorizer(tokenizer=dummy,preprocessor=dummy)
    train_vec = vec.fit_transform(train_tokens).toarray()
    train_tags = csr_matrix(train_tags).toarray()
    train_otherfeats = csr_matrix(train_otherfeats).toarray()
    train_X = np.concatenate([train_otherfeats,train_tags,train_vec],axis=1)
    #train_X = train_vec
    print(train_X.shape)
Example #21
0
gnbObj.isUsed = True
annObj.isUsed = False
ensObj.isUsed = True
apriObj.isUsed = True
astaltObj.isUsed = True
fib4Obj.isUsed = True

cross_validation(toronto)

#sens: 63.53
#acc: 68.96
#auc: 0.75

find_misclassifications(toronto, algorithmArray)
plot_heat_map(toronto, algorithmArray)
get_features(gbcObj.features)

#######################  External Validation from Montreal

# Import the validation set
montreal = dataset_class()
montreal.description = 'McGill Liver Clinic Dataset. External test set for validation'
montreal.df = pd.read_excel(
    'C:/Users/Soren/Desktop/Thesis/Data Analysis/reformatted_mcgill_dataset.xlsx',
    parse_cols="A:BD")
montreal.df = montreal.df.loc[(montreal.df['Fibrosis'] >= 0)\
                              & (montreal.df['Fibrosis'] != 2) & (montreal.df['Fibrosis'] != 3)\
                              & (montreal.df['NAFL'] == 1)]
montreal.df = montreal.df.sample(frac=1).reset_index(drop=True)
montreal.X = montreal.df.iloc[:, 0:49].values
montreal.Y = (montreal.df.iloc[:, 49].values > 1) * 4
Example #22
0
     dirmake(path+'TAMSD/plot/')
     n = 0
     for i in range(Q_TAMSD_plot):
         plot_TAMSD(list(tamsd_data.loc[i]), path+'TAMSD/plot/tamsd'+str(i)+'.pdf', {'figsize': (4,3)}, ['a','b','c','d','e','f'][i])
         n += 1
     print(' --- ZAKOŃCZONO')
     print(64 * '-')
     
 if Q_ML_features == 'Y':
     # wyciąganie danych
     if not traject_loaded:
         trajectories = read_trajectories(part, Model)
         traject_loaded = True
     if not expo_loaded:
         exps = read_real_expo(part, Model)
     get_features(trajectories, exps, part, Model)
     print(64 * '-')
     
 if Q_ML_linreg == 'Y':
     if not features_loaded:
         features = read_ML_features(part, Model)
     linear_regression(features, part, Model)
     print(64 * '-')
     
 if Q_ML_dectree == 'Y':
     if not features_loaded:
         features = read_ML_features(part, Model)
     decision_tree(features, part, Model)
     print(64 * '-')
     
 if Q_ML_randomforest == 'Y':
Example #23
0
image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

scale_factor = 0.5  # make images smaller to speed up the algorithm
image1 = cv2.resize(image1, None, fx=scale_factor, fy=scale_factor)
image2 = cv2.resize(image2, None, fx=scale_factor, fy=scale_factor)

feature_width = 16  # width and height of each local feature, in pixels.

## Find distinctive points in each image. Szeliski 4.1.1
# !!! You will need to implement get_interest_points. !!!
x1, y1 = get_interest_points(image1, feature_width)
x2, y2 = get_interest_points(image2, feature_width)
show_image_point(image1, x1, y1)
show_image_point(image2, x2, y2)
image1_features = get_features(image1, x1, y1, feature_width)
image2_features = get_features(image2, x2, y2, feature_width)

matches, confidences = match_features(image1_features, image2_features)

num_pts_to_visualize = len(matches)

x1 = [x1[match[0]] for match in matches]
y1 = [y1[match[0]] for match in matches]
x2 = [x2[match[1]] for match in matches]
y2 = [y2[match[1]] for match in matches]

show_correspondence(image1, image2, x1, y1, x2, y2)

evaluate_correspondence(
    np.array(x1) / scale_factor,
content = load_image('images/content/pic1.jpg').to(device)
# resize style to match content
style = load_image('images/style/pic2.jpg',
                   shape=content.shape[-2:]).to(device)

# display the images
from im_convert import im_convert
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
#content and style images shown side by side
ax1.imshow(im_convert(content))
ax2.imshow(im_convert(style))

# getting the content and style features before forming the target image
from get_features import get_features
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)

# calculating the gram matrices for each layer of the style representation
from gram_matrix import gram_matrix
style_grams = {
    layer: gram_matrix(style_features[layer])
    for layer in style_features
}

# creating the target image and prepping it for change
# to start of, we use a copy of our content image as the initial target and then iteratively change its style
target = content.clone().requires_grad_(True).to(device)

# setting the weights for each style layer and setting content and style weights
style_weights = {
Example #25
0
# Blog:         http://www.cnblogs.com/AdaminXie/
# Github:       https://github.com/coneypo/Smile_Detector

# use the saved model
from sklearn.externals import joblib

from get_features import get_features
import ML_ways_sklearn

import cv2

# path of test img
path_test_img = "data_imgs/test_imgs/test1.jpg"

# 提取单张40维度特征
pos_49to68_test = get_features(path_test_img)

# path of models
path_models = "data_models/"

print("The result of"+path_test_img+":")
print('\n')

# #########  LR  ###########
LR = joblib.load(path_models+"model_LR.m")
ss_LR = ML_ways_sklearn.model_LR()
X_test_LR = ss_LR.transform([pos_49to68_test])
y_predict_LR = str(LR.predict(X_test_LR)[0]).replace('0', "no smile").replace('1', "with smile")
print("LR:", y_predict_LR)

# #########  LSVC  ###########
Example #26
0
#
# Copyright © 2018 weihao <*****@*****.**>
#
# Distributed under terms of the MIT license.

from get_mat import get_mat
from get_pqrst import get_pqrst
from get_features import get_features
from get_ground_truth import get_result_for_classifier
from utils import *
from train import train
import pandas as pd

if __name__ == '__main__':
    data_used = 1000
    data = []
    #for i in range(1, 8529):
    for i in range(1, data_used):
        print(i)
        file_name = '../training2017/A%s.mat' % str(i).zfill(5)
        out = get_mat(file_name)
        ecg = out['filtered']
        P_index, Q_index, R_index, S_index, T_index = get_pqrst(out)
        features = get_features(ecg, P_index, Q_index, R_index, S_index,
                                T_index)
        data.append(flatten(features))

    df = pd.DataFrame(data=data)
    target = get_result_for_classifier('../training2017/REFERENCE.csv', 1)
    train(df, target[:data_used - 1])
Example #27
0
def scan_text(text_string):
    import pycrfsuite
    from get_features import get_features
    from process_feats import syllable2features, line2features
    import pickle
    from only_four_stresses import only_four_stresses
    from yield_meter_tk import mhgscansion

    tagger = pycrfsuite.Tagger()
    tagger.open('MHGMETRICS.crfsuite')

    text_with_features, sylls = get_features(text_string)
    lines_features = [line2features(line) for line in text_with_features]
    text_tags = only_four_stresses(lines_features, tagger, sylls)

    # add back tags to features
    features_and_tags = []
    for i, line in enumerate(text_with_features):
        line_features_and_tags = []
        for i2, syll in enumerate(line):
            line_features_and_tags.append(syll + (text_tags[i][i2], ))
        features_and_tags.append(line_features_and_tags)

    # return words, sylls and labels
    words_sylls_labels = []
    for line in features_and_tags:
        line_words = []
        rec_word = []
        for syll in line:
            if syll[4] == "WBYR":
                rec_word.append((syll[0], syll[-1]))
                line_words.append(rec_word)
                rec_word = []
            elif syll[4] == "MONO":
                line_words.append([(syll[0], syll[-1])])
                rec_word = []
            else:
                rec_word.append((syll[0], syll[-1]))

        words_sylls_labels.append(line_words)

    # change primary stresses to secondary where necessary
    tags_n_stress = []
    for line in words_sylls_labels:
        rev_line = []
        for word in line:
            rev_word = []
            stress_present = 0
            for syll in word:
                rev_syll = syll
                if (syll[-1] == "MORA_HAUPT" or syll[-1] == "DOPPEL"
                        or syll[-1] == "HALB_HAUPT"):
                    stress_present += 1
                    if stress_present > 1:
                        if syll[-1] == "MORA_HAUPT":
                            rev_syll = (syll[0], "MORA_NEBEN")
                        elif syll[-1] == "HALB_HAUPT":
                            rev_syll = (syll[0], "HALB_NEBEN")

                rev_word.append(rev_syll)
            rev_line.append(rev_word)
        tags_n_stress.append(rev_line)

    return (mhgscansion(tags_n_stress))
Example #28
0
import pickle
import argparse
import numpy as np
from get_features import pos_keys, one_hot_encoded_pos_features_for_dict, get_features

parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('file_for_analyzing', type=str, help='file_for_analyzing')
args = parser.parse_args()

loaded_model = pickle.load(open('finalized_model.sav', 'rb'))
test_file = args.file_for_analyzing
print("predict with trained model")
object_features = get_features(test_file,
                               None,
                               pos_keys,
                               one_hot_encoded_pos_features_for_dict,
                               predict=True)

object_features_array = np.asarray(object_features)
object_features_array = object_features_array.reshape(1, -1)
#print(object_features)
print(loaded_model.predict(object_features_array))
from build_database import build_database
from get_features import get_features
from rank import rank
from classify import classify
from evaluate_ranking import evaluate_ranking
from evaluate_classification import evaluate_classification

ruta1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\images'
ruta2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train\\images'
savepath1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val'
savepath2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train'

build_database(ruta1,savepath1);
build_database(ruta2,savepath2);

get_features(ruta1,savepath1,savepath1);
get_features(ruta2,savepath2,savepath2);

savepath_principal=os.path.dirname(os.path.abspath(__file__))
features_val=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val'
features_train=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train'
rank(features_val,features_train,savepath_principal);

feat=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\Features.txt'
path_out=os.path.dirname(os.path.abspath(__file__))
labels=os.path.dirname(os.path.abspath(__file__))+'\\labels.txt'
classify(feat,path_out,labels)

path=os.path.dirname(os.path.abspath(__file__))
Gt_val_test=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt'
evaluate_ranking(path,Gt_val_test)
Example #30
0
# Updated on:   2018-10-09

# 显示嘴部特征点
# Draw the positions of someone's lip

import dlib  # 人脸识别的库 Dlib
import cv2  # 图像处理的库 OpenCv
from get_features import get_features  # return the positions of feature points

path_test_img = "data/data_imgs/test_imgs/i064rc-mn.jpg"

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(
    'data/data_dlib_model/shape_predictor_68_face_landmarks.dat')

# Get lip's positions of features points
positions_lip = get_features(path_test_img)

img_rd = cv2.imread(path_test_img)

# Draw on the lip points
for i in range(0, len(positions_lip), 2):
    print(positions_lip[i], positions_lip[i + 1])
    cv2.circle(img_rd,
               tuple([positions_lip[i], positions_lip[i + 1]]),
               radius=1,
               color=(0, 255, 0))

cv2.namedWindow("img_read", 2)
cv2.imshow("img_read", img_rd)
cv2.waitKey(0)
from evaluate_ranking import evaluate_ranking
from evaluate_classification import evaluate_classification

ruta1 = os.path.dirname(
    os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val\\images'
ruta2 = os.path.dirname(
    os.path.abspath(__file__)) + '\\TerrassaBuildings900\\train\\images'
savepath1 = os.path.dirname(
    os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val'
savepath2 = os.path.dirname(
    os.path.abspath(__file__)) + '\\TerrassaBuildings900\\train'

build_database(ruta1, savepath1)
build_database(ruta2, savepath2)

get_features(ruta1, savepath1, savepath1)
get_features(ruta2, savepath2, savepath2)

savepath_principal = os.path.dirname(os.path.abspath(__file__))
features_val = os.path.dirname(
    os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val'
features_train = os.path.dirname(
    os.path.abspath(__file__)) + '\\TerrassaBuildings900\\train'
rank(features_val, features_train, savepath_principal)

feat = os.path.dirname(
    os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val\\Features.txt'
path_out = os.path.dirname(os.path.abspath(__file__))
labels = os.path.dirname(os.path.abspath(__file__)) + '\\labels.txt'
classify(feat, path_out, labels)
Example #32
0
import get_features as GF
# Make sure that we are using training images only !
params['split'] = 'train'

t = time.time()
X, pca, scaler = GF.stack_features(params)

print "Done. Time elapsed:", time.time() - t
print np.shape(X)

t = time.time()
GF.train_codebook(params, X)

print "Done. Time elapsed:", time.time() - t
t = time.time()
GF.get_features(params)

print "Done. Time elapsed for training set:", time.time() - t
# Switch to validation set
params['split'] = 'val'

t = time.time()
# Run again
GF.get_features(params)

print "Done. Time elapsed for validation set:", time.time() - t

from rank import *

t = time.time()
rank(params)
Example #33
0
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn import preprocessing
from sklearn import utils

import numbers

current_file_path = os.path.abspath(os.path.join("__file__" ,"../../.."))
nb_path = os.path.abspath(os.path.join(current_file_path, 'notebooks'))
os.chdir(nb_path)

import get_features

print(current_file_path)
features_path = os.path.abspath(os.path.join(nb_path,'features.csv'))
_ = get_features.get_features()

df = pd.read_csv(features_path)

df = df.set_index(['movie_id', 'title'])

feature_list = df.drop('target',axis=1).columns

features = np.array(df.drop('target',axis=1))
labels = df['target']

x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state = 42)

#create param_grid based off of best results from RandomSearchCV
res = pd.read_csv(os.path.abspath(os.path.join(nb_path,'random_rf_cvresults.csv')))
params = [i for i in res.columns if 'param_' in i]
for i in range(len(enron_df)):
    feature_1.iloc[i] = enron_df['bonus'].iloc[i] / enron_df['salary'].iloc[i] if \
        enron_df['salary'][i] != 0.0 else 0.0
    feature_2.iloc[i] = enron_df['from_poi_to_this_person'].iloc[i] / enron_df['to_messages'].iloc[i] if \
        enron_df['to_messages'][i] != 0.0 else 0.0
    feature_3.iloc[i] = enron_df['from_this_person_to_poi'][i] / enron_df['from_messages'].iloc[i] if \
        enron_df['from_messages'][i] != 0.0 else 0.0

enron_df['bonus-to-salary_ratio'] = feature_1
enron_df['from_poi_ratio'] = feature_2
enron_df['to_poi_ratio'] = feature_3

# Define features list

# features_list = get_features(1)  # Include all original features
features_list = get_features(
    2)  # Include all original features plus 3 engineered features
# features_list = get_features(3) # Only use top features selected by Decision Tree algorithm

# Task 2: Remove outliers
# As explained in attached Jupyter Notebook, the following outliers will be removed from the data set
enron_df.drop('TOTAL', axis=0, inplace=True)
enron_df.drop('THE TRAVEL AGENCY IN THE PARK', axis=0, inplace=True)

# Convert data into numeric values, option coerce is used to convert non numeric data to NaN
enron_df = enron_df.apply(lambda x: pd.to_numeric(x, errors='coerce'))

# Convert dataframe back to dictionary
my_dataset = enron_df.T.to_dict()

# Extract features and labels from dataset for local testing
data = featureFormat(my_dataset, features_list, sort_keys=True)
Example #35
0
def _write_feature_files(files, truth_db, truth_function):
    """Function used to output a feature file for use in WEKA.
    """
    get_features.get_features(files[RELATION_FILE_PATH], 
        files[PMI_FILE_PATH], files[COOCCURRENCE_FILE_PATH],
        files[FEATURE_FILE_PATH], truth_db, truth_function)
Example #36
0
labels = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fear', 'disgust', 'surprise']
model = keras.models.load_model("./model.h5")
model.summary()


validation_audio = [
     "./happy.mp3",
     "./happy.m4a",
     "./happy2.m4a",
     "./lucas_nienpedo.mp3",
     "./sol_maichu.m4a",
     "./fran_scared.m4a",
     "./fran_scared2.m4a",
    "./lucas_cursing.mp3",
    "./nacho_happy.mp3",
]

for path in validation_audio:
    print("procesing", path)
    data, sampling_rate = librosa.load(path, duration=2.5, offset=0.6)
    features = get_features(data, sampling_rate)
    features_transposed = np.expand_dims([features], axis=2)

    res = model.predict(features_transposed)
    max_id = np.argmax(res[0])
    print("prediction", labels[max_id])
    print("predictions")
    for score, label in zip(res[0], labels):
        print(label, score)
Example #37
0
from train_classifier import train_classifier
from classify import classify
from eval_classification import eval_classification
from eval_classification import plot_confusion_matrix
import warnings
warnings.filterwarnings("ignore")

#Extraccio dels parametres
params=get_params()
#Creacio de la base de dades
params['split']='train'
build_database(params)
params['split']='val'
build_database(params)
#Extraccio de les caracteri­stiques
get_features(params)
#Entrenem un model de classificacio
train_classifier(params)
#Classificacio
classify(params)
#Avaluacio de la classificacio
f1, precision, recall, accuracy,cm, labels = eval_classification(params)
print "Mesures:\n"    
print f1
print "-F1:", np.mean(f1)
print "-Precision:", np.mean(precision)
print "-Recall:", np.mean(recall)
print "-Accuracy:", accuracy
print "-Confusion matrix:\n", cm

plot_confusion_matrix(cm, labels,normalize = True)
Example #38
0
from train_classifier import train_classifier
from classify import classify
from eval_classification import eval_classification
from eval_classification import plot_confusion_matrix
import warnings
warnings.filterwarnings("ignore")

#Extraccio dels parametres
params = get_params()
#Creacio de la base de dades
params['split'] = 'train'
build_database(params)
params['split'] = 'val'
build_database(params)
#Extraccio de les caracteri­stiques
get_features(params)
#Entrenem un model de classificacio
train_classifier(params)
#Classificacio
classify(params)
#Avaluacio de la classificacio
f1, precision, recall, accuracy, cm, labels = eval_classification(params)
print "Mesures:\n"
print f1
print "-F1:", np.mean(f1)
print "-Precision:", np.mean(precision)
print "-Recall:", np.mean(recall)
print "-Accuracy:", accuracy
print "-Confusion matrix:\n", cm

plot_confusion_matrix(cm, labels, normalize=True)
import csv
import get_features
import expand_features
import unicodecsv

sample_url = "http://www.funda.nl/koop/amsterdam/appartement-49453167-van-boetzelaerstraat-34-2/"


header_output = unicodecsv.writer(open("expanded_headers.csv", "wb"), encoding='utf-8', delimiter='|')
sample = get_features.get_features(sample_url)
sample["type"] = "sample"
expand_features.expand_features(sample)
header_output.writerow(sample.keys())