Ejemplo n.º 1
0
def svoExecute(fpathL1, fpathR1, fpathL2, fpathR2):
    #initialize
    (imLprev, imRprev) = openImg.getImgs(fpathL1, fpathR1)
    (imLnext, imRnext) = openImg.getImgs(fpathL2, fpathR2)

    size = [2, 2]
    #get key points
    kpLprev, descLprev = features.getFeatures(imLprev, size)
    kpRprev, descRprev = features.getFeatures(imRprev, size)

    #correspondences
    corLprev_St, corRprev_St, matchKeptL_St, matchKeptR_St = features.getCorres(
        descLprev, descRprev, kpLprev, kpRprev)

    #triangulate must be of the form 2 x N
    [x3dprev, pErrPrev,
     matchKept_3d] = triangulation.triangulate(corLprev_St, corRprev_St)

    #Get temporal correspondences
    kpLnext, descLnext = features.getFeatures(imLnext, size)

    corLprev_T, corLnext_T, matchKeptPrev_T, matchKeptNext_T = features.getCorres(
        descLprev, descLnext, kpLprev, kpLnext)

    (x3dprev_final, x2dnext_final) = idxMerge(matchKeptL_St, matchKept_3d,
                                              x3dprev, corLnext_T,
                                              matchKeptPrev_T, matchKeptNext_T)

    #Get updated camera pose
    [rot, trans] = PnP.camPose(x3dprev_final, x2dnext_final, pErrPrev)

    return (rot, trans, x3dprev_final)
Ejemplo n.º 2
0
def imageToImage(images, paths, keypoint_type, descriptor_type, score_fun = lambda i,s,u : numpy.mean(s)) :
	""" Compare every image with every other image, generating a few different scores
	    input: images [List of nparrays] all the images
	           labels [List of Strings] labels of all the images
		       keypoint_type [String] e.g "SURF" or "ORB" etc
		       keypoint_descriptor [String] e.g "SURF" or "ORB" etc
		       score_fun [(list(int), list(int/float), list(float)) -> float]
			     The score function take a list of indices, a list of scores (distance
				 between two descriptors), a list of scores (uniqueness of best/second
				 best score) and returns a floating point number.
	    output: [list of (boolean, score)] a list where the boolean is true if the images
		                                   where of the same person and false if not
	"""

	# Get keypoints
	#keypoints = map(lambda i : f.getKeypoints(keypoint_type, i), images)

	# Get descriptors
	#data = map(lambda i,k : f.getDescriptors(descriptor_type, i, k), images, keypoints)
	data = [f.getFeatures([p],keypoint_type, descriptor_type) for p in paths]
	
	indices, keypoints, descriptors = zip(*data)

	# Return the scores labeled with a boolean to indicate if they are of same set
	return matchDescriptors(descriptors, paths, descriptor_type, score_fun)
Ejemplo n.º 3
0
def main():
    video_src = -1
    cam = cv2.VideoCapture(video_src)
    cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    # get train features
    img = cv2.imread('logo_train.png')
    train_features = features.getFeatures(img)
    cur_time = timeit.default_timer()
    frame_number = 0
    scan_fps = 0
    while True:
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        frame_number += 1
        if not frame_number % 100:
            scan_fps = 1 / ((timeit.default_timer() - cur_time) / 100)
            cur_time = timeit.default_timer()

        region = features.detectFeatures(frame, train_features)

        cv2.putText(frame, f'FPS {scan_fps:.3f}', org=(0, 50),
                    fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL,
                    fontScale=1, color=(0, 0, 255))

        if region is not None:
            box = cv2.boxPoints(region)
            box = np.int0(box)
            cv2.drawContours(frame, [box], 0, (0, 255, 0), 2)

        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break
Ejemplo n.º 4
0
def match(paths, options = {}) :

    # Get parameters
    prune_fun = options.get("prune_fun", weightMatrix.pruneThreshold)
    prune_limit = options.get("prune_limit", 2.5)
    keypoint_type = options.get("keypoint_type", "SIFT")
    descriptor_type = options.get("descriptor_type", "SIFT")
    verbose = options.get("verbose", False)
    split_limit = options.get("split_limit", 50)
    cluster_prune_limit = options.get("cluster_prune_limit", 1.5)

    # Get all feature points
    indices, ks, ds = features.getFeatures(paths, options)

    # Calculate weight matrix
    weights = weightMatrix.init(ds, descriptor_type)

    # Get cluster weights
    cluster_weights = prune_fun(weights, prune_limit)

    # Cluster graph
    partitions = cluster(cluster_weights, indices, split_limit = split_limit, prune_limit = cluster_prune_limit, verbose=verbose)
    if verbose : print("%i partitions" % len(set(partitions)))

    def match_fun(threshold) :
        match_data = list(getPartitionMatches(partitions, cluster_weights, weights, indices, threshold))
        if len(match_data) == 0 : return [], [], []
        match_ind, ratios, scores = zip(*match_data)

        # Get positions
        matches = [getMatchPosition(m_i, m_j, ks) for (m_i, m_j) in match_ind]

        return matches, ratios, scores

    return lambda t : match_fun(t)
Ejemplo n.º 5
0
def match(paths, options = {}) :

    keypoint_type		= options.get("keypoint_type", "SIFT")
    descriptor_type		= options.get("descriptor_type", "SIFT")
    use_ball_tree       = options.get("use_ball_tree", False)

    # Get all feature points
    indices, ks, ds = features.getFeatures(paths, options)

    # Use cv2's matcher to get matching feature points
    distances = features.angleDist(ds[indices == 0], ds[indices == 1])
    
    if use_ball_tree :
        ii, ss, uu = features.ballMatch(descriptor_type, ds[indices == 0], ds[indices == 1])
    else :
        ii, ss, uu = features.bfMatch(descriptor_type, ds[indices == 0], ds[indices == 1])

    # Get all positions
    (pos_im1, pos_im2) = (features.getPositions(ks[indices == 0]), features.getPositions(ks[indices == 1]))

    # Define a function that given a threshold returns a set of matches
    def match_fun(threshold) :
        match_data = [(numpy.array((pos_im1[i], pos_im2[j])), uu[i], ss[i]) for i,j in enumerate(ii) if uu[i] < threshold]
        if len(match_data) == 0 : return [], [], []
        matches, ratios, scores = zip(*match_data)

        return matches, ratios, scores

    return lambda t : match_fun(t)
Ejemplo n.º 6
0
def match(paths, options = {}) : 
	
	# Get parameters
	prune_fun = options.get("prune_fun", weightMatrix.pruneTreshold)
	prune_limit = options.get("prune_limit", 3)
	min_edges = options.get("min_edges", 1)
	min_coherence = options.get("min_coherence", -1.0)
	keypoint_type = options.get("keypoint_type", "ORB")
	descriptor_type = options.get("descriptor_type", "BRIEF")
	verbose = options.get("verbose", False)
	split_limit = options.get("split_limit", 999999)
	cluster_prune_limit = options.get("cluster_prune_limit", 1.5)

	# Get all feature points
	indices, ks, ds = features.getFeatures(paths, keypoint_type = keypoint_type, descriptor_type = descriptor_type)

	# Calculate weight matrix (hamming distances)
	weights = weightMatrix.init(ds, descriptor_type)

	# Get cluster weights
	cluster_weights = prune_fun(weights, prune_limit)

	# Cluster graph
	partitions = cluster(cluster_weights, indices, split_limit = split_limit, prune_limit = cluster_prune_limit, verbose=verbose)
	if verbose : print("%i partitions" % len(set(partitions)))

	# Get matches
	matches = getPartitionMatches(partitions, cluster_weights, indices, min_edges, min_coherence)

	# Get find their positions
	matchPos = [getMatchPosition(m_i, m_j, ks) for (m_i,m_j) in matches]

	return matchPos
Ejemplo n.º 7
0
def predict(doc2vec, data, output, mlp=None):
    """ Answer Reranking with rank ~ cosine(q_i, a_i)^(-1) """
    # data : zip(questions, commentsL) ... see 'constructData'
    out = open(output, 'w')
    for q, cl in data:
        scores = []
        q_w = preprocessor(q[1])
        q_v = doc2vec.infer_vector(q_w)
        ac_v = getAverageCV(doc2vec, cl)
        for j, c in enumerate(cl):
            c_w = preprocessor(c[1])
            c_v = doc2vec.infer_vector(c_w)
            f_v = getFeatures(doc2vec, q_w, c_w, \
                { 'qid' : q[0], 'cid' : c[0], 'rank' : j })
            f_v.extend(
                [cosine(q_v, c_v),
                 cosine(q_v, ac_v),
                 cosine(c_v, ac_v)])
            score, pred = predictAux(q_v, c_v, ac_v, f_v, mlp)
            scores.append([score, j, 0, pred])
        scores = sorted(scores, key=lambda score: score[0], reverse=True)
        for i in range(len(scores)):
            scores[i][2] = i + 1
        scores = sorted(scores, key=lambda score: score[1])
        for score in scores:
            out.write('\t'.join([
                q[0], cl[score[1]][0],
                str(score[2]),
                str(score[0]), score[3]
            ]))
            out.write('\n')
    out.close()
Ejemplo n.º 8
0
def match_speed(paths, options = {}) :
    keypoint_type		= options.get("keypoint_type", "SIFT")
    descriptor_type		= options.get("descriptor_type", "SIFT")
    leaf_size		    = options.get("leaf_size", 2)
    radius_size         = options.get("radius_size", 300)
    dist_threshold      = options.get("dist_threshold", 100)
    shuffle_keypoints   = options.get("shuffle_keypoints", False)

    # Get all feature points
    indices, ks, ds = features.getFeatures(paths, keypoint_type, descriptor_type, shuffle_keypoints)

    # Construct ball tree
    bt = BallTree(ds, leaf_size=leaf_size)

    # Filter nodes
    ns = filterNodes(bt, radius_size)

    # Get matches
    match_data = list(getMatches(bt, ns, indices, ks, ds, dist_threshold))

    def match_fun(ratio_threshold) :
        matches = [(pos, s, u) for pos, s, u in match_data if u < ratio_threshold]
        if len(matches) == 0 :
            return [], [], []
        else :
            return zip(*matches)

    return lambda t : match_fun(t)
Ejemplo n.º 9
0
def getFeatures(paths, filter_features, options) :
    """ Retrieves features and filters them """
    feature_points = features.getFeatures(paths, options)
    if filter_features == [] :
        return feature_points
    else :
        ff = numpy.ones(len(feature_points[0]), dtype = numpy.bool)
        ff[filter_features] = False
        indices = feature_points[0][ff]
        ks = feature_points[1][ff]
        ds = feature_points[2][ff]
        return indices, ks, ds
Ejemplo n.º 10
0
def getCorrespondences(paths, homography, keypoint, descriptor, distance_threshold) :

    print("%s " % features.getLabel(paths[0])),

    # Get all feature points
    indices, ks, ds = features.getFeatures(paths, { "keypoint_type" : keypoint, "descriptor_type" : descriptor })

    # Get all positions
    (pos_im1, pos_im2) = (features.getPositions(ks[indices == 0]), features.getPositions(ks[indices == 1]))

    # For all possible combinations, check if the match is acceptable
    correspondences = sum([1 for (p1, p2) in itertools.product(pos_im1, pos_im2) if matchDistance(p1, p2, homography) <= distance_threshold])

    return correspondences
Ejemplo n.º 11
0
def scoreImages(paths, 
		cluster_edges 	= 3, 
		score_edges 	= 40, 
		size 			= 36, 
		withGeometry 	= True, 
		withCertainty 	= True, 
		cluster_prune 	= weightMatrix.pruneHighest,
		score_prune 	= weightMatrix.pruneThreshold,
		normalize		= True,
		score_type		= scoreWeights) :
	""" Given paths to two images, the images are scored based on how 
	    well their traits match
	"""

	default_val = 0.0

	# Get features
	indices, keypoints, descriptors = features.getFeatures(paths, size=size)
	if descriptors == None :
		print(paths)
		print("No descriptors found. Returning score %i" % default_val)
		return default_val

	# Get weights
	full_weights = weightMatrix.init(descriptors)
	score_weights = score_prune(full_weights, score_edges)
	cluster_weights = cluster_prune(score_weights, cluster_edges)

	# Cluster graph
	partitions = louvain.cluster(cluster_weights)

	# Match the traits
	scores, partition_indices = score_type(score_weights, partitions, indices, scoring=lambda m,c : m + c if c > 0 else 0.0, normalize=normalize)

	# Get the geometric multiplier
	geom_multiplier = geometryMultiplier(partitions, partition_indices, numpy.array(indices), keypoints)

	# Get the certainty factor
	certainty_factor = certaintyFactor(len(partition_indices)) if withCertainty else 1.0
	score_sum = sum([s*m for s,m in zip(geom_multiplier, scores)]) if withGeometry else sum(scores)

	# Get final score by multiplying certainty
	final_score = score_sum * certainty_factor

	# Get labels and print
	[l1, l2] = [features.getLabel(p) for p in paths]
	print("Score: %0.4f for %s and %s (clusters: %i)" % (final_score,l1,l2, len(partition_indices)))

	return final_score
Ejemplo n.º 12
0
def match(paths, options = {}) :

    use_ball_tree       = options.get("use_ball_tree", False)

    # Get all feature points
    indices, ks, ds = features.getFeatures(paths, options)

    # Use cv2's matcher to get matching feature points
    match_data = features.bfMatch(ds[indices == 0], ds[indices == 1])

    # Get all positions
    (pos_im1, pos_im2) = (features.getPositions(ks[indices == 0]), features.getPositions(ks[indices == 1]))

    # Define a function that given a threshold returns a set of matches
    def match_fun(match_data, threshold) :
        match_data = [(numpy.array((pos_im1[i], pos_im2[j])), s, u) for (i, j), s, u in match_data if u < threshold]
        if len(match_data) == 0 : return [], [], []
        matches, ratios, scores = zip(*match_data)

        return matches, ratios, scores

    return lambda t : match_fun(match_data, t)
Ejemplo n.º 13
0
def match_radius(paths, options = {}) :
    leaf_size		    = options.get("leaf_size", 10)
    radius_size         = options.get("radius_size", 300)
    ratio_boost         = options.get("ratio_boost", 1.0)
    group_limit         = options.get("group_limit", 5)

    # Get all feature points
    indices, ks, ds = features.getFeatures(paths, options)

    # Construct ball tree
    bt = BallTree(ds, leaf_size=leaf_size)

    # Query function for ball tree
    def query_all() :
        max_index = indices.max()
        for i, descriptor in enumerate(ds) :
            if indices[i] < max_index :
                idxs = numpy.array(bt.query_radius(descriptor, r=radius_size)[0])
                group_size = len(idxs)

                # Get unique match
                for (i,j), m, s, u in query_unique(bt, i, descriptor, indices, ks) : 

                    if group_size >= group_limit :
                        yield m, s, u*ratio_boost, group_size
                    else :
                        yield m, s, u, group_size

    # Get matches
    match_data = list(query_all())

    def match_fun(ratio_threshold) :
        matches = [(pos, s, u, g) for pos, s, u, g in match_data if u < ratio_threshold]
        if len(matches) == 0 :
            return [], [], [], []
        else :
            return zip(*matches)

    return lambda t : match_fun(t)
Ejemplo n.º 14
0
def trainNN(doc2vec, data):
    """ Train MLP """
    mlp = MLPClassifier( solver = param['solver'], \
        hidden_layer_sizes = param['hidden'], \
        activation = param['activation'], \
        learning_rate = 'adaptive', \
        early_stopping = False, \
        random_state = 1, \
        max_iter = 1000, \
        verbose = True )
    X = []
    Y = []
    if data is not None:
        for q, cl in data:
            q_w = preprocessor(q[1])
            q_v = doc2vec.infer_vector(q_w)
            q_v /= norm(q_v)
            ac_v = getAverageCV(doc2vec, cl)
            for j, c in enumerate(cl):
                c_w = preprocessor(c[1])
                c_v = doc2vec.infer_vector(c_w)
                c_v /= norm(c_v)
                f_v = getFeatures(doc2vec, q_w, c_w, \
                    { 'qid' : q[0], 'cid' : c[0], 'rank' : j })
                f_v.extend(
                    [cosine(q_v, c_v),
                     cosine(q_v, ac_v),
                     cosine(c_v, ac_v)])
                X.append(np.append(np.append(q_v, c_v), np.append(ac_v, f_v)))
                Y.append(transformLabel(c[2]))
        np.savez('out/trainNN.npz', x=X, y=Y)
    else:
        npzfile = np.load('out/trainNN.npz')
        X = npzfile['x']
        Y = npzfile['y']
    mlp.fit(X, Y)
    return mlp
Ejemplo n.º 15
0
    def cnn(self, algo, max_words=3000, feats=False, chi2=False):
        print('type == ', algo, feats, 'chi2=', str(chi2), max_words)
        thedata, emb_size = self.splitData(self.input_data, algo)

        if self.clas in [5, 8, 9]:
            testset = rc.set_input_data(None, (self.clas * 10 + 1),
                                        clas=self.clas)
            testset, emb_size = self.splitData(testset, algo)

        print('input len:', len(thedata))
        tokenizer = Tokenizer(num_words=max_words)
        tokenizer.fit_on_texts(thedata)
        self.dictionary = tokenizer.word_index
        vocab_size = len(tokenizer.word_index) + 1

        allWordIndices = []
        for text in thedata:
            wordIndices = self.convert_text_to_index_array(text)
            allWordIndices.append(wordIndices)

        allWordIndices = np.asarray(allWordIndices)

        mode = ["binary"]
        for m in mode:
            print('mode', m)
            train_x = tokenizer.sequences_to_matrix(allWordIndices, mode=m)

            if feats:
                if chi2:
                    featus = getFeatureschi2(self.corpus, clas=self.clas)
                else:
                    featus = getFeatures(self.corpus, clas=self.clas)

                print('Stats::', featus.shape)
                print('Stats::', train_x.shape)
                train_x = np.hstack((train_x, featus))
                print('Stats::', featus.shape)
                print('Stats::', train_x.shape)

            train_y = list(map(lambda x: self.c[x], self.output_data))
            train_y = keras.utils.to_categorical(train_y, self.nb_classes)

            X_train, X_test, Y_train, Y_test = train_test_split(train_x,
                                                                train_y,
                                                                test_size=0.2,
                                                                shuffle=True)
            input_size = len(train_x[0])
            # one test
            validation_split = [0.1]
            batch = [200]
            nb_neurone = [50]  # number of filters
            nb_epoch = [5]
            activation = ['relu']
            optimizer = ['adam']
            loss = ['mse']

            model = Sequential()
            print('emb_size', emb_size)
            model.add(Embedding(vocab_size, emb_size, input_length=input_size))
            model.add(
                Conv1D(nb,
                       activation=a,
                       kernel_size=self.nb_classes,
                       input_shape=(input_size, 1)))

            model.add(MaxPooling1D(self.nb_classes))
            model.add(Flatten())

            model.add(Dense(self.nb_classes, activation='sigmoid'))
            model.compile(loss=l, optimizer=o, metrics=['accuracy'])

            model.fit(X_train,
                      Y_train,
                      batch_size=b,
                      epochs=epoch,
                      verbose=1,
                      validation_split=vs)
            print('evaluation')
            y_pred = model.predict(X_test)

            score = model.evaluate(X_test, Y_test, verbose=0)
            print('acc', score[1])
            Yt_test = np.argmax(Y_test, axis=1)  # Convert one-hot to index
            y_pred = model.predict_classes(X_test)
            print(classification_report(Yt_test, y_pred, digits=4))
            print('done')
Ejemplo n.º 16
0
df_train = preprocessing.prep_trainset(df_train) # Trainset preproccessing
df_descr = preprocessing.prep_descr(df_descr, df_train) # Product descriptions preproccessing
df_attr = preprocessing.prep_attr(df_attr, df_train) # Product attributes preproccessing

# df_train.to_pickle('df_train_prep.pkl')
# df_descr.to_pickle('df_descr_prep.pkl')
# df_attr.to_pickle('df_attr_prep_new1.pkl')

# df_train = pd.read_pickle('df_train_prep.pkl')
# df_descr = pd.read_pickle('df_descr_prep.pkl')
# df_attr = pd.read_pickle('df_attr_prep.pkl')


# Phase 2 Feature enginnering
# df_train, df_similarities, df_fuzzy = features.feature_engineering(df_train, df_descr, df_attr)

df_train, df_train2 = features.getFeatures(df_train, df_descr, df_attr) # df_train2 stored for modelling phase
df_train, df_similarities = features.similarityMetrics(df_train) #
df_fuzzy = features.fuzzy(df_train)

# df_train.to_pickle('df_train_feat.pkl')
# df_similarities.to_pickle('df_similarities_feat.pkl')
# df_fuzzy.to_pickle('df_fuzzy_feat.pkl')
# df_train = pd.read_pickle('df_train_feat.pkl')
# df_similarities = pd.read_pickle('df_similarities_feat.pkl')
# df_fuzzy = pd.read_pickle('df_fuzzy_feat.pkl')

# Phase 3 Modelling

modeling.run(df_train2, df_similarities, df_fuzzy)
Ejemplo n.º 17
0
import features
import openImg
import triangulation

leftpaths, rightpaths = openImg.getFilenames()


import sys
import pdb


for m in range(1):

# get features
    imgL,imgR = openImg.getImgs(leftpaths[m], rightpaths[m])
    kp1,desc1 = features.getFeatures(imgL, [2,2])
    kp2,desc2 = features.getFeatures(imgR, [2,2])

    leftCorres, rightCorres, leftCorresidx, rightCorresidx = features.getCorres(desc1, desc2, kp1,kp2)

    x3dSave, perrFin, idxSave = triangulation.triangulate(leftCorres,rightCorres)

    if m==0:
        x3d = x3dSave
    else:
        x3d = np.concatenate((x3d,x3dSave), axis =1)

    print(m)
import pdb; pdb.set_trace()

x3d = x3d.transpose()
rawdata = pd.read_csv('../kaggle_datasets/flight-delays/flights.csv',
                      encoding='latin-1',
                      error_bad_lines=False)
name = "flight-delays"
rawdata.info()
rawdata.isnull().sum()

# In[86]:

drop = "AIRLINE_DELAY"
encoded = rawdata.copy(deep=True)
#encoded = encoded.fillna(0)
#encoded.dropna(axis=0, inplace=True)
for x in encoded:
    if encoded[x].dtype == "object":
        encoded[x] = encoded[x].astype('category').cat.codes
if encoded.shape[0] > 25000:
    encoded = encoded.sample(n=25000, axis=0)
encoded.info()

# In[87]:

Y = encoded[drop]
X = encoded.drop([drop], axis=1)

Y = Y.to_numpy()
X = X.to_numpy()
vec = features.getFeatures(X, Y, name)
features.serialize(name, vec)
vec
Ejemplo n.º 19
0
        (x_train, y_train), (x_test, y_test), preproc = text.texts_from_array(
            x_train=x_train,
            y_train=y_train,
            x_test=x_test,
            y_test=y_test,
            class_names=listclasses,
            preprocess_mode='bert',
            maxlen=200,
            max_features=15000)

        if feat:
            if chi2:
                featus = getFeatureschi2(corpus, clas=clas)
            else:
                featus = getFeatures(corpus, clas=clas)

            featus = featus.tolist()
            [
                x_train[0][x].tolist().extend(featus[x])
                for x in range(0, split - 1)
            ]
            [
                x_train[1][x].tolist().extend(featus[x])
                for x in range(0, split - 1)
            ]
            [
                x_test[0][x - split].tolist().extend(featus[x])
                for x in range(split, len(featus))
            ]
            [
Ejemplo n.º 20
0
import features
import cv2
import params

img1 = cv2.imread(params.IMGLOCATION + '/image_02/data/0000000000.png', 0)
img2 = cv2.imread(params.IMGLOCATION + '/image_03/data/0000000000.png', 0)

imgout = cv2.imread(params.IMGLOCATION + 'image_02/data/0000000000.png')

size = [3, 3]

kp1, desc1 = features.getFeatures(img1, size)
kp2, desc2 = features.getFeatures(img2, size)

leftCorres, rightCorres, leftCorresidx, rightCorresidx = features.getCorres(
    desc1, desc2, kp1, kp2)

#img3 = cv2.drawMatchesKnn(img1,kp1,img2,kp2,corres,imgout,flags=2)
#cv2.imshow("Display",img3)
#cv2.waitKey(0)

# View features
import pdb
pdb.set_trace()
#kpimg = cv2.drawKeypoints(img1,kp1,imgout)
#cv2.imshow("Display Window",kpimg)
#cv2.waitKey(0)
Ejemplo n.º 21
0
'''
Label the blobs using a previously trained model

Gary Bishop July 2018
'''

import pandas as pd
import Args
import pickle
from features import getFeatures

args = Args.Parse(inblobs='output.blobs.bz2',
                  outblobs='output.labeled.bz2',
                  model='models/LR1.pkl')

data = pd.read_pickle(args.inblobs)

model = pickle.load(open(args.model, 'rb'))

features = getFeatures(data)

labels = model.predict(features)
data.isdot = labels

data.to_pickle(args.outblobs)
Ejemplo n.º 22
0
from pydriller import RepositoryMining, GitRepository
import datetime
from splclassifier import SPLClassifier
#from manualcommits import getManualResultsKconfig, getMakeFileResultsManual, getAMFileResultsManual
from features import getLinuxF, getFeatures
from getCommitsLinux import getLinuxCommits
from getCommits import getListCommits
import re

dt1 = datetime.datetime(2017, 3, 8, 0, 0, 0)
dt2 = datetime.datetime(2017, 12, 31, 0, 0, 0)



#listaCommitsLinux = getLinuxCommits()
features = getFeatures()
arq = open('saida_rc_errados.csv','w')
listaCommits = getListCommits()

'''
fileKind = sys.argv[1]
if(fileKind == 'makefile'):
    print("SOU MAKEFILE")
    listaCommits = getListCommits()
    features = getFeatures()
    arq = open('automated-rc-soletta-retest.csv','w')
        
elif(fileKind == 'kconfig'):
    print("SOU KCONFIG")
    arq = open('automated-results-kconfig-uclibc.csv','w')
else:
Ejemplo n.º 23
0
def match(paths, options = {}) :

    # Get matches in usual format
    def matchFromIndex(i,j) :
        return (features.getPosition(ks[indices == 0][i]), features.getPosition(ks[indices == 1][j]))

    # Get options
    k_init				= options.get("k_init", 50)
    max_iterations		= options.get("max_iterations", 20)
    min_partition_size	= options.get("min_partitions_size", 10)
    max_sd				= options.get("max_sd", 40)
    min_distance		= options.get("min_distance", 25)
    verbose				= options.get("verbose", False)
    keypoint_type		= options.get("keypoint_type", "SIFT")
    descriptor_type		= options.get("descriptor_type", "SIFT")
    ratio_threshold		= options.get("ratio_threshold", 1.0)

    # Get images
    images = map(features.loadImage, paths)

    # Get all feature points
    indices, ks, ds = features.getFeatures(paths, options)

    # Get positions
    positions = numpy.array(features.getPositions(ks))

    # Get matches
    match_points = getMatchPoints(indices, ks, ds, descriptor_type = descriptor_type)

    if len(match_points) == 0 : return lambda t : [], [], []

    # Partition with isodata
    part_1 = isodata.cluster(positions[indices==0], k_init=k_init, max_iterations=max_iterations, min_partition_size=min_partition_size, max_sd=max_sd, min_distance=min_distance)
    part_2 = isodata.cluster(positions[indices==1], k_init=k_init, max_iterations=max_iterations, min_partition_size=min_partition_size, max_sd=max_sd, min_distance=min_distance)

    # Show the clusters
    if verbose : display.showTwoPartitions(part_1, part_2, indices, images, positions)

    # Get a matrix of the matches so that part_corr_{i,j} is equal to the
    # amount of matches between partition i and j
    part_corr = getLinkMat(part_1, part_2, match_points)

    # For each partition figure out which partitions correspond
    partition_links = [getPartitionLinks(row) for row in part_corr]

    # Get all keypoint matches from the matching clusters
    match_set = []
    for i,ms in enumerate(partition_links) :
        for (j,s) in ms :
            match_set.extend(getPartitionMatches(match_points, part_1 == i, part_2 == j))
    
    # def match_fun(threshold) :
    #     # For each partition figure out which partitions correspond
    #     partition_links = [getPartitionLinks(row, threshold) for row in part_corr]

    #     # Get all keypoint matches from the matching clusters
    #     match_set = []
    #     for i, ms in enumerate(partition_links) :
    #         for (j, s) in ms :
    #             match_set.extend(getPartitionMatches(match_points, part_1 == i, part_2 == j))
    #     match_data = [(matchFromIndex(i, j), u, 0) for((i,j),u) in match_set if u < ratio_threshold]
    #     if len(match_data) == 0 : return [], [], []
    #     matches, ratios, scores = zip(*match_data)
    #     return matches, ratios, scores

    # Define a function that given a threshold returns a set of matches
    def match_fun(threshold) :
        match_data = [(matchFromIndex(i,j), u, 0) for ((i,j),u) in match_set if u < threshold]
        if len(match_data) == 0 : return [], [], []
        matches, ratios, scores = zip(*match_data)

        return matches, ratios, scores

    return match_fun
Ejemplo n.º 24
0
def hello():
    trending = getFeatures()
    return render_template("index.html", features=trending)
Ejemplo n.º 25
0
def detection(tweet):
    resp = None

    subj = dict()
    polr = dict()
   
    polr['raw_text'] = tweet
    subj['raw_text'] = tweet
    
    
    subj['postext'] = clean.cleanTextPos(tweet)
    subj['text'] = clean.cleanText(tweet)

    polr['text'] = clean.cleanText(tweet)       
    

    # Terms as PosTags in Subjectivity
    subj['posterms'] = freeling.getPOS(subj['postext'])
    subj['terms'] = tokens.getTokens(subj['text']) # may be tokens.getTokens(text, stopwords)
            
    # Terms as word Tokens in polarity
    polr['terms'] = tokens.getTokens(polr['text']) # may be tokens.getTokens(text, stopwords)
    
   
    ## SELECT TERMS... BY DEFAULT WORDS!
    ###### completar words o postags
    
    subj['features'] = features.getFeatures(subj['posterms'] + subj['terms'], 'unigrams')
    polr['features'] = features.getFeatures(polr['terms'], 'uni+bigrams')
    
    
#        print subj['features']
#        print polr['features']
    
    subj['vectormodel'] = vector_model.getModel(subTerms, subj['features'], 'tf')
    
    polr['vectormodel'] = vector_model.getModel(polTerms, polr['features'], 'tf')
    polr['dictvectormodel'] = dict( zip(polTerms, polr['vectormodel']) )
    
#        print subj['vectormodel']
#        print polr['vectormodel']
    
    subjectPrediction = subjectSVMmodel.predict(subj['vectormodel'])
    
    if showDetails:
        print "subjectPrediction: ", subjectPrediction
    
    if subjectPrediction == 1:
        polarityPrediction = polarityBayesModel.classify(polr['dictvectormodel']) 
        
        if polarityPrediction == 1:
            resp = "pos"
        else:
            resp = "neg"
        
        if showDetails:
            print "polarityPrediction: ", polarityPrediction
    else:
        resp = "not"
    
    return resp
Ejemplo n.º 26
0
run_features_100_15 = timeit.timeit(
    'getFeatures(splitAll(selectData(100, 15)))',
    setup='from __main__ import getFeatures, splitAll, selectData',
    number=1)
run_features_100_20 = timeit.timeit(
    'getFeatures(splitAll(selectData(100, 20)))',
    setup='from __main__ import getFeatures, splitAll, selectData',
    number=1)
run_features_100_25 = timeit.timeit(
    'getFeatures(splitAll(selectData(100, 25)))',
    setup='from __main__ import getFeatures, splitAll, selectData',
    number=1)
# =========================================================================================

# Memory for feature extraction a)
mem_features_20_20 = memUsage(getFeatures(splitAll(selectData(20, 20))))
mem_features_50_20 = memUsage(getFeatures(splitAll(selectData(50, 20))))
mem_features_70_20 = memUsage(getFeatures(splitAll(selectData(70, 20))))

# Memory for feature extraction b)
mem_features_100_10 = memUsage(getFeatures(splitAll(selectData(100, 10))))
mem_features_100_15 = memUsage(getFeatures(splitAll(selectData(100, 15))))
mem_features_100_20 = memUsage(getFeatures(splitAll(selectData(100, 20))))
mem_features_100_25 = memUsage(getFeatures(splitAll(selectData(100, 25))))
# =========================================================================================
run_features_a = [
    run_features_20_20, run_features_50_20, run_features_70_20,
    run_features_100_20
]
run_features_b = [
    run_features_100_10, run_features_100_15, run_features_100_20,