def testL1Distance(self):
   n1 = numpy.array([[1., 2., 3., 4.], [1., 1., 1., 1.]], dtype=numpy.float32)
   n2 = numpy.array([[5., 6., 7., -8.], [1., 1., 1., 1.]], dtype=numpy.float32)
   out = self.Run(functions.l1_distance(n1, n2))
   testing.assert_allclose(
       out[0],
       numpy.array(
           [distance.cityblock(n1[0], n2[0]), distance.cityblock(n1[1], n2[1])
           ]),
       rtol=TOLERANCE)
 def testL1DistanceWithBroadcast(self):
   n1 = numpy.array([[[1., 2., 3., 4.], [1., 1., 1., 1.]], [[5., 6., 7., 8.],
                                                            [1., 1., 1., 2.]]],
                    dtype=numpy.float32)
   n2 = numpy.array([[5., 6., 7., -8.], [1., 1., 1., 1.]], dtype=numpy.float32)
   out = self.Run(functions.l1_distance(n1, n2))
   expected = numpy.array(
       [[distance.cityblock(n1[0, 0], n2[0]), distance.cityblock(
           n1[0, 1], n2[1])], [distance.cityblock(n1[1, 0], n2[0]),
                               distance.cityblock(n1[1, 1], n2[1])]])
   testing.assert_allclose(expected, out[0], atol=TOLERANCE)
Exemple #3
0
def question_6():
    a = (0, 0)
    b = (100, 40)
    pairs = [(55, 5), (59, 10), (56, 15), (50, 18)]

    for p in pairs:
        print('({}, {}):'.format(p[0], p[1]))
        print('L1norm(0, 0): {0}'.format(round(distance.cityblock(p, a), 5)))
        print('L1norm(100, 40): {0}'.format(round(distance.cityblock(p, b), 5)))
        print('L2norm(0, 0): {0}'.format(round(distance.euclidean(p, a), 5)))
        print('L2norm(100, 40): {0}'.format(round(distance.euclidean(p, b), 5)))
        print('')
Exemple #4
0
    def tmpAssignPoints(self, centroids):
        print "WYBRANA METRYKA: "+ str(self.metric)
        distCount = lambda a, b: euclidean(a, b)
        if self.metric == "chebyshev":
             distCount = lambda a, b: chebyshev(a, b)
        if self.metric == "cityblock":
             distCount = lambda a, b: cityblock(a, b)

        # chebyshev = nieskonczonosc
        # distCount = lambda x, y: chebyshev(x, y)

        #cityblock = l1
        # distCount = lambda x, y: cityblock(x, y)
        labels_centroids=[]
        for i in self.df.index:
            disctances=[]
            c_dist = []
            for c in centroids:
                # print "i: "+str(self.df.loc[i].values)+ " c: "+str(c)
                x= distCount(self.df.loc[i].values, c)
                # print "xx: "+str(x)
                #klucz-x, wartosc-c
                disctances.append(x)
                c_dist.append(c)
            m = min(disctances)
            # print "distances:"
            # print  disctances
            # print "c_dist"
            # print c_dist
            dm = disctances.index(m)
            tmp_nearest_centr = c_dist[dm]
            labels_centroids.append(tmp_nearest_centr)
            # print"closest centr: "+str(tmp_nearest_centr )

        return (labels_centroids, centroids)
def Distance(i,j,type):
	if type == 1:
		return distance.euclidean(i,j)
	elif type == 2:
		return distance.cityblock(i,j)
	elif type == 3:
        	return distance.cosine(i,j)  
Exemple #6
0
 def Classify (self, sample, verbose = True):
   length = len (sample)
   features = MFCC.extract (numpy.frombuffer (sample, numpy.int16))
   gestures = {}
   for gesture in self.params:
     d = []
     for tsample in self.params[gesture]:
       total_distance = 0
       smpl_length = len(tsample)
       
       if(numpy.abs(length - smpl_length) <= 0):
          continue
       
       for i in range (min (len (features), len (tsample))):
         total_distance += dist.cityblock(features[i], tsample[i])
       
       d.append (total_distance/float (i))
     score = numpy.min(d)
     gestures[gesture] = score
     if(verbose):
         print "Gesture %s: %f" % (gesture, score)
     try:
       if (score < minimum):
         minimum = score
         lowest = gesture
     except:
       minimum = score
       lowest = gesture
   if verbose:
      print lowest, minimum
   if(minimum < THRESHOLD):
     return lowest
   else:
     return None
def getDistLambda(metric):
    if (metric == "manhattan"):
        return lambda x,y : distance.cityblock(x,y)
    elif (metric == "cosine"):
        return lambda x,y : distance.cosine(x,y)
    else:
        return lambda x,y : distance.euclidean(x,y)
Exemple #8
0
def calc_dist(e1,e2,mode=1):
    if mode == 1:
        return ssd.euclidean(e1,e2)
    elif mode == 2:
        return ssd.cityblock(e1,e2)
    elif mode == 3:
        return ssd.cosine(e1,e2)
Exemple #9
0
def k_means(data, k=2, distance='e'):
    centers = np.array(random.sample(list(data), k))
    
    centers_steps = [centers.tolist()]
    
    changed = True
    while changed:
        prev_centers = np.copy(centers)
        data_nr = data.shape[0]
        clusters = np.empty((data_nr, k))
        for i in range(data_nr):
            if distance == 'e':
                clusters[i] = np.array([euclidean(data[i] - centers[j]) for j in range(k)])
            elif distance == 'm':
                clusters[i] = np.array([cityblock(data[i], centers[j]) for j in range(k)])
            elif distance == 'h':
                clusters[i] = np.array([hamming(data[i], centers[j]) for j in range(k)])
            else:
                raise ValueError('Unrecognized distance')
        clusters = np.argmin(clusters, axis=1)
        
        for i in range(k):
            centers[i] = np.mean(data[np.where(clusters == i)], axis=0)
        
        changed = not np.intersect1d(prev_centers, centers).size == centers.size
        centers_steps.append(centers.tolist())
        
    return centers, centers_steps
Exemple #10
0
def iter_kill_spots(cx, cz):
    c = w.getChunk(cx, cz)
    bedrock_blocks = (c.Blocks == w.materials.Bedrock.ID)
    for bx, bz, y in mcbuddy_util.get_block_pos_from_mask(bedrock_blocks):
        if is_kill_spot(c, bx, bz, y):
            x = bx + (cx << 4)
            z = bz + (cz << 4)
            dist = cityblock((100, 100), (x, z))
            yield x, z, dist
Exemple #11
0
 def __init__(self, rad):
     super().__init__(rad)
     self.mask_ = np.zeros((2*rad+1, 2*rad+1, 2*rad+1), dtype=np.bool)
     for r1 in range(2*self.rad+1):
         for r2 in range(2*self.rad+1):
             for r3 in range(2*self.rad+1):
                 if(cityblock((r1, r2, r3),
                              (self.rad, self.rad, self.rad)) <= self.rad):
                     self.mask_[r1, r2, r3] = True
def calc_dist(di,dj,i=1):
    """ Distance calculation for every
        distance functions in use"""
    if i == 1:
        return ssd.euclidean(di,dj) # built-in Euclidean fn
    elif i == 2:
        return ssd.cityblock(di,dj) # built-in Manhattan fn
    elif i == 3:
        return ssd.cosine(di,dj)    # built-in Cosine fn
Exemple #13
0
def sliced_wasserstein(PD1, PD2, M=50):
    """ Implementation of Sliced Wasserstein distance as described in 
        Sliced Wasserstein Kernel for Persistence Diagrams by Mathieu Carriere, Marco Cuturi, Steve Oudot (https://arxiv.org/abs/1706.03358)


        Parameters
        -----------
        
        PD1: np.array size (m,2)
            Persistence diagram
        PD2: np.array size (n,2)
            Persistence diagram
        M: int, default is 50
            Iterations to run approximation.

        Returns
        --------
        sw: float
            Sliced Wasserstein distance between PD1 and PD2
    """

    diag_theta = np.array(
        [np.cos(0.25 * np.pi), np.sin(0.25 * np.pi)], dtype=np.float32)

    l_theta1 = [np.dot(diag_theta, x) for x in PD1]
    l_theta2 = [np.dot(diag_theta, x) for x in PD2]

    if (len(l_theta1) != PD1.shape[0]) or (len(l_theta2) != PD2.shape[0]):
        raise ValueError("The projected points and origin do not match")

    PD_delta1 = [[np.sqrt(x**2 / 2.0)] * 2 for x in l_theta1]
    PD_delta2 = [[np.sqrt(x**2 / 2.0)] * 2 for x in l_theta2]

    # i have the input now to compute the sw
    sw = 0
    theta = 0.5
    step = 1.0 / M
    for i in range(M):
        l_theta = np.array([np.cos(theta * np.pi),
                            np.sin(theta * np.pi)],
                           dtype=np.float32)

        V1 = [np.dot(l_theta, x)
              for x in PD1] + [np.dot(l_theta, x) for x in PD_delta2]

        V2 = [np.dot(l_theta, x)
              for x in PD2] + [np.dot(l_theta, x) for x in PD_delta1]

        sw += step * cityblock(sorted(V1), sorted(V2))
        theta += step

    return sw
Exemple #14
0
    def run(self, video_path=None):
        if video_path is not None:
            self.video_path = video_path    
        assert (self.video_path is not None), "you should must the video path!"

        self.shots = []
        cap = cv2.VideoCapture(self.video_path)
        hists = []
        frames = []
        while True:
            success, frame = cap.read()
            if not success:
                break
            if self.output_dir is not None:
                frames.append(frame)
            # compute RGB histogram for each frame
            color_histgrams = [cv2.calcHist([frame], [c], None, [self.hist_size], [0,256]) \
                          for c in range(3)]
                          
            color_histgrams = np.array([chist/float(sum(chist)) for chist in color_histgrams])
            
            hists.append(color_histgrams.flatten())

        # manhattan distance of two consecutive histgrams
        scores = [cityblock(*h12) for h12 in zip(hists[:-1], hists[1:])]
        
        print("max diff:", max(scores), "min diff:", min(scores))
        
        # compute automatic threshold
        mean_score = np.mean(scores)
        std_score = np.std(scores)
        threshold = self.absolute_threshold

        # decide shot boundaries
        prev_i = 0
        prev_score = scores[0]
        for i, score in enumerate(scores[1:]):
            if (score >= threshold) and (abs(score - prev_score) >= threshold/2.0):
                self.shots.append((prev_i, i+2))
                prev_i = i + 2
            prev_score = score
        video_length = len(hists)
        self.shots.append((prev_i, video_length))
        assert video_length>=self.min_duration, "duration error"

        self.merge_short_shots()
        
        # save key frames
        if self.output_dir is not None:
            for shot in self.shots:
                cv2.imwrite("%s/frame-%d.jpg" % (self.output_dir,shot[0]), frames[shot[0]])
            print("key frames written to %s" % self.output_dir)
Exemple #15
0
 def calculate(self, row):
     q1 = str(row['question1'])
     q2 = str(row['question2'])
     q1_keywords = self.calculate_keyword(q1)
     q2_keywords = self.calculate_keyword(q2)
     if not len(q1_keywords) or not len(q2_keywords):
         return [0.0, 0.0, 0.0]
     q1_vector = self.calculate_vector(q1_keywords)
     q2_vector = self.calculate_vector(q2_keywords)
     cos_sim = 1 - cosine(q1_vector, q2_vector)
     euclidean_sim = 1 - euclidean(q1_vector, q2_vector)
     manhattan_sim = 1 - cityblock(q1_vector, q2_vector)
     return [cos_sim, euclidean_sim, manhattan_sim]
def get_distances(X):
    nfeatures = len(X[0])
    man_dist = [cityblock(features, np.zeros(nfeatures)) for features in X]
    cosine_dist = [cosine(features, np.ones(nfeatures)) for features in X]
    euclid_dist = [
        sqeuclidean(features, np.zeros(nfeatures)) for features in X
    ]
    minkowski_dist = [
        minkowski(features, np.zeros(nfeatures), 2) for features in X
    ]
    all_dist = np.column_stack((np.column_stack((np.column_stack(
        (man_dist, cosine_dist)), euclid_dist)), minkowski_dist))
    return all_dist
Exemple #17
0
    def calc_puddle_penalization(self, state: tuple) -> float:
        """
        Return a float that represents a penalization, the penalization is the lowest distance between current state
        and the nearest border in manhattan distance.
        :param state:
        :return:
        """
        # Min distance found!
        min_distance = min(
            cityblock(self.current_state, state) for state in self.free_spaces)

        # Set penalization per distance
        return -min_distance
Exemple #18
0
def get_min_dist(incoming_coord):
    dist = {}
    for coordinate in coordinates.iterrows():
        coord = coordinate[0]
        x = coordinate[1][0]
        y = coordinate[1][1]
        dist[coord] = distance.cityblock(incoming_coord, (x, y))
    min_value = min(dist.values())
    min_keys = [k for k in dist if dist[k] == min_value]
    if len(min_keys) > 1:
        return -1  # Location is equally far from two or more coordinates
    else:
        return min_keys[0]
def feature_construct(city,
                      model_name,
                      friends,
                      walk_len=100,
                      walk_times=20,
                      num_features=128):
    '''construct the feature matrixu2_checkin
    Args:
        city: city
        model_name: 20_locid
        friends: friends list (asymetric) [u1, u2]
        walk_len: walk length
        walk_times: walk times
        num_features: dimension for vector        
    Returns:
    '''

    if os.path.exists('dataset/'+city+'/feature/'+city+'_'+model_name+'_'+\
                      str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.feature'):
        os.remove('dataset/'+city+'/feature/'+city+'_'+model_name+'_'+\
                  str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.feature')

    emb = pd.read_csv('dataset/'+city+'/emb/'+city+'_'+model_name+'_'+\
                      str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.emb',\
                      header=None, skiprows=1, sep=' ')
    emb = emb.rename(columns={0: 'uid'})  # last column is user id
    emb = emb.loc[emb.uid > 0]  # only take users, no loc_type, not necessary

    pair = pair_construct(emb.uid.unique(), friends)

    for i in range(len(pair)):
        u1 = pair.loc[i, 'u1']
        u2 = pair.loc[i, 'u2']
        label = pair.loc[i, 'label']

        u1_vector = emb.loc[emb.uid == u1, range(1, emb.shape[1])]
        u2_vector = emb.loc[emb.uid == u2, range(1, emb.shape[1])]

        i_feature = pd.DataFrame([[u1, u2, label,\
                                   cosine(u1_vector, u2_vector),\
                                   euclidean(u1_vector, u2_vector),\
                                   correlation(u1_vector, u2_vector),\
                                   chebyshev(u1_vector, u2_vector),\
                                   braycurtis(u1_vector, u2_vector),\
                                   canberra(u1_vector, u2_vector),\
                                   cityblock(u1_vector, u2_vector),\
                                   sqeuclidean(u1_vector, u2_vector)]])

        i_feature.to_csv('dataset/'+city+'/feature/'+city+'_'+model_name+'_'+\
                         str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.feature',\
                         index = False, header = None, mode = 'a')
Exemple #20
0
    def fit(self,data):
        
        # Will do the normalization, if necessary
        if (self.l2norm == True):
            transformer = Normalizer().fit(data)
            data = transformer.transform(data)
        
        # Initialize the Centroids
        self.ctrds = {}
        
        # Set the default centroids as the first k points of the data
        for i in range(self.k):
            self.ctrds[i] = data[i]
        
        # Set the default running iteration
        for i in range(self.max_iter):
            self.classes = {}
            
            # Set the default classification for k classes
            for i in range(self.k):
                self.classes[i] = []
            
            # Count the distances between the data and the centroids. Methods : Euclidean, Cityblock, Cosine
            for data_rows in data:
                if (self.method == "euclidean"):
                    dst = [distance.euclidean(data_rows, self.ctrds[ctrd]) for ctrd in self.ctrds]
                elif (self.method == "cityblock"):
                    dst = [distance.cityblock(data_rows, self.ctrds[ctrd]) for ctrd in self.ctrds]
                elif (self.method == "cosine"):
                    dst = [distance.cosine(data_rows, self.ctrds[ctrd]) for ctrd in self.ctrds]
                clf = dst.index(min(dst))
                self.classes[clf].append(data_rows)
            
            # Enter the centroids to a dictionary
            prev_ctrds = dict(self.ctrds)
            
            # Determine the average of the distances to determine a new centroid
            for clf in self.classes:
                self.ctrds[clf] = np.average(self.classes[clf],axis=0)

            optimized = True
            
            # Determine the loop if it has passed the minimum tolerance or not
            for c in self.ctrds:
                default_ctrd = prev_ctrds[c]
                current_ctrd = self.ctrds[c]
                if np.sum((current_ctrd - default_ctrd)/default_ctrd * 100.0) > self.tol:
                    optimized = False

            if optimized:
                break
def get_features(question1, question2):
    """
    Get all the features to input into the XGBoost model.

    Pre: Both questions cannot be None.
    Args:
        question1: The first question to match.
        question2: The second question to match.

    Returns: A dictionary with all the features for the XGBoost model.

    """
    w2v = word2vec_features(question1, question2, W2VModel)
    output_dict = {
        # length based features
        "len_q1": [len(question1)],
        "len_q2": [len(question2)],
        "diff_len": [len(question1) - len(question2)],
        "len_char_q1": [len(question1.replace(" ", ""))],
        "len_char_q2": [len(question2.replace(" ", ""))],
        "len_word_q1": [len(question1.split())],
        "len_word_q2": [len(question2.split())],
        "common_words": [
            len(
                set(question1.lower().split()).intersection(
                    set(question2.lower().split())))
        ],
        # distance based features
        #   (fuzzywuzzy library tutorial: https://www.datacamp.com/community/tutorials/fuzzy-string-python)
        "fuzz_Qratio": [fuzz.QRatio(question1, question2)],
        "fuzz_Wratio": [fuzz.WRatio(question1, question2)],
        "fuzz_partial_ratio": [fuzz.partial_ratio(question1, question2)],
        "fuzz_partial_token_set_ratio":
        [fuzz.partial_token_set_ratio(question1, question2)],
        "fuzz_partial_token_sort_ratio":
        [fuzz.partial_token_sort_ratio(question1, question2)],
        "fuzz_token_set_ratio": [fuzz.token_set_ratio(question1, question2)],
        "fuzz_token_sort_ratio": [fuzz.token_sort_ratio(question1, question2)],

        # word2vec based features
        "cosine_distance": [cosine(w2v[0], w2v[1])],
        "cityblock_distance": [cityblock(w2v[0], w2v[1])],
        "jaccard_distance": [jaccard(w2v[0], w2v[1])],
        "canberra_distance": [canberra(w2v[0], w2v[1])],
        "euclidean_distance": [euclidean(w2v[0], w2v[1])],
        "minkowski_distance": [minkowski(w2v[0], w2v[1])],
        "braycurtis_distance": [braycurtis(w2v[0], w2v[1])],
        "wmd": [w2v[2]],
        "norm_wmd": [w2v[3]]
    }
    return output_dict
Exemple #22
0
def find_distance(x,y,option):
    if option == "1":
        # return math.sqrt(sum([(a - b) ** 2 for a, b in zip(x, y)]))
        return np.linalg.norm(x-y)
    elif option == "2":
        # return sum([(a - b) for a,b in zip(x, y)])
        return scp.cityblock(x,y)
    elif option == "3":
        # numer = sum([a*b for a,b in zip(x,y)])
        # denom_1 = math.sqrt(sum([a ** 2 for a in x]))
        # denom_2 = math.sqrt(sum([b ** 2 for b in y]))
        # return numer/(denom_1*denom_2)
        # return sum([sum(a*b)/ (math.sqrt(a**2)*math.sqrt(b**2)) for a, b in zip(x,y)])
        return scp.cosine(x,y)
Exemple #23
0
def add_lateral_connections_topology(layer, distance_to_weight):
    proj = sim.Projection(layer.population, layer.population,
                          sim.AllToAllConnector(), sim.StaticSynapse())

    weights = np.zeros((layer.size(), layer.size()))

    # for all combinations of neurons
    for x1, y1, x2, y2 in itertools.product(np.arange(layer.shape[0]),
                                            np.arange(layer.shape[1]),
                                            repeat=2):
        w = distance_to_weight(distance.cityblock([x1, y1], [x2, y2]))
        weights[layer.get_idx(x1, y1)][layer.get_idx(x2, y2)] = w

    proj.set(weight=weights)
    def get_longest_shortest_routes(self):
        """ Return manhattan distance for all pair of batteries and houses"""
        import collections
        #import math

        manhattan_distance = []
        for house_key, house_value in self.houses.items():
            house_position = house_value.x, house_value.y
            for battery_key, battery_value in self.batteries.items():
                battery_position = battery_value.x, battery_value.y
                manhattan_distance.append([
                    house_key, battery_key,
                    distance.cityblock(battery_position, house_position)
                ])

        # shortest routes
        shortest_distance_per_house = []
        for house in self.houses:
            shortest_distance = math.inf
            for i in manhattan_distance:
                #if i[0] == house and i == 0:
                if i[0] == house:
                    if i[2] < shortest_distance:
                        shortest_distance = i[2]
            shortest_distance_per_house.append(shortest_distance)

        smallest_object_value = 0
        for i in shortest_distance_per_house:
            smallest_object_value += i

        print(smallest_object_value)

        # largest routes
        largest_distance_per_house = []
        for house in self.houses:
            largest_distance = 0
            for i in manhattan_distance:
                #if i[0] == house and i == 0:
                if i[0] == house:
                    if i[2] > largest_distance:
                        largest_distance = i[2]
            largest_distance_per_house.append(largest_distance)

        biggest_object_value = 0
        for i in largest_distance_per_house:
            biggest_object_value += i

        print(biggest_object_value)

        return manhattan_distance
def similarity(svd1, svd2):
    scores = [0, 0, 0]
    array_depths = [32, number_svd_vectors, number_svd_vectors]

    importance_factors = [
        475, 1, 310
    ]  #these are weighting numbers (chosen by trial and error) which resulted in the highest accuracy.
    #this accounts for not all of the things SVD returns being of equal scale/importance.

    for i in range(3):
        for j in range(array_depths[i]):
            scores[i] += distance.cityblock(svd1[0][j],
                                            svd2[0][j])  #(manhattan distance)
    return np.array(scores).dot(np.array(importance_factors))
 def calculate_similarity(self,
                          question_ebd,
                          relation_ebd,
                          metric='cosine'):
     if metric == 'braycurtis':
         return distance.braycurtis(question_ebd, relation_ebd)
     elif metric == 'canberra':
         return distance.canberra(question_ebd, relation_ebd)
     elif metric == 'cityblock':
         return distance.cityblock(question_ebd, relation_ebd)
     elif metric == 'cosine':
         return distance.cosine(question_ebd, relation_ebd)
     elif metric == 'euclidean':
         return distance.euclidean(question_ebd, relation_ebd)
Exemple #27
0
    def manhattan(self, x=None, y=None, w=None):
        """
        曼哈顿距离(Manhattan Distance)是由十九世纪的赫尔曼·闵可夫斯基所创词汇,是种使用在几何度量空间的几何学用语,
        用以标明两个点在标准坐标系上的绝对轴距总和。曼哈顿距离的命名原因是从规划为方型建筑区块的城市(如曼哈顿)
        间,最短的行车路径而来(忽略曼哈顿的单向车道以及只存在于3、14大道的斜向车道)。
        任何往东三区块、往北六区块的的路径一定最少要走九区块,没有其他捷径。

        x = [1, 2, 0]
        y = [0, 1, 0]
        """
        x = x or self.x
        y = y or self.y
        w = w or self.w
        return distance.cityblock(x, y, w)
Exemple #28
0
    def score_routing(self, routing, usage_matrix):
        """
        For the given layout, and the routing, produce the score of the
        routing.

        The score is composed of its constituent nets' scores, and the
        score of each net is based on the number of violations it has,
        the number of vias and pins and the ratio of its actual length
        and the lower bound on its length.

        layout is the 3D matrix produced by the placer.
        """
        alpha = 3
        beta = 0.1
        gamma = 1

        net_scores = {}
        net_num_violations = {}

        # Score each net segment in the entire net
        for net_name, d in routing.iteritems():
            net_scores[net_name] = []
            net_num_violations[net_name] = []

            for i, segment in enumerate(d["segments"]):
                routed_net = segment["net"]

                # Violations
                violation_matrix = segment["violation"]
                violations = self.compute_net_violations(violation_matrix, usage_matrix)
                net_num_violations[net_name].append(violations)

                # Number of vias and pins
                vias = 0
                num_pins = 2
                pins_vias = vias - num_pins

                # Lower length bound
                coord_a = segment["pins"][0]["route_coord"]
                coord_b = segment["pins"][1]["route_coord"]
                lower_length_bound = max(1, cityblock(coord_a, coord_b))
                length_ratio = len(routed_net) / lower_length_bound

                score = (alpha * violations) + (beta * pins_vias) + (gamma * length_ratio)

                net_scores[net_name].append(score)

        # print(routing)
        # print(net_scores)
        return net_scores, net_num_violations
Exemple #29
0
    def __init__(self,
                 height: int,
                 width: int,
                 seed: int = None,
                 levelname: str = ""):
        super().__init__(height, width, seed=seed, levelname=levelname)

        # Key variables
        room_count = max(1, round(self.size / Voronoi.ROOM_SIZE))
        rooms = self.random_points(room_count, margin=1)
        areas = collections.defaultdict(list)

        # Generating vonoroi
        for c1 in range(self.height):
            for c2 in range(self.width):
                areas[tuple(
                    min(rooms,
                        key=lambda room: dist.cityblock(room,
                                                        (c1, c2))))].append(
                                                            (c1, c2))

        # Placing walls
        for area in areas:
            for row in set(point[1] for point in areas[area]):
                points = [point for point in areas[area] if point[1] == row]
                minimum = min(points, key=lambda p: p[0])
                maximum = max(points, key=lambda p: p[0])

                self.map[minimum] = 1
                self.map[maximum] = 1

            for column in set(point[0] for point in areas[area]):
                points = [point for point in areas[area] if point[0] == column]
                minimum = min(points, key=lambda p: p[1])
                maximum = max(points, key=lambda p: p[1])

                self.map[minimum] = 1
                self.map[maximum] = 1

        # Removing borders
        for area in [area for area in areas]:
            for point in areas[area][:]:
                if self.map[point]:
                    areas[area].remove(point)

            if not areas[area]:
                areas.pop(area)

        # Parameters
        self.areas = areas
def manhattan_distance(series1, series2):
    """
    Compute the City Block (Manhattan) distance between two series

    Quantifies the absolute magnitude of the difference between time series.

    Args:
        series1 (numpy.ndarray): First series
        series2 (numpy.ndarray): Second series

    Returns:
        City Block (Manhattan) distance coefficient between the two series
    """
    return sc.cityblock(series1, series2)
Exemple #31
0
def KNNsearch(query, k, metric):
    result = []
    for i in range(len(Collection["names"])):
        dist = math.inf
        if(metric=="eucledian"):
            ndist = distance.euclidean(query, Collection["encodings"][i])
        elif(metric=="manhattan"):
            ndist = distance.cityblock(query, Collection["encodings"][i])
        dist = min(dist,ndist)
        if(len(result)<k):
            heapq.heappush(result,(-dist,Collection["names"][i]))
        elif(heapq.nsmallest(1,result)[0][0]<-dist):
            heapq.heapreplace(result,(-dist,Collection["names"][i]))
    return [heapq.heappop(result) for i in range(len(result))]
Exemple #32
0
 def __init__(self, screen_width, screen_height, state_representation):
     self.wn = turtle.Screen()
     self.screen_width = screen_width
     self.screen_height = screen_height
     self.max_distance = distance.cityblock(
         np.array([0, 0]), np.array([screen_width, screen_height]))
     self.current_food = None
     self.snake = None
     self.actions = {0: "up", 1: "right", 2: "down", 3: "left"}
     self.points = 0
     self.n_moves = 0
     self.max_moves_without_food = 1000
     self.theta = 45
     self.state_representation = state_representation(self)
Exemple #33
0
def manhattan_distance(a, b):
    """Compute the manhattan (L1) distance between two numpy arrays.

    Parameters
    ----------
    a: numpy array
    b: numpy array

    Returns
    -------
    distance: float
    """
    dist = distance.cityblock(a, b)
    return dist
Exemple #34
0
def compute_distance(X, centroid, type="euclidian", weight=1):
    """Computes the distance using the type passed as parameter. Can compute weighted distance only for minkowski."""
    # Initialize the weight to all ones if not specified for weighted minkowski.
    if type is "wminkowski" and weight is 1:
        weight = np.ones(len(X))

    # Computation of the distance using one of the implemented formulas
    distance = {
        "euclidian": (sp.euclidean(X, centroid)),
        "manhattan": (sp.cityblock(X, centroid)),
        "wminkowski": (sp.wminkowski(X, centroid, 2, weight))
    }[type]

    return distance
Exemple #35
0
def chamfer_with_normalize(test, target):
    distances = []

    for i in range(0, len(test)):
        point_dst = []
        for j in range(0, len(target)):
            point_dst.append(distance.cityblock(test[i], target[j]))
        distances.append(min(point_dst))

    distances = np.array(distances)

    distances = (distances - np.min(distances)) / np.ptp(distances)

    return distances
    def __call__(self):
        '''
		returns vector features. They are:
		1. cosine similarity
		2. L2 normed distance
		3. L1 normed distance
		4. Bray-Curtis distance
		5. Correlation distance
		6. absolute distance vector between q1 and q2 (ndim features)
		'''
        from scipy.spatial import distance
        self.q1s = np.array_split(self.q1.values, self.n_batches)
        self.q2s = np.array_split(self.q2.values, self.n_batches)
        indices = np.array_split(self.feat.index, self.n_batches)
        dfs = []
        for index, (q1, q2) in zip(indices, zip(self.q1s, self.q2s)):
            df = pd.DataFrame(index=index.values)
            pre = self.prefix
            vec = self.vectorizer
            df[pre + 'cos_dist'] = [
                distance.cosine(vec(x), vec(y)) for x, y in zip(q1, q2)
            ]
            df[pre + 'euc_dist'] = [
                distance.euclidean(vec(x), vec(y)) for x, y in zip(q1, q2)
            ]
            df[pre + 'manhattan_dist'] = [
                distance.cityblock(vec(x), vec(y)) for x, y in zip(q1, q2)
            ]
            df[pre + 'braycurt_dist'] = [
                distance.braycurtis(vec(x), vec(y)) for x, y in zip(q1, q2)
            ]
            df[pre + 'correlation_dist'] = [
                distance.correlation(vec(x), vec(y)) for x, y in zip(q1, q2)
            ]
            for i in range(self.size):
                if self.transformer is not None:
                    df[pre + 'vec_{}'.format(i)] = abs(
                        self.transformer.transform(
                            np.array([vec(x) for x in q1]))[:, i] -
                        self.transformer.transform(
                            np.array([vec(x) for x in q2]))[:, i])
                else:
                    df[pre + 'vec_{}'.format(i)] = abs(
                        (np.array([vec(x) for x in q1])) -
                        (np.array([vec(x) for x in q2])))[:, i]

            dfs += [df]
        df = pd.concat(dfs)
        return self.feat.join(df)
def cross_channel_distance_features(image):
    """calculates the cross channel distance features 
    
    Calculates the distances across channels 

    Parameters
    ----------
    image : 3D array, shape (M, N, C)
        The input image with multiple channels. 

    Returns
    -------
    features :  dict  
        dictionary including different distances across channels

    """
    features = dict()
    for ch1 in range(image.shape[2]):
        for ch2 in range(ch1 + 1, image.shape[2]):
            # rehaping the channels to 1D
            channel1 = image[:, :, ch1].ravel()
            channel2 = image[:, :, ch2].ravel()

            # creating the suffix name for better readability
            suffix = "_Ch" + str(ch1 + 1) + "_Ch" + str(ch2 + 1)

            # storing the distance values
            features["braycurtis_distance" + suffix] = dist.braycurtis(
                channel1, channel2)
            features["canberra_distance" + suffix] = dist.canberra(
                channel1, channel2)
            features["chebyshev_distance" + suffix] = dist.chebyshev(
                channel1, channel2)
            features["cityblock_distance" + suffix] = dist.cityblock(
                channel1, channel2)
            features["correlation_distance" + suffix] = dist.correlation(
                channel1, channel2)
            features["cosine_distance" + suffix] = dist.cosine(
                channel1, channel2)
            features["euclidean_distance" + suffix] = dist.euclidean(
                channel1, channel2)
            features["jensenshannon_distance" + suffix] = dist.jensenshannon(
                channel1, channel2)
            features["minkowski_distance" + suffix] = dist.minkowski(
                channel1, channel2)
            features["sqeuclidean_distance" + suffix] = dist.sqeuclidean(
                channel1, channel2)

    return features
def trainingAccuracy(K, type):
    minVal = []
    minVal1 = []
    true = 0
    false = 0
    correct = 0
    wrong = 0
    for i in range(0, len(trainMatrix)):
        initial = np.array(trainMatrix[i])
        a = initial[1:len(initial) - 1]
        b = np.delete(trainMatrix, (i), axis=0)
        for j in range(0, len(b)):
            c = np.array(b[j][1:len(b[j]) - 1])
            Edist = distance.euclidean(a, c)
            Mdist = distance.cityblock(a, c)
            result3.append([Edist, b[j][len(b[j]) - 1]])
            result4.append([Mdist, b[j][len(b[j]) - 1]])
            # print result3
            # print result4
        result3.sort(key=lambda row: row[0:])
        result4.sort(key=lambda row: row[0:])
        # print result3
        # print result4
        for k in range(0, K):
            minVal.append(result3[k][1])
            minVal1.append(result4[k][1])
        # print "top k elements" , minVal
        count = Counter(minVal)
        count1 = Counter(minVal1)
        # print " test ans =" , count.most_common()[0]
        # print 'type = ' , count.most_common()
        # print 'type1 = ', count1.most_common()
        # print " test ans =", count.most_common()[0][0]
        # print "real ans = " , testrow[len(testrow)-1]
        if (count.most_common()[0][0] == initial[len(initial) - 1]):
            true += 1.0
        else:
            false += 1.0
        if (count1.most_common()[0][0] == initial[len(initial) - 1]):
            correct += 1.0
        else:
            wrong += 1.0
        minVal = []
        minVal1 = []
    if (type == 2):
        print 'accuracy for K = ', K, "=", (true / (true + false)) * 100, "%"
    else:
        print 'accuracy for K = ', K, "=", (correct /
                                            (correct + wrong)) * 100, "%"
def testingAccuracy(K):
    minVal = []
    minVal1 = []
    i = 0
    j = 0
    true = 0
    false = 0
    correct = 0
    wrong = 0
    for testrow in testMatrix:
        # for testrow in testMatrix1:
        j += 1
        i = 0
        for trainrow in trainMatrix:
            a = np.array(trainrow[1:len(trainrow) - 1])
            b = np.array(testrow[1:len(testrow) - 1])
            # dist = distance.euclidean(a, b)
            dist1 = distance.cityblock(a, b)
            dist = np.linalg.norm(a - b)
            # print "distance for " , i , "= " ,dist
            result.append([dist, trainrow[len(trainrow) - 1]])
            result1.append([dist1, trainrow[len(trainrow) - 1]])
            i += 1
        result.sort(key=lambda row: row[0:])
        result1.sort(key=lambda row: row[0:])
        # print "after sorting = " , result
        for k in range(0, K):
            minVal.append(result[k][1])
            minVal1.append(result[k][1])
        # print "top k elements" , minVal
        count = Counter(minVal)
        count1 = Counter(minVal1)
        # print " test ans =" , count.most_common()[0]
        # print 'type = ' , count.most_common()
        # print " test ans =", count.most_common()[0][0]
        # print "real ans = " , testrow[len(testrow)-1]
        if (count.most_common()[0][0] == testrow[len(testrow) - 1]):
            true += 1.0
        else:
            false += 1.0
        if (count1.most_common()[0][0] == testrow[len(testrow) - 1]):
            correct += 1.0
        else:
            wrong += 1.0
        minVal = []
        minVal1 = []
    # print 'correct = ' , true
    # print 'wrong = ' , false
    print 'accuracy for K = ', K, "=", (true / (true + false)) * 100, "%"
Exemple #40
0
    def determine_longest_at_turn_n(board_state: GameState):
        """
        Determine which snake is longest in current state. Tie break using 'closest head to a fruit'
        :return:
        """
        longest_snake_idx = None
        for snake_i, snake in enumerate(board_state.snakes):
            if snake.alive:
                if longest_snake_idx is None or snake.length > board_state.snakes[
                        longest_snake_idx].length:
                    longest_snake_idx = snake_i
                    continue
                if snake.length == board_state.snakes[
                        longest_snake_idx].length:
                    # Tie Break
                    snake_manhattan_dists = sorted([
                        cityblock(snake.head, trophy_i)
                        for trophy_i in board_state.fruits_locations
                    ])
                    longest_manhattan_dists = sorted([
                        cityblock(board_state.snakes[longest_snake_idx].head,
                                  trophy_i)
                        for trophy_i in board_state.fruits_locations
                    ])

                    for d1, d2 in zip(snake_manhattan_dists,
                                      longest_manhattan_dists):
                        if d1 < d2:
                            longest_snake_idx = snake_i
                            break
                        elif d1 > d2:
                            break
                        else:
                            # equal distance, tie break with later trophy..
                            pass
        return longest_snake_idx
Exemple #41
0
    def calculate_manhattan_score(self):
        # calculate the key hold manhattan scores
        # key hold genuine score
        for i in range(self.kh_test_genuine.shape[0]):
            current_score = cityblock(self.kh_test_genuine.iloc[i].values,
                                      self.kh_mean_vector)
            self.kh_genuine_score.append(current_score)
        # key hold impostor score
        for i in range(self.kh_test_impostor.shape[0]):
            current_score = cityblock(self.kh_test_impostor.iloc[i].values,
                                      self.kh_mean_vector)
            self.kh_impostor_score.append(current_score)

        # calculate the key interval manhattan scores
        # key interval genuine score
        for i in range(self.ki_test_genuine.shape[0]):
            current_score = cityblock(self.ki_test_genuine.iloc[i].values,
                                      self.ki_mean_vector)
            self.ki_genuine_score.append(current_score)
        # key interval impostor score
        for i in range(self.ki_test_impostor.shape[0]):
            current_score = cityblock(self.ki_test_impostor.iloc[i].values,
                                      self.ki_mean_vector)
            self.ki_impostor_score.append(current_score)
Exemple #42
0
 def testReplicaHistAdaptation(self):
   """Verify that adaptation leads to linear replica directions."""
   chain = self.GetAdaptiveIsingChain(
       self.nsteps_per_sweep, self.nswaps_per_sweep, self.burn_roundtrips)
   ntemps = len(self.temps)
   linear_hist = [(ntemps/(ntemps-1))*(x/10) for x in reversed(range(ntemps))]
   # Run chain with linearly spaced temperatures.
   self.RunForRoundtrips(chain, self.adaptation_roundtrips)
   pre_ntransitions = chain.statistics.transitions
   pre_hist = chain.statistics.replica.hist
   # Verify that the histogram of replica directions is far from linear.
   self.assertGreater(distance.cityblock(linear_hist, pre_hist), 1.0)
   # Adapt temperatures.
   chain.AdaptTemperatures()
   chain.Reset()
   # Run chain with new temperatures.
   self.RunForRoundtrips(chain, self.adaptation_roundtrips)
   post_ntransitions = chain.statistics.transitions
   post_hist = chain.statistics.replica.hist
   # Verify that the histogram of replica directions is close to linear.
   self.assertLess(distance.cityblock(linear_hist, post_hist), 1.0)
   # Verify that the number of transitions necessary to reach the
   # same number of roundtrips is shorter after adaptation.
   self.assertLess(post_ntransitions, pre_ntransitions)
Exemple #43
0
 def calculate(self, row):
     seq1 = str(row['question1']).split()
     seq2 = str(row['question2']).split()
     seq1 = [word for word in seq1 if word in self.model]
     seq2 = [word for word in seq2 if word in self.model]
     if len(seq1) == 0 or len(seq2) == 0:
         return [0.0, 0.0, 0.0]
     vec_seq1 = [self.model[x] for x in seq1]
     vec_seq2 = [self.model[x] for x in seq2]
     vec_seq1 = np.array(vec_seq1).mean(axis=0)
     vec_seq2 = np.array(vec_seq2).mean(axis=0)
     cos_sim = 1 - cosine(vec_seq1, vec_seq2)
     euclidean_sim = 1 - euclidean(vec_seq1, vec_seq2)
     manhattan_sim = 1 - cityblock(vec_seq1, vec_seq2)
     return [cos_sim, euclidean_sim, manhattan_sim]
Exemple #44
0
def ae_to_distance(mat1, mat2, metric='euclidean', method='avg', cov=None):
    # Methods: min, max, avg, centroid
    # Metrics: Euclidean, Manhattan (Cityblock), Mahalanobis (requires a covariance matrix) ,Add: Maximum
    if not method == 'centroid':
        vec_dist = cdist(mat1, mat2, metric=metric)
        if method == 'min':
            calc_dist = np.min(vec_dist)
        elif method == 'max':
            calc_dist = np.max(vec_dist)
        elif method == 'avg':
            calc_dist = np.nanmean(vec_dist)
        else:
            raise ValueError("Wrong name of method")
    else:
        cent_1 = np.mean(mat1, axis=0).reshape((1, -1))
        cent_2 = np.mean(mat2, axis=0).reshape((1, -1))
        if metric == 'euclidean':
            calc_dist = euclidean(cent_1, cent_2)
        elif metric == 'cityblock':
            calc_dist = cityblock(cent_1, cent_2)
        elif metric == 'hamming':
            calc_dist = hamming(cent_1, cent_2)
        elif metric == 'correlation':
            calc_dist = correlation(cent_1, cent_2)
        elif metric == 'cityblock':
            calc_dist = cityblock(cent_1, cent_2)
        elif metric == 'sqeuclidean':
            calc_dist = sqeuclidean(cent_1, cent_2)
        elif metric == 'mahalanobis':
            if cov is None:
                raise ValueError("Insert covariance matrix")
            calc_dist = mahalanobis(cent_1, cent_2, VI=np.linalg.inv(cov))
        else:
            raise ValueError("Wrong name of metric")

    return calc_dist
def calculate_shap_distance(alert, counterfactuals):
    alert_counterfactuals = pd.concat([alert, counterfactuals], axis=0)
    alert_counterfactuals_shap = get_shap(alert_counterfactuals)

    dist = []

    for i in range(1, len(alert_counterfactuals.index)):
        dist.append(cityblock(alert_counterfactuals_shap[0], alert_counterfactuals_shap[i]))
        # sum = 0
        # for j in range(0, len(alert_counterfactuals_shap[i])):
        #     sum = sum + alert_counterfactuals_shap[0][j] - alert_counterfactuals_shap[i][j]
        #
        # dist.append(sum)

    return dist
Exemple #46
0
    def __init__(self, rad):
        """Constructor

        Parameters
        ----------

        rad: radius, in voxels, of the sphere inscribed in the
             searchlight cube, not counting the center voxel

        """
        super().__init__(rad)
        self.mask_ = np.zeros((2*rad+1, 2*rad+1, 2*rad+1), dtype=np.bool)
        for r1 in range(2*self.rad+1):
            for r2 in range(2*self.rad+1):
                for r3 in range(2*self.rad+1):
                    if(cityblock((r1, r2, r3),
                                 (self.rad, self.rad, self.rad)) <= self.rad):
                        self.mask_[r1, r2, r3] = True
Exemple #47
0
    def __calculateStatistics(self, result):
        """
        Create the RefDB and some statistics
        :param result:
        :return:
        """

        """
        Set the center sequence for each cluster
        """
        for cluster in result.cluster_set.all():

            # store the cluster representative
            min = None
            sequence = None
            statistics = {
                'dists-variance' : 0,
                'dists-amax' : 0,
                'dists-average' : 0,
                'size' : cluster.sequences.count(),
                'radius' : 0
            }

            if cluster.sequences:
                distances = []
                for sequence in cluster.sequences.all():

                    dist = cityblock(sequence.dna, cluster.centerMean)
                    if dist > statistics['radius']:
                        statistics['radius'] = dist

                    if (dist < min) or (min == None):
                        min = dist
                        sequence = sequence

                    distances.append(dist)

                statistics['dists-variance'] = np.var(distances)
                statistics['dists-amax'] = np.amax(distances)
                statistics['dists-average'] = np.average(distances)
                cluster.statistics = statistics
                cluster.representative = sequence

            cluster.save()
def compare_segments_image(seg1, seg2, slen, cwl):
	#return 2
	#for i in range(slen):
	#	seg1[:,i:60+i]
	#tmp=seg1[:,5:55] # seg is 50 by 10
	#for i in range(slen):
	#print 'template size',seg2.shape
	#print 'template',seg2[:,5:55]
	#print citblock(seg1[:,0:50],seg2[:,5:55])
	dist=[]
	#print seg2.shape
	#print seg1.shape
	for i in range(10):
		
		
		#print 'seg 1 shape',a.shape
		dist.append(cityblock(seg1[:,i:50+i].flatten(),seg2.flatten()))
		
	return min(dist)		
def compHist(hist1, hist2, method, formula):
    """Compare two histograms with given method and formula.

    Parameters
    ----------
    hist1 : 1D array
        The first histogram
    hist2 : 1D array
        The second histogram
    method : str(cv integer)
        Options for method ('cv_comp', 'scipy_comp', 'kl_div')
    formula: str(cv integer)
        Options for formula.
        For method == 'cv_comp' (cv.CV_COMP_CORREL, cv.CV_COMP_CHISQR, cv.CV_COMP_INTERSECT, cv.CV_COMP_BHATTACHARYYA)
        For method == 'scipy_comp' ("Euclidean", "Manhattan", "Chebysev")
        
    """

    ## using opencv
    if method == 'cv_comp':
        dis = cv2.compareHist(np.float32(hist1), np.float32(hist2), formula)
        if formula == cv.CV_COMP_CORREL:
            dis = -dis + 1

    ## using Scipy distance metrics
    if method == 'scipy_comp':
        if formula == 'Euclidean':
            dis = dist.euclidean(hist1, hist2)
        if formula == 'Manhattan':
            dis = dist.cityblock(hist1, hist2)
        if formula == 'Chebysev':
            dis = dist.chebyshev(hist1, hist2)

    ## using KL divergence
    hist1 = np.float32(hist1) + 1
    hist2 = np.float32(hist2) + 1     
    if method == 'kl_div':
        kbp = np.sum(hist1 * np.log(hist1 / hist2), 0)
        kbq = np.sum(hist2 * np.log(hist2 / hist1), 0)

        dis = np.double(kbp + kbq)/2

    return dis
Exemple #50
0
def determineDistance(point1, point2, set_type=distance_type):

	'''
	This function determines the distance between two points in any number of dimensions.
	As such, it can also be used to determine the radius of a preference circle (by inputting
	a point	and the status quo).
	'''
	if preference_shape == 'ellipse':
		point2 = (point2[0], point2[1])

	if set_type == 'pyth':
		# determines distance between two points using Pythagorean theorem
		distance = dist.euclidean(point1, point2)

	elif set_type == 'city-block':
		# determines the city-block or Manhattan distance between two points
		distance = dist.cityblock(point1, point2)

	return distance
Exemple #51
0
def find_block_types(world, type_, player='Player', sort=True,
                     chunks_to_search=9, limit=0):
    px, py, pz = world.getPlayerPosition(player=player)
    type_id = world.__getattribute__('materials').__getattribute__(type_).ID
    blocks = []
    try:
        for chunk in get_surrounding_chunks(world, num=chunks_to_search):
            cx, cz = chunk.chunkPosition
            mask = (chunk.Blocks == type_id)
            for bx, bz, y in get_block_pos_from_mask(mask):
                x = bx + (cx << 4)
                z = bz + (cz << 4)
                distance = cityblock((px, pz, py), (x, z, y))
                blocks.append((x, z, y, int(distance)))
    except ChunkNotPresent:
        pass
    if not blocks:
        return []
    if sort:
        blocks.sort(key=lambda tup: tup[-1])
    return blocks[0:min(len(blocks) - 1, limit - 1)]
	def get(self):
		args = self.parser.parse_args()
		node1, node2 = args['node1'], args['node2']
		nodeType1, nodeType2 = nodes[node1], nodes[node2]
		nodeKey1, nodeKey2 = args[nodeType1 + '1'], args[nodeType2 + '2']
		result1 = self.getVector(node1, nodeType1, nodeKey1)
		try:
			assert len(result1) > 0
		except:
			raise ValueError("%s does not exist in database" % node1)
		result2 = self.getVector(node2, nodeType2, nodeKey2)
		try:
			assert len(result2) > 0
		except:
			raise ValueError("%s does not exist in database" % node2)
		result1 = np.array(map(float, result1[0][self.getVecName()].split(' ')))
		result2 = np.array(map(float, result2[0][self.getVecName()].split(' ')))
		diff = ','.join(map(str, result1 - result2))
		inner = np.inner(result1, result2)
		l1, l2, cos = cityblock(result1, result2), euclidean(result1, result2), cosine(result1, result2)
		return json.dumps({'Difference': diff, 'Manhattan Distance': l1, 'Euclidean Distance': l2, 'Cosine Distance': cos, 'Inner Product': inner})
Exemple #53
0
    def assignPoints(self, centroids):
        # centroids
        # l = len(centroids)
        # for i in range(0,l):
        #     centroids[i]+=random.random()

        #set to True when there is a change in assigning points to clusters(centroids)
        changed = False

        assignedCentroids = pd.DataFrame()
        for i in self.df.index:
            distances = {}
            for c in centroids.index:
                if self.metric == "euclidean":
                    x = self.myEuclidean(self.df.loc[c], self.df.loc[i])
                if self.metric == "chebyshev":
                    x = chebyshev(self.df.loc[c], self.df.loc[i])
                if self.metric == "cityblock":
                    x = cityblock(self.df.loc[c], self.df.loc[i])
                # print"i: "+str(i)+" c: "+str(c)
                # print "self.df.loc[i]: "+ str(self.df.loc[i])
                # print "self.df.loc[c]: "+ str(self.df.loc[c])
                # print "x: "+str(x)
                #dictionary that stores centroid as a key and distance between point and centroid as a value
                distances[c] = x

            # find the minimum by comparing the second element of each tuple (values)
            m=min(distances.items(), key=lambda x: x[1])
            #m[0] is a key of a min value in a dictionary, so m[0] is centroid
            # point i 'belongs' to centroid m[0]
            # if not assignedCentroids.at[i,'centroids']==m[0]: #if centroid is changed
            #     changed=True
            # changed=True
            # assignedCentroids.at[i,'centroids']=m[0]
            assignedCentroids.at[i]=m[0]
        # print "centroidyyyyyyyyyy"
        # print assignedCentroids
        return (assignedCentroids, changed)
Exemple #54
0
 def metrykaManhattan(self, array1, array2):
     """
     Computes the Manhattan distance between two n-vectors u and v,
     which is defined as
 
     .. math::
 
        \\sum_i {\\left| u_i - v_i \\right|}.
 
     Parameters
     ----------
     u : ndarray
         An :math:`n`-dimensional vector.
     v : ndarray
         An :math:`n`-dimensional vector.
 
     Returns
     -------
     d : double
         The City Block distance between vectors ``u`` and ``v``.
 
     """
     # od = abs(xa-xb) + abs(ya-yb)
     return cityblock(array1,array2)
Exemple #55
0
def main():
    print "# KNN Classifier"
    parser = ld.parse_arguments()

    # priting args
    print '\t-k = ' + str(parser.k)
    print '\t-d = ' + parser.distance

    stopwords = None
    if parser.stopwords_path:
        stopwords = ld.load_stopwords(parser.stopwords_path)

    voc = load_vocabulary(parser.train_path, stopwords)
    answers = load_answers(parser.train_path)

    train = transform(voc, parser.train_path)
    test = transform(voc, parser.test_path)

    # output file
    out_path = '../results/' + parser.distance + '_' + str(parser.k)
    out_path += '.txt'
    out_file = open(out_path, 'w')

    for point in test:
        neighbors = []
        for i in xrange(len(train)):
            neigh = train[i]
            distance = 0.0

            if parser.distance == 'cosine':
                distance = spd.cosine(neigh, point)
            elif parser.distance == 'jaccard':
                distance = spd.jaccard(neigh, point)
            elif parser.distance == 'euclidean':
                distance = spd.euclidean(neigh, point)
            elif parser.distance == 'dice':
                distance = spd.dice(neigh, point)
            elif parser.distance == 'correlation':
                distance = spd.correlation(neigh, point)
            elif parser.distance == 'manhattan':
                distance = spd.cityblock(neigh, point)
            else:
                print >> stderr, "ERRO! -  Distância informada inválida."
                exit()

            tup = (distance, i)
            heapq.heappush(neighbors, tup)

        # return the highest k similar points
        top_k = heapq.nsmallest(parser.k, neighbors)

        # classifing
        classification = np.zeros(2)
        for (_, idi) in top_k:
            classe = answers[idi]
            classification[int(classe)] += 1

        # outputing classification
        if(classification[0] >= classification[1]):
            print >> out_file, '0'
            print '0'
        else:
            print >> out_file, '1'
            print '1'

    # outputing the results'
    print
    print "# Resultados salvos no arquivo: " + out_path
    out_file.close()
    result.result("../data/imdb_test", out_path)
    def _build_phase_data(self):
        
        profile = pyscarphase.proto.meta.load_profile(self.args.profile)

        thread = profile.threads[self.args.thread]

        reader = pyscarphase.proto.data.DataReader(
            thread.profile.filename, 
            uuid=thread.profile.uuid
            )
        
        phases = {}
        for w in reader:
    
            #
            pid = w.phase_info.phase
               
            #
            if not pid in phases:
                phases[pid] = self.Phase(pid)
                phases[pid].centroid = \
                    np.zeros(len(w.phase_info.signature.fv_values))
    
            #
            phases[pid].centroid = \
                np.add(
                    phases[pid].centroid, 
                    w.phase_info.signature.fv_values[:]
                    )

        # Normalize centroid
        for p in phases.itervalues():
            p.centroid = np.linalg.norm(p.centroid, 1)          
            
        # Do second pass
        reader.seek(0)
        offset = 0
        for w in reader:
    
            #
            pid = w.phase_info.phase
            
            # Calc distance
            d = spd.cityblock(
                phases[pid].centroid, 
                w.phase_info.signature.fv_values[:]
                )

            #
            phases[pid].windows.append((offset, w.size, d))
            
            #
            offset += w.size
            
        # Order phases in descending length
        phases = sorted(
            phases.itervalues(), 
            key=lambda p: len(p.windows), 
            reverse=True
            )
        
        #
        return phases
Exemple #57
0
	#print "[+] Matrix in use is: \n", a
	#print "==================================================================="
	normA = normalize(a, norm='l1')
	
	temp_max = np.zeros(k)
	temp_min = np.zeros(k)
	min_array = np.zeros(k)
	max_array = np.zeros(k)
	r = np.zeros(k)
	for i in range(k):
		temp_min[i] = sys.maxint
		temp_max[i] = -1
	for i in range(k):
		for j in range(k):
			if i != j:
				if (dist.cityblock(normA[i],normA[j]) < temp_min[i]):
					min_array[i] = dist.cityblock(normA[i],normA[j])
					temp_min[i] = min_array[i]
				if (dist.cityblock(normA[i],normA[j]) > temp_max[i]):
					max_array[i] = dist.cityblock(normA[i],normA[j])
					temp_max[i] = max_array[i]

	for i in range(k):
		r[i] = min_array[i]/max_array[i]

	#print "[+] Min distances are: \n", min_array
	#print "==================================================================="
	#print "[+] Max distances are: \n", max_array
	#print "==================================================================="
	#print "[+] Ratios are: \n", r
	print "*******************************************************************"
Qcosines=cosine_similarity(QuestionTVectorArray[0:1],QuestionTVectorArray)
Acosines=cosine_similarity(AnswerTVectorArray[0:1],AnswerTVectorArray)

Qbray=[dist.braycurtis(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Abray=[dist.braycurtis(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcanberra=[dist.canberra(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acanberra=[dist.canberra(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qhamming=[dist.hamming(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Ahamming=[dist.hamming(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcorrelation=[dist.correlation(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acorrelation=[dist.correlation(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcityblock=[dist.cityblock(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acityblock=[dist.cityblock(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qdice=[dist.dice(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Adice=[dist.dice(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qyule=[dist.yule(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Ayule=[dist.yule(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

#C_Q=np.histogram2d(QuestionTVectorArray[1],QuestionTVectorArray[1])[0]

#print "question mutual info-->",mutual_info_score(None,None,contigency=C_Q)#QuestionTVectorArray[0:1],QuestionTVectorArray)
#QuestionVectorArray=Qvectorizer.fit_transform(all_questions).toarray()
#AnswerVectorArray=Avectorizer.fit_transform(all_answers).toarray()

#QUserinputVectorArray=Qvectorizer.transform(userinput).toarray()
def cityblock((x, y)):
    return distance.cityblock(x, y)
Exemple #60
0
def wvCity(a):
	return [distance.cityblock(x[0], x[1]) for x in a]