Beispiel #1
0
    def _calc_dist_2(words, w2v_model, distance_type, t):
        l1_dist = 0
        l2_dist = 0
        cos_dist = 0
        coord_dist = 0
        t = float(t)

        for word_id1 in range(len(words)):
            for word_id2 in range(word_id1 + 1, len(words)):
                # Calcular L1 w2v metric
                l1_dist += (sci_dist.euclidean(w2v_model[words[word_id1]],
                                               w2v_model[words[word_id2]]))

                # Calcular L2 w2v metric
                l2_dist += (sci_dist.sqeuclidean(w2v_model[words[word_id1]],
                                                 w2v_model[words[word_id2]]))

                # Calcular cos w2v metric
                cos_dist += (sci_dist.cosine(w2v_model[words[word_id1]],
                                             w2v_model[words[word_id2]]))

                # Calcular coordinate w2v metric
                coord_dist += (sci_dist.sqeuclidean(
                    w2v_model[words[word_id1]], w2v_model[words[word_id2]]))

        if distance_type == 'l1_dist':
            return l1_dist / (t * (t - 1.0))
        elif distance_type == 'l2_dist':
            return l2_dist / (t * (t - 1.0))
        elif distance_type == 'cos_dist':
            return cos_dist / (t * (t - 1.0))
        elif distance_type == 'coord_dist':
            return coord_dist / (t * (t - 1.0))

        return .0
Beispiel #2
0
def naive_set_right_view(comparable_views, current_view_list, key_list,
                         val_list, no_of_zoom, x, y):
    if no_of_zoom == 1:
        space = 1.0
    elif no_of_zoom == 2:
        space = 0.75
    else:
        space = 0.5

    diff2 = 0
    diff_1_2 = 0
    heap = []
    three_best_diff2 = [0 for i in range(10)]
    view_list = [0 for i in range(10)]
    space = 0

    for view, scags in comparable_views.items():

        window = view.split("; ")
        window_x = ast.literal_eval(window[0])
        window_y = ast.literal_eval(window[1])

        if ((((left_view_x[0] - space) <= window_x[0] <=
              (left_view_x[1] + space)) and
             ((left_view_x[0] - space) <= window_x[1] <=
              (left_view_x[1] + space)))) or (((x[0] - space) <= window_x[0] <=
                                               (x[1] + space)) and
                                              ((x[0] - space) <= window_x[1] <=
                                               (x[1] + space))):
            continue

        view_diff = distance.sqeuclidean(np.asarray(scags),
                                         np.asarray(current_view_list))

        for ele in heap:
            if (ele - 0.05) <= view_diff <= (ele + 0.05):
                continue

        if len(heap) < 10:
            heappush(heap, view_diff)
        else:
            heappushpop(heap, view_diff)
        if view_diff in heap:
            three_best_diff2[heap.index(view_diff)] = scags
            view_list[heap.index(view_diff)] = view

    for ele in three_best_diff2:
        if ele == 0:
            continue
        if distance.euclidean(np.asarray(ele),
                              np.asarray(diff1_scags)) > diff_1_2:
            diff_1_2 = distance.sqeuclidean(np.asarray(ele),
                                            np.asarray(diff1_scags))
            diff2 = ele

    view = key_list[val_list.index(diff2)]
    right = view.split("; ")
    right_view_x = ast.literal_eval(right[0])
    right_view_y = ast.literal_eval(right[1])
Beispiel #3
0
def calcDisPosMin2(puntoEnX, centroides, data):
    min = 0.0
    aux = 0.0
    pos = 0
    min = distance.sqeuclidean(puntoEnX, centroides[0])
    if len(centroides) > 1:
        tam = len(centroides)
        for i in range(1, tam):
            aux = distance.sqeuclidean(puntoEnX, centroides[i])
            if aux < min:
                min = aux
                pos = i
    return min, pos
Beispiel #4
0
def topic_w2v(topic_words, word_embedding):
    """Word Embedding topic quality metric for a topic.

    Calculates the Cosine Distance, L1 Distance, L2 Distance and Coordinate Distance topic quality metrics
    for one individual topic based on the topic words.

    Args:
        topic_words: list
            Words that compose one individual topic.
        word_embedding: gensim.KeyedVectors
            Mapping between words and vectors for the Word2Vec model. Generate with Gensim.

    Returns:
        cosine_distance: float
            Resultant Cosine Distance for the topic.
        l1_distance: float
            Resultant L1 Distance metric for the topic.
        l2_distance: float
            Resultant L2 Distance metric for the topic.
        coordinate_distance: float
            Resultant Coordinate Distance metric for the topic.
    """
    cosine_distance = 0.0
    l1_distance = 0.0
    l2_distance = 0.0
    coordinate_distance = 0.0

    n_top = len(topic_words)

    t = float(n_top)
    t = t * (t - 1.0)

    for word_i_idx in range(n_top):
        for word_j_idx in range(word_i_idx + 1, n_top):
            try:
                word_i = word_embedding[topic_words[word_i_idx]]
                word_j = word_embedding[topic_words[word_j_idx]]
            except KeyError:
                continue

            cosine_distance += (sci_dist.cosine(word_i, word_j))
            l1_distance += (sci_dist.euclidean(word_i, word_j))
            l2_distance += (sci_dist.sqeuclidean(word_i, word_j))
            coordinate_distance += (sci_dist.sqeuclidean(word_i, word_j))

    cosine_distance = cosine_distance / t
    l1_distance = l1_distance / t
    l2_distance = l2_distance / t
    coordinate_distance = coordinate_distance / t

    return cosine_distance, l1_distance, l2_distance, coordinate_distance
Beispiel #5
0
    def getSigmaDerivKernelMat(self, FMat1, FMat2):
        """
        Returns the derivative of the kernel matrix w.r.t. sigma where (kernelMat)_ij = kernel(FMat1[i],FMat2[j])
        """
        kernelMat = self.getKernelMat(FMat1, FMat2)
        distmat = np.array([[sqeuclidean(f1, f2) for f1 in FMat1]
                            for f2 in FMat2])
        SDKernelMat = np.array([[
            1 / self.sigma**3 * sqeuclidean(f1, f2) *
            self.single_comparison(f1, f2, self.sigma)
            for i, f1 in enumerate(FMat1)
        ] for f2 in FMat2])

        # return 1/self.sigma**3 * distmat * kernelMat
        return SDKernelMat
Beispiel #6
0
def getLshNN_op1(dataset, nnModel, thred_radius_dist, trained_num_probes,
                 thred_sameTweetDist):
    ngIdxList = []
    indexedInCluster = {}
    clusters = []
    for dataidx in range(dataset.shape[0]):
        if dataidx in indexedInCluster:
            nn_keys = None
        else:
            clusterIdx = len(clusters)
            indexedInCluster[dataidx] = clusterIdx
            clusters.append([dataidx])

            nnModel.set_num_probes(trained_num_probes)
            # nn_keys: (id1, id2, ...)
            nn_keys = nnModel.find_near_neighbors(dataset[dataidx, :],
                                                  thred_radius_dist)

            nn_dists = [(idx, key) for idx, key in enumerate(nn_keys)
                        if key > dataidx - 130000 and key < dataidx +
                        130000 and sqeuclidean(dataset[dataidx, :], dataset[
                            key, :]) < thred_sameTweetDist]
            #nn_dists = [(idx, key) for idx, key in enumerate(nn_keys) if sqeuclidean(dataset[dataidx,:], dataset[key,:]) < 0.2]
            #print len(nn_keys), len(nn_dists), nn_dists[:min(10, len(nn_dists))], nn_dists[-min(10, len(nn_dists)):]

            for idx, key in nn_dists:
                indexedInCluster[key] = clusterIdx

        ngIdxList.append(nn_keys)
        if (dataidx + 1) % 10000 == 0:
            print "## completed", dataidx + 1, len(clusters), time.asctime()
    ngIdxList = np.asarray(ngIdxList)
    return ngIdxList, indexedInCluster, clusters
def Dist(array1, array2, dist):
    if dist == 'braycurtis':
        return distance.braycurtis(array1, array2)
    elif dist == 'correlation':
        return distance.correlation(array1, array2)
    elif dist == 'mahalanobis':
        return distance.mahalanobis(array1, array2)
    elif dist == 'minkowski':
        return distance.minkowski(array1, array2)
    elif dist == 'seuclidean':
        return distance.seuclidean(array1, array2)
    elif dist == 'sqeuclidean':
        return distance.sqeuclidean(array1, array2)
    elif dist == 'pearsonp':
        r, p = pearsonr(array1, array2)
        return p
    elif dist == 'pearsonr':
        r, p = pearsonr(array1, array2)
        return r
    elif dist == 'spearmanp':
        r, p = spearmanr(array1, array2)
        return p
    elif dist == 'spearmanr':
        r, p = spearmanr(array1, array2)
        return r
Beispiel #8
0
def random_walk_matrix(matrix, startVector, R, maxIterations, normThreshold):
    """
    Runs Random Walk with Restart using a matrix implementation

    @param matrix: numpy array, normalized adjancency matrix of entire PPI network
    @param startVector: numpy array, contains weighted start probabilities
    @param R: float, probability of restart parameter
    @param maxInterations: integer, maximum number of iterations to run
    @param normThreshold: integer, threshold at which the algorithm stops running if the difference between two steps is less than it

    @returns numpy array, final vector containing ranked proteins
    """
    print("STARTING RANDOM WALK")

    previousVector = np.copy(startVector)
    iterations = 0
    diff = float('inf')

    while diff > normThreshold and iterations < maxIterations:
        print("iteration:", iterations)

        # Perform one step of the walk
        newVector = (1 - R) * np.matmul(matrix, previousVector)
        newVector = np.add(newVector, R * startVector)

        diff = distance.sqeuclidean(newVector, previousVector)
        previousVector = newVector
        iterations += 1

    return newVector
Beispiel #9
0
def rbfKernel_Inst(x1, x2, gamma):
    """
    The rbf kernel between two instances
    """
    norm = sqeuclidean(x1, x2)  # returns the squared euclidean norm
    k = np.exp(-gamma * norm)
    return k
Beispiel #10
0
def recommend(time, user_features, choices):
    global recommended, last_user_features, hits, articles_all
    last_user_features = user_features
    # scaler = StandardScaler()

    # Exploit
    if hits > 0:  # and np.random.uniform() > 1/t:
        # Predict article features for user features
        features = []
        for i in range(6):
            # uf = scaler.fit_transform([user_features])
            pred = models[i].predict([user_features])
            features.append(pred[0])

        # Get nearest choice and recommend it
        dist = 1e99
        for choice in choices:
            d = sqeuclidean(features, articles_all[choice])
            if d < dist:
                dist = d
                recommended = choice

    # Explore
    if recommended is None:
        recommended = np.random.choice(choices)

    # Always needs to return an article
    return recommended
Beispiel #11
0
def distance(S,T):
  """
  Calculates the Euclidean distance between two sequences of varying
  lengths, using early abandon if possible.
  """
  n, m = len(S), len(T)

  if n > m:
    n,m = m,n
    S,T = T,S

  min_distance = np.inf

  for i in range(0, m - n + 1):
    stop         = False
    sum_distance = 0.0

    for j,x in enumerate(S):
      y             = T[i+j]
      sum_distance += sqeuclidean(x, y)

      # Abandon calculations as soon as the best distance (so far) has
      # been surpassed---adding more values will only increase it.
      if sum_distance >= min_distance:
        stop = True
        break

    if not stop:
      min_distance = sum_distance

  return min_distance
Beispiel #12
0
def euclidian_distance(dataset, sim_dataset):
    sq_error = 0
    from scipy.spatial.distance import sqeuclidean

    for i in range(dataset.shape[1]):
        sq_error += sqeuclidean(sim_dataset[:, i], dataset[:, i])
    return sq_error
    def rrt(self, max_iterations=1000000):
        def pick_random_state():
            x = random.uniform(-1.2, 0.6)
            y = random.uniform(-0.07, 0.076)
            return (x, y)

        self.snapshot_buffer = []
        observation = self.envs[0].reset()
        self.state_buffer = [observation]
        for iteration in range(max_iterations):
            rand_state = pick_random_state()
            closest_state = min(
                self.state_buffer,
                key=lambda i: distance.sqeuclidean(i, rand_state))
            self.envs[0].unwrapped.state = closest_state
            random_action = self.envs[0].action_space.sample()
            print(iteration)
            for _ in range(1):
                observation, reward, done, info = self.envs[0].step(
                    random_action)
                if observation[0] > 0.5:
                    print("DAMN!")
                    exit()
            # self.envs[0].unwrapped.render()
            self.state_buffer.append(observation)
            if done:
                observation = self.envs[0].reset()
Beispiel #14
0
def compute_distance(query_channel, channel, mean_vec, distance_type='eucos'):
    """ Compute the specified distance type between chanels of mean vector and query image.
    In caffe library, FC8 layer consists of 10 channels. Here, we compute distance
    of distance of each channel (from query image) with respective channel of
    Mean Activation Vector. In the paper, we considered a hybrid distance eucos which
    combines euclidean and cosine distance for bouding open space. Alternatively,
    other distances such as euclidean or cosine can also be used.

    Input:
    --------
    query_channel: Particular FC8 channel of query image
    channel: channel number under consideration
    mean_vec: mean activation vector
    Output:
    --------
    query_distance : Distance between respective channels
    """

    if distance_type == 'eucos':
        query_distance = spd.euclidean(mean_vec[channel, :],
                                       query_channel) / 200. + spd.cosine(
                                           mean_vec[channel, :], query_channel)
    elif distance_type == 'euclidean':
        query_distance = spd.euclidean(mean_vec[channel, :],
                                       query_channel) / 200.
    elif distance_type == 'cosine':
        query_distance = spd.cosine(mean_vec[channel, :], query_channel)
    elif distance_type == DistanceType.squeuclidean():
        query_distance = spd.sqeuclidean(mean_vec[channel, :], query_channel)
    else:
        print(
            "distance type not known: enter either of eucos, euclidean or cosine"
        )
    return query_distance
Beispiel #15
0
def distance_between_rows(X):
    """Compute euclidean distance between rows of data matrix.

    Parameters
    ----------
    X : array_like or ndarray.
        Input data.

    Returns
    -------
    dist : ndarray (2D).
        Simmetric matrix of computed distance between rows.

    Examples
    --------

    >>> x = np.array(([2, 4, 3], [1, 5, 7], [8, 6, 9])).T
    >>> distance_between_rows(x)
        [[ 0.          4.89897949  6.164414  ]
         [ 4.89897949  0.          3.74165739]
         [ 6.164414    3.74165739  0.        ]]
    """
    X = check_array(X)
    dist = np.zeros((X.shape[0], X.shape[0]))
    for i in range(X.shape[0] - 1):
        for j in range(i + 1, X.shape[0]):
            dist[i, j] = dist[j, i] = np.sqrt(sqeuclidean(X[i], X[j]))
    return dist
Beispiel #16
0
    def getDistMat(self, FMat1, FMat2):
        """

        """
        distmat = np.array([[sqeuclidean(f1, f2) for f1 in FMat1]
                            for f2 in FMat2])
        return distmat
Beispiel #17
0
 def c_kmeanspp(self):
     start=time.time()
     centroides=np.zeros((self.k,self.X[0].shape[0]))
     ri=np.random.randint(0, high=self.X.shape[0])
     centroides[0] = self.X[ri]
     distancias_cuadradas=np.zeros((self.X.shape[0],1))
     current=1        
     for i in range(1,self.k):
         for j in range(self.X.shape[0]):
             templ=[]  
             for l in range(current):
                 templ.append(distance.sqeuclidean(self.X[j],centroides[l]))                    
             distancias_cuadradas[j]=np.min(templ)                
         total=np.sum(distancias_cuadradas)            
         distancias_cuadradas=distancias_cuadradas/total
         distancias_cuadradas=np.cumsum(distancias_cuadradas)
         ran=np.random.rand()
         for m in range(distancias_cuadradas.shape[0]):
             if ran<distancias_cuadradas[m]:
                 centroides[i]=self.X[m]
                 current+=1
                 break   
     end=time.time()  
     #print("inicializacion  kmeans++ en :",(end-start)," s") 
     return centroides   
Beispiel #18
0
def match_person(path, person, threshold=0.0):
    """
    Finds the match for the person among people in the DB
    :param df:
    :param person:
    :param threshold: maximum distance for which person if matched. if Min distance > threshold, None is returned
    :return: name of the match or None
    """
    df = pd.read_csv(path)

    df.set_index(['Unnamed: 0'], inplace=True)
    if 'Unnamed: 0.1' in df.columns:
        del df['Unnamed: 0.1']
    person = person.set_index([ID])
    distances = {}
    if df.empty:
        return None, None
    for row in df.iterrows():
        name, value = row
        distances[name] = sqeuclidean(person, value)
    best = sorted(distances, key=distances.get)[0]
    if distances[best] < threshold:
        return None, None
    diff = abs(person - value)
    feature = diff.idxmin(axis=1)[0]
    return best, feature
Beispiel #19
0
def get_belief_given_observation(image_path, encoder, aspect_nodes_path):
    belief_time = time.time()

    aspect_nodes = np.load(aspect_nodes_path)['aspect_nodes']
    num_aspect_nodes = len(aspect_nodes)

    image = Image.open(image_path)
    image = image_to_tensor(image)

    image_tensor = to_var(image)
    encoder_feat = encoder(image_tensor).view(-1).data.numpy()

    belief_inverse_dist = np.zeros(num_aspect_nodes)
    belief_negative_dist = np.zeros(num_aspect_nodes)
    belief_squared_dist = np.zeros(num_aspect_nodes)


    for i, aspect_node in enumerate(aspect_nodes):
        belief_inverse_dist[i] = 1./(1e-8 + distance.euclidean(encoder_feat, aspect_node))
        belief_negative_dist[i] = -distance.euclidean(encoder_feat, aspect_node)
        belief_squared_dist[i] = 1./(1e-8 + distance.sqeuclidean(encoder_feat, aspect_node))

    belief_inverse_dist /= belief_inverse_dist.sum()
    belief_squared_dist /= belief_squared_dist.sum()
    belief_negative_dist -=  (belief_negative_dist.min() + belief_negative_dist.max())
    belief_negative_dist /=  belief_negative_dist.sum()


    print('getting belief took %.2f secs'%(time.time()-belief_time))
    return belief_inverse_dist, belief_squared_dist, belief_negative_dist
Beispiel #20
0
    def intersects(self, src, dest=None):
        '''
        It dest is None then it returns true if src is inside the region,
        otherwise it returns true if the line segment intersects the region.

        .. math ::

            w = x_{center} - x_0

            u = (x_1 - x_0)/norm(x_1 - x_0)

            d = norm(w - (w \cdot u) u)

            return \ d <= radius

        '''
        if isinstance(src, Point):
            src = src.coords

        if dest:
            if isinstance(dest, Point):
                dest = dest.coords

            w = self.center - src
            u = (dest - src)
            lambd = dot(w, u) / sqeuclidean(u, 0)
            lambd = min(max(lambd, 0), 1)
            dist = euclidean(w - lambd * u, 0)
            return dist <= self.radius
        return euclidean(self.center, src) <= self.radius
Beispiel #21
0
def _transformcellrbf(packet):

    group_df = packet[0]
    minpipeend = packet[1]
    samplevec_list = packet[2]
    radius_list = packet[3]
    learconst_list = packet[4]
    irr_list = packet[5]
    paramlist = packet[6]
    plotpipe = packet[7]
    shouldplot = packet[8]
    selfpid = os.getpid()
    gamma = 1/len(paramlist)

    for irr in irr_list:
        samplevec = samplevec_list[irr]
        try:
            #find min section
            func = lambda row: abs(1 - np.exp(gamma *sqeuclidean(row.as_matrix(columns=paramlist), samplevec)**2))
            rbf_df = group_df.apply(func, axis=1)
            mindiff = rbf_df.min(skipna=True)
            minindex = rbf_df.argmin(skipna=True)
            minposition = group_df.loc[minindex, ['x_position', 'y_position']].as_matrix()
        except Exception, msg:
            print 'Msg: ', msg
            print 'samplevec', samplevec
            print 'irr: ', irr
            print 'rfb_df: ', rbf_df
            print 'mindiff', mindiff
            print 'argmin: ', rbf_df.argmin(skipna=True)

        #giveresult to main process adn wait respond
        minpipeend.send({'job': 'min', 'mindiff': mindiff, 'position': minposition, 'pid': selfpid})
        if minpipeend.poll(600):
            letter = minpipeend.recv()

        #perturb the cellmap
        minposition, minpid = letter
        if minpid == selfpid:
            group_df.loc[minindex, 'pick_count'] += 1

        output_df = pnd.DataFrame(columns=paramlist)
        for index in group_df.index.values:
            distance = np.linalg.norm(group_df.loc[index, ['x_position', 'y_position']].as_matrix() - minposition)
            if distance < radius_list[irr]:
                smoothkernel = (radius_list[irr]-distance)/radius_list[irr]
                cellvec = group_df.loc[index].as_matrix(columns=paramlist)
                learnconstant = learconst_list[irr]

                #SOM Equation here
                output = cellvec + learnconstant * smoothkernel * (samplevec - cellvec)

                output_df.loc[index] = output
        for index in output_df.index.values:
            group_df.loc[index, paramlist] = output_df.loc[index, paramlist]

        #sending group_df back to main to be plotted
        if shouldplot:
            plotpipe.send({'job' :'plot', 'groupdf': group_df, 'pid':selfpid})
Beispiel #22
0
 def euclidean(self, x, y):
     """
     :param x: is the 1D detector or training/test array (i.e normal python array) to be compared to y
     :param y: is the 1D training/test or detector array (i.e normal python array) to be compared to x
     :return:
     """
     output = math.sqrt(dist.sqeuclidean(x, y))
     return output
    def weight(self, angOrg, angTarg, pOrg, pTarg, b):

        #pOrg.append(angOrg)
        #pTarg.append(angTarg)

        dist = distance.sqeuclidean(pOrg, pTarg)

        return (1 / (2 * b)) * np.exp(-1 * (dist / b))
Beispiel #24
0
def _signals_distance(real_signal, model_signal):
    """
    Computes distance as 1 - R_squared statistic
    """
    residuals = distance.sqeuclidean(real_signal, model_signal)
    mean = float(sum(real_signal)) / len(real_signal)
    variance = sum((x - mean) ** 2 for x in real_signal)
    return residuals / variance
def squared_exp_cov(x_p, x_q):
    """Calculates the squared exponential covariance function between the
    outputs f(x_p) and f(x_q) given the input vectors x_p and x_q, as per Eq. 2.16 of
    R&W. 

    NOTE: In contrast to sqeuclidean used below, the sq_dist function from the
    code accompanying the book calculates ALL pairwise distances between column
    vectors of two matrices."""
    return np.exp(-0.5 * sqeuclidean(x_p, x_q))
 def rgb(self, value=PAD()):
     """
     RGB values for discrete features
     :param value: mood value wrapped in a PAD object
     :return: numpy array containing r, g and b values between 0 and 1
     """
     similarities = np.array([1-sqeuclidean(f, value.state) for f in self.f_vec])
     strongest = min(enumerate(1-similarities/np.linalg.norm(similarities, 1)), key=itemgetter(1))
     return self.rgb_vec[strongest[0]]
Beispiel #27
0
Datei: AIS.py Projekt: igbe/AIS
 def euclidean(self, x, y):
     """
     :param x: is the 1D detector or training/test array (i.e normal python array) to be compared to y
     :param y: is the 1D training/test or detector array (i.e normal python array) to be compared to x
     :return:
     """
     print "getting euclidean distance between {0} and {1}".format(x, y)
     output = math.sqrt(dist.sqeuclidean(x, y))
     return output
 def rgb_alt(self, value=PAD()):
     """
     Averaged RGB values between all measured features
     :param value: mood value wrapped in a PAD object
     :return: numpy array containing r, g and b values between 0 and 1
     """
     similarities = np.array([1-sqeuclidean(f, value.state) for f in self.f_vec])
     weights = 1-similarities/np.linalg.norm(similarities, 1)
     return np.average(self.rgb_vec, axis=0, weights=weights)
def squared_exp_cov(x_p, x_q):
    """Calculates the squared exponential covariance function between the
    outputs f(x_p) and f(x_q) given the input vectors x_p and x_q, as per Eq. 2.16 of
    R&W. 

    NOTE: In contrast to sqeuclidean used below, the sq_dist function from the
    code accompanying the book calculates ALL pairwise distances between column
    vectors of two matrices."""
    return np.exp(-0.5 * sqeuclidean(x_p, x_q))
Beispiel #30
0
def riot(M, num_anchor=30, num_random=1000):
    """
    check out IEEE CLOUD18 paper
    :param M:
    :param num_anchor:
    :param num_random:
    :return:
    """
    logging.debug("Working on model " + M.name + " @ div_conv.py::riot")

    anchors = M.init_random_pop(num_anchor)
    # add some diagonals also
    d = M.decNum
    for i in range(d):
        diag = M.init_random_pop(1, default_value=i / (d - 1))
        anchors = pd.merge(anchors, diag, how='outer')

    M.eval_pd_df(anchors)
    logging.debug("Evaluating %d anchors done" % anchors.shape[0])

    randoms = M.init_random_pop(num_random)

    DIST_MTX = pd.DataFrame(index=anchors.index, columns=randoms.index)
    for a in anchors.index:
        for r in randoms.index:
            DIST_MTX.loc[a, r] = distance.sqeuclidean(randoms.loc[r, M.decs],
                                                      anchors.loc[a, M.decs])
    logging.debug("Distance in configuration spaces calc done.")

    # guessing the objectives. see JC oral slides final.pdf P44 at jianfeng.us
    for r in randoms.index:
        n, f = np.argmin(DIST_MTX[r].tolist()), np.argmax(DIST_MTX[r].tolist())

        nf = (anchors.loc[n] - anchors.loc[f])[M.decs].values
        nr = (anchors.loc[n] - randoms.loc[r])[M.decs].values
        rf = (randoms.loc[r] - anchors.loc[f])[M.decs].values

        pQ = np.dot(nf, nr) / np.dot(nf, rf)
        randoms.loc[r, M.objs] = anchors.loc[n, M.objs] + (pQ / (pQ + 1)) * (
            anchors.loc[f, M.objs] - anchors.loc[n, M.objs])

    # Hypothesis showing
    # randoms_cp = randoms.copy(deep=True)
    # randoms_cp[M.objs] = -1
    # M.eval_pd_df(randoms_cp)
    # logging.debug('The avg absolute guessing error is:')
    # error = np.average(np.abs(randoms[M.objs] - randoms_cp[M.objs]), axis=0)
    # logging.debug(error)

    # collecting and returning
    all_configs = pd.merge(anchors, randoms, how='outer')
    cleared, dominated = cull(all_configs[M.objs])

    res = all_configs.loc[cleared]
    M.eval_pd_df(res, force_eval_all=True)
    cleared, dominated = cull(res)
    return res.loc[cleared]
def f(x_t_p, x_t, sigma):
    diff = np.abs(np.subtract(x_t_p, x_t))
    if (diff >= sigma * 2).any():
        return 0.
    if f_cache[diff[0], diff[1]] != -1:
        return f_cache[diff[0], diff[1]]
    else:
        distance = (1/(2*np.pi*(sigma ** 2))) * np.exp((-sd.sqeuclidean(x_t_p, x_t))/(2 * (sigma ** 2)))
        f_cache[diff[0], diff[1]] = distance
        return distance
Beispiel #32
0
def label_im(im):
    """Assigns each pixel to a region label."""
    nrow = im.shape[0]
    ncol = im.shape[1]
    lab = np.empty([nrow, ncol], dtype=int)
    
    for irow in range(0, nrow):
        for icol in range(0, ncol):
            elem = im[irow, icol, :]
            min_dist2 = distance.sqeuclidean(elem, RGB_LST[0])
            min_ix = 0
            for imin in range(1, len(RGB_LST)):
                dist2 = distance.sqeuclidean(elem, RGB_LST[imin])
                if (dist2 < min_dist2):
                    min_dist2 = dist2
                    min_ix = imin
            lab[irow, icol] = RGB_LAB[min_ix]
    
    return lab
Beispiel #33
0
def _holt__(x, xi, p, y, l, b, s, m, n, max_seen):
    """
    Simple Exponential Smoothing
    Minimization Function
    (,)
    """
    alpha, beta, phi, alphac, betac, y_alpha = _holt_init(x, xi, p, y, l, b)
    for i in range(1, n):
        l[i] = (y_alpha[i - 1]) + (alphac * (l[i - 1]))
    return sqeuclidean(l, y)
Beispiel #34
0
 def getSigmaDerivSigmaVecKernelMat(self, FMat1, FMat2, sigmaVec):
     """
     Returns the derivative of the kernel matrix w.r.t. sigma.
     """
     SDSVkernelMat = np.array([[
         1 / (sigmaVec[i]**3) * sqeuclidean(f1, f2) *
         self.single_comparison(f1, f2, sigmaVec[i])
         for i, f1 in enumerate(FMat1)
     ] for f2 in FMat2])
     return SDSVkernelMat
Beispiel #35
0
def cls_ctd(point, ctd):
    min_distance = float('inf')
    belongs_to_cluster = None
    for j, centroid in enumerate(ctd):
        dist = distance.sqeuclidean(point, centroid)
        if dist < min_distance:
            min_distance = dist
            belongs_to_cluster = j

    return belongs_to_cluster
def closest_centroid(point, centroids):
    min_distance = float('inf')
    belongs_to_cluster = None
    for j, centroid in enumerate(centroids):
        dist = distance.sqeuclidean(point, centroid)
        if dist < min_distance:
            min_distance = dist
            belongs_to_cluster = j

    return belongs_to_cluster
Beispiel #37
0
def _holt__(x, xi, p, y, l, b, s, m, n, max_seen):
    """
    Simple Exponential Smoothing
    Minimization Function
    (,)
    """
    alpha, beta, phi, alphac, betac, y_alpha = _holt_init(x, xi, p, y, l, b)
    for i in range(1, n):
        l[i] = (y_alpha[i - 1]) + (alphac * (l[i - 1]))
    return sqeuclidean(l, y)
Beispiel #38
0
def closest_centroid(point, centroids, k):
    min_distance = float('inf')
    belongs_to_cluster = None
    for i in range(k):
        dist = distance.sqeuclidean(point, centroids[i])
        if dist < min_distance:
            min_distance = dist
            belongs_to_cluster = i

    return belongs_to_cluster
Beispiel #39
0
def multiquadric(x, y, c=0):
    """Compute a multiquadric kernel.

    The Multiquadric kernel can be used in the same situations as the Rational
    Quadratic kernel. As is the case with the Sigmoid kernel, it is also an
    example of an non-positive definite kernel:
                    K(x, y) = sqrt(||x - y||^2 + c^2)
    where `x` and `y` are vectors in the input space (i.e., vectors of
    features computed from training or test samples), ||x - y||^2 is the
    squared Euclidean norm, and `c` ≥ 0 is a free parameter (default=0).
    """
    return sqrt(dist.sqeuclidean(x, y) + c**2)
Beispiel #40
0
def get_pert_sigma(prev_population, sim_theta):
    M = (int) (len(prev_population) * 0.2)
    from scipy.spatial.distance import sqeuclidean
    distances = []
    for p in prev_population:
        distances.append(sqeuclidean(p, sim_theta))

    nearest = []
    indices = [i[0] for i in sorted(enumerate(distances), key=lambda x:x[1])]
    for index in indices[:M]:
        nearest.append(prev_population[index])
    return np.cov(np.vstack(nearest).T)
Beispiel #41
0
def rational_quadratic(x, y, c=0):
    """Compute a rational quadratic kernel.

    The Rational Quadratic kernel is less computationally intensive than the
    Gaussian kernel and can be used as an alternative when using the Gaussian
    becomes too expensive:
                    K(x, y) = 1 - (||x - y||^2 / (||x - y||^2 + c))
    where `x` and `y` are vectors in the input space (i.e., vectors of
    features computed from training or test samples), ||x - y||^2 is the
    squared Euclidean norm, and `c` ≥ 0 is a free parameter (default=0).
    """
    d = dist.sqeuclidean(x, y)
    return 1 - d / (d + c)
Beispiel #42
0
    def cal_matrix_euc(list_word, dict_word_vec):
        """
        sqe_euclid類似度で単語間の距離を計算
        list_word: 単語のリスト
        dict_word_vec: 単語がkey, 分散表現がvalueのdict
        return: cos_matrix: 各単語間の類似度を計算したmatrix
        """
        euc_matrix = np.zeros((len(list_word), len(list_word)))
        for i, word1 in enumerate(list_word):
            for j, word2 in enumerate(list_word):
                euc_matrix[i][j] = sqeuclidean(dict_word_vec[word1], dict_word_vec[word2])

        return euc_matrix
Beispiel #43
0
def _holt_add_dam(x, xi, p, y, l, b, s, m, n, max_seen):
    """
    Additive and Additive Damped
    Minimization Function
    (A,) & (Ad,)
    """
    alpha, beta, phi, alphac, betac, y_alpha = _holt_init(x, xi, p, y, l, b)
    if alpha == 0.0:
        return max_seen
    if beta > alpha:
        return max_seen
    for i in range(1, n):
        l[i] = (y_alpha[i - 1]) + (alphac * (l[i - 1] + phi * b[i - 1]))
        b[i] = (beta * (l[i] - l[i - 1])) + (betac * phi * b[i - 1])
    return sqeuclidean(l + phi * b, y)
Beispiel #44
0
def _holt_mul_dam(x, xi, p, y, l, b, s, m, n, max_seen):
    """
    Multiplicative and Multiplicative Damped
    Minimization Function
    (M,) & (Md,)
    """
    alpha, beta, phi, alphac, betac, y_alpha = _holt_init(x, xi, p, y, l, b)
    if alpha == 0.0:
        return max_seen
    if beta > alpha:
        return max_seen
    for i in range(1, n):
        l[i] = (y_alpha[i - 1]) + (alphac * (l[i - 1] * b[i - 1]**phi))
        b[i] = (beta * (l[i] / l[i - 1])) + (betac * b[i - 1]**phi)
    return sqeuclidean(l * b**phi, y)
Beispiel #45
0
def _holt_win__add(x, xi, p, y, l, b, s, m, n, max_seen):
    """
    Additive Seasonal
    Minimization Function
    (,A)
    """
    alpha, beta, gamma, phi, alphac, betac, gammac, y_alpha, y_gamma = _holt_win_init(
        x, xi, p, y, l, b, s, m)
    if alpha == 0.0:
        return max_seen
    if gamma > 1 - alpha:
        return max_seen
    for i in range(1, n):
        l[i] = (y_alpha[i - 1]) - (alpha * s[i - 1]) + (alphac * (l[i - 1]))
        s[i + m - 1] = y_gamma[i - 1] - (gamma * (l[i - 1])) + (gammac * s[i - 1])
    return sqeuclidean(l + s[:-(m - 1)], y)
Beispiel #46
0
def gaussian(x, y, sigma=1):
    """Compute a Gaussian kernel.

    The Gaussian kernel is an example of radial basis function kernel. It can
    be define as:
                    K(x, y) = exp(-||x - y||^2 / 2σ^2)
    where `x` and `y` are vectors in the input space (i.e., vectors of
    features computed from training or test samples), ``||x - y||^2` is the
    squared Euclidean norm, and the adjustable parameter `sigma`
    plays a major role in the performance of the kernel, and should be
    carefully tuned to the problem at hand. If overestimated, the exponential
    will behave almost linearly and the higher-dimensional projection will
    start to lose its non-linear power. In the other hand, if underestimated,
    the function will lack regularization and the decision boundary will be
    highly sensitive to noise in training data.
    """
    return exp(-(dist.sqeuclidean(x, y)/2*sigma**2))
Beispiel #47
0
def _holt_win_add_add_dam(x, xi, p, y, l, b, s, m, n, max_seen):
    """
    Additive and Additive Damped with Additive Seasonal
    Minimization Function
    (A,A) & (Ad,A)
    """
    alpha, beta, gamma, phi, alphac, betac, gammac, y_alpha, y_gamma = _holt_win_init(
        x, xi, p, y, l, b, s, m)
    if alpha * beta == 0.0:
        return max_seen
    if beta > alpha or gamma > 1 - alpha:
        return max_seen
    for i in range(1, n):
        l[i] = (y_alpha[i - 1]) - (alpha * s[i - 1]) + \
               (alphac * (l[i - 1] + phi * b[i - 1]))
        b[i] = (beta * (l[i] - l[i - 1])) + (betac * phi * b[i - 1])
        s[i + m - 1] = y_gamma[i - 1] - (gamma * (l[i - 1] + phi * b[i - 1])) + (gammac * s[i - 1])
    return sqeuclidean((l + phi * b) + s[:-(m - 1)], y)
Beispiel #48
0
def _holt_win_mul_mul_dam(x, xi, p, y, l, b, s, m, n, max_seen):
    """
    Multiplicative and Multiplicative Damped with Multiplicative Seasonal
    Minimization Function
    (M,M) & (Md,M)
    """
    alpha, beta, gamma, phi, alphac, betac, gammac, y_alpha, y_gamma = _holt_win_init(
        x, xi, p, y, l, b, s, m)
    if alpha * beta == 0.0:
        return max_seen
    if beta > alpha or gamma > 1 - alpha:
        return max_seen
    for i in range(1, n):
        l[i] = (y_alpha[i - 1] / s[i - 1]) + \
               (alphac * (l[i - 1] * b[i - 1]**phi))
        b[i] = (beta * (l[i] / l[i - 1])) + (betac * b[i - 1]**phi)
        s[i + m - 1] = (y_gamma[i - 1] / (l[i - 1] *
                                          b[i - 1]**phi)) + (gammac * s[i - 1])
    return sqeuclidean((l * b**phi) * s[:-(m - 1)], y)
Beispiel #49
0
def _maptosmllestrbf(jobpacket):
        cellmap_df = jobpacket[0]
        paramlist = jobpacket[1]
        mainsource_df = jobpacket[2]
        gamma = 1/len(paramlist)
        defvec = array([1.0 for i in xrange(len(paramlist))])
        resultlist = []

        for sourceindex in mainsource_df.index.values:

            sourceid = mainsource_df.ix[sourceindex]['id']
            sourcelabel = mainsource_df.ix[sourceindex]['label']
            sourcevec = mainsource_df.ix[sourceindex].as_matrix(columns=paramlist)
            func =lambda row: abs(1 - exp(gamma *sqeuclidean(row.as_matrix(columns=paramlist), sourcevec)**2))

            result_df = cellmap_df.apply(func, axis=1)
            minindex = result_df.argmin()
            resultlist.append([minindex, sourceid, sourcelabel])

        return resultlist
def euclidean_metric(w, X, y):
    return sqeuclidean(X.dot(w), y)
Beispiel #51
0
def get_distance_scipy(a, b):
    """Get the squared Euclidean distance between two Points using scipy"""
    return sqeuclidean(a.coords, b.coords)
Beispiel #52
0
    def _predict(self, h=None, smoothing_level=None, smoothing_slope=None,
                 smoothing_seasonal=None, initial_level=None, initial_slope=None,
                 damping_slope=None, initial_seasons=None, use_boxcox=None, lamda=None, remove_bias=None):
        """
        Helper prediction function

        Parameters
        ----------
        h : int, optional
            The number of time steps to forecast ahead.
        """
        # Variable renames to alpha,beta, etc as this helps with following the
        # mathematical notation in general
        alpha = smoothing_level
        beta = smoothing_slope
        gamma = smoothing_seasonal
        phi = damping_slope

        # Start in sample and out of sample predictions
        data = self.endog
        damped = self.damped
        seasoning = self.seasoning
        trending = self.trending
        trend = self.trend
        seasonal = self.seasonal
        m = self.seasonal_periods
        phi = phi if damped else 1.0
        if use_boxcox == 'log':
            lamda = 0.0
            y = boxcox(data, 0.0)
        elif isinstance(use_boxcox, float):
            lamda = use_boxcox
            y = boxcox(data, lamda)
        elif use_boxcox:
            y, lamda = boxcox(data)
        else:
            lamda = None
            y = data.squeeze()
            if np.ndim(y) != 1:
                raise NotImplementedError('Only 1 dimensional data supported')
        y_alpha = np.zeros((self.nobs,))
        y_gamma = np.zeros((self.nobs,))
        alphac = 1 - alpha
        y_alpha[:] = alpha * y
        if trending:
            betac = 1 - beta
        if seasoning:
            gammac = 1 - gamma
            y_gamma[:] = gamma * y
        l = np.zeros((self.nobs + h + 1,))
        b = np.zeros((self.nobs + h + 1,))
        s = np.zeros((self.nobs + h + m + 1,))
        l[0] = initial_level
        b[0] = initial_slope
        s[:m] = initial_seasons
        phi_h = np.cumsum(np.repeat(phi, h + 1)**np.arange(1, h + 1 + 1)
                          ) if damped else np.arange(1, h + 1 + 1)
        trended = {'mul': np.multiply,
                   'add': np.add,
                   None: lambda l, b: l
                   }[trend]
        detrend = {'mul': np.divide,
                   'add': np.subtract,
                   None: lambda l, b: 0
                   }[trend]
        dampen = {'mul': np.power,
                  'add': np.multiply,
                  None: lambda b, phi: 0
                  }[trend]
        if seasonal == 'mul':
            for i in range(1, self.nobs + 1):
                l[i] = y_alpha[i - 1] / s[i - 1] + \
                    (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                        (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] / \
                    trended(l[i - 1], dampen(b[i - 1], phi)) + \
                    (gammac * s[i - 1])
            slope = b[1:i + 1].copy()
            season = s[m:i + m].copy()
            l[i:] = l[i]
            if trending:
                b[:i] = dampen(b[:i], phi)
                b[i:] = dampen(b[i], phi_h)
            trend = trended(l, b)
            s[i + m - 1:] = [s[(i - 1) + j % m] for j in range(h + 1 + 1)]            
            fitted = trend * s[:-m]
        elif seasonal == 'add':
            for i in range(1, self.nobs + 1):
                l[i] = y_alpha[i - 1] - (alpha * s[i - 1]) + \
                    (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                        (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] - \
                    (gamma * trended(l[i - 1],
                                     dampen(b[i - 1], phi))) + (gammac * s[i - 1])
            slope = b[1:i + 1].copy()
            season = s[m:i + m].copy()
            l[i:] = l[i]
            if trending:
                b[:i] = dampen(b[:i], phi)
                b[i:] = dampen(b[i], phi_h)
            trend = trended(l, b)
            s[i + m - 1:] = [s[(i - 1) + j % m] for j in range(h + 1 + 1)]            
            fitted = trend + s[:-m]
        else:
            for i in range(1, self.nobs + 1):
                l[i] = y_alpha[i - 1] + \
                    (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                        (betac * dampen(b[i - 1], phi))
            slope = b[1:i + 1].copy()
            season = s[m:i + m].copy()
            l[i:] = l[i]
            if trending:
                b[:i] = dampen(b[:i], phi)
                b[i:] = dampen(b[i], phi_h)
            trend = trended(l, b)
            fitted = trend
        level = l[1:i + 1].copy()
        if use_boxcox or use_boxcox == 'log' or isinstance(use_boxcox, float):
            fitted = inv_boxcox(fitted, lamda)
            level = inv_boxcox(level, lamda)
            slope = detrend(trend[:i], level)
            if seasonal == 'add':
                season = (fitted - inv_boxcox(trend, lamda))[:i]
            elif seasonal == 'mul':
                season = (fitted / inv_boxcox(trend, lamda))[:i]
            else:
                pass
        sse = sqeuclidean(fitted[:-h - 1], data)
        # (s0 + gamma) + (b0 + beta) + (l0 + alpha) + phi
        k = m * seasoning + 2 * trending + 2 + 1 * damped
        aic = self.nobs * np.log(sse / self.nobs) + (k) * 2
        aicc = aic + (2 * (k + 2) * (k + 3)) / (self.nobs - k - 3)
        bic = self.nobs * np.log(sse / self.nobs) + (k) * np.log(self.nobs)
        resid = data - fitted[:-h - 1]
        if remove_bias:
            fitted += resid.mean()
        if not damped:
            phi = np.NaN
        self.params = {'smoothing_level': alpha,
                       'smoothing_slope': beta,
                       'smoothing_seasonal': gamma,
                       'damping_slope': phi,
                       'initial_level': l[0],
                       'initial_slope': b[0],
                       'initial_seasons': s[:m],
                       'use_boxcox': use_boxcox,
                       'lamda': lamda,
                       'remove_bias': remove_bias}
        hwfit = HoltWintersResults(self, self.params, fittedfcast=fitted, fittedvalues=fitted[:-h - 1],
                                   fcastvalues=fitted[-h - 1:], sse=sse, level=level,
                                   slope=slope, season=season, aic=aic, bic=bic,
                                   aicc=aicc, resid=resid, k=k)
        return HoltWintersResultsWrapper(hwfit)
Beispiel #53
0
from scipy.sparse import *
from scipy.sparse.linalg import *
import numpy
import matplotlib.pyplot as plt

text_data = [line.strip() for line in open('data\\reduced-data.txt')]
data = [[float(value) for value in line.rstrip(',').split(',')] for line in text_data]

labels = [line.strip() for line in open('data\\artistList.txt')]

artist_name = sys.argv[1]
num_neighbors = int(sys.argv[2])

artist_vec = []

for i in range(0, len(labels)):
    if (artist_name == labels[i]):
        artist_vec = data[i]

if (len(artist_vec) == 0):
    print "Artist not found"
    sys.exit()

distances = [(labels[i], sqeuclidean(numpy.array(data[i]), numpy.array(artist_vec))) for i in range(0, len(labels))]

similar = sorted(distances, key=lambda pair: pair[1])

for i in range(0, num_neighbors):
    print similar[i][0]

Beispiel #54
0
	u, unique_idx = np.unique(nearest_idxs, return_index=True)

	# calculate error of transformation
	# and select if better than previous
	sum_dist = 0
	for i in range(0, len(nearest_idxs)):
		if nearest_idxs[i] == kd_osm.n:
			# distance higher than threshold (ie no next neighbor found)
			# break
			sum_dist = prev_distance + 1
			break
		if i not in unique_idx:
			# same point matches twice, -> punish
			sum_dist += 400
		else:
			sum_dist += sqeuclidean(osm_corners[nearest_idxs[i], 0:2], transformed_img[i, 0:2])

	if abs(sum_dist) < prev_distance:
		print(sum_dist)
		best_trafo = trafo
		prev_distance = abs(sum_dist)
		plt.hold(False)
		plt.scatter(osm_corners[:, 0], osm_corners[:, 1], color=(1,0,0))
		plt.hold(True)
		plt.scatter(transformed_img[:, 0], transformed_img[:, 1], color=(0,1,0))
		plt.scatter(src[:, 0], src[:, 1], color=(0,0,1), s=30)
		plt.show()

print("\n\n\nBest Result: ")
print(best_trafo)
print("DISTANCE: ", prev_distance)
def rbf_kernel(x_i, x_t, gamma) :
    return math.exp(- gamma * distance.sqeuclidean(x_i, x_t) )
Beispiel #56
0
def getLshNN_op1(dataset, nnModel, thred_radius_dist, trained_num_probes, thred_sameTweetDist):
    ngIdxList= []
    indexedInCluster = {}
    clusters = []
    for dataidx in range(dataset.shape[0]):
        if dataidx in indexedInCluster:
            nn_keys = None
        else:
            clusterIdx = len(clusters)
            indexedInCluster[dataidx] = clusterIdx
            clusters.append([dataidx])

            nnModel.set_num_probes(trained_num_probes)
            # nn_keys: (id1, id2, ...)
            nn_keys = nnModel.find_near_neighbors(dataset[dataidx,:], thred_radius_dist)

            nn_dists = [(idx, key) for idx, key in enumerate(nn_keys) if key > dataidx-130000 and key < dataidx+130000 and sqeuclidean(dataset[dataidx,:], dataset[key,:]) < thred_sameTweetDist]
            #nn_dists = [(idx, key) for idx, key in enumerate(nn_keys) if sqeuclidean(dataset[dataidx,:], dataset[key,:]) < 0.2]
            #print len(nn_keys), len(nn_dists), nn_dists[:min(10, len(nn_dists))], nn_dists[-min(10, len(nn_dists)):]

            for idx, key in nn_dists:
                indexedInCluster[key] = clusterIdx

        ngIdxList.append(nn_keys)
        if (dataidx+1) % 10000 == 0:
            print "## completed", dataidx+1, len(clusters), time.asctime()
    ngIdxList = np.asarray(ngIdxList)
    return ngIdxList, indexedInCluster, clusters
Beispiel #57
0
    def _predict(self, h=None, smoothing_level=None, smoothing_slope=None,
                 smoothing_seasonal=None, initial_level=None, initial_slope=None,
                 damping_slope=None, initial_seasons=None, use_boxcox=None, lamda=None,
                 remove_bias=None, is_optimized=None):
        """
        Helper prediction function

        Parameters
        ----------
        h : int, optional
            The number of time steps to forecast ahead.
        """
        # Variable renames to alpha, beta, etc as this helps with following the
        # mathematical notation in general
        alpha = smoothing_level
        beta = smoothing_slope
        gamma = smoothing_seasonal
        phi = damping_slope

        # Start in sample and out of sample predictions
        data = self.endog
        damped = self.damped
        seasoning = self.seasoning
        trending = self.trending
        trend = self.trend
        seasonal = self.seasonal
        m = self.seasonal_periods
        phi = phi if damped else 1.0
        if use_boxcox == 'log':
            lamda = 0.0
            y = boxcox(data, 0.0)
        elif isinstance(use_boxcox, float):
            lamda = use_boxcox
            y = boxcox(data, lamda)
        elif use_boxcox:
            y, lamda = boxcox(data)
        else:
            lamda = None
            y = data.squeeze()
            if np.ndim(y) != 1:
                raise NotImplementedError('Only 1 dimensional data supported')
        y_alpha = np.zeros((self.nobs,))
        y_gamma = np.zeros((self.nobs,))
        alphac = 1 - alpha
        y_alpha[:] = alpha * y
        if trending:
            betac = 1 - beta
        if seasoning:
            gammac = 1 - gamma
            y_gamma[:] = gamma * y
        l = np.zeros((self.nobs + h + 1,))
        b = np.zeros((self.nobs + h + 1,))
        s = np.zeros((self.nobs + h + m + 1,))
        l[0] = initial_level
        b[0] = initial_slope
        s[:m] = initial_seasons
        phi_h = np.cumsum(np.repeat(phi, h + 1)**np.arange(1, h + 1 + 1)
                          ) if damped else np.arange(1, h + 1 + 1)
        trended = {'mul': np.multiply,
                   'add': np.add,
                   None: lambda l, b: l
                   }[trend]
        detrend = {'mul': np.divide,
                   'add': np.subtract,
                   None: lambda l, b: 0
                   }[trend]
        dampen = {'mul': np.power,
                  'add': np.multiply,
                  None: lambda b, phi: 0
                  }[trend]
        nobs = self.nobs
        if seasonal == 'mul':
            for i in range(1, nobs + 1):
                l[i] = y_alpha[i - 1] / s[i - 1] + \
                       (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                           (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] / trended(l[i - 1], dampen(b[i - 1], phi)) + \
                    (gammac * s[i - 1])
            slope = b[1:nobs + 1].copy()
            season = s[m:nobs + m].copy()
            l[nobs:] = l[nobs]
            if trending:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(l, b)
            s[nobs + m - 1:] = [s[(nobs - 1) + j % m] for j in range(h + 1 + 1)]
            fitted = trend * s[:-m]
        elif seasonal == 'add':
            for i in range(1, nobs + 1):
                l[i] = y_alpha[i - 1] - (alpha * s[i - 1]) + \
                       (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                           (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] - \
                    (gamma * trended(l[i - 1], dampen(b[i - 1], phi))) + \
                    (gammac * s[i - 1])
            slope = b[1:nobs + 1].copy()
            season = s[m:nobs + m].copy()
            l[nobs:] = l[nobs]
            if trending:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(l, b)
            s[nobs + m - 1:] = [s[(nobs - 1) + j % m] for j in range(h + 1 + 1)]
            fitted = trend + s[:-m]
        else:
            for i in range(1, nobs + 1):
                l[i] = y_alpha[i - 1] + \
                       (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                           (betac * dampen(b[i - 1], phi))
            slope = b[1:nobs + 1].copy()
            season = s[m:nobs + m].copy()
            l[nobs:] = l[nobs]
            if trending:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(l, b)
            fitted = trend
        level = l[1:nobs + 1].copy()
        if use_boxcox or use_boxcox == 'log' or isinstance(use_boxcox, float):
            fitted = inv_boxcox(fitted, lamda)
            level = inv_boxcox(level, lamda)
            slope = detrend(trend[:nobs], level)
            if seasonal == 'add':
                season = (fitted - inv_boxcox(trend, lamda))[:nobs]
            else:  # seasonal == 'mul':
                season = (fitted / inv_boxcox(trend, lamda))[:nobs]
        sse = sqeuclidean(fitted[:-h - 1], data)
        # (s0 + gamma) + (b0 + beta) + (l0 + alpha) + phi
        k = m * seasoning + 2 * trending + 2 + 1 * damped
        aic = self.nobs * np.log(sse / self.nobs) + k * 2
        if self.nobs - k - 3 > 0:
            aicc_penalty = (2 * (k + 2) * (k + 3)) / (self.nobs - k - 3)
        else:
            aicc_penalty = np.inf
        aicc = aic + aicc_penalty
        bic = self.nobs * np.log(sse / self.nobs) + k * np.log(self.nobs)
        resid = data - fitted[:-h - 1]
        if remove_bias:
            fitted += resid.mean()
        if not damped:
            phi = np.NaN
        self.params = {'smoothing_level': alpha,
                       'smoothing_slope': beta,
                       'smoothing_seasonal': gamma,
                       'damping_slope': phi,
                       'initial_level': l[0],
                       'initial_slope': b[0],
                       'initial_seasons': s[:m],
                       'use_boxcox': use_boxcox,
                       'lamda': lamda,
                       'remove_bias': remove_bias}

        # Format parameters into a DataFrame
        codes = ['alpha', 'beta', 'gamma', 'l.0', 'b.0', 'phi']
        codes += ['s.{0}'.format(i) for i in range(m)]
        idx = ['smoothing_level', 'smoothing_slope', 'smoothing_seasonal',
               'initial_level', 'initial_slope', 'damping_slope']
        idx += ['initial_seasons.{0}'.format(i) for i in range(m)]

        formatted = [alpha, beta, gamma, l[0], b[0], phi]
        formatted += s[:m].tolist()
        formatted = list(map(lambda v: np.nan if v is None else v, formatted))
        formatted = np.array(formatted)
        if is_optimized is None:
            optimized = np.zeros(len(codes), dtype=np.bool)
        else:
            optimized = is_optimized.astype(np.bool)
        included = [True, trending, seasoning, True, trending, damped]
        included += [True] * m
        formatted = pd.DataFrame([[c, f, o] for c, f, o in zip(codes, formatted, optimized)],
                                 columns=['name', 'param', 'optimized'],
                                 index=idx)
        formatted = formatted.loc[included]

        hwfit = HoltWintersResults(self, self.params, fittedfcast=fitted,
                                   fittedvalues=fitted[:-h - 1], fcastvalues=fitted[-h - 1:],
                                   sse=sse, level=level, slope=slope, season=season, aic=aic,
                                   bic=bic, aicc=aicc, resid=resid, k=k,
                                   params_formatted=formatted, optimized=optimized)
        return HoltWintersResultsWrapper(hwfit)
Beispiel #58
0
def sqeuclidean((x, y)):
    return distance.sqeuclidean(x, y)
Beispiel #59
0
 def probability(self, x):
     return cmath.exp(
         -0.5 * self.precision * distance.sqeuclidean(x, self.mean)
     ).real