Esempio n. 1
0
def get_phonetic_distances():
    pairs = combinations(phonemes.keys(), 2)
    distances = {}
    for pair in pairs:
        char1 = pair[0]
        char2 = pair[1]
        phoneme1 = phonemes[char1]
        phoneme2 = phonemes[char2]

        hamming_distance = EditDistance.get_distance(phoneme1, phoneme2)

        euclidean_distance = EuclideanDistance.get_distance(phoneme1, phoneme2)

        edit_distance = 0

        sum_distance = 0

        for i in range(len(phoneme1)):
            if (phoneme1[i] != phoneme2[i]):
                edit_distance += 1
            sum_distance += abs(phoneme1[i] - phoneme2[i])

        distances[(char1, char2)] = (hamming_distance, euclidean_distance, edit_distance, sum_distance)

    return distances
Esempio n. 2
0
 def __init__ ( self , X= None , y= None , dist_metric = EuclideanDistance () , num_components=0) :
     self . dist_metric = dist_metric
     self . num_components = 0
     self . projections = []
     self .W = []
     self . mu = []
     if (X is not None ) and (y is not None ):
         self . compute (X ,y )
Esempio n. 3
0
 def __init__(self,
              X=None,
              y=None,
              dist_metric=EuclideanDistance(),
              num_components=0):
     super(EigenfacesModel, self).__init__(X=X,
                                           y=y,
                                           dist_metric=dist_metric,
                                           num_components=num_components)
Esempio n. 4
0
def run_rec():
    # This is where we write the images, if an output_dir is given
    # in command line:
    out_dir = None
    # Now read in the image data. This must be a valid path!
    [X, y] = read_images('images')
    # Then set up a handler for logging:
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    # Add handler to facerec modules, so we see what's going on inside:
    logger = logging.getLogger("facerec")
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)
    # Define the Fisherfaces as Feature Extraction method:
    feature = Fisherfaces()
    # Define a 1-NN classifier with Euclidean Distance:
    classifier = NearestNeighbor(dist_metric=EuclideanDistance(), k=1)
    # Define the model as the combination
    my_model = PredictableModel(feature=feature, classifier=classifier)
    # Compute the Fisherfaces on the given data (in X) and labels (in y):
    my_model.compute(X, y)
    # We then save the model, which uses Pythons pickle module:
    save_model('model.pkl', my_model)

    model = load_model('model.pkl')
    # Then turn the first (at most) 16 eigenvectors into grayscale
    # images (note: eigenvectors are stored by column!)
    #E = []
    #for i in xrange(min(model.feature.eigenvectors.shape[1], 16)):
    #    e = model.feature.eigenvectors[:,i].reshape(X[0].shape)
    #    E.append(minmax_normalize(e,0,255, dtype=np.uint8))
    # Plot them and store the plot to "python_fisherfaces_fisherfaces.pdf"
    #subplot(title="Fisherfaces", images=E, rows=4, cols=4, sptitle="Fisherface", colormap=cm.jet, filename="fisherfaces.png")
    # Perform a 10-fold cross validation
    cv = KFoldCrossValidation(model, k=10)
    cv.validate(X, y)
    # And print the result:
    cv.print_results()

    im = Image.open('search.png')
    im = im.convert("L")
    predicted_label = model.predict(im)[0]

    print(predicted_label)
    return predicted_label
Esempio n. 5
0
            distance = self.distance.compute(distance_context)
            distances.append((distance, t))

        distances.sort(key=operator.itemgetter(1))
        return distances[:k]

    def __average_neighbors(self, neighbors):
        length = len(neighbors)
        neighbor_distance = list(map(lambda x: x[1], neighbors))
        return [sum(x) / length for x in zip(*neighbor_distance)]

    def predict(self, data_point, k):
        '''
        Prediction method for KNN 
        '''
        # Precondition checks.
        if not self.__is_trained:
            raise TrainingIncompleteException()

        if k <= 0:
            raise NegativeOrZeroKException(k)

        nearest_neighbors = self.__get_neighbors(data_point, k)
        return self.__average_neighbors(nearest_neighbors)


if __name__ == '__main__':
    # TODO: Move over to Unit tests..
    knn = KNNRegression()
    knn.train([[0, 0, 2], [3, 4, 5], [5, 5, 5]], EuclideanDistance())
    print(knn.predict([2, 3, 4], 3))
Esempio n. 6
0
 def __init__(self, dist_metric=EuclideanDistance(), k=1):
     AbstractClassifier.__init__(self)
     self.k = k
     self.dist_metric = dist_metric
     self.X = []
     self.y = np.array([], dtype=np.int32)
    def __init__(
        self,
        vocab_size: int,
        distance: str = "euclidean",
        embedding_dim: Optional[int] = None,
        embedding_dropout: float = 0,
        pretrained_embeddings: Optional[Tensor] = None,
        freeze_pretrained_embeddings: bool = True,
        padding_idx: int = 0,
        hidden_dim: int = 128,
        **kwargs,
    ) -> None:
        """Initialize the TextClassifier model.

        Parameters
        ----------
        vocab_size: int
            The number of tokens in the vocabulary
        distance: str, optional
            One of: ['euclidean', 'hyperbolic']
        embedding_dim: int, optional
            The token embedding dimension. Should be provided if no
            pretrained embeddings are used.
        pretrained_embeddings: Tensor, optional
            A pretrained embedding matrix
        freeze_pretrained_embeddings: bool, optional
            Only used if a pretrained embedding matrix is provided.
            Freezes the embedding layer during training.
        padding_idx: int, optional
            The padding index. Default ``0``.
        hidden_dim: int, optional
            The hidden dimension of the encoder. Default ``128``.

       Extra keyword arguments are passed to the RNNEncoder.

        """
        super().__init__()

        if distance == "euclidean":
            dist = EuclideanDistance()
            mean = EuclideanMean()
        elif distance == "hyperbolic":
            dist = HyperbolicDistance()
            mean = HyperbolicMean()
        else:
            raise ValueError(
                f"Distance should be one of: ['euclidean', 'hyperbolic'], but got {distance}"
            )

        self.padding_idx = padding_idx
        self.distance_module = dist
        self.mean_module = mean
        self.embedding_dropout = nn.Dropout(embedding_dropout)

        if embedding_dim is None and pretrained_embeddings is None:
            raise ValueError(
                "At least one of: `embedding_dim` and `pretrained_embeddings` must be provided"
            )

        if pretrained_embeddings is not None:
            self.embedding = nn.Embedding.from_pretrained(
                pretrained_embeddings,
                freeze=freeze_pretrained_embeddings,
                padding_idx=padding_idx,
            )
        else:
            self.embedding = nn.Embedding(vocab_size, embedding_dim)

        kwargs["hidden_size"] = hidden_dim
        self.encoder = RNNEncoder(input_size=self.embedding.embedding_dim,
                                  **kwargs)