def get_phonetic_distances(): pairs = combinations(phonemes.keys(), 2) distances = {} for pair in pairs: char1 = pair[0] char2 = pair[1] phoneme1 = phonemes[char1] phoneme2 = phonemes[char2] hamming_distance = EditDistance.get_distance(phoneme1, phoneme2) euclidean_distance = EuclideanDistance.get_distance(phoneme1, phoneme2) edit_distance = 0 sum_distance = 0 for i in range(len(phoneme1)): if (phoneme1[i] != phoneme2[i]): edit_distance += 1 sum_distance += abs(phoneme1[i] - phoneme2[i]) distances[(char1, char2)] = (hamming_distance, euclidean_distance, edit_distance, sum_distance) return distances
def __init__ ( self , X= None , y= None , dist_metric = EuclideanDistance () , num_components=0) : self . dist_metric = dist_metric self . num_components = 0 self . projections = [] self .W = [] self . mu = [] if (X is not None ) and (y is not None ): self . compute (X ,y )
def __init__(self, X=None, y=None, dist_metric=EuclideanDistance(), num_components=0): super(EigenfacesModel, self).__init__(X=X, y=y, dist_metric=dist_metric, num_components=num_components)
def run_rec(): # This is where we write the images, if an output_dir is given # in command line: out_dir = None # Now read in the image data. This must be a valid path! [X, y] = read_images('images') # Then set up a handler for logging: handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) # Add handler to facerec modules, so we see what's going on inside: logger = logging.getLogger("facerec") logger.addHandler(handler) logger.setLevel(logging.DEBUG) # Define the Fisherfaces as Feature Extraction method: feature = Fisherfaces() # Define a 1-NN classifier with Euclidean Distance: classifier = NearestNeighbor(dist_metric=EuclideanDistance(), k=1) # Define the model as the combination my_model = PredictableModel(feature=feature, classifier=classifier) # Compute the Fisherfaces on the given data (in X) and labels (in y): my_model.compute(X, y) # We then save the model, which uses Pythons pickle module: save_model('model.pkl', my_model) model = load_model('model.pkl') # Then turn the first (at most) 16 eigenvectors into grayscale # images (note: eigenvectors are stored by column!) #E = [] #for i in xrange(min(model.feature.eigenvectors.shape[1], 16)): # e = model.feature.eigenvectors[:,i].reshape(X[0].shape) # E.append(minmax_normalize(e,0,255, dtype=np.uint8)) # Plot them and store the plot to "python_fisherfaces_fisherfaces.pdf" #subplot(title="Fisherfaces", images=E, rows=4, cols=4, sptitle="Fisherface", colormap=cm.jet, filename="fisherfaces.png") # Perform a 10-fold cross validation cv = KFoldCrossValidation(model, k=10) cv.validate(X, y) # And print the result: cv.print_results() im = Image.open('search.png') im = im.convert("L") predicted_label = model.predict(im)[0] print(predicted_label) return predicted_label
distance = self.distance.compute(distance_context) distances.append((distance, t)) distances.sort(key=operator.itemgetter(1)) return distances[:k] def __average_neighbors(self, neighbors): length = len(neighbors) neighbor_distance = list(map(lambda x: x[1], neighbors)) return [sum(x) / length for x in zip(*neighbor_distance)] def predict(self, data_point, k): ''' Prediction method for KNN ''' # Precondition checks. if not self.__is_trained: raise TrainingIncompleteException() if k <= 0: raise NegativeOrZeroKException(k) nearest_neighbors = self.__get_neighbors(data_point, k) return self.__average_neighbors(nearest_neighbors) if __name__ == '__main__': # TODO: Move over to Unit tests.. knn = KNNRegression() knn.train([[0, 0, 2], [3, 4, 5], [5, 5, 5]], EuclideanDistance()) print(knn.predict([2, 3, 4], 3))
def __init__(self, dist_metric=EuclideanDistance(), k=1): AbstractClassifier.__init__(self) self.k = k self.dist_metric = dist_metric self.X = [] self.y = np.array([], dtype=np.int32)
def __init__( self, vocab_size: int, distance: str = "euclidean", embedding_dim: Optional[int] = None, embedding_dropout: float = 0, pretrained_embeddings: Optional[Tensor] = None, freeze_pretrained_embeddings: bool = True, padding_idx: int = 0, hidden_dim: int = 128, **kwargs, ) -> None: """Initialize the TextClassifier model. Parameters ---------- vocab_size: int The number of tokens in the vocabulary distance: str, optional One of: ['euclidean', 'hyperbolic'] embedding_dim: int, optional The token embedding dimension. Should be provided if no pretrained embeddings are used. pretrained_embeddings: Tensor, optional A pretrained embedding matrix freeze_pretrained_embeddings: bool, optional Only used if a pretrained embedding matrix is provided. Freezes the embedding layer during training. padding_idx: int, optional The padding index. Default ``0``. hidden_dim: int, optional The hidden dimension of the encoder. Default ``128``. Extra keyword arguments are passed to the RNNEncoder. """ super().__init__() if distance == "euclidean": dist = EuclideanDistance() mean = EuclideanMean() elif distance == "hyperbolic": dist = HyperbolicDistance() mean = HyperbolicMean() else: raise ValueError( f"Distance should be one of: ['euclidean', 'hyperbolic'], but got {distance}" ) self.padding_idx = padding_idx self.distance_module = dist self.mean_module = mean self.embedding_dropout = nn.Dropout(embedding_dropout) if embedding_dim is None and pretrained_embeddings is None: raise ValueError( "At least one of: `embedding_dim` and `pretrained_embeddings` must be provided" ) if pretrained_embeddings is not None: self.embedding = nn.Embedding.from_pretrained( pretrained_embeddings, freeze=freeze_pretrained_embeddings, padding_idx=padding_idx, ) else: self.embedding = nn.Embedding(vocab_size, embedding_dim) kwargs["hidden_size"] = hidden_dim self.encoder = RNNEncoder(input_size=self.embedding.embedding_dim, **kwargs)