Esempio n. 1
0
def learn_distance_metric(distances, pairs_per_prototype=100, 
                                     test_size=0.5, 
                                     return_features=False,
                                     return_pairs=False):
    feature_pipeline = Pipeline([
        ('dates', DateFeatureTransformer()),
        ('features', MMCFeatureTransformer()),
    ])
    
    features = feature_pipeline.fit_transform(distances)
    pairs = create_mmc_pairs(distances, pairs_per_prototype=pairs_per_prototype)
    
    X_train, X_test, y_train, y_test = train_test_split(pairs[:, :2], pairs[:, -1], 
                shuffle=True, stratify=pairs[:, -1], test_size=test_size
    )

    mmc = MMC(preprocessor=np.array(features, dtype=np.float))
    mmc = mmc.fit(X_train, y_train)
    score = f1_score(y_test, mmc.predict(X_test), average='weighted')
    return SimpleNamespace(
        score=score,
        metric_components=mmc.components_.transpose(),
        features=None if not return_features else features,
        pairs=None if not return_pairs else pairs 
    )
# in this task we want points where the first feature is close to be closer to each other,
# no matter how close the second feature is

y = [1, 1, -1, -1]
"""
Learn MMC (Mahalanobis Metrics for Clustering) Model 
"""
mmc = MMC()
mmc.fit(pairs, y)  # learn the MMC model
"""
Return the decision function used to classify the pairs
"""
print("debug 1: ", mmc.decision_function(pairs))
"""
Returns a copy of the Mahalanobis matrix learned by the metric learner
"""
print("debug 2: ", mmc.get_mahalanobis_matrix())
"""
Returns a function that takes as input two 1D arrays and outputs the learned metric score on these two points.
"""
f = mmc.get_metric()
print("debug 3: ", f)
"""
Predicts the learned metric between input pairs
"""
example_pairs = [
    [[1.2, 7.5], [1.3, 8.5]]
]  #[1.2, 7.5] # error - ValueError: 3D array of formed tuples expected by MMC.
print("debug 4 : ", mmc.predict(example_pairs))