Esempio n. 1
0
    def classify(test_X):
        test_X = to_ndarray(test_X)

        response_Y = []
        for neighbor_indices in nn.kneighbors(test_X, return_distance=False):
            neighbor_labels = [train_Y[index] for index in neighbor_indices]
            best_label = max(set(train_Y), key=lambda label: neighbor_labels.count(label))
            response_Y.append(best_label)
        return to_ndarray(response_Y, dtype=int)
Esempio n. 2
0
def compute_relevance_for_most_popular(actuals: pd.DataFrame, k: int):
    """
    Computes the binary relevance vector for each user,
    meaning 0 if bad recommendation, 1 if good recommendation.

    :param actuals: actuals: n_ratings x [user, item, rating, timestamp] matrix with actual ratings
    :param k: number of items to recommend
    :return: a list of lists, where each list is a binary relevance vector for a user
    """
    np_actuals = utils.to_ndarray(actuals)
    mp_items = baselines.most_popular(np_actuals)

    # list of users
    users = actuals['user'].unique()

    # result
    res = []

    # iterating over users to make binary relevance vectors
    for user in users:
        data = actuals[actuals['user'] == user]  # taking data for user.
        items_rated = list(
            data['item']
        )  # taking items he has rated, as only those can be evaluated.

        # TODO: I think we should just take k first elements recommended list and then call "not-rated" items false positives
        # taking k items of most popular which the user has rated.
        k_mp_items = [i for i in mp_items if i in items_rated][:k]

        if len(k_mp_items) > 0:
            relevance = compute_single_relevance_for_index_predictions(
                data, k_mp_items)
            res.append(relevance)

    return res
def ced(img_file, sigma, t, T, all=False):
    img = to_ndarray(img_file)
    if not all:
        # avoid copies, just do all steps:
        img = gs_filter(img, sigma)
        img, D = gradient_intensity(img)
        img = suppression(img, D)
        img, weak = threshold(img, t, T)
        img = tracking(img, weak)
        return [img]
    else:
        # make copies, step by step
        img1 = gs_filter(img, sigma)
        img2, D = gradient_intensity(img1)
        img3 = suppression(copy(img2), D)
        img4, weak = threshold(copy(img3), t, T)
        img5 = tracking(copy(img4), weak)
        return [to_ndarray(img_file), img1, img2, img3, img4, img5]
Esempio n. 4
0
def most_popular(data: pd.DataFrame) -> np.ndarray:
    """
    Computing popularity of each item in terms of average ratings and then returns a list of each items' average rating.

    :param R: data in form of (user, item, rating, timestamp) tuples
    :return: list of average ratings for each item
    """
    R = utils.to_ndarray(data)

    # returning mean of each column
    return R.mean(axis=0)
Esempio n. 5
0
    def embed(self, param, unknown_embedding=None):
        if unknown_embedding is not None:
            assert unknown_embedding.shape == self.values.shape[1 : ]

        if type(param) is str:
            try:
                return self.values[self.word_indices[param]]
            except KeyError:
                if unknown_embedding is not None:
                    return unknown_embedding
                else:
                    raise
        else:
            rec = partial(self.embed, unknown_embedding=unknown_embedding)
            return to_ndarray(chain.from_iterable(map(rec, param)))
Esempio n. 6
0
    def __setitem__(self, key, value):
        """
        x.__setitem__(key, value) <==> x[key] = value

        Sets values based on `key`.  All the functionality of
        ``ndarray.__setitem__()`` is supported (including fancy
        indexing), plus a special support for expressions:

        Parameters
        ----------
        key : string
            The corresponding ctable column name will be set to `value`.  If
            not a column name, it will be interpret as a boolean expression
            (computed via `ctable.eval`) and the rows where these values are
            true will be set to `value`.

        See Also
        --------
        ctable.eval

        """

        # First, convert value into a structured array
        value = utils.to_ndarray(value, self.dtype)
        # Check if key is a condition actually
        if type(key) is bytes:
            # Convert key into a boolean array
            #key = self.eval(key)
            # The method below is faster (specially for large ctables)
            rowval = 0
            for nrow in self.where(key, outcols=["nrow__"]):
                nrow = nrow[0]
                if len(value) == 1:
                    for name in self.names:
                        self.cols[name][nrow] = value[name]
                else:
                    for name in self.names:
                        self.cols[name][nrow] = value[name][rowval]
                    rowval += 1
            return
        # Then, modify the rows
        for name in self.names:
            self.cols[name][key] = value[name]
        return
Esempio n. 7
0
    def __init__(self,
                 data: pd.DataFrame,
                 K: int,
                 epochs=100,
                 alpha=0.002,
                 beta=0.02):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - data (dataframe)  : user-item interactions
        - K (int)           : number of latent dimensions
        - epochs (int)      : number of iterations
        - alpha (float)     : learning rate
        - beta (float)      : regularization parameter
        """
        self.R = utils.to_ndarray(data)
        self.n_users, self.n_items = self.R.shape
        self.global_avg = data['rating'].mean()
        self.K = K
        self.epochs = epochs
        self.alpha = alpha
        self.beta = beta
Esempio n. 8
0
 def classify(test_X):
     test_X = to_ndarray(test_X)
     return lgr.predict(test_X)
Esempio n. 9
0
 def classify(test_X):
     test_X = to_ndarray(test_X)
     return forest.predict(test_X)
Esempio n. 10
0
def load_json(filepath):
    path_feature_map = json.load(open(filepath, 'r'))
    path_feature_map = to_ndarray(path_feature_map)
    return path_feature_map
Esempio n. 11
0
import glob
import json
import sys
import os

import numpy as np

import utils
from similarity import search_k_nearest
from mds import calculate_positions
from plot import plot_with_labels


dicts = []
for path in glob.glob('./jsonfiles/*.json'):
    dicts.append(json.load(open(path, 'r')))
path_feature_map = utils.merge_multiple_dicts(dicts)
path_feature_map = utils.to_ndarray(path_feature_map)

#TODO Show error messages
key = sys.argv[1]
keys = path_feature_map.keys()
print(keys)
query = path_feature_map[key]
k_nearest = search_k_nearest(path_feature_map, query)
for filepath, distance in k_nearest:
    print("{}\n{:>3e}\n".format(filepath.encode('utf-8'), distance))