コード例 #1
0
ファイル: __init__.py プロジェクト: konstantint/FCorr
  def __init__(self, data):
    '''
      Reads the data, standardizes it and creates an index based on the data.

      Args:
         data (array): A two-dimensional array, with vectors to be indexed in the rows. 
            Preferably a numpy array, but other iterables (e.g. list of lists of the same length) could work too.
      Raises:
         ValueError, TypeError or something like that, if the dataset is invalid.
    '''
    standardized_data = standardize(data)
    self._tree = KDTree(standardized_data) # Index the data
コード例 #2
0
 def _setup_tree(self):
     ''' sets up the kD-trees and retrieves the min time stamp '''
     if not self._observations is None:
         timed_obs = defaultdict(list)
         for obs in self._observations:
             timed_obs[int(obs[2])].append(obs)
         self._trees = dict()
         for key, val in timed_obs.items():
             self._trees[key] = KDTree(val)
         self._min_time_stamp = min(timed_obs.keys())
     else:
         raise Exception("observations not set.")
コード例 #3
0
ファイル: __init__.py プロジェクト: konstantint/FCorr
class CorrelationIndex:
  '''
    Data structure, that indexes a set of vectors for fast retrieval of 
    elements most correlated with the query point.

    >> fci = CorrelationIndex([(1,3,5), (4,3,2)])
    >> fci.query((1,2,3))
    [0]
    >> fci.query((3,2,1))
    [1]
    >> fci.query((1,2,3), 0)
    []
    >> fci.query((1,2,3), 2)
    [0, 1]
    >> fci.query((3,2,1), 3)
    [1, 0]

    >> fci = CorrelationIndex([])
    >> fci.query((1,2,3))
    []

    >> fci = CorrelationIndex("random string")
    Traceback (most recent call last):
        ...
    ValueError: object is not iterable

    >> fci = CorrelationIndex([(1,3,5), (4,3)])
    Traceback (most recent call last):
        ...
    ValueError: iterable should contain tuples (or vectors or lists) of numbers of equal length
  '''

  def __init__(self, data):
    '''
      Reads the data, standardizes it and creates an index based on the data.

      Args:
         data (array): A two-dimensional array, with vectors to be indexed in the rows. 
            Preferably a numpy array, but other iterables (e.g. list of lists of the same length) could work too.
      Raises:
         ValueError, TypeError or something like that, if the dataset is invalid.
    '''
    standardized_data = standardize(data)
    self._tree = KDTree(standardized_data) # Index the data

  def query(self, x, matches=1):
    #TODO: Implement
    return [self._tree.find_approximate_nearest_neigbor(x)]