Exemple #1
0
def nnmainloop(data, l):
    """Runs a single query with given input line."""
    from nkutils import simplenn
    try:
        # parse line
        els = l.strip().split()
        metric = els.pop(0)
        assert metric in METRICS
        stype = els.pop(0)
        assert stype in SEARCH_TYPES
        # set params
        k = -1
        r = -1
        sort = 1
        if stype == 'k':
            k = int(els.pop(0))
        elif stype == 'radius':
            r = float(els.pop(0))
        elif stype == 'radius-k':
            r = float(els.pop(0))
            k = int(els.pop(0))
        elif stype == 'unsorted':
            sort = 0
        # parse data
        fvec = np.array(map(float, els))
        assert len(fvec) == data.shape[1]
        # run the actual search
        dists = simplenn(data, fvec, metric=metric, normalize=None)
        ret = filternnresults(dists, k=k, r=r, sort=sort)
        print ret
    except Exception, e:
        log('Exception of type %s: %s' % (type(e), e))
        print 0
Exemple #2
0
    def search(self, fvecs):
        """Runs a nn search for the given feature vectors.
        Uses the existing parameters for metric, stype, k, and r.
        Any of those can be changed at any time.
        """
        from nkpylib.nkutils import simplenn, filternnresults, nkgrouper, getTimeDiffs
        from nkpylib.nkthreadutils import spawnWorkers
        from Queue import Queue
        #return self.searchUsingProcs(fvecs)
        self.sort = 0 if self.stype == 'unsorted' else 1
        start = time.time()
        if self.nprocs > 1:
            inq, outq = Queue(), Queue()

            def inproc():
                while 1:
                    idx, fvec = inq.get()
                    t1 = time.time()
                    dists = simplenn(self.data,
                                     fvec,
                                     metric=self.metric,
                                     normalize=None)
                    t2 = time.time()
                    out = filternnresults(dists,
                                          k=self.k,
                                          r=self.r,
                                          sort=self.sort)
                    t3 = time.time()
                    #log('Got times: %s' % (getTimeDiffs([t1,t2,t3])))
                    #log('Got outs: %s' % (out,))
                    outq.put((idx, out))

            # spawn procs
            procs = spawnWorkers(self.nprocs, inproc, interval=0)
            # add to inq
            for i, fvec in enumerate(fvecs):
                inq.put((i, fvec))
            #log('Added %d fvecs to inq' % (len(fvecs)))
            # read from outq
            outputs = [0] * len(fvecs)
            todo = set(range(len(fvecs)))
            while todo:
                if len(todo) % 10 == 0:
                    log('%d left in todo, %0.3fs elapsed' %
                        (len(todo), time.time() - start))
                    #log('Outputs: %s' % (outputs,))
                    pass
                idx, out = outq.get()
                todo.remove(idx)
                outputs[idx] = out
        else:
            alldists = (simplenn(self.data,
                                 fvec,
                                 metric=self.metric,
                                 normalize=None) for fvec in fvecs)
            outputs = [
                filternnresults(dists, k=self.k, r=self.r, sort=self.sort)
                for dists in alldists
            ]
        return outputs
Exemple #3
0
def main():
    """Main method"""
    # setup command parser
    usage = 'Usage: python %s [opts] <data filename>' % (sys.argv[0])
    parser = OptionParser(usage=usage, version=VERSION, description=DESCRIPTION)
    parser.add_option('-m', '--metric', dest='metric', choices=METRICS, default=DEFAULT_METRIC, help='the distance metric to use [default %s]' % (DEFAULT_METRIC))
    parser.add_option('-d', '--datafmt', dest='datafmt', choices=FMTS, default=DEFAULT_DATAFMT, help='the format of the data file [default %s]' % (DEFAULT_DATAFMT))
    parser.add_option('-i', '--inputfmt', dest='inputfmt', choices=FMTS, default=DEFAULT_INPUTFMT, help='the format of the input data from stdin [default %s]' % (DEFAULT_INPUTFMT))
    parser.add_option('-o', '--outputfmt', dest='outputfmt', choices=FMTS, default=DEFAULT_OUTPUTFMT, help='the format of the output to stdout [default %s]' % (DEFAULT_OUTPUTFMT))
    opts, args = parser.parse_args()
    if len(args) < 1:
        parser.print_help()
        parser.error('Need to specify the data filename.')
    #log('%s, %s' % (opts, args))
    datafname = args[0]
    data = readdata(datafname, opts.datafmt)
    ndata, ndims = len(data), len(data[0])
    #print data
    i = 0
    for line in sys.stdin:
        fvec = readfmt(line, fmt=opts.inputfmt)
        assert len(fvec) == ndims
        dists = simplenn(data, fvec, metric=opts.metric, normalize=None)
        print writefmt(dists, fmt=opts.outputfmt)
        try:
            sys.stdout.flush()
        except IOError: break
Exemple #4
0
def nn1(m, n, ret):
    """Iterating over each row and running simplenn"""
    for i, row in enumerate(n):
        ret[i, :] = simplenn(m, row, metric='l2', normalize=None, withsum=1)
        #b = simplenn(m, row, metric='l2', normalize=None, withsum=1)
        #dists = (m - row) ** 2
        #ret1[i,:] = numpy.sum(dists, 1)
        #assert np.array_equal(b,ret1[i,:])
    return ret
Exemple #5
0
 def inproc():
     while 1:
         idx, fvec = inq.get()
         t1 = time.time()
         dists = simplenn(self.data,
                          fvec,
                          metric=self.metric,
                          normalize=None)
         t2 = time.time()
         out = filternnresults(dists,
                               k=self.k,
                               r=self.r,
                               sort=self.sort)
         t3 = time.time()
         #log('Got times: %s' % (getTimeDiffs([t1,t2,t3])))
         #log('Got outs: %s' % (out,))
         outq.put((idx, out))