def nnmainloop(data, l): """Runs a single query with given input line.""" from nkutils import simplenn try: # parse line els = l.strip().split() metric = els.pop(0) assert metric in METRICS stype = els.pop(0) assert stype in SEARCH_TYPES # set params k = -1 r = -1 sort = 1 if stype == 'k': k = int(els.pop(0)) elif stype == 'radius': r = float(els.pop(0)) elif stype == 'radius-k': r = float(els.pop(0)) k = int(els.pop(0)) elif stype == 'unsorted': sort = 0 # parse data fvec = np.array(map(float, els)) assert len(fvec) == data.shape[1] # run the actual search dists = simplenn(data, fvec, metric=metric, normalize=None) ret = filternnresults(dists, k=k, r=r, sort=sort) print ret except Exception, e: log('Exception of type %s: %s' % (type(e), e)) print 0
def search(self, fvecs): """Runs a nn search for the given feature vectors. Uses the existing parameters for metric, stype, k, and r. Any of those can be changed at any time. """ from nkpylib.nkutils import simplenn, filternnresults, nkgrouper, getTimeDiffs from nkpylib.nkthreadutils import spawnWorkers from Queue import Queue #return self.searchUsingProcs(fvecs) self.sort = 0 if self.stype == 'unsorted' else 1 start = time.time() if self.nprocs > 1: inq, outq = Queue(), Queue() def inproc(): while 1: idx, fvec = inq.get() t1 = time.time() dists = simplenn(self.data, fvec, metric=self.metric, normalize=None) t2 = time.time() out = filternnresults(dists, k=self.k, r=self.r, sort=self.sort) t3 = time.time() #log('Got times: %s' % (getTimeDiffs([t1,t2,t3]))) #log('Got outs: %s' % (out,)) outq.put((idx, out)) # spawn procs procs = spawnWorkers(self.nprocs, inproc, interval=0) # add to inq for i, fvec in enumerate(fvecs): inq.put((i, fvec)) #log('Added %d fvecs to inq' % (len(fvecs))) # read from outq outputs = [0] * len(fvecs) todo = set(range(len(fvecs))) while todo: if len(todo) % 10 == 0: log('%d left in todo, %0.3fs elapsed' % (len(todo), time.time() - start)) #log('Outputs: %s' % (outputs,)) pass idx, out = outq.get() todo.remove(idx) outputs[idx] = out else: alldists = (simplenn(self.data, fvec, metric=self.metric, normalize=None) for fvec in fvecs) outputs = [ filternnresults(dists, k=self.k, r=self.r, sort=self.sort) for dists in alldists ] return outputs
def main(): """Main method""" # setup command parser usage = 'Usage: python %s [opts] <data filename>' % (sys.argv[0]) parser = OptionParser(usage=usage, version=VERSION, description=DESCRIPTION) parser.add_option('-m', '--metric', dest='metric', choices=METRICS, default=DEFAULT_METRIC, help='the distance metric to use [default %s]' % (DEFAULT_METRIC)) parser.add_option('-d', '--datafmt', dest='datafmt', choices=FMTS, default=DEFAULT_DATAFMT, help='the format of the data file [default %s]' % (DEFAULT_DATAFMT)) parser.add_option('-i', '--inputfmt', dest='inputfmt', choices=FMTS, default=DEFAULT_INPUTFMT, help='the format of the input data from stdin [default %s]' % (DEFAULT_INPUTFMT)) parser.add_option('-o', '--outputfmt', dest='outputfmt', choices=FMTS, default=DEFAULT_OUTPUTFMT, help='the format of the output to stdout [default %s]' % (DEFAULT_OUTPUTFMT)) opts, args = parser.parse_args() if len(args) < 1: parser.print_help() parser.error('Need to specify the data filename.') #log('%s, %s' % (opts, args)) datafname = args[0] data = readdata(datafname, opts.datafmt) ndata, ndims = len(data), len(data[0]) #print data i = 0 for line in sys.stdin: fvec = readfmt(line, fmt=opts.inputfmt) assert len(fvec) == ndims dists = simplenn(data, fvec, metric=opts.metric, normalize=None) print writefmt(dists, fmt=opts.outputfmt) try: sys.stdout.flush() except IOError: break
def nn1(m, n, ret): """Iterating over each row and running simplenn""" for i, row in enumerate(n): ret[i, :] = simplenn(m, row, metric='l2', normalize=None, withsum=1) #b = simplenn(m, row, metric='l2', normalize=None, withsum=1) #dists = (m - row) ** 2 #ret1[i,:] = numpy.sum(dists, 1) #assert np.array_equal(b,ret1[i,:]) return ret
def inproc(): while 1: idx, fvec = inq.get() t1 = time.time() dists = simplenn(self.data, fvec, metric=self.metric, normalize=None) t2 = time.time() out = filternnresults(dists, k=self.k, r=self.r, sort=self.sort) t3 = time.time() #log('Got times: %s' % (getTimeDiffs([t1,t2,t3]))) #log('Got outs: %s' % (out,)) outq.put((idx, out))