def __init__(self, spec): if isinstance(spec, Spec): self.spec = spec else: self.spec = Spec(spec) self.discodex_client = DiscodexClient() self.docset = Docset(self.spec.docset_name)
class FreequeryClient(object): def __init__(self, spec): if isinstance(spec, Spec): self.spec = spec else: self.spec = Spec(spec) self.discodex_client = DiscodexClient() self.docset = Docset(self.spec.docset_name) def query(self, q, ranked=True): """Return a ranked list of matching `Document` instances.""" qq = Query.parse(q) res = self.discodex_client.query(self.spec.invindex_name, qq) res = map(TfIdf.undemux, res) if not res: return [] pageranks = None if ranked: scoredb = ScoreDB(self.spec.scoredb_path) uris = [e[0] for e in res] pageranks = dict(scoredb.rank(uris)) if not pageranks: raise Exception("no ranks available") docs = [] for uri,scores in res: doc = self.docset.get(uri) doc.score = Score(**scores) if pageranks: doc.score['pagerank'] = pageranks[uri] doc.excerpt = doc.excerpt(qq) docs.append(doc) return docs def index(self, **kwargs): if not self.docset.exists(): print "fq: cannot index `%s': no such docset" % self.spec.docset_name exit(1) job = IndexJob(self.spec, self.discodex_client, **kwargs) job.start() def linkparse(self, **kwargs): job = LinkParseJob(self.spec, **kwargs) job.start() def rank(self, **kwargs): job = PagerankJob(self.spec, **kwargs) job.start()
def client(self): from discodex.client import DiscodexClient return DiscodexClient(self.options.host, self.options.port)