Esempio n. 1
0
 def __init__(self, spec):
     if isinstance(spec, Spec):
         self.spec = spec
     else:
         self.spec = Spec(spec)
     self.discodex_client = DiscodexClient()
     self.docset = Docset(self.spec.docset_name)
Esempio n. 2
0
class FreequeryClient(object):

    def __init__(self, spec):
        if isinstance(spec, Spec):
            self.spec = spec
        else:
            self.spec = Spec(spec)
        self.discodex_client = DiscodexClient()
        self.docset = Docset(self.spec.docset_name)

    def query(self, q, ranked=True):
        """Return a ranked list of matching `Document` instances."""
        qq = Query.parse(q)
        res = self.discodex_client.query(self.spec.invindex_name, qq)
        res = map(TfIdf.undemux, res)
        if not res:
            return []

        pageranks = None
        if ranked:
            scoredb = ScoreDB(self.spec.scoredb_path)
            uris = [e[0] for e in res]
            pageranks = dict(scoredb.rank(uris))
            if not pageranks:
                raise Exception("no ranks available")
            
        docs = []
        for uri,scores in res:
            doc = self.docset.get(uri)
            doc.score = Score(**scores)
            if pageranks:
                doc.score['pagerank'] = pageranks[uri]
            doc.excerpt = doc.excerpt(qq)
            docs.append(doc)
        return docs

    def index(self, **kwargs):
        if not self.docset.exists():
            print "fq: cannot index `%s': no such docset" % self.spec.docset_name
            exit(1)
        job = IndexJob(self.spec, self.discodex_client, **kwargs)
        job.start()


    def linkparse(self, **kwargs):
        job = LinkParseJob(self.spec, **kwargs)
        job.start()
        
    def rank(self, **kwargs):
         job = PagerankJob(self.spec, **kwargs)
         job.start()
Esempio n. 3
0
 def client(self):
     from discodex.client import DiscodexClient
     return DiscodexClient(self.options.host, self.options.port)