def receiveQuery(self, query): """ :type query: `maay.p2pquerier.P2pQuery` """ if query.qid in self._receivedQueries or \ query.qid in self._sentQueries: return if query.qid not in self._sentQueries: print "P2pQuerier receiveQuery : %s from %s:%s " \ % (query.getWords(), query.client_host, query.client_port) self._receivedQueries[query.qid] = query query.hop() if query.ttl > 0: self.sendQuery(query) documents = self.querier.findDocuments(query.query) if len(documents) == 0: print " ... no document matching the query, won't answer." return for doc in documents: abstract = makeAbstract(doc.text, query.getWords()) doc.text = untagText(removeSpace(abstract)) # provider is a 4-uple (login, node_id, IP, xmlrpc-port) provider = (NODE_LOGIN, NODE_CONFIG.get_node_id(), NODE_HOST, NODE_CONFIG.rpcserver_port) self.relayAnswer(P2pAnswer(query.qid, provider, documents))
def __init__(self, context, querier, p2pquerier): athena.LivePage.__init__(self) # NOTE: At the time this comment is written, athena/LivePages are handled # differently in nevow SVN. It's now possible to insantiate directly # LivePage instances (which is great !), so we'll have to change # the implementation for next nevow release. self.querier = querier self.p2pquerier = p2pquerier self.query = Query.fromContext(context) self.offset = self.query.offset self.onlyLocal = False self.onlyDistant = False # push local results once for all if len(inevow.IRemainingSegments(context)) < 2: # only store abstracts in the results table results = [] for localDoc in querier.findDocuments(self.query): localDoc.text = makeAbstract(localDoc.text, self.query.words) results.append(localDoc) webappConfig = INodeConfiguration(context) p2pQuery = P2pQuery(sender=webappConfig.get_node_id(), query=self.query) self.qid = p2pQuery.qid self.p2pQuery = p2pQuery # purge old results self.querier.purgeOldResults() provider = (NODE_LOGIN, NODE_CONFIG.get_node_id(), 'localhost', 0) self.querier.pushDocuments(self.qid, results, provider) self.results = self.querier.getQueryResults(self.query)
def relayAnswer(self, answer, local=False): # local still unused """record and forward answers to a query. If local is True, then the answers come from a local query, and thus must not be recorded in the database""" print "P2pQuerier relayAnswer : %s documents" % len(answer.documents) query = self._receivedQueries.get(answer.queryId) if query: print " ... relaying Answer to originator ..." else: query = self._sentQueries.get(answer.queryId) if query: print " ... originator : we got mail :) ... " else: print " ... bailing out (bug?) : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ # TODO: record answer in database if local is False # auc : to cache them ? if not query.isKnown(document): abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) #toSend.append(document.asDictionnary()) # above was meant to be like .asKwargs() ? # anyway, this stuff is xmlrpc-serializable (auc) toSend.append(document) if query.sender != self.nodeId: try: # getNodeUrl seems not to exist yet #senderUrl = self.querier.getNodeUrl(query.sender) host = query.host port = query.port print " ... will send answer to %s:%s" % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, self.nodeId, toSend) d.addCallback(self.querier.registerNodeActivity) d.addErrback(P2pErrbacks.answerQueryProblem) P2pErrbacks.setAnswerTarget(senderUrl) except ValueError: print "unknown node %s" % query.sender else: # local would be true ? don't waste the answers ... self._notifyAnswerCallbacks(answer.queryId, toSend)
def render_row(self, context, data): document = data words = self.query.split() context.fillSlots('mime_type', re.sub("/", "_", document.mime_type)) context.fillSlots('doctitle', tags.xml(boldifyText(document.title, words))) # XXX abstract attribute should be a unicode string try: abstract = makeAbstract(document.text, words) abstract = normalize_text(unicode(abstract)) except Exception, exc: import traceback traceback.print_exc() print exc abstract = u'No abstract available for this document [%s]' % exc
def relayAnswer(self, answer): """record and forward answers to a query.""" print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \ % (len(answer.documents), answer.provider[2], answer.provider[3]) query = self._receivedQueries.get(answer.qid) if not query : query = self._sentQueries.get(answer.qid) if not query: print " ... bug or dos : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ if 'url' in document: document['url'] = os.path.basename(document['url']) # TODO: record answer in database if local is False # auc : to have them in Document with state == KNOWN #if not query.isKnown(document): abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) toSend.append(document) ## else: ## #FIXME: shouldn't we add all documents regardless ## # of duplicates, so as to add a new provider entry ? ## print "we already know this doc !!!@~^#{" if query.sender != NODE_CONFIG.get_node_id(): self.querier.registerNodeActivity(answer.provider[1]) (host, port) = (query.client_host, query.client_port) print " ... relaying Answer to %s:%s ..." % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, NODE_CONFIG.get_node_id(), answer.provider, toSend) d.addErrback(answerQueryErrback(query)) else: print " ... originator : we got an answer !" self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
def relayAnswer(self, answer): """record and forward answers to a query.""" print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \ % (len(answer.documents), answer.provider[2], answer.provider[3]) query = self._receivedQueries.get(answer.qid) if not query : query = self._sentQueries.get(answer.qid) if not query: print " ... bug or dos : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ # only node-local docs will exhibit their full pathname if 'url' in document: doc_url = base64.decodestring(document['url']) document['url'] = base64.encodestring(os.path.basename(doc_url)) # TODO: record answer in database if local is False # auc : to have them in Document with state == KNOWN abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) toSend.append(document) if query.sender != NODE_CONFIG.get_node_id(): self.querier.registerNodeActivity(answer.provider[1]) (host, port) = (query.client_host, query.client_port) print " ... relaying Answer to %s:%s ..." % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, NODE_CONFIG.get_node_id(), answer.provider, toSend) d.addErrback(answerQueryErrback(query)) else: print " ... originator : we got an answer !" self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
def receiveQuery(self, query): """ :type query: `maay.p2pquerier.P2pQuery` """ print "P2pQuerier receiveQuery : %s" % query if query.qid in self._receivedQueries or \ query.qid in self._sentQueries: print " ... we already know query %s, this ends the trip" % query.qid return if query.qid not in self._sentQueries: print " ... %s is a new query, let's work ..." % query.qid self._receivedQueries[query.qid] = query query.hop() if query.ttl > 0: self.sendQuery(query) documents = self.querier.findDocuments(query.query) for doc in documents: abstract = makeAbstract(doc.text, query.getWords()) doc.text = untagText(removeSpace(abstract)) self.relayAnswer(P2pAnswer(query.qid, documents))
def testSimple(self): # Check excerpt at the beginning of the text abstract = makeAbstract(self.text, [u"free"]) expected = u'This program is <b>free</b> software; you can redistribute it and/or modify it under the terms of <b>...</b> Public License as published by the <b>Free</b> Software Foundation; either version 2 of the License, or (at your <b>...</b> this program; if not, write to the <b>Free</b> Software Foundation, Inc., 51 <b>...</b>' self.assertEquals(expected, abstract)
def testWordAtEnd(self): abstract = makeAbstract(self.text, [u"Boston"]) expected = ' <b>...</b> Inc., 51 Franklin St, Fifth Floor, <b>Boston</b>, MA 02110-1301 USA' self.assertEquals(expected, abstract)
def testUnknownWord(self): abstract = makeAbstract(self.text, [u"FOOBAR"]) expected = 'This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, <b>...</b>' self.assertEquals(expected, abstract)
def testMixedCase(self): abstract = makeAbstract(self.text, [u"pUrPoSe"]) expected = ' <b>...</b> or FITNESS FOR A PARTICULAR <b>PURPOSE</b>. See the GNU General Public <b>...</b>' self.assertEquals(expected, abstract)
def testLength200AndUnknownWord(self): try: abstract = makeAbstract(self.text[:200], [u'tralala']) except IndexError: self.fail('bug #5648 is present')