def receiveQuery(self, query): """ :type query: `maay.p2pquerier.P2pQuery` """ if query.qid in self._receivedQueries or \ query.qid in self._sentQueries: return if query.qid not in self._sentQueries: print "P2pQuerier receiveQuery : %s from %s:%s " \ % (query.getWords(), query.client_host, query.client_port) self._receivedQueries[query.qid] = query query.hop() if query.ttl > 0: self.sendQuery(query) documents = self.querier.findDocuments(query.query) if len(documents) == 0: print " ... no document matching the query, won't answer." return for doc in documents: abstract = makeAbstract(doc.text, query.getWords()) doc.text = untagText(removeSpace(abstract)) # provider is a 4-uple (login, node_id, IP, xmlrpc-port) provider = (NODE_LOGIN, NODE_CONFIG.get_node_id(), NODE_HOST, NODE_CONFIG.rpcserver_port) self.relayAnswer(P2pAnswer(query.qid, provider, documents))
def relayAnswer(self, answer, local=False): # local still unused """record and forward answers to a query. If local is True, then the answers come from a local query, and thus must not be recorded in the database""" print "P2pQuerier relayAnswer : %s documents" % len(answer.documents) query = self._receivedQueries.get(answer.queryId) if query: print " ... relaying Answer to originator ..." else: query = self._sentQueries.get(answer.queryId) if query: print " ... originator : we got mail :) ... " else: print " ... bailing out (bug?) : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ # TODO: record answer in database if local is False # auc : to cache them ? if not query.isKnown(document): abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) #toSend.append(document.asDictionnary()) # above was meant to be like .asKwargs() ? # anyway, this stuff is xmlrpc-serializable (auc) toSend.append(document) if query.sender != self.nodeId: try: # getNodeUrl seems not to exist yet #senderUrl = self.querier.getNodeUrl(query.sender) host = query.host port = query.port print " ... will send answer to %s:%s" % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, self.nodeId, toSend) d.addCallback(self.querier.registerNodeActivity) d.addErrback(P2pErrbacks.answerQueryProblem) P2pErrbacks.setAnswerTarget(senderUrl) except ValueError: print "unknown node %s" % query.sender else: # local would be true ? don't waste the answers ... self._notifyAnswerCallbacks(answer.queryId, toSend)
def relayAnswer(self, answer): """record and forward answers to a query.""" print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \ % (len(answer.documents), answer.provider[2], answer.provider[3]) query = self._receivedQueries.get(answer.qid) if not query : query = self._sentQueries.get(answer.qid) if not query: print " ... bug or dos : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ if 'url' in document: document['url'] = os.path.basename(document['url']) # TODO: record answer in database if local is False # auc : to have them in Document with state == KNOWN #if not query.isKnown(document): abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) toSend.append(document) ## else: ## #FIXME: shouldn't we add all documents regardless ## # of duplicates, so as to add a new provider entry ? ## print "we already know this doc !!!@~^#{" if query.sender != NODE_CONFIG.get_node_id(): self.querier.registerNodeActivity(answer.provider[1]) (host, port) = (query.client_host, query.client_port) print " ... relaying Answer to %s:%s ..." % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, NODE_CONFIG.get_node_id(), answer.provider, toSend) d.addErrback(answerQueryErrback(query)) else: print " ... originator : we got an answer !" self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
def relayAnswer(self, answer): """record and forward answers to a query.""" print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \ % (len(answer.documents), answer.provider[2], answer.provider[3]) query = self._receivedQueries.get(answer.qid) if not query : query = self._sentQueries.get(answer.qid) if not query: print " ... bug or dos : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ # only node-local docs will exhibit their full pathname if 'url' in document: doc_url = base64.decodestring(document['url']) document['url'] = base64.encodestring(os.path.basename(doc_url)) # TODO: record answer in database if local is False # auc : to have them in Document with state == KNOWN abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) toSend.append(document) if query.sender != NODE_CONFIG.get_node_id(): self.querier.registerNodeActivity(answer.provider[1]) (host, port) = (query.client_host, query.client_port) print " ... relaying Answer to %s:%s ..." % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, NODE_CONFIG.get_node_id(), answer.provider, toSend) d.addErrback(answerQueryErrback(query)) else: print " ... originator : we got an answer !" self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
def receiveQuery(self, query): """ :type query: `maay.p2pquerier.P2pQuery` """ print "P2pQuerier receiveQuery : %s" % query if query.qid in self._receivedQueries or \ query.qid in self._sentQueries: print " ... we already know query %s, this ends the trip" % query.qid return if query.qid not in self._sentQueries: print " ... %s is a new query, let's work ..." % query.qid self._receivedQueries[query.qid] = query query.hop() if query.ttl > 0: self.sendQuery(query) documents = self.querier.findDocuments(query.query) for doc in documents: abstract = makeAbstract(doc.text, query.getWords()) doc.text = untagText(removeSpace(abstract)) self.relayAnswer(P2pAnswer(query.qid, documents))
def testUntag(self): text = 'Hello <a href="foo.bar.com">world <b>!</b></a><img alt="" />' self.assertEquals(untagText(text), 'Hello world !')