def __send_search_request_to(self, query_id, TTL, ip, port, search_query, min_score, forwarding_node_count, result_count): # signal.alarm(10) try: connection = httplib.HTTPConnection(ip, port) connection.putrequest("POST", "/message") connection.endheaders() print "send message" protocol.sendHeader(connection, constants.SEARCH_REQUEST_COMMAND, query_id, TTL) protocol.sendSearchRequest(connection, min_score, forwarding_node_count, result_count, search_query) print "get response" http_response = connection.getresponse() (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(http_response) print "recv pong id = %s %s %s" % (node_id, ip, port) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) connection.close() # except TimeoutError: # signal.alarm(0) # return communication.Communication.TIMEOUT_ERROR except socket.error, (code, message): print "Connection problem on node [%s:%s]: %s" % (ip, port, message) return communication.Communication.CONNECTION_ERROR
def __send_search_request_to(self, query_id, TTL, ip, port, search_query, min_score, forwarding_node_count, result_count): # signal.alarm(10) try: connection = httplib.HTTPConnection(ip, port) connection.putrequest("POST", "/message") connection.endheaders() print "send message" protocol.sendHeader(connection, constants.SEARCH_REQUEST_COMMAND, query_id, TTL) protocol.sendSearchRequest(connection, min_score, forwarding_node_count, result_count, search_query) print "get response" http_response = connection.getresponse() (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(http_response) print "recv pong id = %s %s %s" % (node_id, ip, port) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo( node_id, ip, port, bandwidth, counter, last_seen_time) connection.close() # except TimeoutError: # signal.alarm(0) # return communication.Communication.TIMEOUT_ERROR except socket.error, (code, message): print "Connection problem on node [%s:%s]: %s" % (ip, port, message) return communication.Communication.CONNECTION_ERROR
def recv_download_request(self, httpRequestHandler, query_id, TTL, sender_nodeID, sender_nodeIP, sender_nodePort, document_id, search_query): # check in the indexer if we have it documentInfo = globalvars.database.getDocumentInfo(document_id=document_id) if not documentInfo: print "never heard about document %s" % document_id # todo: forward request to a node which have a document which a close document id? has_content = 0 has_description = 0 else: if documentInfo.state == maay.datastructure.documentinfo.KNOWN_STATE: print "I do not have the file on my disk, why do you ask me ?" # todo: but I can give you some other pointers has_content = 0 else: has_content = 1 has_description = 1 flags = (has_content * constants.HAS_DOCUMENT_CONTENT_FLAG) | (has_description * constants.HAS_DOCUMENT_DESCRIPTION_FLAG) # update documentScore with the download request received and the # documentscore received # dp = documentProviders[0] # nodeInfo = globalvars.database.getNodeInfo(dp.node_id) httpRequestHandler.send_response(200) httpRequestHandler.end_headers() output = tools.file2stream(httpRequestHandler.wfile) protocol.sendHeader(output, constants.DOWNLOAD_RESPONSE_COMMAND, self.__generateQueryID(), constants.INIT_TTL) protocol.sendDownloadResponse(output, document_id, flags) if has_description: documentProviders = globalvars.database.getDocumentProviders(documentInfo.db_document_id) if has_content and not documentInfo.url: fileInfos = globalvars.database.getFileInfos(db_document_id = documentInfo.db_document_id, state = maay.datastructure.documentinfo.PUBLISHED_STATE) for fileInfo in fileInfos: pos = fileInfo.file_name.find(globalvars.config.getValue("PublishedDocumentRoot")) print "send url pos = %s" % pos if pos != -1: documentInfo.url = 'http://%s:%s/pub/%s' % (globalvars.ip, globalvars.port, fileInfo.file_name[pos + len(globalvars.config.getValue("PublishedDocumentRoot")) + 1:]) documentInfo.url = documentInfo.url.replace("\\", "/") break protocol.sendDownloadResponseDocumentDescription(output, documentInfo.title, documentInfo.publication_time, documentInfo.mime_type, documentInfo.size, documentInfo.url or "", len(documentProviders)) for dp in documentProviders: nodeInfo = globalvars.database.getNodeInfo(dp.node_id) protocol.sendDownloadResponseProvider(output, dp.node_id, nodeInfo.ip, nodeInfo.port, dp.last_providing_time, nodeInfo.last_seen_time, nodeInfo.bandwidth, nodeInfo.counter) if has_content: fileInfo = globalvars.database.getFileInfos(db_document_id = documentInfo.db_document_id)[0] protocol.sendDownloadResponseDocument(output, fileInfo.file_name, documentInfo.size) self.hasDownloaded(sender_nodeID, document_id, search_query, weight=DOWNLOAD_SCORE_WEIGHT)
def handleMessage(httpRequestHandler): # read message content (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(httpRequestHandler.rfile) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) # update information on the node # we have to forward them back also, and bufferize them before # forwarding them back # check error before if command_type == constants.SEARCH_REQUEST_COMMAND: (min_score, forwarding_node_count, result_count, search_query) = response.readSearchRequest(httpRequestHandler.rfile) globalvars.maay_core.recv_search_request(queryID, TTL, node_id, ip, port, search_query, min_score, forwarding_node_count, result_count, constants.MAAY_SEARCH_RANGE) globalvars.maay_core.manifest_interest(node_id, search_query) elif command_type == constants.SEARCH_RESPONSE_COMMAND: # if I receive answers of a unknown query, do nothing resultSpool = globalvars.maay_core.getResultSpoolManager( ).getResultSpool(queryID) if not resultSpool: return search_query = resultSpool.getQuery() globalvars.maay_core.manifest_interest(node_id, search_query) hit_count = response.readSearchResponseInfo(httpRequestHandler.rfile) for i in range(0, hit_count): (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) = response.readSearchResponseHitInfo( httpRequestHandler.rfile) # update information on the document # print (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) documentInfo = globalvars.maay_core.updateDocumentInfo( document_id, mime_type, title, file_size, publication_time, url) # todo 0 should be the score/rank of the document # rank = 0.0 for j in range(0, score_count): (word, relevance, popularity, excerpt, excerpt_position, word_position) = response.readSearchResponseHitScore( httpRequestHandler.rfile) # print (word, relevance, popularity, excerpt) # update information on the word in the document print "process document scores" ds = globalvars.maay_core.processDocumentScore( documentInfo.db_document_id, word, relevance, popularity, excerpt, excerpt_position, word_position, nodeInfo) # if word in search_query: # rank += (float(ds.relevance) + 0.0001) * (float(ds.popularity) + 0.0001) ranking_score = globalvars.maay_core.compute_ranking_score( document_id, search_query) globalvars.maay_core.updateDocumentMatching( document_id=document_id) result = resultspool.MaayResult(document_id, ranking_score, 0, 0, int(publication_time), documentInfo.state) resultSpool.addResult(result) for j in xrange(provider_count): (node_id, ip, port, last_storing_time, last_seen_time, bandwidth, counter) = response.readSearchResponseHitProvider( httpRequestHandler.rfile) globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) globalvars.maay_core.manifest_interest(node_id, search_query) # update information on the document provider (node) globalvars.maay_core.updateDocumentProvider( documentInfo.db_document_id, node_id, last_storing_time) elif command_type == constants.DOWNLOAD_REQUEST_COMMAND: (document_id, search_query) = response.readDownloadRequest(httpRequestHandler.rfile) globalvars.maay_core.manifest_interest(node_id, search_query) globalvars.maay_core.recv_download_request(httpRequestHandler, queryID, TTL, node_id, ip, port, document_id, search_query) if command_type != constants.DOWNLOAD_REQUEST_COMMAND: httpRequestHandler.send_response(200) httpRequestHandler.end_headers() protocol.sendHeader(tools.file2stream(httpRequestHandler.wfile), constants.PONG_COMMAND, queryID, 0)
class Download: FINISHED_STATE = 2 def __init__(self, document_id, search_query = []): self.__active = 0 self.__providers = [] self.__providersHT = {} self.__document_id = document_id self.__search_query = search_query self.__transferred = 0 self.__state = download.NOT_STARTED_STATE self.__last_search_time = 0 # TODO: for the moment unisource is ok # divide the file into chunks of 256ko # several states : in download, downloaded, not downloaded def isActive(self): return self.__active def setActive(self, active): self.__active = active def fetch(self): if self.__state == download.FINISHED_STATE: return 1 if self.__state == download.NOT_STARTED_STATE: self.__state = download.INIT_STATE while 1: documentInfo = globalvars.database.getDocumentInfo(self.__document_id) documentProviders = globalvars.database.getDocumentProviders(documentInfo.db_document_id) for dp in documentProviders: p = self.__providersHT.get(dp.node_id) if p: n = globalvars.database.getNodeInfo(dp.node_id) if p.ip != n.ip or p.port != n.port: p.port = port p.ip = ip p.state = download.Provider.UNTRIED_STATE p.last_try = 0 else: n = globalvars.database.getNodeInfo(dp.node_id) p = download.Provider(n.node_id, n.ip, n.port, n.bandwidth, dp.last_providing_time) self.__providers.append(p) self.__providersHT[dp.node_id] = p provider = None for p in self.__providers: if p.state in (download.Provider.UNREACHABLE_STATE, download.Provider.NOT_PROVIDING_STATE): continue if p.last_try + download.NEXT_RETRY_PERIOD > time.time(): continue provider = p break if not provider: print "no provider" if self.__last_search_time + download.NEXT_SEARCH_PERIOD > time.time() and self.__state == download.SEARCHING_SOURCES_STATE: time.sleep(1) return 0 globalvars.maay_core.send_search_request(["#%s" % self.__document_id], constants.INIT_TTL, constants.MAAY_SEARCH_RANGE, constants.MIN_SCORE, constants.INIT_FNC, constants.INIT_EHC, query_id = self.__document_id) print "search for providers" self.__state = download.SEARCHING_SOURCES_STATE self.__last_search_time = time.time() time.sleep(1) return 0 print "PROVIDER IP =%s [%s]" % (provider.ip, provider.state) self.__state = download.CONNECTING_STATE provider.state = download.Provider.CONNECTED_STATE connection = None try: # signal.alarm(5) print "essaie sur %s %s" % (provider.ip, provider.port) connection = httplib.HTTPConnection(provider.ip, provider.port) connection.putrequest("POST", "/message") connection.endheaders() # signal.alarm(0) except Exception, e: # signal.alarm(0) print "Exception: %s" % e provider.state = download.Provider.UNREACHABLE_STATE continue except TimeoutError: # signal.alarm(0) provider.state = download.Provider.BUSY_STATE provider.last_try = time.time() continue try: provider.state = download.Provider.DOWNLOADING_STATE protocol.sendHeader(connection, constants.DOWNLOAD_REQUEST_COMMAND, "12345678901234567890", constants.INIT_TTL) protocol.sendDownloadRequest(connection, self.__document_id, self.__search_query) print "requete envoye, attente reponse %s" % connection r = connection.getresponse() print "resp" (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(r) print "resp 2" document_id, flags = response.readDownloadResponse(r) if not (flags & constants.HAS_DOCUMENT_DESCRIPTION_FLAG): print "the provider do not have the file %s" % len(self.__providers) provider.state = download.Provider.NOT_PROVIDING_STATE globalvars.database.deleteDocumentProvider(db_document_id = documentInfo.db_document_id, node_id = provider.node_id) continue (title, publication_time, mime_type, size, url, provider_count) = response.readDownloadResponseDocumentDescription(r) print "url received = %s" % url globalvars.maay_core.updateDocumentInfo(document_id, mime_type, title, size, publication_time, url) for j in xrange(provider_count): (node_id, ip, port, last_storing_time, last_seen_time, bandwidth, counter) = response.readSearchResponseHitProvider(r) print "provider in resp = %s" % str((node_id, ip, port, bandwidth, counter, last_seen_time)) globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) globalvars.maay_core.updateDocumentProvider(documentInfo.db_document_id, node_id, last_storing_time) globalvars.maay_core.manifest_interest(node_id, self.__search_query) if not (flags & constants.HAS_DOCUMENT_CONTENT_FLAG): print "the provider do not have the file %s" % len(self.__providers) provider.state = download.Provider.NOT_PROVIDING_STATE globalvars.database.deleteDocumentProvider(db_document_id = documentInfo.db_document_id, node_id = provider.node_id) continue content_input = response.readDownloadResponseInput(r) self.__state = download.DOWNLOADING_STATE print "waiting document content" file_name = globalvars.config.getValue("TemporaryDocumentRoot") + os.path.sep + document_id + (mimetypes.guess_extension(mime_type) or ".txt") fd = file(file_name, "wb") idle = 0 self.__transferred = 0 while self.__transferred < size and idle < 20: idle += 1 buf = content_input.read(1024) if not buf: continue idle = 0 fd.write(buf) # print "write buf" self.__transferred += len(buf) fd.close() if idle > 20: raise "idle" print "document received completeley" if self.__transferred != size: print "Error: file length not match %s %s" % (self.__transferred, size) connection.close() os.remove(file_name) continue new_file_name = document_id + (mimetypes.guess_extension(mime_type) or ".txt") absolute_new_file_name = "%s%s%s" % (globalvars.config.getValue("CachedDocumentRoot"), os.path.sep, new_file_name) if os.path.exists(absolute_new_file_name): os.remove(absolute_new_file_name) print "rename %s => %s" % (file_name, absolute_new_file_name) os.rename(file_name, absolute_new_file_name) print "done => %s" % absolute_new_file_name # file_time = int(os.stat(absolute_new_file_name)[stat.ST_MTIME]) file_time = 0 fileInfo = maay.datastructure.fileinfo.FileInfo(absolute_new_file_name, file_time, documentInfo.db_document_id, maay.datastructure.documentinfo.CACHED_STATE, maay.datastructure.fileinfo.CREATED_FILE_STATE) print "1 documentInfo.db_document_id = %s" % fileInfo.db_document_id db_fileInfos = globalvars.database.getFileInfos(file_name=absolute_new_file_name) if not db_fileInfos: globalvars.database.insertFileInfo(fileInfo) else: globalvars.database.updateFileInfo(fileInfo) globalvars.indexer.addNewDocumentToIndex(absolute_new_file_name) self.__state = download.FINISHED_STATE provider.state = download.Provider.FINISHED_STATE return 1 except Exception, e: time.sleep(2) # else: # except TimeoutError, e: print "Error ex: %s" % e provider.state = download.Provider.BUSY_STATE provider.last_try = time.time()
def flushResults(self): t = int(time.time()) for rs in self.resultSpoolManager.getResultSpools(): if rs.getNodeID() == self.__nodeID: continue if t - rs.getQueryTime() > constants.result_spool_lifetime: self.resultSpoolManager.removeResultSpool(rs) continue if rs.getSentResultCount() >= rs.getExpectedResultCount(): continue # keep this resultspool documentIDs = rs.getBestUnsentResults() if len(documentIDs) == 0: continue nodeInfo = globalvars.database.getNodeInfo(rs.getNodeID()) print "flush results to %s" % rs.getNodeID() # todo : if the connection is local, make a shortcut # c = protocol.Protocol(self, nodeInfo.ip, nodeInfo.port) connection = httplib.HTTPConnection(nodeInfo.ip, nodeInfo.port) connection.putrequest("POST", "/message") connection.endheaders() protocol.sendHeader(connection, constants.SEARCH_RESPONSE_COMMAND, rs.getQueryID(), 0) protocol.sendSearchResponseInfo(connection, len(documentIDs)) for document_id in documentIDs: documentInfo = globalvars.database.getDocumentInfos( document_id=document_id, get_text=1)[0] if not documentInfo.url: fileInfos = globalvars.database.getFileInfos( db_document_id=documentInfo.db_document_id, state=maay.datastructure.documentinfo.PUBLISHED_STATE) for fileInfo in fileInfos: pos = fileInfo.file_name.find( globalvars.config.getValue( "PublishedDocumentRoot")) if pos != -1: documentInfo.url = 'http://%s:%s/pub/%s' % ( globalvars.ip, globalvars.port, fileInfo.file_name[pos + len( globalvars.config. getValue("PublishedDocumentRoot")) + 1:]) documentInfo.url = documentInfo.url.replace( "\\", "/") break queryDocumentScores = globalvars.database.getDocumentScores( documentInfo.db_document_id, rs.getQuery()) relevantDocumentScores = globalvars.database.getBestRelevantDocumentScores( documentInfo.db_document_id, constants.relevant_document_score_count + len(rs.getQuery())) documentProviders = globalvars.database.getDocumentProviders( documentInfo.db_document_id) documentScores = queryDocumentScores[:] for ds in relevantDocumentScores: add = 1 for word in rs.getQuery(): if ds.word == word: add = 0 break if add: documentScores.append(ds) url = documentInfo.url if not url: url = "" # if url: # if url.find('/') == 0: # url = "http://%s:%s%s/pub" % (globalvars.hostname, globalvars.port, url) # else: # url = "" protocol.sendSearchResponseHitInfo( connection, document_id, documentInfo.mime_type, url, documentInfo.publication_time, documentInfo.size, documentInfo.title, len(documentScores), len(documentProviders)) for ds in documentScores: pos = ds.position text = documentInfo.text if pos >= constants.MAX_TEXT_CONTENT_STORED_SIZE: pos = 0 start = max(0, pos - constants.EXCERPT_HALF_SIZE) if start > 0: while start < pos and text[start] != ' ': start += 1 start += 1 end = min(len(text) - 1, start + constants.EXCERPT_SIZE) if end < len(text) - 1: while end > pos and text[end] != ' ': end -= 1 excerpt = documentInfo.text[start:end] print "excerpt = %s (%s,%s)" % (excerpt, start, end) protocol.sendSearchResponseHitScore( connection, ds.word, ds.relevance, ds.popularity, excerpt, start, ds.position) for dp in documentProviders: ni = globalvars.database.getNodeInfo(dp.node_id) if ni.node_id == self.__nodeID: ni.last_seen_time == int(time.time()) protocol.sendSearchResponseHitProvider( connection, dp.node_id, ni.ip, ni.port, dp.last_providing_time, ni.last_seen_time, ni.bandwidth, ni.counter) connection.close()
def recv_download_request(self, httpRequestHandler, query_id, TTL, sender_nodeID, sender_nodeIP, sender_nodePort, document_id, search_query): # check in the indexer if we have it documentInfo = globalvars.database.getDocumentInfo( document_id=document_id) if not documentInfo: print "never heard about document %s" % document_id # todo: forward request to a node which have a document which a close document id? has_content = 0 has_description = 0 else: if documentInfo.state == maay.datastructure.documentinfo.KNOWN_STATE: print "I do not have the file on my disk, why do you ask me ?" # todo: but I can give you some other pointers has_content = 0 else: has_content = 1 has_description = 1 flags = (has_content * constants.HAS_DOCUMENT_CONTENT_FLAG) | ( has_description * constants.HAS_DOCUMENT_DESCRIPTION_FLAG) # update documentScore with the download request received and the # documentscore received # dp = documentProviders[0] # nodeInfo = globalvars.database.getNodeInfo(dp.node_id) httpRequestHandler.send_response(200) httpRequestHandler.end_headers() output = tools.file2stream(httpRequestHandler.wfile) protocol.sendHeader(output, constants.DOWNLOAD_RESPONSE_COMMAND, self.__generateQueryID(), constants.INIT_TTL) protocol.sendDownloadResponse(output, document_id, flags) if has_description: documentProviders = globalvars.database.getDocumentProviders( documentInfo.db_document_id) if has_content and not documentInfo.url: fileInfos = globalvars.database.getFileInfos( db_document_id=documentInfo.db_document_id, state=maay.datastructure.documentinfo.PUBLISHED_STATE) for fileInfo in fileInfos: pos = fileInfo.file_name.find( globalvars.config.getValue("PublishedDocumentRoot")) print "send url pos = %s" % pos if pos != -1: documentInfo.url = 'http://%s:%s/pub/%s' % ( globalvars.ip, globalvars.port, fileInfo.file_name[pos + len( globalvars.config. getValue("PublishedDocumentRoot")) + 1:]) documentInfo.url = documentInfo.url.replace("\\", "/") break protocol.sendDownloadResponseDocumentDescription( output, documentInfo.title, documentInfo.publication_time, documentInfo.mime_type, documentInfo.size, documentInfo.url or "", len(documentProviders)) for dp in documentProviders: nodeInfo = globalvars.database.getNodeInfo(dp.node_id) protocol.sendDownloadResponseProvider( output, dp.node_id, nodeInfo.ip, nodeInfo.port, dp.last_providing_time, nodeInfo.last_seen_time, nodeInfo.bandwidth, nodeInfo.counter) if has_content: fileInfo = globalvars.database.getFileInfos( db_document_id=documentInfo.db_document_id)[0] protocol.sendDownloadResponseDocument(output, fileInfo.file_name, documentInfo.size) self.hasDownloaded(sender_nodeID, document_id, search_query, weight=DOWNLOAD_SCORE_WEIGHT)
def flushResults(self): t = int(time.time()) for rs in self.resultSpoolManager.getResultSpools(): if rs.getNodeID() == self.__nodeID: continue if t - rs.getQueryTime() > constants.result_spool_lifetime: self.resultSpoolManager.removeResultSpool(rs) continue if rs.getSentResultCount() >= rs.getExpectedResultCount(): continue # keep this resultspool documentIDs = rs.getBestUnsentResults() if len(documentIDs) == 0: continue nodeInfo = globalvars.database.getNodeInfo(rs.getNodeID()) print "flush results to %s" % rs.getNodeID() # todo : if the connection is local, make a shortcut # c = protocol.Protocol(self, nodeInfo.ip, nodeInfo.port) connection = httplib.HTTPConnection(nodeInfo.ip, nodeInfo.port) connection.putrequest("POST", "/message") connection.endheaders() protocol.sendHeader(connection, constants.SEARCH_RESPONSE_COMMAND, rs.getQueryID(), 0) protocol.sendSearchResponseInfo(connection, len(documentIDs)) for document_id in documentIDs: documentInfo = globalvars.database.getDocumentInfos(document_id = document_id, get_text = 1)[0] if not documentInfo.url: fileInfos = globalvars.database.getFileInfos(db_document_id = documentInfo.db_document_id, state = maay.datastructure.documentinfo.PUBLISHED_STATE) for fileInfo in fileInfos: pos = fileInfo.file_name.find(globalvars.config.getValue("PublishedDocumentRoot")) if pos != -1: documentInfo.url = 'http://%s:%s/pub/%s' % (globalvars.ip, globalvars.port, fileInfo.file_name[pos + len(globalvars.config.getValue("PublishedDocumentRoot")) + 1:]) documentInfo.url = documentInfo.url.replace("\\", "/") break queryDocumentScores = globalvars.database.getDocumentScores(documentInfo.db_document_id, rs.getQuery()) relevantDocumentScores = globalvars.database.getBestRelevantDocumentScores(documentInfo.db_document_id, constants.relevant_document_score_count + len(rs.getQuery())) documentProviders = globalvars.database.getDocumentProviders(documentInfo.db_document_id) documentScores = queryDocumentScores[:] for ds in relevantDocumentScores: add = 1 for word in rs.getQuery(): if ds.word == word: add = 0 break if add: documentScores.append(ds) url = documentInfo.url if not url: url = "" # if url: # if url.find('/') == 0: # url = "http://%s:%s%s/pub" % (globalvars.hostname, globalvars.port, url) # else: # url = "" protocol.sendSearchResponseHitInfo(connection, document_id, documentInfo.mime_type, url, documentInfo.publication_time, documentInfo.size, documentInfo.title, len(documentScores), len(documentProviders)) for ds in documentScores: pos = ds.position text = documentInfo.text if pos >= constants.MAX_TEXT_CONTENT_STORED_SIZE: pos = 0 start = max(0, pos - constants.EXCERPT_HALF_SIZE) if start > 0: while start < pos and text[start] != ' ': start += 1 start += 1 end = min(len(text) - 1, start + constants.EXCERPT_SIZE) if end < len(text) - 1: while end > pos and text[end] != ' ': end -= 1 excerpt = documentInfo.text[start:end] print "excerpt = %s (%s,%s)" % (excerpt, start, end) protocol.sendSearchResponseHitScore(connection, ds.word, ds.relevance, ds.popularity, excerpt, start, ds.position) for dp in documentProviders: ni = globalvars.database.getNodeInfo(dp.node_id) if ni.node_id == self.__nodeID: ni.last_seen_time == int(time.time()) protocol.sendSearchResponseHitProvider(connection, dp.node_id, ni.ip, ni.port, dp.last_providing_time, ni.last_seen_time, ni.bandwidth, ni.counter) connection.close()
def handleMessage(httpRequestHandler): # read message content (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(httpRequestHandler.rfile) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) # update information on the node # we have to forward them back also, and bufferize them before # forwarding them back # check error before if command_type == constants.SEARCH_REQUEST_COMMAND: (min_score, forwarding_node_count, result_count, search_query) = response.readSearchRequest(httpRequestHandler.rfile) globalvars.maay_core.recv_search_request(queryID, TTL, node_id, ip, port, search_query, min_score, forwarding_node_count, result_count, constants.MAAY_SEARCH_RANGE) globalvars.maay_core.manifest_interest(node_id, search_query) elif command_type == constants.SEARCH_RESPONSE_COMMAND: # if I receive answers of a unknown query, do nothing resultSpool = globalvars.maay_core.getResultSpoolManager().getResultSpool(queryID) if not resultSpool: return search_query = resultSpool.getQuery() globalvars.maay_core.manifest_interest(node_id, search_query) hit_count = response.readSearchResponseInfo(httpRequestHandler.rfile) for i in range(0, hit_count): (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) = response.readSearchResponseHitInfo(httpRequestHandler.rfile) # update information on the document # print (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) documentInfo = globalvars.maay_core.updateDocumentInfo(document_id, mime_type, title, file_size, publication_time, url) # todo 0 should be the score/rank of the document # rank = 0.0 for j in range(0, score_count): (word, relevance, popularity, excerpt, excerpt_position, word_position) = response.readSearchResponseHitScore(httpRequestHandler.rfile) # print (word, relevance, popularity, excerpt) # update information on the word in the document print "process document scores" ds = globalvars.maay_core.processDocumentScore(documentInfo.db_document_id, word, relevance, popularity, excerpt, excerpt_position, word_position, nodeInfo) # if word in search_query: # rank += (float(ds.relevance) + 0.0001) * (float(ds.popularity) + 0.0001) ranking_score = globalvars.maay_core.compute_ranking_score(document_id, search_query) globalvars.maay_core.updateDocumentMatching(document_id=document_id) result = resultspool.MaayResult(document_id, ranking_score, 0, 0, int(publication_time), documentInfo.state) resultSpool.addResult(result) for j in xrange(provider_count): (node_id, ip, port, last_storing_time, last_seen_time, bandwidth, counter) = response.readSearchResponseHitProvider(httpRequestHandler.rfile) globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) globalvars.maay_core.manifest_interest(node_id, search_query) # update information on the document provider (node) globalvars.maay_core.updateDocumentProvider(documentInfo.db_document_id, node_id, last_storing_time) elif command_type == constants.DOWNLOAD_REQUEST_COMMAND: (document_id, search_query) = response.readDownloadRequest(httpRequestHandler.rfile) globalvars.maay_core.manifest_interest(node_id, search_query) globalvars.maay_core.recv_download_request(httpRequestHandler, queryID, TTL, node_id, ip, port, document_id, search_query) if command_type != constants.DOWNLOAD_REQUEST_COMMAND: httpRequestHandler.send_response(200) httpRequestHandler.end_headers() protocol.sendHeader(tools.file2stream(httpRequestHandler.wfile), constants.PONG_COMMAND, queryID, 0)