def __send_search_request_to(self, query_id, TTL, ip, port, search_query, min_score, forwarding_node_count, result_count): # signal.alarm(10) try: connection = httplib.HTTPConnection(ip, port) connection.putrequest("POST", "/message") connection.endheaders() print "send message" protocol.sendHeader(connection, constants.SEARCH_REQUEST_COMMAND, query_id, TTL) protocol.sendSearchRequest(connection, min_score, forwarding_node_count, result_count, search_query) print "get response" http_response = connection.getresponse() (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(http_response) print "recv pong id = %s %s %s" % (node_id, ip, port) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo( node_id, ip, port, bandwidth, counter, last_seen_time) connection.close() # except TimeoutError: # signal.alarm(0) # return communication.Communication.TIMEOUT_ERROR except socket.error, (code, message): print "Connection problem on node [%s:%s]: %s" % (ip, port, message) return communication.Communication.CONNECTION_ERROR
def handleMessage(httpRequestHandler): # read message content (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(httpRequestHandler.rfile) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) # update information on the node # we have to forward them back also, and bufferize them before # forwarding them back # check error before if command_type == constants.SEARCH_REQUEST_COMMAND: (min_score, forwarding_node_count, result_count, search_query) = response.readSearchRequest(httpRequestHandler.rfile) globalvars.maay_core.recv_search_request(queryID, TTL, node_id, ip, port, search_query, min_score, forwarding_node_count, result_count, constants.MAAY_SEARCH_RANGE) globalvars.maay_core.manifest_interest(node_id, search_query) elif command_type == constants.SEARCH_RESPONSE_COMMAND: # if I receive answers of a unknown query, do nothing resultSpool = globalvars.maay_core.getResultSpoolManager( ).getResultSpool(queryID) if not resultSpool: return search_query = resultSpool.getQuery() globalvars.maay_core.manifest_interest(node_id, search_query) hit_count = response.readSearchResponseInfo(httpRequestHandler.rfile) for i in range(0, hit_count): (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) = response.readSearchResponseHitInfo( httpRequestHandler.rfile) # update information on the document # print (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) documentInfo = globalvars.maay_core.updateDocumentInfo( document_id, mime_type, title, file_size, publication_time, url) # todo 0 should be the score/rank of the document # rank = 0.0 for j in range(0, score_count): (word, relevance, popularity, excerpt, excerpt_position, word_position) = response.readSearchResponseHitScore( httpRequestHandler.rfile) # print (word, relevance, popularity, excerpt) # update information on the word in the document print "process document scores" ds = globalvars.maay_core.processDocumentScore( documentInfo.db_document_id, word, relevance, popularity, excerpt, excerpt_position, word_position, nodeInfo) # if word in search_query: # rank += (float(ds.relevance) + 0.0001) * (float(ds.popularity) + 0.0001) ranking_score = globalvars.maay_core.compute_ranking_score( document_id, search_query) globalvars.maay_core.updateDocumentMatching( document_id=document_id) result = resultspool.MaayResult(document_id, ranking_score, 0, 0, int(publication_time), documentInfo.state) resultSpool.addResult(result) for j in xrange(provider_count): (node_id, ip, port, last_storing_time, last_seen_time, bandwidth, counter) = response.readSearchResponseHitProvider( httpRequestHandler.rfile) globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) globalvars.maay_core.manifest_interest(node_id, search_query) # update information on the document provider (node) globalvars.maay_core.updateDocumentProvider( documentInfo.db_document_id, node_id, last_storing_time) elif command_type == constants.DOWNLOAD_REQUEST_COMMAND: (document_id, search_query) = response.readDownloadRequest(httpRequestHandler.rfile) globalvars.maay_core.manifest_interest(node_id, search_query) globalvars.maay_core.recv_download_request(httpRequestHandler, queryID, TTL, node_id, ip, port, document_id, search_query) if command_type != constants.DOWNLOAD_REQUEST_COMMAND: httpRequestHandler.send_response(200) httpRequestHandler.end_headers() protocol.sendHeader(tools.file2stream(httpRequestHandler.wfile), constants.PONG_COMMAND, queryID, 0)
class Download: FINISHED_STATE = 2 def __init__(self, document_id, search_query = []): self.__active = 0 self.__providers = [] self.__providersHT = {} self.__document_id = document_id self.__search_query = search_query self.__transferred = 0 self.__state = download.NOT_STARTED_STATE self.__last_search_time = 0 # TODO: for the moment unisource is ok # divide the file into chunks of 256ko # several states : in download, downloaded, not downloaded def isActive(self): return self.__active def setActive(self, active): self.__active = active def fetch(self): if self.__state == download.FINISHED_STATE: return 1 if self.__state == download.NOT_STARTED_STATE: self.__state = download.INIT_STATE while 1: documentInfo = globalvars.database.getDocumentInfo(self.__document_id) documentProviders = globalvars.database.getDocumentProviders(documentInfo.db_document_id) for dp in documentProviders: p = self.__providersHT.get(dp.node_id) if p: n = globalvars.database.getNodeInfo(dp.node_id) if p.ip != n.ip or p.port != n.port: p.port = port p.ip = ip p.state = download.Provider.UNTRIED_STATE p.last_try = 0 else: n = globalvars.database.getNodeInfo(dp.node_id) p = download.Provider(n.node_id, n.ip, n.port, n.bandwidth, dp.last_providing_time) self.__providers.append(p) self.__providersHT[dp.node_id] = p provider = None for p in self.__providers: if p.state in (download.Provider.UNREACHABLE_STATE, download.Provider.NOT_PROVIDING_STATE): continue if p.last_try + download.NEXT_RETRY_PERIOD > time.time(): continue provider = p break if not provider: print "no provider" if self.__last_search_time + download.NEXT_SEARCH_PERIOD > time.time() and self.__state == download.SEARCHING_SOURCES_STATE: time.sleep(1) return 0 globalvars.maay_core.send_search_request(["#%s" % self.__document_id], constants.INIT_TTL, constants.MAAY_SEARCH_RANGE, constants.MIN_SCORE, constants.INIT_FNC, constants.INIT_EHC, query_id = self.__document_id) print "search for providers" self.__state = download.SEARCHING_SOURCES_STATE self.__last_search_time = time.time() time.sleep(1) return 0 print "PROVIDER IP =%s [%s]" % (provider.ip, provider.state) self.__state = download.CONNECTING_STATE provider.state = download.Provider.CONNECTED_STATE connection = None try: # signal.alarm(5) print "essaie sur %s %s" % (provider.ip, provider.port) connection = httplib.HTTPConnection(provider.ip, provider.port) connection.putrequest("POST", "/message") connection.endheaders() # signal.alarm(0) except Exception, e: # signal.alarm(0) print "Exception: %s" % e provider.state = download.Provider.UNREACHABLE_STATE continue except TimeoutError: # signal.alarm(0) provider.state = download.Provider.BUSY_STATE provider.last_try = time.time() continue try: provider.state = download.Provider.DOWNLOADING_STATE protocol.sendHeader(connection, constants.DOWNLOAD_REQUEST_COMMAND, "12345678901234567890", constants.INIT_TTL) protocol.sendDownloadRequest(connection, self.__document_id, self.__search_query) print "requete envoye, attente reponse %s" % connection r = connection.getresponse() print "resp" (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(r) print "resp 2" document_id, flags = response.readDownloadResponse(r) if not (flags & constants.HAS_DOCUMENT_DESCRIPTION_FLAG): print "the provider do not have the file %s" % len(self.__providers) provider.state = download.Provider.NOT_PROVIDING_STATE globalvars.database.deleteDocumentProvider(db_document_id = documentInfo.db_document_id, node_id = provider.node_id) continue (title, publication_time, mime_type, size, url, provider_count) = response.readDownloadResponseDocumentDescription(r) print "url received = %s" % url globalvars.maay_core.updateDocumentInfo(document_id, mime_type, title, size, publication_time, url) for j in xrange(provider_count): (node_id, ip, port, last_storing_time, last_seen_time, bandwidth, counter) = response.readSearchResponseHitProvider(r) print "provider in resp = %s" % str((node_id, ip, port, bandwidth, counter, last_seen_time)) globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) globalvars.maay_core.updateDocumentProvider(documentInfo.db_document_id, node_id, last_storing_time) globalvars.maay_core.manifest_interest(node_id, self.__search_query) if not (flags & constants.HAS_DOCUMENT_CONTENT_FLAG): print "the provider do not have the file %s" % len(self.__providers) provider.state = download.Provider.NOT_PROVIDING_STATE globalvars.database.deleteDocumentProvider(db_document_id = documentInfo.db_document_id, node_id = provider.node_id) continue content_input = response.readDownloadResponseInput(r) self.__state = download.DOWNLOADING_STATE print "waiting document content" file_name = globalvars.config.getValue("TemporaryDocumentRoot") + os.path.sep + document_id + (mimetypes.guess_extension(mime_type) or ".txt") fd = file(file_name, "wb") idle = 0 self.__transferred = 0 while self.__transferred < size and idle < 20: idle += 1 buf = content_input.read(1024) if not buf: continue idle = 0 fd.write(buf) # print "write buf" self.__transferred += len(buf) fd.close() if idle > 20: raise "idle" print "document received completeley" if self.__transferred != size: print "Error: file length not match %s %s" % (self.__transferred, size) connection.close() os.remove(file_name) continue new_file_name = document_id + (mimetypes.guess_extension(mime_type) or ".txt") absolute_new_file_name = "%s%s%s" % (globalvars.config.getValue("CachedDocumentRoot"), os.path.sep, new_file_name) if os.path.exists(absolute_new_file_name): os.remove(absolute_new_file_name) print "rename %s => %s" % (file_name, absolute_new_file_name) os.rename(file_name, absolute_new_file_name) print "done => %s" % absolute_new_file_name # file_time = int(os.stat(absolute_new_file_name)[stat.ST_MTIME]) file_time = 0 fileInfo = maay.datastructure.fileinfo.FileInfo(absolute_new_file_name, file_time, documentInfo.db_document_id, maay.datastructure.documentinfo.CACHED_STATE, maay.datastructure.fileinfo.CREATED_FILE_STATE) print "1 documentInfo.db_document_id = %s" % fileInfo.db_document_id db_fileInfos = globalvars.database.getFileInfos(file_name=absolute_new_file_name) if not db_fileInfos: globalvars.database.insertFileInfo(fileInfo) else: globalvars.database.updateFileInfo(fileInfo) globalvars.indexer.addNewDocumentToIndex(absolute_new_file_name) self.__state = download.FINISHED_STATE provider.state = download.Provider.FINISHED_STATE return 1 except Exception, e: time.sleep(2) # else: # except TimeoutError, e: print "Error ex: %s" % e provider.state = download.Provider.BUSY_STATE provider.last_try = time.time()
def __send_search_request_to(self, query_id, TTL, ip, port, search_query, min_score, forwarding_node_count, result_count): # signal.alarm(10) try: connection = httplib.HTTPConnection(ip, port) connection.putrequest("POST", "/message") connection.endheaders() print "send message" protocol.sendHeader(connection, constants.SEARCH_REQUEST_COMMAND, query_id, TTL) protocol.sendSearchRequest(connection, min_score, forwarding_node_count, result_count, search_query) print "get response" http_response = connection.getresponse() (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(http_response) print "recv pong id = %s %s %s" % (node_id, ip, port) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) connection.close() # except TimeoutError: # signal.alarm(0) # return communication.Communication.TIMEOUT_ERROR except socket.error, (code, message): print "Connection problem on node [%s:%s]: %s" % (ip, port, message) return communication.Communication.CONNECTION_ERROR
def handleMessage(httpRequestHandler): # read message content (protocol_version, vendor, node_id, ip, port, bandwidth, counter, command_type, queryID, TTL) = response.readHeader(httpRequestHandler.rfile) last_seen_time = int(time.time()) nodeInfo = globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) # update information on the node # we have to forward them back also, and bufferize them before # forwarding them back # check error before if command_type == constants.SEARCH_REQUEST_COMMAND: (min_score, forwarding_node_count, result_count, search_query) = response.readSearchRequest(httpRequestHandler.rfile) globalvars.maay_core.recv_search_request(queryID, TTL, node_id, ip, port, search_query, min_score, forwarding_node_count, result_count, constants.MAAY_SEARCH_RANGE) globalvars.maay_core.manifest_interest(node_id, search_query) elif command_type == constants.SEARCH_RESPONSE_COMMAND: # if I receive answers of a unknown query, do nothing resultSpool = globalvars.maay_core.getResultSpoolManager().getResultSpool(queryID) if not resultSpool: return search_query = resultSpool.getQuery() globalvars.maay_core.manifest_interest(node_id, search_query) hit_count = response.readSearchResponseInfo(httpRequestHandler.rfile) for i in range(0, hit_count): (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) = response.readSearchResponseHitInfo(httpRequestHandler.rfile) # update information on the document # print (document_id, mime_type, url, publication_time, file_size, title, score_count, provider_count) documentInfo = globalvars.maay_core.updateDocumentInfo(document_id, mime_type, title, file_size, publication_time, url) # todo 0 should be the score/rank of the document # rank = 0.0 for j in range(0, score_count): (word, relevance, popularity, excerpt, excerpt_position, word_position) = response.readSearchResponseHitScore(httpRequestHandler.rfile) # print (word, relevance, popularity, excerpt) # update information on the word in the document print "process document scores" ds = globalvars.maay_core.processDocumentScore(documentInfo.db_document_id, word, relevance, popularity, excerpt, excerpt_position, word_position, nodeInfo) # if word in search_query: # rank += (float(ds.relevance) + 0.0001) * (float(ds.popularity) + 0.0001) ranking_score = globalvars.maay_core.compute_ranking_score(document_id, search_query) globalvars.maay_core.updateDocumentMatching(document_id=document_id) result = resultspool.MaayResult(document_id, ranking_score, 0, 0, int(publication_time), documentInfo.state) resultSpool.addResult(result) for j in xrange(provider_count): (node_id, ip, port, last_storing_time, last_seen_time, bandwidth, counter) = response.readSearchResponseHitProvider(httpRequestHandler.rfile) globalvars.maay_core.updateNodeInfo(node_id, ip, port, bandwidth, counter, last_seen_time) globalvars.maay_core.manifest_interest(node_id, search_query) # update information on the document provider (node) globalvars.maay_core.updateDocumentProvider(documentInfo.db_document_id, node_id, last_storing_time) elif command_type == constants.DOWNLOAD_REQUEST_COMMAND: (document_id, search_query) = response.readDownloadRequest(httpRequestHandler.rfile) globalvars.maay_core.manifest_interest(node_id, search_query) globalvars.maay_core.recv_download_request(httpRequestHandler, queryID, TTL, node_id, ip, port, document_id, search_query) if command_type != constants.DOWNLOAD_REQUEST_COMMAND: httpRequestHandler.send_response(200) httpRequestHandler.end_headers() protocol.sendHeader(tools.file2stream(httpRequestHandler.wfile), constants.PONG_COMMAND, queryID, 0)