def cache_get_file( file_path, max_age, connect_args, job_attrs, squid_port, http_port ): """ Get the given file (file_path) from the cache and return a file handle to it (via urllib2) if it is younger than max_age (where max_age is in seconds) """ # get the file from squid global tmp_connect_args global tmp_job_attrs try: if tmp_job_attrs == None: tmp_job_attrs = {} if tmp_connect_args == None: tmp_connect_args = {} tmp_connect_args[os.path.basename(file_path)] = copy.deepcopy(connect_args) tmp_job_attrs[os.path.basename(file_path)] = iftfile.iftjob.get_attrs_copy(job_attrs) proxy_handler = urllib2.ProxyHandler( {'http': 'http://127.0.0.1:' + str(squid_port)} ) opener = urllib2.build_opener( proxy_handler ) if max_age > 0: opener.addheaders = [("Cache-Control","max-age=" + str(max_age))] cached_file_fd = opener.open( "http://127.0.0.1:" + str(http_port) + os.path.abspath( file_path ) ) return cached_file_fd except urllib2.HTTPError, inst: iftlog.log(1, "iftcache.cache_get_file: file not in cache") tmp_connect_args[os.path.basename(file_path)] = None tmp_job_attrs[os.path.basename(file_path)] = None return None
def cache_shutdown(): """ Shut down the caching system if no one is using it @return 0 on success; negative on failure """ global cache_ref global cache_sem global cache_server global cache_dir try: cache_sem.acquire() cache_ref -= 1 # TODO: less kludgy way? os.popen("rm -rf " + cache_dir).close() if cache_ref == 0: # stop our cache server if cache_server != None: cache_server.socket.close() del cache_server cache_server = None cache_sem.release() except Exception, inst: iftlog.log("iftcache: could not shut down cache", inst) return E_UNHANDLED_EXCEPTION
def recv_files( self, remote_files, local_dir ): # shell out and scp cmd = "/usr/bin/scp -P " + str(self.port) if self.identity_file != "": cmd += "-i " + self.identity_file + " " max_rc = 0 for file_list in remote_files: filename = file_list[1] this_cmd = copy.copy( cmd ) this_cmd += " " + self.remote_user + "@" + self.remote_host + ":" + os.path.abspath(filename) + " " + os.path.abspath(local_dir) + "/" iftlog.log(1, self.name + ": '" + this_cmd + "'") pipe = os.popen( this_cmd ) rc = pipe.close() #pipe = subprocess.Popen( this_cmd ) #rc = pipe.wait() if rc != None: iftlog.log(5, self.name + ": scp returned " + str(rc) + " for file " + str(filename)) max_rc = E_NO_CONNECT continue else: self.add_file( file_list[0], file_list[1]) return max_rc
def recv_files( self, remote_file_paths, local_file_dir ): global cache_dir # get the file from the cache and write it to disk, if possible file_fd = cache_get_file( cache_path(self.file_to_recv), self.max_age, self.connect_args, self.job_attrs, self.squid_port, self.http_port ) if file_fd == None: iftlog.log(3, self.name + ": could not receive " + self.file_to_recv ) self.recv_finished( TRANSMIT_STATE_FAILURE ) return E_NO_DATA # not in cache ==> protocol failure data = file_fd.read() file_fd.close() tmp_file_name = "" try: tmp_file_name = local_file_dir + "/" + os.path.basename( self.job_attrs.get( iftfile.JOB_ATTR_DEST_NAME ) ) fd = open( tmp_file_name, "wb" ) fd.write( data ) fd.close() except Exception, inst: iftlog.exception(self.name + ".recv_files: failed to save " + self.file_to_recv + " to " + tmp_file_name, inst) return E_IOERROR
def refine_classifier(): """ Train the classifier using the data collected """ global PENDING_FEATURES global CLASSIFIER if RETRAIN_FREQ <= 0: return 0 # don't bother if the classifier will never exist anyway iftlog.log(1, "Retraining classifier...") if CLASSIFIER == None: if CLASSIFIER_TYPE == "NaiveBayes": # haven't begun training yet, so make it happen CLASSIFIER = iftNaiveBayesClassifier.train( PENDING_FEATURES ) elif CLASSIFIER_TYPE == "DecisionTree": CLASSIFIER = DecisionTreeClassifier.train( PENDING_FEATURES ) else: if CLASSIFIER_TYPE == "NaiveBayes": CLASSIFIER.refine( PENDING_FEATURES ) elif CLASSIFIER_TYPE == "DecisionTree": CLASSIFIER.refine( PENDING_FEATURES, 0.05, 100, 10 ) # use default train() values PENDING_FEATURES = [] return 0
def __next_files(self): """ What files do we want to receive next? """ unreceived = None if self.ift_job.get_attr( iftfile.JOB_ATTR_REMOTE_IFTD ) or self.get_chunking_mode() != PROTO_NO_CHUNKING: unreceived = self.unreceived_chunk_ids() if unreceived == None: return [] tld = self.ift_job.get_attr( iftfile.JOB_ATTR_SRC_CHUNK_DIR ) if self.get_chunking_mode() != PROTO_NONDETERMINISTIC_CHUNKING: # reserve these in advance urc = [] for i in unreceived: rc = self.iftfile_ref.reserve_chunk( self, i, self.ift_job.get_attr( iftfile.JOB_ATTR_CHUNK_TIMEOUT ) ) if rc == E_COMPLETE: # we're done! self.recv_finished( TRANSMIT_STATE_SUCCESS ) return [] elif rc != 0: iftlog.log(1, self.name + ": WARNING: could not reserve chunk " + str(i) + " for writing in " + str(self.iftfile_ref)) else: iftlog.log(1, self.name + ": reserved chunk " + str(i) + " for writing in " + str(self.iftfile_ref)) urc.append( [i, tld + "/" + str(i)] ) unreceived = urc else: unreceived = [[-1, os.path.abspath( self.ift_job.get_attr( iftfile.JOB_ATTR_SRC_NAME ) )]] return unreceived
def setup( self, connect_attrs ): iftproto.iftsender.setup( self, connect_attrs ) self.bt_session = lt.session() # start up the session self.bt_session.listen_on( self.setup_attrs[ IFTBITTORRENT_PORTRANGE_LOW ], self.setup_attrs[ IFTBITTORRENT_PORTRANGE_HIGH ] ) iftlog.log(1, "libtorrent session created!") return 0 # we're ready for torrent files
def __send_file_chunks( self ): """ Call repeatedly in the ifttransmit main loop to send chunks. This will be called once prepare_transmit and possibly send_job have been called Return an event to be handled by ifttransmit """ if self.ready_to_send == False or self.transmit_state != TRANSMIT_STATE_CHUNKS: return (0,E_BAD_STATE) # can we do anything? if self.ift_job == None and self.ready_to_send == True: iftlog.log(5, self.name + ": No job to process! Use my assign_job() method and resume me") self.ready_to_send = False return (0,E_BAD_STATE) chunk = None chunk_id = -1 rc = 0 chunk, chunk_id, chunk_path, remote_chunk_path = self.__next_chunk() try: rc = self.send_one_chunk( chunk, chunk_id, chunk_path, remote_chunk_path ) except Exception, inst: iftlog.exception( self.name + ": could not send data", inst ) self.close_connection( TRANSMIT_STATE_FAILURE ) t = time.time() iftstats.log_chunk( self.ift_job, self.name, False, t, t, 0 ) return (PROTO_MSG_ERROR_FATAL, E_NO_DATA)
def await_sender( self, connection_attrs, timeout ): if connection_attrs != None: p = connection_attrs.get( PROTO_PORTNUM ) if p != None: try: self.port = int(p) except: pass # set up a server socket to the remote host self.soc = None self.connected = False if connection_attrs.get( IFTSOCKET_TIMEOUT ) != None: self.timeout = connection_attrs.get( IFTSOCKET_TIMEOUT ) else: self.timeout = 1 if self.port != None: try: self.soc = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) self.soc.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.soc.settimeout( self.timeout ) # use the given timeout self.soc.bind( ("localhost", self.port) ) self.soc.listen(1) # only one remote host should talk to me iftlog.log(1, "iftsocket_receiver: Listening on localhost:" + str(self.port) ) except Exception, inst: iftlog.exception( "iftsocket_receiver: could not set up server socket", str(inst) ) return E_NO_CONNECT return 0
def test_iftd( file, filehash, filesize, remote_host, tmpfs_dir ): ts = 0 te = 0 http_connect_attrs = { iftproto.PROTO_PORTNUM:8000, iftfile.JOB_ATTR_SRC_NAME:file, iftfile.JOB_ATTR_DEST_NAME:file, iftfile.JOB_ATTR_SRC_HOST:remote_host } job_attrs = { iftfile.JOB_ATTR_SRC_HOST:remote_host, iftfile.JOB_ATTR_SRC_NAME:file, iftfile.JOB_ATTR_FILE_SIZE:int(filesize), iftfile.JOB_ATTR_CHUNKSIZE:int(filesize), # get the whole file at once # iftfile.JOB_ATTR_FILE_HASH:filehash, iftfile.JOB_ATTR_DEST_NAME:file, iftfile.JOB_ATTR_DEST_HOST:"localhost", protocols.http.HTTP_SERVER_VERSION:11 } client = iftapi.make_XMLRPC_client() connects = { "http_receiver":http_connect_attrs, "http_sender":http_connect_attrs } for i in xrange(0, 10): # transfer the file with iftd 5 times ts = time.time() client.begin_ift( job_attrs, connects, False, True, 4001, "/RPC2", True, False, 60) te = time.time() iftlog.log(5, "iftd " + remote_host + file + " " + str(te - ts) )
def lookup_proto_instance(proto_handle): try: inst = __loaded_proto_instances[proto_handle] return inst except: iftlog.log(5, "iftloader: Invalid handle " + str(proto_handle)) return None
def send_chunk( self, chunk, chunk_id, chunk_path, remote_chunk_path ): # if chunk paths are given, then we are supposed to send just the chunks, not the file entirely local = self.file_to_send remote = self.remote_path if chunk_path: local = chunk_path remote = remote_chunk_path # shell out and scp cmd = "scp -P " + str(self.port) if self.identity_file != "": cmd += "-i " + self.identity_file cmd += " " + local + " " + self.remote_user + "@" + self.remote_host + ":" + remote iftlog.log(1, self.name + ": " + cmd) pipe = os.popen( cmd ) rc = pipe.close() if rc != None: iftlog.log(5, "iftscp_sender: scp returned " + str(rc)) return -rc return 0
def make_chunks_dir( filename, filehash ): """ Make a directory from the filename and filehash to store incoming chunks into. """ if filename[0] == "/": filename = filename[1:] file_dir = os.path.basename( filename ) + "." + str(filehash) if os.path.exists( os.path.join(__file_chunks_dir, file_dir) ): iftlog.log(3, "WARNING: " + os.path.join(__file_chunks_dir, file_dir) + " exists!") try: os.popen("rm -rf " + os.path.join(__file_chunks_dir, file_dir) + "/*").close() except: pass return 0 # already done! chunk_dir = get_chunks_dir( filename, filehash ) try: rc = os.popen("mkdir -p " + chunk_dir ).close() if rc != None: iftlog.log(5, "iftfile: could not make chunk directory " + chunk_dir ) return E_IOERROR return 0 except Exception, inst: iftlog.exception( "iftfile: could not make chunk directory " + chunk_dir ) return E_IOERROR
def validate_attrs(self, given_attrs, needed_args): """ Determine whether or not the given attributes are valid and sufficient. @arg given_attrs Dictionary of setup attributes to be passed to ifttransmit.setup() @arg needed_args Dictionary of needed arguments @return 0 if valid, nonzero if not """ rc = 0 if given_attrs != None and needed_args != None: for arg in needed_args: if given_attrs.has_key(arg) == False: iftlog.log(5, self.name + ".validate_attrs: argument " + arg + " not supplied") rc = E_NO_DATA elif needed_args != None: if iftfile.JOB_ATTR_OPTIONAL in needed_args: needed_args.remove( iftfile.JOB_ATTR_OPTIONAL ) if PROTO_USE_DEPRICATED in needed_args: needed_args.remove( PROTO_USE_DEPRICATED ) if len(needed_args) != 0: iftlog.log(5, self.name + ".validate_attrs: need " + str(needed_args) + ", but None given") rc = E_NO_DATA return rc
def unlock_chunk( self, owner, chunk_id ): """ Unlock a chunk. Only valid for MODE_WRITE Return 0 on success; negative on failure """ try: # sanity check if self.__mode != MODE_WRITE: self.__error = E_BAD_MODE return E_BAD_MODE if chunk_id < 0 or chunk_id >= self.__num_chunks: self.__error = E_INVAL return E_OVERFLOW if self.__chunk_owners[ chunk_id ] == owner: self.__chunk_reservations[ chunk_id ] = 0 self.__chunk_owners[ chunk_id ] = None self.__chunk_locks[ chunk_id ].release() return 0 return E_INVAL except Exception, inst: if self.__open == False: iftlog.log("iftfile.unlock_chunk: file is no longer open") iftlog.exception("iftfile.unlock_chunk: could not unlock " + str(chunk_id)) return E_BAD_STATE
def shutdown(): """ Shut down the file writer """ os.popen("rm -rf " + __file_chunks_dir ) iftlog.log(3, "iftfile: shutdown complete")
def __grow_metadata( self, chunk_id ): """ Atomically allocate more chunk masks, locks, and reservations. """ self.__expand_lock.acquire() # if we were closed, do nothing if self.__open == False: iftlog.log(5, "iftfile.__grow_metadata(): file is no longer open, so doing nothing") self.__expand_lock.release() return num_mutexes = len(self.__chunk_locks) if num_mutexes < chunk_id + 1: self.__chunk_locks = self.__chunk_locks + [threading.BoundedSemaphore(1)] * (chunk_id + 1 - num_mutexes) num_reservations = len(self.__chunk_reservations) if num_reservations < chunk_id + 1: self.__chunk_reservations = self.__chunk_reservations + [0] * (chunk_id + 1 - num_reservations) num_owners = len(self.__chunk_owners) if num_owners < chunk_id + 1: self.__chunk_owners = self.__chunk_owners + [None] * (chunk_id + 1 - num_owners) self.__expand_lock.release()
def test_arizonatransfer( file, filehash, filesize, remote_host, tmpfs_dir ): ts = 0 te = 0 file_url = remote_host + file file_data = { "filename": file[1:], "hash": filehash, "size": int(filesize), "hashfuncs": [arizonatransfer.default_hashfunc] } for i in xrange(0, 10): # transfer the file with arizonatransfer 5 times, excluding getting the metafile ts = time.time() rc, downloaded_files = arizonatransfer.getfiles1(remote_host, [file_data], tmpfs_dir, None, True, prioritylist = ["http"]) te = time.time() if not rc: print "arizonatransfer failed!" print "downloaded files: " + str(downloaded_files) iftlog.log(5, "arizonatransfer " + file_url + " " + str(te - ts) )
def get_chunks( filename, chunksize ): """ Determine the chunks and sha-1 hashes of the would-be chunks of a file. """ # sanity check if not os.path.exists(filename): iftlog.log(3, "Skipping " + filename + " since it cannot be found") return (E_IOERROR, None) if not (stat.S_IWUSR & os.stat( filename ).st_mode): iftlog.log(3, "Skipping " + filename + " since I do not have read permission") return (E_IOERROR, None) chunks = [] chunk_hashes = [] fd = open( filename, "rb" ) while True: chunk = fd.read( chunksize ) m = hashlib.sha1() m.update( chunk ) chunk_hash = m.hexdigest() chunk_hashes.append( chunk_hash ) chunks.append( chunk ) if len(chunk) != chunksize: # last chunk; EOF reached break fd.close() return (0, chunks, chunk_hashes)
def log_transfer(function, pid, timestamp, timestampend): try: iftlog.log(3, "Retrieved file") import storklog storklog.log_transfer(function, pid, timestamp, timestampend) except: pass
def recv_file( self, remote_chunk_dir, desired_chunks ): print "recv_file" chunk_dict = {} # get each chunk from the remote host try: for chunk in desired_chunks: connection = urllib2.Request( "http://" + self.remote_host + ":" + str(self.connect_args[iftproto.PROTO_PORTNUM]) + os.path.join( remote_chunk_dir, str(chunk) ) ) response = urllib2.urlopen( connection ) if response.code == 200: # got chunk chunk_dict[chunk] = response.read() else: iftlog.log(3, self.name + ": WARNING: could not get chunk " + str(chunk) + ", status = " + str(response.code) ) response.close() if chunk_dict == {}: return (E_NO_DATA, None) return (0, chunk_dict) except Exception, inst: iftlog.exception(self.name + ": ERROR: could not get all chunks from " + str(self.remote_host) + " in " + str(remote_chunk_dir), inst) return (E_NO_CONNECT, None)
def get_proto_rankings( job_attrs, success=True ): """ If the classifier is a Naive Bayes classifier, then get back the probabilities of each protocol's success given a dictionary of job attributes. @arg job_attrs Job attribute dictionary to feed into the classifier @arg success Set to True to measure the probabilities of successful transmission. Set to False to measure the probabilities of failed transmissions. @return A list of protocol rankings as tuples. """ if not CLASSIFIER: print "no classifier available :(" return [] if CLASSIFIER_TYPE == "NaiveBayes": ret = [] features = extract_features( job_attrs, success ) distrib = CLASSIFIER.prob_classify( features ) for sample in distrib.samples(): ret.append( (sample, distrib.prob(sample)) ) return ret else: iftlog.log(1, "get_proto_rankings: cannot yet handle " + str(CLASSIFIER_NAME)) return []
def import_package(name): try: __import__(name) iftlog.log(0, "iftloader: Loaded " + name) return 0 except Exception, inst: iftlog.exception("iftloader: could not load " + name, inst) return -1
def lock_chunk( self, owner, chunk_id, override=False, t=1.0 ): """ Lock a chunk for writing. No other threads can access it. Only valid for MODE_WRITE Blocks, and returns """ try: # sanity check if self.marked_complete: self.__error = E_COMPLETE return E_COMPLETE if self.__mode != MODE_WRITE: self.__error = E_BAD_MODE return E_BAD_MODE # if we know how many chunks there are, then lock it as usual if self.known_size == True: if chunk_id < 0 or chunk_id >= self.__num_chunks: self.__error = E_INVAL return E_INVAL else: # make a new entry if we need to and lock it if chunk_id < 0: self.__error = E_INVAL return E_INVAL self.__grow_metadata( chunk_id ) # can't lock the chunk if it already has data if self.__chunk_mask[ chunk_id ]: return E_DUPLICATE self.__chunk_locks[ chunk_id ].acquire() # we're takin' over if override: self.__chunk_reservations[ chunk_id ] = time.time() + t self.__chunk_owners[ chunk_id ] = owner # sanity check again (in case this was called after fclose()) if self.__mode != MODE_WRITE: self.__error = E_BAD_MODE self.__chunk_locks[ chunk_id ].release() return E_BAD_MODE # when we return, this thread holds the lock return 0 except Exception, inst: if self.__open == False: iftlog.log("iftfile.lock_chunk: file is no longer open") # should only happen if the file gets closed by another thread iftlog.exception("iftfile.lock_chunk: could not lock chunk " + str(chunk_id)) return E_BAD_STATE
def proto_clean( self ): if self.torrent_handle != None: iftlog.log( 3, self.name + ": purging " + self.torrent_handle.save_path()) os.popen("rm -rf " + self.torrent_handle.save_path() ).close() self.bt_session.remove_torrent( self.torrent_handle ) self.torrent_handle = None self.torrent_info = None self.chunksize = None
def recv_chunks( self, remote_chunk_dir, desired_chunks ): # determine what has been received since the last time this was called chunk_list = self.get_chunk_list() active_set = set( chunk_list ) # wait until we actually receive something while len(active_set) - len(self.recv_prev) == 0: time.sleep(0.5) active_set = active_set | set( self.get_chunk_list() ) if self.torrent_handle.is_seed(): # we have all chunks active_set = set([i for i in xrange(0, self.torrent_handle.status().num_pieces)]) break s = self.torrent_handle.status() state_str = ['queued', 'checking', 'downloading metadata', \ 'downloading', 'finished', 'seeding', 'allocating'] print '%.2f%% complete (down: %.1f kb/s up: %.1f kB/s peers: %d) %s' % \ (s.progress * 100, s.download_rate / 1000, s.upload_rate / 1000, \ s.num_peers, state_str[s.state]) if not iftapi.is_alive(): return E_FAILURE continue # indicate what we have received new_chunks = active_set - self.recv_prev iftlog.log(3, self.name + ": received " + str(len(new_chunks)) + " more chunks") self.recv_prev = active_set print "have now received " + str(len(self.recv_prev)) + " chunks" # convert to dictionary bt_dir = self.torrent_handle.save_path() rc = 0 for chunk_id in new_chunks: file_slices = self.torrent_info.map_block( chunk_id, 0, self.chunksize ) # which file(s) did this chunk correspond to? for fs in file_slices: recv_file = self.torrent_info.file_at( fs.file_index ) try: chunk_fd = open( bt_dir + "/" + recv_file.path, "r") chunk_fd.seek( fs.offset ) chunk_data = chunk_fd.read( self.chunksize ) if chunk_data: self.add_chunk( chunk_id, chunk_data ) chunk_fd.close() print "copy chunk " + str(chunk_id) + " from " + str(recv_file.path) + " at offset " + str(fs.offset) + ", length " + str(len(chunk_table[chunk_id])) + " (chunksize is " + str(self.chunksize) + ")" except Exception, inst: iftlog.exception( self.name + ": could not get chunk " + str(chunk_id) + " from " + str(recv_file) + " at offset " + str(fs.offset), inst) rc = E_IOERROR continue
def setup( self, setup_attrs ): try: self.port = setup_attrs[PROTO_PORTNUM] iftlog.log(1, "iftsocket_receiver.setup: will receive on port " + str(self.port)) except: return E_NO_VALUE # nothing to do... return 0
def close_connection( self, final_state=TRANSMIT_STATE_SUCCESS ): """ Close up a connection. Invalidate any locks or references we have acquired. """ self.__end_transmit() self.transmit_state = final_state self.ready_to_send = False self.end_time = time.time() iftlog.log(5, self.name + ": Transmission took " + str(self.end_time - self.start_time) + " ticks" )
def setup( self, setup_attrs ): try: if setup_attrs.has_key( PROTO_PORTNUM ): self.port = setup_attrs[PROTO_PORTNUM] if setup_attrs.has_key( IFTSCP_IDENTITY_FILE ): self.identity_file = setup_attrs[ IFTSCP_IDENTITY_FILE ] iftlog.log(1, "iftscp_sender.setup: will send on port " + str(self.port)) except Exception, inst: return E_NO_VALUE
def test_iftd( file, filehash, filesize, remote_host, tmpfs_dir ): ts = 0 te = 0 http3_connect_attrs = { iftproto.PROTO_PORTNUM:8000, iftfile.JOB_ATTR_SRC_NAME:file, iftfile.JOB_ATTR_DEST_NAME:file, iftfile.JOB_ATTR_SRC_HOST:remote_host } http4_connect_attrs = { iftproto.PROTO_PORTNUM:8000, iftfile.JOB_ATTR_SRC_NAME:file, iftfile.JOB_ATTR_DEST_NAME:file, iftfile.JOB_ATTR_SRC_HOST:remote_host } job_attrs = { iftfile.JOB_ATTR_SRC_HOST:remote_host, iftfile.JOB_ATTR_SRC_NAME:file, iftfile.JOB_ATTR_FILE_SIZE:int(filesize), iftfile.JOB_ATTR_FILE_HASH:filehash, iftfile.JOB_ATTR_DEST_NAME:file, iftfile.JOB_ATTR_CHUNKSIZE:int(filesize) / 20, iftfile.JOB_ATTR_DEST_HOST:"localhost" } client = iftapi.make_XMLRPC_client() connects = { "http3_receiver":http3_connect_attrs, "http3_sender":http3_connect_attrs, "http4_receiver":http4_connect_attrs, "http4_sender":http4_connect_attrs } client.clear_classifier() for i in xrange(0, 10): # transfer the file with iftd ts = time.time() client.begin_ift( job_attrs, connects, False, True, 4001, "/RPC2", True, False, 60) te = time.time() iftlog.log(5, "iftd " + remote_host + file + " " + str(te - ts) ) time.sleep(15) cls_data = client.get_proto_rankings( job_attrs, True ) iftlog.log(5, "") iftlog.log(5, "Protocol rankings") for (proto, prob) in cls_data: iftlog.log(5, str(proto) + ": " + str(prob)) iftlog.log(5, "")