def recv_chunks( self, remote_chunk_dir, desired_chunks ): # shell out and scp cmd = "/usr/bin/scp -P " + str(self.port) if self.identity_file != "": cmd += "-i " + self.identity_file + " " if remote_chunk_dir[-1] != '/': remote_chunk_dir += '/' received_chunks = {} max_rc = 0 for chunk in desired_chunks: self.num_sent += 1 if self.num_sent > self.max_sent: max_rc = E_NO_CONNECT continue this_cmd = copy.copy( cmd ) this_cmd += " " + self.remote_user + "@" + self.remote_host + ":" + remote_chunk_dir + str(chunk) + " " + iftfile.get_chunks_dir( self.file_name, self.file_hash ) + "/" + str(chunk) iftlog.log(1, self.name + ": '" + this_cmd + "'") pipe = os.popen( this_cmd ) rc = pipe.close() if rc != None: iftlog.log(5, self.name + ": scp returned " + str(rc) + " for chunk " + str(chunk)) max_rc = E_NO_CONNECT continue chunk_dir = iftfile.get_chunks_dir( self.file_name, self.file_hash ) fd = open( chunk_dir + "/" + str(chunk) ) chunk_data = fd.read() fd.close() received_chunks[ chunk ] = chunk_data return (max_rc, received_chunks)
def recv_chunks( self, remote_chunk_dir, desired_chunks ): # receive chunks in fragments, since no remote IFTD and the server is HTTP/1.1 try: byte_ranges = [] # determine byte ranges corresponding to the chunk if len(desired_chunks) > 1: desired_chunks.sort() curr_range = -1 for chunk in desired_chunks: byte_ranges.append( [ self.job_attrs.get( iftfile.JOB_ATTR_CHUNKSIZE ) * chunk, min( self.job_attrs.get( iftfile.JOB_ATTR_CHUNKSIZE ) * (chunk+1) - 1, self.file_size - 1 ) ] ) else: byte_ranges = [[ self.job_attrs.get( iftfile.JOB_ATTR_CHUNKSIZE ) * desired_chunks[0], min( self.job_attrs.get( iftfile.JOB_ATTR_CHUNKSIZE ) * (desired_chunks[0]+1) - 1, self.file_size - 1 ) ]] # translate the byte range into a string byte_range_str = "bytes=" for brange in byte_ranges: byte_range_str += str(brange[0]) + "-" + str(brange[1]) + "," byte_range_str = byte_range_str[:-1] # remove last , remote_file = self.job_attrs.get( iftfile.JOB_ATTR_SRC_NAME ) if remote_file[0] != '/': remote_file = '/' + remote_file iftlog.log(3, self.name + ": request " + byte_range_str + " of " + remote_file + " from " + self.job_attrs.get( iftfile.JOB_ATTR_SRC_HOST ) ) req = urllib2.Request( "http://" + self.job_attrs.get( iftfile.JOB_ATTR_SRC_HOST ) + ":" + str(self.portnum) + remote_file ) req.add_header( "range", byte_range_str ) resp = urllib2.urlopen( req ) if resp.code < 200 or resp.code > 400: # error! return E_NO_CONNECT # get the data #resp_data = resp.read() #data = StringIO( resp_data ) #resp.close() data = resp num_chunks = 0 content_type = resp.headers.getheader('content-type') content_range = resp.headers.getheader('content-range') if "multipart/byteranges" in content_type: # multipart response # get xxxxx from boundary=xxxxx boundary = content_type[ content_type.find("boundary=") + 9 : ].strip() chunk_dict = {} CRLF = "\r\n" # read the data read_offset = 0 while True: line = data.readline() if len(line) == 0: break if line == CRLF: continue line = line[ : len(line) - 2 ] start_byte = -1 stop_byte = -1 if line == "--" + boundary + "--": break elif line == "--" + boundary: while True: line = data.readline() if line.lower().startswith("content-range"): content_range = line[ line.find("bytes ") + 6 : line.find("/") ] start_byte = int( content_range[0: content_range.find("-")] ) stop_byte = int( content_range[ content_range.find("-") + 1 : ] ) + 1 if line == CRLF: break # read data chunk_id = start_byte / self.job_attrs.get( iftfile.JOB_ATTR_CHUNKSIZE ) #t1 = time.time() chunk = data.read( stop_byte - start_byte ) #t2 = time.time() #print "chunk took " + str(t2 - t1) num_chunks += 1 self.add_chunk( chunk_id, chunk ) if num_chunks > 0: resp.close() return 0 else: resp.close() return E_NO_DATA elif content_range != None: # only one range given, and it's only one chunk start_byte = int( content_range[ 6 : content_range.find("-") ] ) chunk_id = start_byte / self.job_attrs.get( iftfile.JOB_ATTR_CHUNKSIZE ) self.add_chunk( chunk_id, resp.read() ) resp.close() return 0 # not a multipart response, even though we expected it! else: # byterange request either encompassed the whole file, or this was an HTTP/1.0 server and we got the whole file back. local_path = iftfile.get_chunks_dir( self.job_attrs.get( iftfile.JOB_ATTR_DEST_NAME ), self.job_attrs.get( iftfile.JOB_ATTR_FILE_HASH ) ) + "/" + os.path.basename( self.job_attrs.get( iftfile.JOB_ATTR_DEST_NAME ) ) rc = self.__write_file( local_path, resp.read() ) if rc == 0: self.whole_file( local_path ) else: iftlog.log(5, self.name + ": could not save " + local_path + ", rc = " + str(rc) ) self.recv_finished( TRANSMIT_STATE_FAILURE ) return rc except Exception, inst: iftlog.exception( self.name + ": could not receive chunks " + str(desired_chunks), inst) return E_UNHANDLED_EXCEPTION
def iftsend( job, available_protocols, connect_dict=None, iftd_remote_port = USER_PORT + 1, iftd_xmlrpc_path = "/RPC2", user_timeout = 60 ): """ Intelligently send a file to a remote host. @arg job This is an iftjob instance containing the job data @arg available_protocols This is a list of strings of each available (usable) protocol to use @arg connect_dict This is a dictionary of dictionaries mapping protocol names in available_protocols to connection attribute dictionaries. @arg iftd_remote_port Remote IFTD XML-RPC port (default is USER_PORT+1) @arg iftd_xmlrpc_path Remote IFTD XML-RPC domain (default is /RPC2) @arg user_timeout Timeout for XML-RPC calls """ from iftdata import SEND_FILES_DIR job_attrs = job.attrs # get remote host recv_host = job.get_attr( iftfile.JOB_ATTR_DEST_HOST ) if recv_host == None: iftlog.log(5, "iftsend: remote host not defined") return E_INVAL # can we even proceed to read this? filename = job.get_attr( iftfile.JOB_ATTR_SRC_NAME ) # sanity check if filename == None: iftlog.log(5, "iftsend: filename is not specified!") return E_INVAL if not os.path.exists( filename ): iftlog.log(5, "iftsend: file " + filename + " does not exist!") return E_FILE_NOT_FOUND if not (stat.S_IWUSR & os.stat( filename ).st_mode): iftlog.log(5, "iftsend: cannot read file " + filename) return E_IOERROR if SEND_FILES_DIR[-1] != "/": SEND_FILES_DIR = SEND_FILES_DIR + "/" if not filename.startswith(SEND_FILES_DIR, 0, len(SEND_FILES_DIR)): iftlog.log(5, "iftsend: cannot send file " + filename + ", it is not in " + SEND_FILES_DIR ) return E_FILE_NOT_FOUND # prepare to send rc, file_hash, chunk_hashes, chunk_data = prepare_sender( job.get_attr( iftfile.JOB_ATTR_SRC_NAME ), job.get_attr( iftfile.JOB_ATTR_CHUNKSIZE) ) if rc != 0: iftlog.log(5, "iftsend: could not prepare to send") return rc job.set_attr( iftfile.JOB_ATTR_GIVEN_CHUNKS, True ) # we will be given chunks out of band job.set_meta( iftfile.JOB_ATTR_GIVEN_CHUNKS, Queue.Queue(0) ) # use a blocking queue to get chunks within # make sure to pass along the file hash if job.get_attr( iftfile.JOB_ATTR_FILE_HASH ) == None: job.set_attr( iftfile.JOB_ATTR_FILE_HASH, file_hash ) # make sure to pass along the file size if job.get_attr( iftfile.JOB_ATTR_FILE_SIZE ) == None: job.set_attr( iftfile.JOB_ATTR_FILE_SIZE, os.stat( filename ).st_size ) # make sure to pass along the file type if job.get_attr( iftfile.JOB_ATTR_FILE_TYPE ) == None: job.set_attr( iftfile.JOB_ATTR_FILE_TYPE, iftstats.fset_filetype( job.attrs ) ) # pass receiver the chunk hashes if job.get_attr( iftfile.JOB_ATTR_CHUNK_HASHES ) == None: job.set_attr( iftfile.JOB_ATTR_CHUNK_HASHES, chunk_hashes ) job.set_attr( iftfile.JOB_ATTR_SRC_CHUNK_DIR, iftfile.get_chunks_dir( filename, file_hash, True ) ) m = hashlib.sha1() m.update( cPickle.dumps( job.attrs ) ) # transmission id is the sha-1 of the job attrs xmit_id = m.hexdigest() iftlog.log(3, "iftsend(id: " + xmit_id + "): will send " + str(job.get_attr( iftfile.JOB_ATTR_SRC_NAME )) + " to " + str(job.get_attr( iftfile.JOB_ATTR_DEST_HOST ))) # make clones of each of the protocols proto_list = [] for proto_name in PROTOCOLS.keys(): if proto_name.find("_sender") < 0: continue proto = None try: # clone the vanilla protocol so we can run more than one concurrently proto = copy.deepcopy( PROTOCOLS[proto_name] ) proto.assign_job( job ) except Exception, inst: iftlog.exception("iftsend: cannot clone " + proto_name + ", skipping...", inst) continue proto_list.append( proto )
def get_iftd_sender_data( xmit_id, job_attrs, available_protos, connect_dict ): """ Called by the receiver (remote) on the sender (local) to get the sender's capabilities--specifically, which protocols it has senders for, and where the chunks will be located. Return the list of protocols usable to both sender and receiver. """ global TransferCore error_rc = (xmit_id, None, None, None, None, None, None, None) file_name = job_attrs.get( iftfile.JOB_ATTR_SRC_NAME ) user_job = iftfile.iftjob( job_attrs ) # does the file exist? if not os.path.exists( file_name ): iftlog.log(5, "get_iftd_sender_data: file " + str(file_name) + " does not exist") return error_rc # don't even bother # is the file readable? if not (stat.S_IWUSR & os.stat( file_name ).st_mode): iftlog.log(5, "get_iftd_sender_data: file " + str(file_name) + " is not readable") return error_rc # don't bother--can't read # is the file accessible? from iftdata import SEND_FILES_DIR if SEND_FILES_DIR[-1] != '/': SEND_FILES_DIR = SEND_FILES_DIR + "/" if not os.path.abspath(file_name).startswith( SEND_FILES_DIR, 0, len(SEND_FILES_DIR)): iftlog.log(5, "get_iftd_sender_data: will not send " + str(file_name) + ", it is not in " + SEND_FILES_DIR ) return error_rc # access control violation # get our available protocols my_protos = proto_names( senders( list_protocols() ) ) other_protos = [] if available_protos: other_protos = proto_names( available_protos ) # calculate intersection between both available protos my_protos_set = set( my_protos ) other_protos_set = set( other_protos ) usable_protos_set = my_protos_set.intersection( other_protos_set ) file_size = iftfile.get_filesize( file_name ) iftlog.log(1, "get_iftd_sender_data: file " + str(file_name) + ", size " + str(file_size)) # start my passive senders sender_names = senders( list_protocols() ) proto_insts = [] for proto in sender_names: if PROTOCOLS.get(proto) != None and not PROTOCOLS.get(proto).isactive(): p = None # start this passive sender try: p = copy.deepcopy( PROTOCOLS.get(proto) ) except: iftlog.log(5, "get_iftd_sender_data: could not start passive sender " + proto) continue proto_insts.append(p) expected_fsize = user_job.get_attr( iftfile.JOB_ATTR_FILE_SIZE ) min_fsize = user_job.get_attr( iftfile.JOB_ATTR_FILE_MIN_SIZE ) max_fsize = user_job.get_attr( iftfile.JOB_ATTR_FILE_MAX_SIZE ) # do some sanity checking... if min_fsize != None and max_fsize != None: if file_size < min_fsize or file_size > max_fsize: return error_rc # wrong size expectation if expected_fsize != None and expected_fsize != file_size: return error_rc # wrong size expectation # set up rc, file_hash, chunk_hashes, chunk_data = prepare_sender( file_name, user_job.get_attr( iftfile.JOB_ATTR_CHUNKSIZE ) ) if rc != 0: iftlog.log(5, "get_iftd_sender_data: could not prepare to send") return error_rc user_job.supply_attr( iftfile.JOB_ATTR_FILE_SIZE, file_size ) user_job.supply_attr( iftfile.JOB_ATTR_FILE_HASH, file_hash ) user_job.supply_attr( iftfile.JOB_ATTR_FILE_TYPE, iftstats.filetype( file_name ) ) passive_protos = start_passive_protos( connect_dict, user_job, proto_insts, 1.0 ) # start passive protocol handling thread TransferCore.begin_ift_send( xmit_id, user_job, chunk_data, user_job.get_attr( iftfile.JOB_ATTR_CHUNK_TIMEOUT ), connect_dict ) TransferCore.run_ift_send_passive( xmit_id, user_job, passive_protos, user_job.get_attr( iftfile.JOB_ATTR_CHUNK_TIMEOUT )) proto_mask = [0] * len(sender_names) for i in xrange(0, len(sender_names)): p = sender_names[i] if PROTOCOLS[p].isactive(): proto_mask[i] = True else: proto_mask[i] = False return (xmit_id, iftfile.get_chunks_dir( file_name, file_hash, True), file_size, file_hash, iftstats.filetype(file_name), sender_names, proto_mask, chunk_hashes)
# now we have the protocols available to both of us. # get the best protocol features = iftstats.extract_features( job.attrs ) best_proto = iftstats.best_protocol( features ) if best_proto != None: usable_protos = proto_names( [best_proto] + sender_available_protos ) # make chunk directory in preparation for receiving pieces (i.e. from an active sender) rc = iftfile.make_chunks_dir( job.get_attr( iftfile.JOB_ATTR_DEST_NAME ), job.get_attr( iftfile.JOB_ATTR_FILE_HASH ) ) if rc != 0: iftlog.log(5, "iftreceive: could not make chunks directory") return rc # record local chunk dir job.set_attr( iftfile.JOB_ATTR_DEST_CHUNK_DIR, iftfile.get_chunks_dir( job.get_attr( iftfile.JOB_ATTR_DEST_NAME ), job.get_attr( iftfile.JOB_ATTR_FILE_HASH ), remote_iftd )) # start up a transfer processor with the available protocols proto_instances = [] iftlog.log(1, "iftreceive: available receivers are " + str([proto + "_receiver" for proto in usable_protos])) iftlog.log(1, "iftreceive: file chunksize is " + str(job.get_attr( iftfile.JOB_ATTR_CHUNKSIZE ) ) ) for proto in usable_protos: proto = proto + "_receiver" # if it's available, then there's a receiver available p = None try: p = copy.deepcopy( PROTOCOLS[proto] ) except Exception, inst: iftlog.log(5, "iftreceive: ERROR: could not clone protocol " + proto) continue