def copyToDFS(address, fname, path): """ Contact the metadata server to ask to copy file fname, get a list of data nodes. Open the file in path to read, divide in blocks and send to the data nodes. """ #Create a connection to the data server sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(address) # Fill code p = Packet() fsize = os.path.getsize(path) # Read file # Fill code fpath=path+fname # Create a Put packet with the fname and the length of the data, # and sends it to the metadata server # Fill code p.BuildPutPacket(fname, fsize) sock.sendall(p.getEncodedPacket().encode()) # If no error or file exists # Get the list of data nodes. # Divide the file in blocks # Send the blocks to the data servers msg = sock.recv(1024).decode('utf-8') msg=eval(msg) # Decode the packet received sock.close() # Fill code if msg != 'DUP': archivo = path pedazos = int(fsize/len(msg)) block_list=[] with open(archivo, 'r') as f: for i in range(len(msg)): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) linea = f.readline(pedazos) direccion=tuple(msg[i]) sock.connect(direccion) p.BuildPutPacket(linea, pedazos) sock.sendall(p.getEncodedPacket().encode()) id = sock.recv(1024).decode() block_list.append(direccion) block_list.append(id) sock.close() # Notify the metadata server where the blocks are saved. # Fill code sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(address) p.BuildDataBlockPacket(fname, block_list) sock.sendall(p.getEncodedPacket().encode()) sock.close()
def copyToDFS(address, from_path, to_path): """ Copy file from local machine in 'from_path' to the remote DFS in 'to_path' by dividing it to blocks and sending them to the available data nodes (managed by MDS) """ # Create a connection to the data server print "Connecting to MDS..." sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(address) print "Connected!" # Create `put` packet with the server path and fsize, and send it to MDS fsize = os.path.getsize(from_path) p1 = Packet() p1.BuildPutPacket(to_path, fsize) sock.send(p1.getEncodedPacket()) print "Sent `put` request to MDS for local file '%s'!" % from_path # Get the list of data nodes (if no error) msg = sock.recv(DNODE_BUFFER) sock.close() if msg == "DUP": print "Tried inserting a file that already exists! Exiting..." sys.exit(0) p2 = Packet() p2.DecodePacket(msg) nodelist = p2.getDataNodes() print "Received list of %d data nodes! Closing connection to MDS..." % len(nodelist) # Divide the file in blocks and send them to data nodes chunk, total = 1, 0 cuota = int(ceil(float(fsize) / len(nodelist))) block_list = [] fd = open(from_path, "rb") for ip, prt in nodelist: # Connect to data node and send data block `put` request sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock2.connect((ip, prt)) p3 = Packet() p3.BuildCommand("put") sock2.send(p3.getEncodedPacket()) print "\n\t- Sent `put` request to data node at %s:%s!" % (ip, prt) # Blocking dummy variable so that messages don't corrupt themselves OK = sock2.recv(2) # 2 because it's only "OK" if OK != "OK": print "\nReply from %s:%s is corrupted! Exiting..." % (ip, prt) sock2.close() fd.close() sys.exit(0) # Send cuota little by little count = 0 while count < cuota and total < fsize: data = fd.read(SUBCHUNK_BUFFER) sent = sock2.send(data) count += sent total += sent # Blocking dummy variable so that messages don't corrupt themselves MORE = sock2.recv(4) # 4 because it's only "MORE" if MORE != "MORE": print "\nReply from %s:%s is corrupted! Exiting..." % (ip, prt) sock2.close() fd.close() sys.exit(0) # Notify data node that chunk was sent print "\t- Sent chunk #%d!" % chunk chunk += 1 sock2.send("DONE") # Receive block id from data node bid = sock2.recv(36) # 36 because uuids are 36 in length block_list.append((ip, prt, bid)) print "\t- Received '%s'. Saving for later..." % bid # Disconnect from data node sock2.close() print "\t- Disconnecting from data node..." # Close local file fd.close() # Check if whole file was sent if total == fsize: print "\nWhole file sent!!!" else: print "The file wasn't sent completely! Exiting..." sys.exit(0) # Notify the MDS where the blocks are saved sock3 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock3.connect(address) print "Reconnecting to MDS..." p4 = Packet() p4.BuildDataBlockPacket(to_path, block_list) sock3.send(p4.getEncodedPacket()) print "Sent block list! Disconnecting from MDS..." # Disconnect from MDS sock3.close() print "Done!"
def copyToDFS(address, fname, path): """ Contact the metadata server to ask to copy file fname, get a list of data nodes. Open the file in path to read, divide in blocks and send to the data nodes. """ print('hola') # Create a connection to the data server # Fill code sock = socket.socket() sock.connect(address) # Read file # Fill code fd = open( './' + path, "rb") # path is the name of the file stored in our local directory block_list = [] # block list of the file contents = fd.read() # content from the file fd.close() # Create a Put packet with the fname and the length of the data, # and sends it to the metadata server # Fill code p = Packet() size = os.path.getsize('./' + path) p.BuildPutPacket(fname, size) file_attributes = p.packet # contains the dictionary {"command": "put", "fname": fname, "size": size} from the property self.packet sock.sendall( p.getEncodedPacket() ) # send to meta data server the put packet; self.packet = {"command": "put", "fname": fname, "size": size} # If no error or file exists # Get the list of data nodes. # Divide the file in blocks # Send the blocks to the data servers # Fill code response = sock.recv(1024) # data nodes from meta-data to save the file sock.close() sock = None if response != b'DUP': p.DecodePacket(response) else: print('DUP') return data_nodes = p.packet data_nodes = data_nodes["servers"] parts = int( len(contents) / len(data_nodes)) # each block list will contain this size "parts" print(data_nodes, 'response of data nodes') print(file_attributes, 'file_attributes') newDataNodes = [] # will contain the data nodes that are running for host, port in data_nodes: isRunning = isPortRunning(host, port) if isRunning: print(host, port, 'RUNNING') newDataNodes.append([host, port]) data_nodes = newDataNodes print(data_nodes, 'data nodes running') # dividing the file over the number of data servers fd = open('./' + path, "rb") newContents = fd.read(parts) contents_left = len(contents) % parts # the characters left for i in range(len(data_nodes)): if i == len(data_nodes) - 2: block_list.append(newContents) newContents = fd.read(parts + contents_left) else: block_list.append(newContents) newContents = fd.read(parts) print(block_list, 'blocks of data') fd.close() # sending the blocks to the data servers data = [] for i in range(len(data_nodes)): host = data_nodes[i][0] port = data_nodes[i][1] block = block_list[i] sock = socket.socket() sock.connect((host, port)) p.BuildPutPacket(path, size) # p.BuildPutResponse(block_list[i]) # block of data, i is the number of the new file sock.sendall(p.getEncodedPacket()) response = sock.recv(1024) print(response, 'line 155') print(type(block_list[i]), 'line 116') try: sock.sendall(block_list[i].encode()) except: sock.sendall(block_list[i]) # if type(block_list[i]) == 'bytes': # sock.sendall(block_list[i]) # else: # sock.sendall(block_list[i].encode()) chunk_id = sock.recv(1024) chunk_id = chunk_id.decode() # chunk_id or uuid data.append((host, port, chunk_id)) sock.close() sock = None sock = socket.socket() sock.connect( address) # connect socket to meta data server to send new block packet # Notify the metadata server where the blocks are saved. # Fill code p.BuildDataBlockPacket(fname, data) sock.sendall(p.getEncodedPacket()) sock.close() sock = None
def copyToDFS(address, fname, path): """ Contact the metadata server to ask to copu file fname, get a list of data nodes. Open the file in path to read, divide in blocks and send to the data nodes. """ # Create a connection to the data server # Fill code sockt = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sockt.connect(address) file_size = os.path.getsize(path) pack = Packet() pack.BuildPutPacket(fname, file_size, 0) # THIS IS DUMMY BLOCK SIZE. Metadata won't need it. sockt.sendall(pack.getEncodedPacket()) status = sockt.recv(3) print(status) dat = sockt.recv(4096) data = Packet() data.DecodePacket(dat) message = data.getDataNodes() print(message) # Read file read = open(path, 'r+b') block_ids = [] # Fill code if status == "NAK": print("Error copying files.") else: bk_size = (file_size / len(message)) + 1 for i in message: # sock_to_dnode.sendto # print(read.name, tuple(message[i])) sock_to_dnode = socket.socket(socket.AF_INET, socket.SOCK_STREAM) address1 = i[0] port1 = i[1] sock_to_dnode.connect((address1, port1)) dnode_pack = Packet() dnode_pack.BuildPutPacket(fname, file_size, bk_size) sock_to_dnode.sendall(dnode_pack.getEncodedPacket()) print("Put Packet sent.") if sock_to_dnode.recv(1024) == "OK": sock_to_dnode.sendall(read.read(bk_size)) block_ids.append((address1, port1, sock_to_dnode.recv(1024))) print(block_ids) sock_to_dnode.close() sockt.close() read.close() # Create a Put packet with the fname and the length of the data, # and sends it to the metadata server # Fill code # If no error or file exists # Get the list of data nodes. # Divide the file in blocks # Send the blocks to the data servers # Fill code # Notify the metadata server where the blocks are saved. socket_blks = socket.socket(socket.AF_INET, socket.SOCK_STREAM) socket_blks.connect(address) bk_ids_to_meta = Packet() bk_ids_to_meta.BuildDataBlockPacket(fname, block_ids) socket_blks.sendall(bk_ids_to_meta.getEncodedPacket()) socket_blks.close()
def copyToDFS(address, fname, path): """ Contact the metadata server to ask to copu file fname, get a list of data nodes. Open the file in path to read, divide in blocks and send to the data nodes. """ # Create a connection to the data server #print("Till here its fine") #Getting Filename, Filesize and sending metadata.py for next steps # Create a Put packet with the fname and the length of the data, # and sends it to the metadata server #opening socket to connect to meta-data server. so = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # print("connected") #Getting file size to be able to use BuildPutPacket() fsize = os.path.getsize(path) #Creating packet to send request to meta-data server. packet_put_request = Packet() packet_put_request.BuildPutPacket(fname, fsize) request = packet_put_request.getEncodedPacket() #Connection to meta-data server. so.connect(address) #Sending request to meta-data server. so.sendall(request) # If no error or file exists # Get the list of data nodes. #Receiving DataNode list from meta-data server. response = so.recv(4096) print("message recieved") #creating packet to access the DataNode list. packet_put_response = Packet() #print(response) packet_put_response.DecodePacket(response) so.close() #print("Now we have dataNode list") #Getting dataNodes from the packet. DataNodes = packet_put_response.getDataNodes() #Getting the number of DataNodes to later calculate the block size. num_nodes = len(DataNodes) #Dividing the file in blocks #opening second socket to connect to the dataNodes. # Fill code #Calculating the block size. bsize = (fsize / num_nodes) + 1 # Send the blocks to the data servers # Fill code #Reading File with open(path, 'r+b') as file: msg = str(file.read()) buff_size = 64000 # print(msg)i*(bsize): (i+1)*(bsize) #This will keep track of the blockid's that the file will use. blocid = [] #This for will connect with each Datanode registered in the database and it will #send the file divided in blocks. for i in range(num_nodes): print("second socket opening") #opening connection with data-node.py so2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) #getting the DataNode that will be used Dnode = DataNodes.pop(0) # Read file # Fill code #Creating packet to send to the DataNode the file information. dblocpac = Packet() dblocpac.BuildPutPacket(fname, fsize) dblocksend = dblocpac.getEncodedPacket() #connecting to the DataNode so2.connect(tuple(Dnode)) #address,port from the dataNodes. so2.sendall(dblocksend) #print("reading file") #print("Message sent") block_response = so2.recv(4096) if block_response == "OK": # print(msg[i*(fsize/num_nodes): (i+1)*(fsize/num_nodes)]) #Dividing File into chunks and sending it to the DataNode. if i == num_nodes: bsize = fsize % num_nodes msg = msg[i * (bsize):(i + 1) * (bsize)] so2.sendall(msg) else: so2.sendall(msg[i * (bsize):(i + 1) * (bsize)]) #Receiving the block id. this_block = so2.recv(4096) #print("block received") #print(this_block) #Storing the block id with the given DataNode in the blocid list. blocid.append((Dnode[0], Dnode[1], this_block)) else: print("ERROR In Data Node") so2.close() # Notify the metadata server where the blocks are saved. #Receiving block id # Fill code #print(blocid) #print("building packet") #creating packet to send to meta-data server. blocpac = Packet() blocpac.BuildDataBlockPacket(fname, blocid) packsend = blocpac.getEncodedPacket() #opening connection with meta-data server. so = socket.socket(socket.AF_INET, socket.SOCK_STREAM) so.connect(address) #print("sending blockids") so.sendall(packsend) so.close()
if receive_data: the_socket.sendall(block_info) else: print "node error" block_list.append((node[0], str(node[1]), receive_data)) the_socket.close() # Notify the metadata server where the blocks are saved. # Fill code create_packet.BuildDataBlockPacket(fname, block_list) worked, the_socket = connection( create_packet.getEncodedPacket(), address[0], address[1]) #if it occured then close the_socket.close() worked_2 = int(worked) if worked_2: pass else: print("Error occured")
def copyToDFS(address, fname, path): """ Contact the metadata server and ask to copy file fname(check if file is available/exists) ; get a list of data nodes from meta data server if file doesn't already exist ; Open the file in path to read ; Divide file in blocks ; Send blocks distributedly to the data nodes ; Recieves: address: fname: path: """ # Create a connection to the data server print "address ", address print "fname ", fname print "path full ", path print "path cut", path[:len(path) - 4] fsize = os.path.getsize(path) print "fsize", fsize p = Packet() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((address[0], address[1])) # Read file # file = "" # f = open(path, 'rb') with open(path, 'rb') as f: # for line in f: file = f.read() # file = f.readlines() f.close() # Create a Put packet with the fname and the length of the data, # and sends it to the metadata server # print os.path.getsize(path) p.BuildPutPacket(fname, fsize) sock.sendall(p.getEncodedPacket() ) # package must be encoded before sending throught the net data = sock.recv(1024) # recieve data sent from meta-data server # print "data ", data # list files sock.close() # If no error or duplicate file exists # Get the list of data nodes. if data == "DUP": print "File exists aready. Exiting copy client. " # Would you like to keep or replace? return 0 # exit else: # recieve data node server list # if dup, DecodePacket returns an error saying it cant decode the JSON object ; p.DecodePacket(data) servers = p.getDataNodes() print "servers ", servers print "server count ", len(servers) # Divide the file in blocks fblocks = [] block_size = fsize / len(servers) print "block size ", block_size for i in range(0, fsize, block_size): print "i ", i # print "len of block of file to copy ", /len(file[i:i+block_size]) # if i/block_size + 1 = # print "TRY" print "JJJ ", len(file[i:i + block_size]) fblocks.append(file[i:i + block_size]) # if len(file[i:i+block_size]) < block_size: # print "se acabo el file se supone ", file[i:] # print "len del file q se acabo. ", len(file[i:]) # fblocks.append(file[i:]) # else: # print "parte del fokin file ", file[i:i+block_size] # print "len parte del fokin file ", len(file[i:i+block_size]) # fblocks.append(file[i:i+block_size]) # break # Send the blocks to the data servers. for dserver in servers: print "dserver ", dserver pack = Packet() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((dserver[0], dserver[1])) pack.BuildPutPacket(fname, fsize) s.sendall(pack.getEncodedPacket()) ack = s.recv(1024) # print ss if ack == "OK": # if response ok, send block size, wait aknowledge, send dblocks, wait acknowledge, # # send block id's to meta print "RESPONSE OK WOOO" dblock = fblocks.pop( 0 ) # get first dblock, remove from block list and send to data node servers s.sendall(str(len(dblock))) ack = s.recv(1024) print "Ack el data node recibio el size del bloque ", ack sent = False while not sent: #len(dblock) > 0: # """ # if block vigger than 300 megs # first chunk of data block to be sent # datablock without the sent chunk else: dblock_chunk = dblock[0:1024] # first chunk of data block to be sent dblock = dblock[1024:] # datablock without the sent chunk if len(dblock) <= 0: sent = True s.sendall(dblock_chunk) ack = s.recv(1024) print ("acknowledge ", ac """ if len(dblock) > 100000000: dblock_chunk = dblock[0:100000000] print "len big ", len(dblock_chunk) dblock = dblock[100000000:] else: dblock_chunk = dblock[ 0:1024] # first chunk of data block to be sent print "len 1024 ", len(dblock_chunk) dblock = dblock[1024:] # datablock without the sent chunk if len(dblock) <= 0: sent = True s.sendall(dblock_chunk) ack = s.recv(1024) print "acknowledge ", ack s.sendall("OK") block_id = s.recv(1024) s.close() dserver.append(block_id) # ugh this took me a while to realize. print "id of block sent to dnode ", block_id # Notify the metadata server where the blocks are saved. metasock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) metasock.connect((address[0], address[1])) p.BuildDataBlockPacket(fname, servers) metasock.sendall(p.getEncodedPacket()) metasock.close()