예제 #1
0
 def test_initialize_simple(self):
     logging.debug('Testing initializing a simple file blob')
     reset_storage()
     fb = file_blob()
     fb.my_hash = '1'
     expected_string = '\n=========Printing details of file blob object=========\nmy_hash: 1\nparent_hash: \nblob_type: file\n------------------------------------------------------'
     self.assertEqual(expected_string, str(fb))
예제 #2
0
    def restore_directory(self, key, working_directory, storage_directory,
                          commit_hash):
        '''
		load a whole directory as an initial commit
		'''
        logging.info(
            'working_directory: %s, storage_directory: %s, commit_hash: %s',
            working_directory, storage_directory, commit_hash)
        #@TODO:  Write to a temp directory first and then cut to the working directory?  Would ensure user has very little possibility to see on partial files.

        cb = commit_blob()
        cb.load(key, storage_directory, commit_hash)

        #restore tree folder structure
        tb = tree_blob()
        tb.load(key, storage_directory, cb.tree_hash)
        tb.build_tree(key, storage_directory)
        logging.debug('tree to restore:\n%s' % (str(tb)))
        tb.write_folders(working_directory)

        for path, node in tb.walk():
            if node.node_type != 'file':
                continue
            fb = file_blob()
            fb.load(key, storage_directory, node.hash_hex)
            full_file_path = os.path.join(working_directory, path)
            #full_file_path = working_directory + path + '/' + file_name
            f = open(full_file_path, 'wb')
            f.write(fb.apply_delta(key, storage_directory))
예제 #3
0
파일: newtest.py 프로젝트: stevenleigh/sib
	def test_initialize_simple(self):
		logging.debug('Testing initializing a simple file blob')
		reset_storage()
		fb=file_blob()
		fb.my_hash='1'		
		expected_string = '\n=========Printing details of file blob object=========\nmy_hash: 1\nparent_hash: \nblob_type: file\n------------------------------------------------------'
		self.assertEqual(expected_string, str(fb))
예제 #4
0
	def restore_directory(self, key, working_directory, storage_directory, commit_hash):
		'''
		load a whole directory as an initial commit
		'''
		logging.info('working_directory: %s, storage_directory: %s, commit_hash: %s', working_directory, storage_directory, commit_hash)
		#@TODO:  Write to a temp directory first and then cut to the working directory?  Would ensure user has very little possibility to see on partial files.
		
		cb=commit_blob()
		cb.load(key, storage_directory, commit_hash)
		
		#restore tree folder structure
		tb = tree_blob()
		tb.load(key, storage_directory, cb.tree_hash)
		tb.build_tree(key, storage_directory)
		logging.debug('tree to restore:\n%s'%(str(tb)))
		tb.write_folders(working_directory)
		
		for path, node in tb.walk():
			if node.node_type != 'file':
				continue
			fb = file_blob()
			fb.load(key, storage_directory, node.hash_hex)
			full_file_path = os.path.join(working_directory, path)
			#full_file_path = working_directory + path + '/' + file_name
			f=open(full_file_path,'wb');
			f.write(fb.apply_delta(key, storage_directory))	
예제 #5
0
    def restore_directory(self, key, working_directory, storage_directory,
                          commit_hash):
        '''
		load a whole directory as an initial commit
		'''
        logging.info(
            'working_directory: %s, storage_directory: %s, commit_hash: %s',
            working_directory, storage_directory, commit_hash)

        cb = commit_blob()
        cb.load(key, storage_directory, commit_hash)

        #restore tree folder structure
        tb = tree_blob()
        tb.load(key, storage_directory, cb.tree_hash)
        tb.display()
        file_listing = tb.write_directory_structure(key, storage_directory,
                                                    working_directory)

        #restore files
        for (path, file_name, hash_hex, file_size) in file_listing:
            fb = file_blob()
            fb.load(key, storage_directory, hash_hex)
            #full_file_path = os.path.join(working_directory_path, path, file_name)
            full_file_path = working_directory + path + '/' + file_name
            f = open(full_file_path, 'wb')
            f.write(fb.apply_delta(key, storage_directory))
예제 #6
0
    def blobs_to_restore_blob(self,
                              key,
                              storage_directory,
                              file_name,
                              parent_tree=True):
        """
		Returns a list file names of any blobs needed to restore a blob.  
		If an empty list is returned then it should be possible to restore the given blob from local files.
		Will likely need to be called repeatedly on a commit because needed blobs 
		also have dependent blobs which wont be known until the needed blobs are obtained. 
		"""
        if (not os.path.exists(os.path.join(storage_directory, file_name))):
            return [file_name]

        #Check if this is a commit blob
        if file_name[0] == '_':
            #Get the tree.  Parent commits are not needed to restore a commit, or its files.
            cb = commit_blob()
            cb.load(key, storage_directory, file_name[1:])
            return self.blobs_to_restore_blob(key, storage_directory,
                                              cb.tree_hash)

        fb = file_blob()
        fb.load(key, storage_directory, file_name)

        #check if this is a tree blob
        if fb.blob_type == 'tree':
            tb = tree_blob()
            tb.load(key, storage_directory,
                    file_name)  #TODO: can this be casted?
            if not tb.parent_hash == '':
                #check for all parents of tree
                needed_tree_parent_hash = self.blobs_to_restore_blob(
                    key, storage_directory, tb.parent_hash, False)
                if not (needed_tree_parent_hash == None):
                    return needed_tree_parent_hash

            if not parent_tree:  #only traverse tree structure of parent tree
                return None
            #check for all files in tree structure
            file_hashes = tb.file_hashes(key, storage_directory)
            #(unused_path, unused_file_name, hash_hex, unused_file_size) = tb.write_directory_structure(key, storage_directory, None, False)
            needed_files = []
            for h in file_hashes:
                temp_hash = self.blobs_to_restore_blob(key, storage_directory,
                                                       h)
                if not temp_hash == None:
                    needed_files.extend(temp_hash)
            if needed_files == []:
                return None
            return needed_files

        #fb is a file blob
        if fb.parent_hash == '':
            return None
        else:
            return self.blobs_to_restore_blob(key, storage_directory,
                                              fb.parent_hash)
예제 #7
0
파일: newtest.py 프로젝트: stevenleigh/sib
	def test_delta_simple(self):
		logging.debug('Testing a delta on a file blob')
		reset_storage()
		f=open('../resource/sample_text_1.txt','rb')
		fb=file_blob()
		fb.my_hash='1'
		fb.compute_delta(key,f.read())
		
		expected_string = '\n=========Printing details of file blob object=========\nmy_hash: a1f7f4060ff3f7c0b7c28b4a37990a851a225e475d47da7a0d82dec5\nparent_hash: \nblob_type: file\naggregate a[0:0] b[0:0] first line\nabcdefghijklmnopqrstuvwxyz\nthird line\nfourth\nlast line------------------------------------------------------'
		self.assertEqual(expected_string, str(fb))
예제 #8
0
    def test_delta_simple(self):
        logging.debug('Testing a delta on a file blob')
        reset_storage()
        f = open('../resource/sample_text_1.txt', 'rb')
        fb = file_blob()
        fb.my_hash = '1'
        fb.compute_delta(key, f.read())

        expected_string = '\n=========Printing details of file blob object=========\nmy_hash: a1f7f4060ff3f7c0b7c28b4a37990a851a225e475d47da7a0d82dec5\nparent_hash: \nblob_type: file\naggregate a[0:0] b[0:0] first line\nabcdefghijklmnopqrstuvwxyz\nthird line\nfourth\nlast line------------------------------------------------------'
        self.assertEqual(expected_string, str(fb))
예제 #9
0
	def blobs_to_restore_blob(self, key, storage_directory, file_name, parent_tree = True):
		"""
		Returns a list file names of any blobs needed to restore a blob.  
		If an empty list is returned then it should be possible to restore the given blob from local files.
		Will likely need to be called repeatedly on a commit because needed blobs 
		also have dependent blobs which wont be known until the needed blobs are obtained. 
		"""
		if (not os.path.exists(os.path.join(storage_directory,file_name))):
			return [file_name]
		
		#Check if this is a commit blob
		if file_name[0]=='_':
			#Get the tree.  Parent commits are not needed to restore a commit, or its files.
			cb = commit_blob()
			cb.load(key, storage_directory, file_name[1:])
			return self.blobs_to_restore_blob(key, storage_directory, cb.tree_hash)
		
		fb = file_blob()
		fb.load(key, storage_directory, file_name)
		
		#check if this is a tree blob
		if fb.blob_type=='tree':
			tb=tree_blob()
			tb.load(key, storage_directory, file_name)  #TODO: can this be casted?
			if not tb.parent_hash=='':
				#check for all parents of tree
				needed_tree_parent_hash = self.blobs_to_restore_blob(key, storage_directory, tb.parent_hash, False)
				if not (needed_tree_parent_hash == None):
					return needed_tree_parent_hash
			
			if not parent_tree:  #only traverse tree structure of parent tree
				return None
			#check for all files in tree structure
			file_hashes = tb.file_hashes(key, storage_directory)
			#(unused_path, unused_file_name, hash_hex, unused_file_size) = tb.write_directory_structure(key, storage_directory, None, False)
			needed_files = []
			for h in file_hashes:
				temp_hash = self.blobs_to_restore_blob(key, storage_directory, h)
				if not temp_hash==None:
					needed_files.extend(temp_hash)
			if needed_files==[]:
				return None
			return needed_files
		
		#fb is a file blob
		if fb.parent_hash=='':
			return None
		else:
			return self.blobs_to_restore_blob(key, storage_directory, fb.parent_hash)
예제 #10
0
	def restore_directory(self, key, working_directory, storage_directory, commit_hash):
		'''
		load a whole directory as an initial commit
		'''
		logging.info('working_directory: %s, storage_directory: %s, commit_hash: %s', working_directory, storage_directory, commit_hash)
		
		cb=commit_blob()
		cb.load(key, storage_directory, commit_hash)
		
		#restore tree folder structure
		tb = tree_blob()
		tb.load(key, storage_directory, cb.tree_hash)
		tb.display()
		file_listing = tb.write_directory_structure(key, storage_directory, working_directory)
		
		#restore files
		for (path, file_name, hash_hex, file_size) in file_listing:
			fb = file_blob()
			fb.load(key, storage_directory, hash_hex)
			#full_file_path = os.path.join(working_directory_path, path, file_name)
			full_file_path = working_directory + path + '/' + file_name
			f=open(full_file_path,'wb');
			f.write(fb.apply_delta(key, storage_directory))
예제 #11
0
파일: test.py 프로젝트: stevenleigh/sib
for root, dirs, files in os.walk(peer_C_storage, topdown=False):
    for name in files:
        os.remove(os.path.join(root, name))
    for name in dirs:
        os.rmdir(os.path.join(root, name))

os.mkdir(os.path.join(peer_A_storage, 'test_share'))

print '\n\n'
print '************************************************************************'
print '***Testing initializing a file blob'
logging.debug('Testing initializing a file blob')
print '************************************************************************'
#open a text file and import into file blob
f = open('../resource/sample_text_1.txt', 'rb')
fb = file_blob()
fb.my_hash = '1'
fb.display()
fb.compute_delta(key, f.read())
fb.display()

print '\n\n'
print '************************************************************************'
print '***Testing a simple file delta'
logging.debug('Testing a simple file delta')
print '************************************************************************'
#open a 2nd version of text file and compute delta from first version
f2 = open('../resource/sample_text_2.txt', 'rb')
fb2 = file_blob()
fb2.my_hash = '2'
fb2.display()
예제 #12
0
    def store_file_blobs(self, key, commit_hash, parent_commit_hash,
                         storage_directory, working_directory):
        logging.info(
            'commit_hash: %s, parent_commit_hash: %s, storage_directory: %s, working_directory: %s',
            commit_hash, parent_commit_hash, storage_directory,
            working_directory)

        #chop the root folder off working_directory
        working_directory, tail = os.path.split(working_directory)

        #load current commit, tree, and file info
        cb = commit_blob()
        cb.load(key, storage_directory, commit_hash)
        tb = tree_blob(
        )  #it is okay to modify this tree blob.  The one stored for the commit is already saved.
        tb.load(key, storage_directory, cb.tree_hash)
        tb.build_tree(key, storage_directory)
        logging.debug('tree to store: %s' % (tb))

        #removed files with duplicate hashes in working directory or storage directory
        hash_set = set()
        for root, dirs, files in os.walk(storage_directory):
            for f in files:
                hash_set.add(f)
        logging.debug('storage directory hashes: %s' % (hash_set))
        for path, node in tb.walk():
            logging.debug('checking: %s' % (path))
            if node.node_type != 'file':
                continue
            if node.hash_hex in hash_set:
                logging.debug('found hash match: %s' % (node.hash_hex))
                tb.rm_node(path, 'file')
            else:
                hash_set.add(node.hash_hex)

        if parent_commit_hash == None:  #this is an initial commit
            for path, node in tb.walk():
                if node.node_type != 'file':
                    continue
                full_path = os.path.join(working_directory, path)
                new_file = open(full_path, 'r')
                fb = file_blob()
                fb.compute_delta(key, new_file.read())
                fb.store(key, storage_directory)
            return

        #load parent commit, tree, and file info
        pcb = commit_blob()
        pcb.load(key, storage_directory, parent_commit_hash)
        ptb = tree_blob()
        ptb.load(key, storage_directory, pcb.tree_hash)
        ptb.build_tree(key, storage_directory)

        logging.debug('performing differential commit using following trees')
        logging.debug('parent tree: %s' % (str(ptb)))
        logging.debug('child tree: %s' % (str(tb)))

        #find files with the same name in the same path, compute deltas, and store as file blob diffs
        for path, node in tb.walk():
            if node.node_type != 'file':
                continue
            if not ptb.has_node(path, 'file'):
                continue
            p_node = ptb.get_node(path, 'file')
            logging.debug(
                'Found files with matching paths and names.  working: %s, parent: %s',
                node.hash_hex, p_node.hash_hex)
            full_file_path = os.path.join(working_directory, path)
            new_file = open(full_file_path, 'rb')
            pfb = file_blob()
            pfb.load(key, storage_directory, p_node.hash_hex)
            fb = file_blob()
            fb.compute_delta(key, new_file.read(), pfb, storage_directory)
            fb.store(key, storage_directory)
            tb.rm_node(path, 'file')

        #TODO: re-implement code commented below
        """
		#Look for similar files between working and parent and compute diffs on those
		index=-1
		while (index+1<len(file_hashes)):  #cycle through all file records in working directory
			index+=1
			parent_index=-1
			while (parent_index+1<len(parent_file_hashes)):  #cycle through all files records in parent commit
				parent_index+=1
				#if file_names[index]!= parent_file_names[parent_index]:
				#	break
				
				#must have similar file sizes
				percent_size_change = abs(file_sizes[index]-parent_file_sizes[index]) / file_sizes[index] 
				if  percent_size_change > 0.10:
					continue
				
				#must have similar byte sequences
				full_file_path = working_directory + file_folders[index] + '/' + file_names[index]
				new_file = open(full_file_path,'rb')
				new_file_text = new_file.read()
				pfb = file_blob()
				pfb.load(key, storage_directory, parent_file_hashes[parent_index])
				pfb_text = pfb.apply_delta(key, storage_directory)
				s=difflib.SequenceMatcher(None,new_file_text,pfb_text)
				if s.real_quick_ratio() < 0.75:
					continue
				if s.quick_ratio() < 0.75:
					continue
				
				#If this line is reached the files are similar enough.  Compute the diff and store.
				logging.debug('Found files with similar content. working: %s, parent: %s', file_hashes[index], parent_file_hashes[parent_index])
				fb = file_blob()
				fb.compute_delta(key, new_file_text, pfb, storage_directory)
				fb.store(key, storage_directory)
				
				file_hashes.pop(index)
				file_names.pop(index)
				file_folders.pop(index)
				file_sizes.pop(index)
				index-=1
				break
		"""

        #store all remaining files as initial versions
        for path, node in tb.walk():
            if node.node_type != 'file':
                continue
            full_file_path = os.path.join(working_directory, path)
            new_file = open(full_file_path, 'rb')
            fb = file_blob()
            fb.compute_delta(key, new_file.read())
            fb.store(key, storage_directory)
예제 #13
0
파일: test.py 프로젝트: stevenleigh/sib
        os.remove(os.path.join(root, name))
    for name in dirs:
        os.rmdir(os.path.join(root, name))


os.mkdir(os.path.join(peer_A_storage, "test_share"))


print "\n\n"
print "************************************************************************"
print "***Testing initializing a file blob"
logging.debug("Testing initializing a file blob")
print "************************************************************************"
# open a text file and import into file blob
f = open("../resource/sample_text_1.txt", "rb")
fb = file_blob()
fb.my_hash = "1"
fb.display()
fb.compute_delta(key, f.read())
fb.display()


print "\n\n"
print "************************************************************************"
print "***Testing a simple file delta"
logging.debug("Testing a simple file delta")
print "************************************************************************"
# open a 2nd version of text file and compute delta from first version
f2 = open("../resource/sample_text_2.txt", "rb")
fb2 = file_blob()
fb2.my_hash = "2"
예제 #14
0
	def merge(tb_A, tb_B, tb_C, merge_method = 'dupe_on_conflict'):
		"""Merges two trees. C is common ancestor
		merge_method determines how conflicts are handled.
		dupe_on_conflict: default.  try merging.  if a conflict can't be resolved automatically save both file versions.
		dupe_always: if two folders/files need to be merged always save both copies
		force_merge: try merging.  in case of conflict resolve automatically even if result contains unified diff sections.
		"""
		
		"""
		Algorithm
		--------------
		A: update commit
		B: old commit
		C: common ancestor commit
		N: new tree
		-store trees from A, B, C, and N in easily traversable data structure
		-iterate through folders and files from A (from top down.  these iterations only add to wd)
		    -if B has it
		        -no change necessary
		        -add to N as is
		    -if C has it, but B doesn't 
		        -means B deleted file/folder
		        -don't add to N
		    -if neither C nor B have it
		        -means A added it
		        -push change to wd
		        -add to N
		    -if B has same file name and path, but different hash as A
		        -attempt merge
		        -add merged file(s) to N
		
		-iterate through folders and files from B (from bottom up.  these iterations only delete from wd)
		    -if neither C nor A have it
		        -means B added it
		        -add to N
		        -no change necessary
		    -if C has it, but A doesn't 
		        -means A deleted file/folder
		        -delete file/folder from wd
		
		-commit wd as merge
		
		-issues with algorithm
		    -B moves file, A edits file
    """
		
		
		merge_tb = tree_blob()
		merge_tb.root_node = tree_blob.TreeNode()
		merge_tb.root_node.name = tb_A.root_node.name
		merge_tb.root_node.node_type = 'folder'
		merge_tb.root_node.size=0
		print 'walking tree'
		for path, node in tb_A.walk():
			print path
			if path == merge_tb.root_node.name:
				continue
			if tb_B.has_node(path, node.node_type) and 
			        (tb_B.get_node(path, node.node_type)).hash_hex == node.hash_hex:  #not modified
				merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))
				continue
			if tb_B.has_node(path, node.node_type) and 
			        (tb_B.get_node(path, node.node_type)).hash_hex != node.hash_hex:  #merge files		
				hash_hex_B = (tb_B.get_node(path, node.node_type)).hash_hex
				fb_B = file_blob()
				fb_B.load(key, storage, hash_hex_B)
				fb_A = file_blob()
				fb_A.load(key, storage, node.hash_hex)
				if tb_C.has_node(path, node.node_type):
					hash_hex_C = (tb_C.get_node(path, node.node_type)).hash_hex
					fb_C = file_blob()
					fb_C.load(key, storage, hash_hex_C)
				else:
					fb_C = fb_B
				merge_text = file_blob.merge(fb_A.apply_delta(key, storage), fb_B.apply_delta(key, storage), fb_C.apply_delta(key, storage))
				fb_merge = file_blob()
				fb_merge.compute_delta(key, merge_text, fb_A, storage)
				merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))
				continue	
			if tb_C.has_node(path, node.node_type):  #B deleted
				continue
			
			merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))  #A added
			
		for path, node in tb_B.walk():
			print path
			if path == merge_tb.root_node.name:
				continue
			if (not tb_C.has_node(path, node.node_type)) and (not tb_A.has_node(path, node.node_type)):  #B added
				merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))
				
		return merge_tb
예제 #15
0
	def merge(tb_A, tb_B, tb_C, merge_method = 'dupe_on_conflict'):
		"""Merges two trees. C is common ancestor
		merge_method determines how conflicts are handled.
		dupe_on_conflict: default.  try merging.  if a conflict can't be resolved automatically save both file versions.
		dupe_always: if two folders/files need to be merged always save both copies
		force_merge: try merging.  in case of conflict resolve automatically even if result contains unified diff sections.
		"""
		
		"""
		Algorithm
		--------------
		A: update commit
		B: old commit
		C: common ancestor commit
		N: new tree
		-store trees from A, B, C, and N in easily traversable data structure
		-iterate through folders and files from A (from top down.  these iterations only add to wd)
		    -if B has it
		        -no change necessary
		        -add to N as is
		    -if C has it, but B doesn't 
		        -means B deleted file/folder
		        -don't add to N
		    -if neither C nor B have it
		        -means A added it
		        -push change to wd
		        -add to N
		    -if B has same file name and path, but different hash as A
		        -attempt merge
		        -add merged file(s) to N
		
		-iterate through folders and files from B (from bottom up.  these iterations only delete from wd)
		    -if neither C nor A have it
		        -means B added it
		        -add to N
		        -no change necessary
		    -if C has it, but A doesn't 
		        -means A deleted file/folder
		        -delete file/folder from wd
		
		-commit wd as merge
		
		-issues with algorithm
		    -B moves file, A edits file
    """
		
		
		merge_tb = tree_blob()
		merge_tb.root_node = tree_blob.TreeNode()
		merge_tb.root_node.name = tb_A.root_node.name
		merge_tb.root_node.node_type = 'folder'
		merge_tb.root_node.size=0
		print 'walking tree'
		for path, node in tb_A.walk():
			print path
			if path == merge_tb.root_node.name:
				continue
			if tb_B.has_node(path, node.node_type) and 
			        (tb_B.get_node(path, node.node_type)).hash_hex == node.hash_hex:  #not modified
				merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))
				continue
			if tb_B.has_node(path, node.node_type) and 
			        (tb_B.get_node(path, node.node_type)).hash_hex != node.hash_hex:  #merge files		
				hash_hex_B = (tb_B.get_node(path, node.node_type)).hash_hex
				fb_B = file_blob()
				fb_B.load(key, storage, hash_hex_B)
				fb_A = file_blob()
				fb_A.load(key, storage, node.hash_hex)
				if tb_C.has_node(path, node.node_type):
					hash_hex_C = (tb_C.get_node(path, node.node_type)).hash_hex
					fb_C = file_blob()
					fb_C.load(key, storage, hash_hex_C)
				else:
					fb_C = fb_B
				merge_text = file_blob.merge(fb_A.apply_delta(key, storage), fb_B.apply_delta(key, storage), fb_C.apply_delta(key, storage))
				fb_merge = file_blob()
				fb_merge.compute_delta(key, merge_text, fb_A, storage)
				merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))
				continue	
			if tb_C.has_node(path, node.node_type):  #B deleted
				continue
			
			merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))  #A added
			
		for path, node in tb_B.walk():
			print path
			if path == merge_tb.root_node.name:
				continue
			if (not tb_C.has_node(path, node.node_type)) and (not tb_A.has_node(path, node.node_type)):  #B added
				merge_tb.add_node(path, node.node_type, node.hash_hex, int(node.size))
				
		return merge_tb
		
			
			
			


















			
			
			
			
			
			
예제 #16
0
파일: test.py 프로젝트: stevenleigh/sib
for i in range(6):
	try:
		print s2.recv(1028)
	except socket.error as msg:
		pass
		

"""

print '\n\n'
print '************************************************************************'
print '***Testing sending a large file between two sibs'
logging.debug('Testing sending a large file between two sibs')
print '************************************************************************'
large_file = open('../resource/alice.txt', 'rb')
fb = file_blob()
fb.compute_delta(key, large_file.read())
large_file_hash = fb.store(key, os.path.join(peer_B_storage, 'test_share'))
time.sleep(1)

sib_b = SIB()
sib_b.new_sockets.put(11120)
sib_b.js.storage_directory = peer_B_storage
sib_b.js.my_machine_ID = 'machine_B'

sib_a = SIB()
sib_a.new_sockets.put(11121)
sib_a.js.storage_directory = peer_A_storage
sib_a.js.my_machine_ID = 'machine_A'

#TODO: this transfer is slow?
예제 #17
0
	def store_file_blobs(self, key, commit_hash, parent_commit_hash, storage_directory, working_directory):
		logging.info('commit_hash: %s, parent_commit_hash: %s, storage_directory: %s, working_directory: %s', 
					commit_hash, parent_commit_hash, storage_directory, working_directory)
		
		#chop the root folder off working_directory
		working_directory, tail = os.path.split(working_directory)
		
		#load current commit, tree, and file info
		cb = commit_blob()
		cb.load(key, storage_directory, commit_hash)
		tb = tree_blob()  #it is okay to modify this tree blob.  The one stored for the commit is already saved.
		tb.load(key, storage_directory, cb.tree_hash)	
		tb.build_tree(key, storage_directory)
		logging.debug('tree to store: %s'%(tb))
		
		#removed files with duplicate hashes in working directory or storage directory
		hash_set = set()
		for root, dirs, files in os.walk(storage_directory):
			for f in files:
				hash_set.add(f)
		logging.debug('storage directory hashes: %s'%(hash_set))
		for path, node in tb.walk():
			logging.debug('checking: %s'%(path))
			if node.node_type != 'file':
				continue
			if node.hash_hex in hash_set:
				logging.debug('found hash match: %s'%(node.hash_hex))
				tb.rm_node(path, 'file')
			else:
				hash_set.add(node.hash_hex)
		
		
		if parent_commit_hash==None:  #this is an initial commit
			for path, node in tb.walk():
				if node.node_type !='file':
					continue
				full_path = os.path.join(working_directory, path)
				new_file = open(full_path,'r')
				fb = file_blob()
				fb.compute_delta(key, new_file.read())
				fb.store(key, storage_directory)
			return
		
		
		#load parent commit, tree, and file info
		pcb = commit_blob()
		pcb.load(key, storage_directory, parent_commit_hash)
		ptb = tree_blob()
		ptb.load(key, storage_directory, pcb.tree_hash)	
		ptb.build_tree(key, storage_directory)
		
		logging.debug('performing differential commit using following trees')
		logging.debug('parent tree: %s' %(str(ptb)))
		logging.debug('child tree: %s' %(str(tb)))
		

		#find files with the same name in the same path, compute deltas, and store as file blob diffs
		for path, node in tb.walk():
			if node.node_type != 'file':
				continue
			if not ptb.has_node(path, 'file'):
				continue
			p_node = ptb.get_node(path, 'file')
			logging.debug('Found files with matching paths and names.  working: %s, parent: %s', node.hash_hex, p_node.hash_hex)
			full_file_path = os.path.join(working_directory, path)
			new_file = open(full_file_path,'rb')
			pfb = file_blob()
			pfb.load(key, storage_directory, p_node.hash_hex)
			fb = file_blob()
			fb.compute_delta(key, new_file.read(), pfb, storage_directory)
			fb.store(key, storage_directory)
			tb.rm_node(path, 'file')
		
		
		#TODO: re-implement code commented below
		"""
		#Look for similar files between working and parent and compute diffs on those
		index=-1
		while (index+1<len(file_hashes)):  #cycle through all file records in working directory
			index+=1
			parent_index=-1
			while (parent_index+1<len(parent_file_hashes)):  #cycle through all files records in parent commit
				parent_index+=1
				#if file_names[index]!= parent_file_names[parent_index]:
				#	break
				
				#must have similar file sizes
				percent_size_change = abs(file_sizes[index]-parent_file_sizes[index]) / file_sizes[index] 
				if  percent_size_change > 0.10:
					continue
				
				#must have similar byte sequences
				full_file_path = working_directory + file_folders[index] + '/' + file_names[index]
				new_file = open(full_file_path,'rb')
				new_file_text = new_file.read()
				pfb = file_blob()
				pfb.load(key, storage_directory, parent_file_hashes[parent_index])
				pfb_text = pfb.apply_delta(key, storage_directory)
				s=difflib.SequenceMatcher(None,new_file_text,pfb_text)
				if s.real_quick_ratio() < 0.75:
					continue
				if s.quick_ratio() < 0.75:
					continue
				
				#If this line is reached the files are similar enough.  Compute the diff and store.
				logging.debug('Found files with similar content. working: %s, parent: %s', file_hashes[index], parent_file_hashes[parent_index])
				fb = file_blob()
				fb.compute_delta(key, new_file_text, pfb, storage_directory)
				fb.store(key, storage_directory)
				
				file_hashes.pop(index)
				file_names.pop(index)
				file_folders.pop(index)
				file_sizes.pop(index)
				index-=1
				break
		"""
		
		#store all remaining files as initial versions
		for path, node in tb.walk():
			if node.node_type !='file':
				continue
			full_file_path = os.path.join(working_directory, path)
			new_file = open(full_file_path,'rb')
			fb = file_blob()
			fb.compute_delta(key, new_file.read())
			fb.store(key, storage_directory)		
예제 #18
0
    def store_file_blobs(self, key, commit_hash, parent_commit_hash,
                         storage_directory, working_directory):
        logging.info(
            'commit_hash: %s, parent_commit_hash: %s, storage_directory: %s, working_directory: %s',
            commit_hash, parent_commit_hash, storage_directory,
            working_directory)

        #chop the root folder off working_directory
        working_directory, tail = os.path.split(working_directory)

        #load current commit, tree, and file info
        cb = commit_blob()
        cb.load(key, storage_directory, commit_hash)
        tb = tree_blob()
        tb.load(key, storage_directory, cb.tree_hash)

        file_listing = tb.write_directory_structure(key, storage_directory,
                                                    None, False)
        file_hashes = []
        file_names = []
        file_folders = []
        file_sizes = []
        for (path, file_name, hash_hex, file_size) in file_listing:
            file_hashes.append(hash_hex)
            file_names.append(file_name)
            file_folders.append(path)
            file_sizes.append(file_size)

        if parent_commit_hash == None:  #this is an initial commit
            #store all remaining files as initial versions
            index = -1
            while (index + 1 < len(file_hashes)
                   ):  #cycle through all file records in working directory
                index += 1
                full_file_path = working_directory + file_folders[
                    index] + '/' + file_names[index]
                new_file = open(full_file_path, 'r')
                fb = file_blob()
                fb.compute_delta(key, new_file.read())
                fb.store(key, storage_directory)
            return

        #load parent commit, tree, and file info
        pcb = commit_blob()
        pcb.load(key, storage_directory, parent_commit_hash)
        ptb = tree_blob()
        ptb.load(key, storage_directory, pcb.tree_hash)

        file_listing = ptb.write_directory_structure(key, storage_directory,
                                                     None, False)
        parent_file_hashes = []
        parent_file_names = []
        parent_file_folders = []
        parent_file_sizes = []
        for (path, file_name, hash_hex, file_size) in file_listing:
            parent_file_hashes.append(hash_hex)
            parent_file_names.append(file_name)
            parent_file_folders.append(path)
            parent_file_sizes.append(file_size)

        #Find file blob matches and similar file blobs

        #remove duplicate hashes in working directory
        index = -1
        while True:
            index += 1
            if index >= len(file_hashes) - 1:
                break
            if file_hashes[index] in file_hashes[index + 1:]:
                logging.debug(
                    'Found duplicate files within working directory.  working: %s',
                    file_hashes[index])
                file_hashes.pop(index)
                file_names.pop(index)
                file_folders.pop(index)
                file_sizes.pop(index)
                index -= 1

        #remove duplicate hashes in working directory vs storage directory
        index = -1
        while True:
            index += 1
            if index >= len(file_hashes) - 1:
                break
            if file_hashes[index] in parent_file_hashes:
                logging.debug(
                    'Found duplicate file already stored.  working: %s',
                    file_hashes[index])
                file_hashes.pop(index)
                file_names.pop(index)
                file_folders.pop(index)
                file_sizes.pop(index)
                index -= 1

        #find files with the same name in the same path, compute deltas, and store as file blob diffs
        index = -1
        while (index + 1 < len(file_hashes)
               ):  #cycle through all file records in working directory
            index += 1
            parent_index = -1
            while (parent_index + 1 < len(parent_file_hashes)
                   ):  #cycle through all files records in parent commit
                parent_index += 1
                if file_names[index] != parent_file_names[parent_index]:
                    continue
                if file_folders[index] != parent_file_folders[parent_index]:
                    continue

                #If this line is reached we found two files with the same name, path, but different hashes.
                #Compute the diff between these two files and store it.
                logging.debug(
                    'Found files with matching paths and names.  working: %s, parent: %s',
                    file_hashes[index], parent_file_hashes[parent_index])
                full_file_path = working_directory + file_folders[
                    index] + '/' + file_names[index]
                new_file = open(full_file_path, 'rb')
                pfb = file_blob()
                pfb.load(key, storage_directory,
                         parent_file_hashes[parent_index])
                fb = file_blob()
                fb.compute_delta(key, new_file.read(), pfb, storage_directory)
                fb.store(key, storage_directory)

                file_hashes.pop(index)
                file_names.pop(index)
                file_folders.pop(index)
                file_sizes.pop(index)
                index -= 1
                break

        #Look for similar files between working and parent and compute diffs on those
        index = -1
        while (index + 1 < len(file_hashes)
               ):  #cycle through all file records in working directory
            index += 1
            parent_index = -1
            while (parent_index + 1 < len(parent_file_hashes)
                   ):  #cycle through all files records in parent commit
                parent_index += 1
                #if file_names[index]!= parent_file_names[parent_index]:
                #	break

                #must have similar file sizes
                percent_size_change = abs(
                    file_sizes[index] -
                    parent_file_sizes[index]) / file_sizes[index]
                if percent_size_change > 0.10:
                    continue

                #must have similar byte sequences
                full_file_path = working_directory + file_folders[
                    index] + '/' + file_names[index]
                new_file = open(full_file_path, 'rb')
                new_file_text = new_file.read()
                pfb = file_blob()
                pfb.load(key, storage_directory,
                         parent_file_hashes[parent_index])
                pfb_text = pfb.apply_delta(key, storage_directory)
                s = difflib.SequenceMatcher(None, new_file_text, pfb_text)
                if s.real_quick_ratio() < 0.75:
                    continue
                if s.quick_ratio() < 0.75:
                    continue

                #If this line is reached the files are similar enough.  Compute the diff and store.
                logging.debug(
                    'Found files with similar content. working: %s, parent: %s',
                    file_hashes[index], parent_file_hashes[parent_index])
                fb = file_blob()
                fb.compute_delta(key, new_file_text, pfb, storage_directory)
                fb.store(key, storage_directory)

                file_hashes.pop(index)
                file_names.pop(index)
                file_folders.pop(index)
                file_sizes.pop(index)
                index -= 1
                break

        #store all remaining files as initial versions
        index = -1
        while (index + 1 < len(file_hashes)
               ):  #cycle through all file records in working directory
            index += 1
            full_file_path = working_directory + file_folders[
                index] + '/' + file_names[index]
            new_file = open(full_file_path, 'rb')
            fb = file_blob()
            fb.compute_delta(key, new_file.read())
            fb.store(key, storage_directory)
예제 #19
0
	def store_file_blobs(self, key, commit_hash, parent_commit_hash, storage_directory, working_directory):
		logging.info('commit_hash: %s, parent_commit_hash: %s, storage_directory: %s, working_directory: %s', 
					commit_hash, parent_commit_hash, storage_directory, working_directory)
		
		#chop the root folder off working_directory
		working_directory, tail = os.path.split(working_directory)
		
		#load current commit, tree, and file info
		cb = commit_blob()
		cb.load(key, storage_directory, commit_hash)
		tb = tree_blob()
		tb.load(key, storage_directory, cb.tree_hash)	
		
		file_listing=tb.write_directory_structure(key, storage_directory, None, False)
		file_hashes=[]
		file_names=[]
		file_folders=[]
		file_sizes=[]
		for (path, file_name, hash_hex, file_size) in file_listing:
			file_hashes.append(hash_hex)
			file_names.append(file_name)
			file_folders.append(path)
			file_sizes.append(file_size)
		
		if parent_commit_hash==None:  #this is an initial commit
			#store all remaining files as initial versions
			index=-1
			while (index+1<len(file_hashes)):  #cycle through all file records in working directory
				index+=1
				full_file_path = working_directory + file_folders[index] + '/' + file_names[index]
				new_file = open(full_file_path,'r')
				fb = file_blob()
				fb.compute_delta(key, new_file.read())
				fb.store(key, storage_directory)
			return
		
		
		
		#load parent commit, tree, and file info
		pcb = commit_blob()
		pcb.load(key, storage_directory, parent_commit_hash)
		ptb = tree_blob()
		ptb.load(key, storage_directory, pcb.tree_hash)	
		
		file_listing=ptb.write_directory_structure(key, storage_directory, None, False)
		parent_file_hashes=[]
		parent_file_names=[]
		parent_file_folders=[]
		parent_file_sizes=[]
		for (path, file_name, hash_hex, file_size) in file_listing:
			parent_file_hashes.append(hash_hex)
			parent_file_names.append(file_name)
			parent_file_folders.append(path)
			parent_file_sizes.append(file_size)
			
		
		#Find file blob matches and similar file blobs
		
		#remove duplicate hashes in working directory
		index=-1
		while True:
			index+=1
			if index>=len(file_hashes)-1:
				break
			if file_hashes[index] in file_hashes[index+1:]:
				logging.debug('Found duplicate files within working directory.  working: %s', file_hashes[index])
				file_hashes.pop(index)
				file_names.pop(index)
				file_folders.pop(index)
				file_sizes.pop(index)
				index-=1
				
				
		#remove duplicate hashes in working directory vs storage directory
		index=-1
		while True:
			index+=1
			if index>=len(file_hashes)-1:
				break
			if file_hashes[index] in parent_file_hashes:
				logging.debug('Found duplicate file already stored.  working: %s', file_hashes[index])
				file_hashes.pop(index)
				file_names.pop(index)
				file_folders.pop(index)
				file_sizes.pop(index)
				index-=1		

		
		#find files with the same name in the same path, compute deltas, and store as file blob diffs
		index=-1
		while (index+1<len(file_hashes)):  #cycle through all file records in working directory
			index+=1
			parent_index=-1
			while (parent_index+1<len(parent_file_hashes)):  #cycle through all files records in parent commit
				parent_index+=1
				if file_names[index]!= parent_file_names[parent_index]:
					continue
				if file_folders[index]!=parent_file_folders[parent_index]:
					continue
				
				#If this line is reached we found two files with the same name, path, but different hashes.
				#Compute the diff between these two files and store it.
				logging.debug('Found files with matching paths and names.  working: %s, parent: %s', file_hashes[index], parent_file_hashes[parent_index])
				full_file_path = working_directory + file_folders[index] + '/' + file_names[index]
				new_file = open(full_file_path,'rb')
				pfb = file_blob()
				pfb.load(key, storage_directory, parent_file_hashes[parent_index])
				fb = file_blob()
				fb.compute_delta(key, new_file.read(), pfb, storage_directory)
				fb.store(key, storage_directory)
				
				file_hashes.pop(index)
				file_names.pop(index)
				file_folders.pop(index)
				file_sizes.pop(index)
				index-=1
				break
				
			
		#Look for similar files between working and parent and compute diffs on those
		index=-1
		while (index+1<len(file_hashes)):  #cycle through all file records in working directory
			index+=1
			parent_index=-1
			while (parent_index+1<len(parent_file_hashes)):  #cycle through all files records in parent commit
				parent_index+=1
				#if file_names[index]!= parent_file_names[parent_index]:
				#	break
				
				#must have similar file sizes
				percent_size_change = abs(file_sizes[index]-parent_file_sizes[index]) / file_sizes[index] 
				if  percent_size_change > 0.10:
					continue
				
				#must have similar byte sequences
				full_file_path = working_directory + file_folders[index] + '/' + file_names[index]
				new_file = open(full_file_path,'rb')
				new_file_text = new_file.read()
				pfb = file_blob()
				pfb.load(key, storage_directory, parent_file_hashes[parent_index])
				pfb_text = pfb.apply_delta(key, storage_directory)
				s=difflib.SequenceMatcher(None,new_file_text,pfb_text)
				if s.real_quick_ratio() < 0.75:
					continue
				if s.quick_ratio() < 0.75:
					continue
				
				#If this line is reached the files are similar enough.  Compute the diff and store.
				logging.debug('Found files with similar content. working: %s, parent: %s', file_hashes[index], parent_file_hashes[parent_index])
				fb = file_blob()
				fb.compute_delta(key, new_file_text, pfb, storage_directory)
				fb.store(key, storage_directory)
				
				file_hashes.pop(index)
				file_names.pop(index)
				file_folders.pop(index)
				file_sizes.pop(index)
				index-=1
				break
		
		
		#store all remaining files as initial versions
		index=-1
		while (index+1<len(file_hashes)):  #cycle through all file records in working directory
			index+=1
			full_file_path = working_directory + file_folders[index] + '/' + file_names[index]
			new_file = open(full_file_path,'rb')
			fb = file_blob()
			fb.compute_delta(key, new_file.read())
			fb.store(key, storage_directory)
예제 #20
0
파일: test.py 프로젝트: stevenleigh/sib
	try:
		print s2.recv(1028)
	except socket.error as msg:
		pass
		

"""


print '\n\n'
print '************************************************************************'
print '***Testing sending a large file between two sibs'
logging.debug('Testing sending a large file between two sibs')
print '************************************************************************'
large_file = open('../resource/alice.txt','rb')
fb=file_blob()
fb.compute_delta(key,large_file.read())
large_file_hash = fb.store(key, os.path.join(peer_B_storage, 'test_share'))
time.sleep(1)

sib_b = SIB()
sib_b.new_sockets.put(11120)
sib_b.js.storage_directory = peer_B_storage
sib_b.js.my_machine_ID = 'machine_B'

sib_a = SIB()
sib_a.new_sockets.put(11121)
sib_a.js.storage_directory = peer_A_storage
sib_a.js.my_machine_ID = 'machine_A'

#TODO: this transfer is slow?