def checkhash(dir,csvf): """ Check hash of file. Only do stuff with it if it has changed """ sha512hasher = FileHash('sha512') process = {} for f in csvf: hashfile = f+".sha512" hashvalue = sha512hasher.hash_file(dir + "/" + f) if os.path.isfile(dir+"/"+hashfile): # file with hash value is present, compare hashes print("Hashfile ("+hashfile+") present") checksums = dict(sha512hasher.verify_checksums(dir+"/"+hashfile)) for x, y in checksums.items(): if x == dir+"/"+f: if y: #print("Hashes match, I have already seen this file") flag = False else: print("Hashes do not match ... we need to process this file; updating hashfile as well") writehashfile(dir,f,hashfile,hashvalue) flag = True else: print("Hashfile not present, creating it ...") writehashfile(dir,f,hashfile,hashvalue) flag = True process[f] = flag return process
def test_towdata_put(self): """Test case for towdata_put Uploads live data to the server for predicition """ update_file = "C:/Users/21ale/Documents/CLASE/TFG/TFG-proyect/TFG-web/server/swagger_server/test/test-files/updateTest.csv" test_file = "C:/Users/21ale/Documents/CLASE/TFG/TFG-proyect/TFG-web/server/swagger_server/test/test-files/appendedUpdateTest.csv" retrieved_file = "C:/Users/21ale/Documents/CLASE/TFG/TFG-proyect/TFG-web/server/swagger_server/database/savefile.csv" with open(update_file, 'r') as myfile: data = myfile.read() body = Body1(1, data) response = self.client.open( '/towdata', method='PUT', data=json.dumps(body), content_type='application/json') default_controller.towdata_put(body.csv) md5hasher = FileHash("md5") expected_hash = md5hasher.hash_file(test_file) obtained_hash = md5hasher.hash_file(retrieved_file) self.assertEqual(expected_hash, obtained_hash)
def check_file_integrity(indir, outdir): """ Parse file in dir and check integrity """ dic_files = {} dic_param = {} dic_integ = {} for f in os.listdir(indir): path = os.path.join(indir, f) #if os.path.isdir(path)==True: # print (str(f) + "is a dir" ) #elif os.path.isfile(path): if os.path.isfile(path): #dic_param['size']=Path(path).stat().st_size dic_param['size'] = os.path.getsize(path) md5hasher = FileHash('md5') dic_param['md5'] = md5hasher.hash_file(path) dic_files[f] = dic_param #print( f + " : It is a normal file") #Reinitialize dict dic_param = {} #else: # print(f + "It is a special file (socket, FIFO, device file)" ) #print (dic_files) return dic_files
def test_hash_file(self): """Test the hash_file() method.""" for algo in SUPPORTED_ALGORITHMS: for filename in self.expected_results.keys(): hasher = FileHash(algo) self.assertEqual(self.expected_results[filename][algo], hasher.hash_file(filename))
def setup_method(self): self.test_suite_name = os.environ['test_selector'] self.status = 'Fail' self.status = 'Fail' md5 = FileHash('sha1') self.chksum = md5.hash_file('Test/Milestone1Tests.py') self.active_test = os.environ['ACTIVE_TEST']
def thread_download_file(file_url: str, file_name: str) -> NoReturn: if not re.search(r'https?://', file_url): # FIXME CUIDADO, PONGO HTTP Y NO HTTPS NO SE SI DARA ALGUN PROBLEMA file_url = 'http://{}'.format(file_url) logger.debug("Thread %s: starting", file_name) directory = './download' path = f'{directory}/{datetime.datetime.now().timestamp()}-{file_name}' logger.debug("download url %s in %s", file_url, path) # wget da error si existe el fichero que queremos descargar if os.path.exists(path): os.remove(path) try: wget.download(file_url, path) hash_sha256 = FileHash('sha256') hash_file = hash_sha256.hash_file(path) logger.info("file %s, hash %s", file_name, hash_file) if not connect_sqlite.is_exists_hash(hash_file): logger.debug("Insertamos %s en la BD", file_name) analize_hash(hash_file, file_name, file_url) else: logger.info("exists %s in database", file_name) except urllib.error.HTTPError: # fixme ver como retorno la info logger.error("the page blocks the download: %s", file_url) # os.remove(path) # FIXME DESCOMENTAR logger.debug("Thread %s: finishing", file_name)
def scan_files(conn): ''' Scan through the provided directory path for files that match the file extensions list. If there is a match, attempt to extract the exif data by using the host OS command 'exif'. ''' print("scan_files started\n") for root, _, files in os.walk(ARGS.path): for afile in files: print("AFILE %s " % afile) md5hasher = FileHash('md5') one_hash = md5hasher.hash_file(root + '/' + afile) if afile.endswith(tuple(FILE_EXTENSION)): dtg_y, dtg_m, dtg_d = get_exif_date((root + '/' + afile)) dtg = dtg_y + '/' + dtg_m + '/' + dtg_d print("FILE: %s HASH: %s DTG: %s" % (afile, one_hash, dtg)) entry = afile + '|' + root + '|' + one_hash + '|' + dtg_y + '|' \ + dtg_m + '|' + dtg_d insert_record(conn, entry) else: print(".", end='') print('')
def test_verify_sfv(self): """Test the verify_sfv() method.""" hasher = FileHash('crc32') results = [ result.hashes_match for result in hasher.verify_sfv("lorem_ipsum.sfv") ] self.assertTrue(all(results))
def test_verify_checksums(self): """Test the verify_checksums() method.""" for algo in SUPPORTED_ALGORITHMS: hasher = FileHash(algo) results = [ result.hashes_match for result in hasher.verify_checksums("hashes." + algo) ] self.assertTrue(all(results))
def getHashAudio(): global fileHash md5hasher = FileHash('md5') md5hasher.hash_file('song_embedded.wav') fileHash = md5hasher.hash_file('song_embedded.wav') filehash_audio = open('hash-audio-password.txt', 'a+') filehash_audio.write(fileHash) filehash_audio.close() print("FileHash : ", fileHash) messagebox.showinfo("File Hash", fileHash)
def test_checksum(self): tested = tempfile.mkstemp()[1] fn = 'tests/data/reference_nc.nc' nc2mmd = Nc_to_mmd(fn, check_only=True) nc2mmd.to_mmd() checksum = nc2mmd.metadata['storage_information']['checksum'] with open(tested, 'w') as tt: tt.write('%s *%s'%(checksum, fn)) md5hasher = FileHash('md5') self.assertTrue(md5hasher.verify_checksums(tested)[0].hashes_match)
def shipFile(service): #begin checksum calculation for log files hashing_files = fnmatch.filter(os.listdir('.'),'outputDRIVE*.txt') for hashing_file in hashing_files: md5hasher = FileHash('md5') sha1hasher = FileHash('sha1') sha512hasher = FileHash('sha512') print ("md5 = "+ md5hasher.hash_file(hashing_file)+"\n", file=open(hashing_file+'.hash',"a")) print ("\nsha1 = "+ sha1hasher.hash_file(hashing_file)+"\n", file=open(hashing_file+".hash","a")) print ("\nsha512 = "+ sha512hasher.hash_file(hashing_file)+"\n", file=open(hashing_file+".hash","a")) ###end checksum ###shipping of logs and checksums shipping_files = fnmatch.filter(os.listdir('.'),'outputDRIVE*.txt*') for shipping_file in shipping_files: file_metadata={'name':shipping_file} log = MediaFileUpload(shipping_file) try: file=service.files().create(body=file_metadata, media_body=log,fields='id').execute() print( 'File ID: %s' % file.get('id') ) os.remove(shipping_file) except IOError: print('upload error')
def test_four(self): """ Verify redundancy - Have a known configuration of node count , chunk size and redunduncy level - Load a sequence of files with specific sizes - Delete number of nodes as redunduncy specification - Check if the files can be retrived """ self.test_name = self.test_four.__doc__ try: app_config = json.load( open(config.TEST_DATA.milestone_3.app_config_location)) node_count = app_config['node_count'] size_per_slice = app_config['size_per_slice'] redunduncy_count = app_config['redundancy_count'] print("\nVerifying Pre-Requisites for the Test") assert (node_count == 10) assert (size_per_slice == 1024) assert (redunduncy_count == 1) print("\nSuccessfully verified Pre-Requisites for the test") #Load 10KB file md5 = FileHash('sha1') input_checksum = md5.hash_file( config.TEST_DATA.milestone_3.file_1_path) rs = txn.upload_a_file(config.TEST_DATA.milestone_3.file_1_path) assert (rs.status_code == 200) file_id = str(rs.text) nodes = [] for items in os.listdir( config.TEST_DATA.milestone_3.location_of_nodes): if (os.path.isdir( config.TEST_DATA.milestone_3.location_of_nodes + '/' + items) and 'node_' in items): nodes.append(items) # delete a node #shutil.rmtree( os.path.join( config.TEST_DATA.milestone_3.location_of_nodes , nodes[0]) ) #rename a file os.rename( os.path.join(config.TEST_DATA.milestone_3.location_of_nodes, nodes[0]), os.path.join(config.TEST_DATA.milestone_3.location_of_nodes, 'XYZQBC')) # try getting the file back rs = txn.retrive_a_file_by_id(file_id) assert (rs.status_code == 200) open('Output.file', 'wb+').write(rs.content) output_checksum = md5.hash_file('Output.file') assert input_checksum, output_checksum self.status = 'Pass' except Exception as e: self.status = 'Fail' raise e
def browseAudio(): global fileAudio global filename_audio filename_audio = filedialog.askopenfilename() dir_path = os.path.split(filename_audio)[0] global audioFileHash audioFileHash = FileHash('md5') audioFileHash.hash_file(filename_audio) audioFileHash = audioFileHash.hash_file(filename_audio) print("FileHash : ", audioFileHash) load_audio_path.insert(tk.END, filename_audio)
def CheckSumMD5(filename, md5hash): if not os.path.exists(filename): return False md5_hasher = FileHash('md5') md5_str = md5_hasher.hash_file(filename) md5_str = md5_str.upper() print("Comparing :", md5_str, md5hash) if (md5_str == md5hash): return True else: return False
def get_hash(latest_download, hash_type='sha256'): """ Function get the hash of a file. Args: hash_type (str): hash type for the file (sha256, etc.) return: string of the latest downloaded file rtype: str """ sha256hash = FileHash(hash_type) downloaded_file = sha256hash.hash_file(latest_download) return downloaded_file
def getHashImage(): global fileHash md5hasher = FileHash('md5') md5hasher.hash_file(dir_path + '/result/output-stego.jpg') fileHash = md5hasher.hash_file(dir_path + '/result/output-stego.jpg') filehash_image = open(dir_path + '/result/hash-image-password.txt', 'a+') filehash_image.write(fileHash) filehash_image.close() print(fileHash) messagebox.showinfo("File Hash", fileHash)
def test_hash_dir(self): """"Test the hash_dir() method.""" os.chdir("..") for algo in SUPPORTED_ALGORITHMS: for filename in self.expected_results.keys(): hasher = FileHash(algo) basename, ext = os.path.splitext(filename) results = hasher.hash_dir("./testdata", "*" + ext) for result in results: self.assertEqual( self.expected_results[result.filename][algo], result.hash) if len(results) == 1: self.assertEqual(filename, result.filename)
def test_cathash_dir(self): """"Test the cathash_dir() method.""" os.chdir("..") for algo in SUPPORTED_ALGORITHMS: hasher = FileHash(algo) self.assertEqual(hasher.cathash_dir("./testdata", "*.txt"), self.expected_results['lorem_ipsum.txt'][algo]) self.assertEqual(hasher.cathash_dir("./testdata", "*.zip"), self.expected_results['lorem_ipsum.zip'][algo]) self.assertEqual( hasher.cathash_dir("./testdata", "*.[ziptxt]*"), self.expected_results['lorem_ipsum_zip+txt.cat' if ( self.expected_results['lorem_ipsum.txt'][algo] > self. expected_results['lorem_ipsum.zip'][algo] ) else 'lorem_ipsum_txt+zip.cat'][algo])
def browseImage(): global fileImage # global path_imgsteg global filename_image filename_image = tkFileDialog.askopenfilename() dir_path = os.path.split(filename_image)[0] global imageFileHash imageFileHash = FileHash('md5') imageFileHash.hash_file(filename_image) imageFileHash = imageFileHash.hash_file(filename_image) print(imageFileHash) print("directory path : " + dir_path) subprocess.Popen(['mkdir', dir_path + '/decrypt-result']) # Call subprocess load_image_path.insert(tk.END, filename_image)
def run(self): if self.data_type not in ['hash', 'file']: self.notSupported() elif self.data_type == 'file': filepath = self.get_param('file') md5hasher = FileHash('md5') data = md5hasher.hash_file(filepath) elif self.data_type == 'hash': data = self.get_data() response = self.hashdd_check(data) hash = data.upper() if response['result'] == 'SUCCESS': if self.service == "status": self.report({'known_level': response[hash]['known_level']}) elif self.service == "detail": self.report({ 'known_level': response[hash]['summary']['hashdd_known_level'], 'file_name': response[hash]['summary']['hashdd_file_name'], 'file_absolute_path': response[hash]['summary']['hashdd_file_absolute_path'], 'size': response[hash]['summary']['hashdd_size'], 'product_manufacturer': response[hash]['summary']['hashdd_product_manufacturer'], 'product_name': response[hash]['summary']['hashdd_product_name'], 'product_version': response[hash]['summary']['hashdd_product_version'], 'architecture': response[hash]['summary']['hashdd_architecture'], 'md5': response[hash]['summary']['hashdd_md5'], 'sha1': response[hash]['summary']['hashdd_sha1'], 'sha256': response[hash]['summary']['hashdd_sha256'], 'ssdeep': response[hash]['summary']['hashdd_ssdeep'] }) else: self.error('{}'.format(response['result']))
def setup_method(self): self.test_suite_name = os.environ['test_selector'] self.status = 'Fail' self.status = 'Fail' md5 = FileHash('sha1') self.chksum = md5.hash_file('Test/Milestone1BTests.py') location_of_config = config.TEST_DATA.milestone_1b.app_config_location server_side_conf = json.load(open(location_of_config)) self.servers = server_side_conf['peers'] if (self.servers.__len__() != set(self.servers).__len__()): assert 1 == 0, """The configured Peers in your config are not unique. Each entry of the configured peer must be unique {0} """.format(self.servers) raise """The configured Peers in your config are not unique. Each entry of the configured peer must be unique {0} """.format(self.servers)
def test_hash_files(self): """Test the hash_files() method.""" for algo in SUPPORTED_ALGORITHMS: hasher = FileHash(algo) # test on singleton lists of filenames for filename in self.expected_results.keys(): results = hasher.hash_files([filename]) self.assertEqual(len(results), 1) for result in results: self.assertEqual(self.expected_results[filename][algo], result.hash) self.assertEqual(filename, result.filename) # test on all filenames at once results = hasher.hash_files(self.expected_results.keys()) self.assertEqual(len(results), len(self.expected_results)) for result in results: self.assertEqual(self.expected_results[result.filename][algo], result.hash)
def findDup(parentFolder): # Dups in format {hash:[names]} dups = {} md5hasher = FileHash('md5') for dirName, subdirs, fileList in os.walk(parentFolder): print('Scanning %s...' % dirName) for filename in fileList: # Get the path to the file path = os.path.join(dirName, filename) # Calculate hash file_hash = md5hasher.hash_file(path) # Add or append the file path if file_hash in dups: fullpath = os.path.join(dirName, filename) destpath = dirName.replace('Sortiert', 'Duplikate') move_to_dir(fullpath, destpath) else: dups[file_hash] = [path] return dups
def test_four(self): """ Verify that the uploaded file can be reterived sucessfully - Verify check sum before upload and after reterive """ self.test_name = self.test_four.__doc__ try: md5 = FileHash('sha1') hash = md5.hash_file(config.TEST_DATA.test_four.file_to_upload) response = txn.upload_a_file( config.TEST_DATA.test_four.file_to_upload) assert (response.status_code == 200) id = response.text r = txn.retrive_a_file_by_id(id) open(config.TEST_DATA.test_four.file_name, 'wb+').write(r.content) hash_2 = md5.hash_file(config.TEST_DATA.test_four.file_name) assert (hash == hash_2) self.status = 'Pass' except Exception as e: self.status = 'Fail' raise e
def diffnii(left, right): hasher = FileHash('sha256') result = {} img1_hash = hasher.hash_file(left) img2_hash = hasher.hash_file(right) result['hash'] = {'left': img1_hash, 'right': img2_hash, 'equal': img1_hash == img2_hash} if result['hash']['equal']: result['similarity'] = 1.0 return result img1 = nib.load(left) img1_data = img1.get_fdata() img2 = nib.load(right) img2_data = img2.get_fdata() totalvoxels1 = np.prod(img1.shape) totalvoxels2 = np.prod(img2.shape) result['total_voxels'] = {'left': totalvoxels1, 'right': totalvoxels2, 'equal': totalvoxels1 == totalvoxels2} result['shape'] = {'left': list(img1_data.shape), 'right': list(img2_data.shape), 'equal': img1_data.shape == img2_data.shape} if not result['shape']['equal']: result['similarity'] = min((totalvoxels1, totalvoxels2)) / max((totalvoxels1, totalvoxels2)) return result result['voxel_val_sum'] = {'left': img1_data.sum(), 'right': img2_data.sum(), 'equal': img1_data.sum() - img2_data.sum() == 0} result['voxel_val_mean_diff'] = {'mean_diff': (img1_data - img2_data).mean(), 'equal': (img1_data - img2_data).mean() == 0} if not result['voxel_val_mean_diff']['equal']: m = result['voxel_val_mean_diff']['mean_diff'] ma = (img1_data - img2_data).max() result['similarity'] = (ma - m) / ma return result result['similarity'] = 1.0 return result
class GraphTests(unittest.TestCase): """ To do this test I have supplied an already generated class diagram of test.py located in test_files/supplied_class_diagram.png This test will compare the file hashes of the generated png file with this using the filehash library """ def setUp(self): self.graph = Graph('test_files/test.py') self.hasher = FileHash('md5') self.check_hash = self.hasher.hash_file( 'test_files/supplied_class_diagram.png') def tearDown(self): pass def test_01(self): """ Generate a new class diagram from test_files/test.py and check the new md5 hash against the existing one """ self.test_hash = self.hasher.hash_file('classes.png') self.assertTrue(self.test_hash == self.check_hash) os.remove('classes.png')
def test_four(self): """ Verify that a file uploaded to a server can be retrieved from another server with integrity """ self.test_name = self.test_four.__doc__ try: txn.test_configs.API_SERVER_URL = self.servers[1] md5 = FileHash('sha1') hash = md5.hash_file( config.TEST_DATA.milestone_1b.file_to_upload_1) response = txn.upload_a_file( config.TEST_DATA.milestone_1b.file_to_upload_1) assert (response.status_code == 200) id = response.text txn.test_configs.API_SERVER_URL = self.servers[0] r = txn.retrive_a_file_by_id(id) open(config.TEST_DATA.milestone_1b.file_name_1, 'wb+').write(r.content) hash_2 = md5.hash_file(config.TEST_DATA.milestone_1b.file_name_1) assert (hash == hash_2) self.status = 'Pass' except Exception as e: self.status = 'Fail' raise e
def process_IN_ATTRIB(self, event): filepath = event.pathname if os.path.isdir(filepath) == True : return if os.access(filepath,os.X_OK) : os.chmod( filepath, 0o600 ) now = datetime.datetime.now() hasher = FileHash('md5') md5 = hasher.hash_file( filepath ) hasher = FileHash('sha1') sha1 = hasher.hash_file( filepath ) hasher = FileHash() sha256 = hasher.hash_file( filepath ) save_dir = os.path.join(gSaveMalwareDir, sha256[0:2], sha256[2:4]) os.makedirs(save_dir, exist_ok=True) save_filepath = os.path.join(save_dir,sha256) shutil.move(filepath, save_filepath) log = { "datetime":now.strftime('%Y/%m/%d %H:%M:%S'), "path": filepath, "md5_hash": md5, "sha-1_hash": sha1, "sha-256_hash": sha256, "file_format": magic.from_file(save_filepath), "filepath": save_filepath } f_filename = now.strftime('%Y%m%d') #f_save_dir = os.path.join(gSaveDir, f_filename[0:6], f_filename[6:8]) f_save_dir = os.path.join(gSaveDir) os.makedirs(f_save_dir, exist_ok=True) f_filename = os.path.join( f_save_dir, "rfch_"+f_filename+".log" ) f = open( f_filename, "a") f.write(json.dumps(log)+"\n") f.close()
def main(): args = create_parser().parse_args() if not args.algorithm.lower() in SUPPORTED_ALGORITHMS: print("ERROR: Unknown checksum/hash algorithm: {0}".format( args.algorithm)) parser.print_help() sys.exit(1) hasher = FileHash(args.algorithm.lower()) if args.checksums: process_checksum_file(args.checksums, hasher) elif args.directory: process_dir(args.directory, hasher) elif args.cathash: process_cathash(args.cathash, hasher) else: process_files(args.filenames, hasher)