class IndexOrLookup (threading.Thread): def __init__(self, thread_Id): self.thread_Id = thread_Id self.elasticDB = ElasticDatabase(EsCfg) thread = threading.Thread(target=self.run, args=()) thread.daemon = True # Daemonize thread thread.start() # Start the execution def run(self): print "Starting thread IndexOrLookup ", self.thread_Id ct = 0 while True: ct = ct + 1 message = SDHASH_QUEUE.get() data = json.loads(message) image = data['image'] base_image = data['base_image'] file_path = data['relative_path'] operation = data['operation'] sdhash = data['sdhash'] if ct % 100 == 0: print self.thread_Id, ' ', ct if operation == "store": self.elasticDB.index_dir(base_image, file_path, sdhash) else: self.elasticDB.judge_dir(base_image, image, file_path, sdhash) SDHASH_QUEUE.task_done() print "Exiting thread IndexOrLookup"
def hash_and_index(imagename, operation): elasticDB = ElasticDatabase(EsCfg) base_image = get_base_image(imagename) print 'Base image is: ', base_image # Private registry imagenames would be of # format registry-ip:registry-port/image-name:tag short_imagename = imagename.split("/")[-1] if operation == 'compare' and not elasticDB.check_index_exists(base_image): print('Indexing missing base image: ', base_image) process_image(base_image, base_image, base_image, 'store', elasticDB) process_image(imagename, short_imagename, base_image, operation, elasticDB)
score = resline.split('|')[-1] if score == "100": print fileName + ' match 100%' else: changed_files[filename] = score os.remove("file_hash") os.remove("ref_hash") return changed_files if __name__ == "__main__": # TEST IMAGE # imagename should be in the form image:tag # Example test: python utils.py python:2.7.8-slim ##imagename = sys.argv[1] ##hash_and_index(imagename) # TEST CONTAINER container_id = sys.argv[1] elasticDB = ElasticDatabase(EsCfg) differences = check_container(container_id, elasticDB, 'ubuntu:14.04') print "SUSPICIOUS FILES" space = 36 for key in differences: print key, ' '*(space-len(key)) , differences[key] print 'DONE'
def check_container(container_id): """ Check a running container for files that have been changed. If a file been changed, determine if it's suspicious by checking if the reference dataset contains a file with the same path. If so compare the hash of the file with the reference hash. param container_id: short or full container id return: json string containing suspicious files """ base_image = get_container_base_img(container_id) if base_image is None: return json.dumps({'error': 'failed to get container base image'}) elasticDB = ElasticDatabase(EsCfg) if not elasticDB.check_index_exists(base_image): print('Indexing missing base image: ', base_image) process_image(base_image, base_image, base_image, 'store', elasticDB) print 'Reference index is ', base_image changed_files = {} # filename => similarity score res = exec_cmd(['docker', 'diff', container_id]) if res is None: return json.dumps({'error': 'Error running docker diff.'}) files = res.splitlines() files_only = get_files_only(files) temp_dir = 'tmpdata' if not os.path.exists(temp_dir): os.mkdir(temp_dir) for s in files_only: filename = s[3:] # filename starts at 3 # check if ref DB contains this file path result = elasticDB.search_file(base_image, filename) if result is None: changed_files[filename] = -1 else: # found a file with same path # compare ref hash with file hash ref_sdhash = result['_source']['sdhash'] features = ref_sdhash.split(":")[10:12] if int(features[0]) < 2 and int(features[1]) < 16: changed_files[filename] = -2 continue copy_from_container(container_id + ':' + filename, temp_dir) basename = os.path.basename(filename) file_sdhash = exec_cmd( ['sdhash', os.path.join(temp_dir, basename)]) with open("file_hash", "w") as f: f.write(file_sdhash) with open("ref_hash", "w") as f: f.write(ref_sdhash) file1 = os.path.abspath('file_hash') file2 = os.path.abspath('ref_hash') # compare file hash with reference hash resline = exec_cmd(['sdhash', '-c', file1, file2, '-t', '0']) resline = resline.strip() score = resline.split('|')[-1] if score == "100": print fileName + ' match 100%' else: changed_files[filename] = score os.remove("file_hash") os.remove("ref_hash") return json.dumps(changed_files)
def check_container(container_id): """ Check a running container for files that have been changed. If a file been changed, determine if it's suspicious by checking if the reference dataset contains a file with the same path. If so compare the hash of the file with the reference hash. param container_id: short or full container id return: json string containing suspicious files """ base_image = get_container_base_img(container_id) if base_image is None: return json.dumps({'error':'failed to get container base image'}) elasticDB = ElasticDatabase(EsCfg) if not elasticDB.check_index_exists(base_image): print('Indexing missing base image: ', base_image) process_image(base_image, base_image, base_image, 'store', elasticDB) print 'Reference index is ', base_image changed_files = {} # filename => similarity score res = exec_cmd(['docker', 'diff', container_id]) if res is None: return json.dumps({'error':'Error running docker diff.'}) files = res.splitlines() files_only = get_files_only(files) temp_dir = 'tmpdata' if not os.path.exists(temp_dir): os.mkdir(temp_dir) for s in files_only: filename = s[3:] # filename starts at 3 # check if ref DB contains this file path result = elasticDB.search_file(base_image, filename) if result is None: changed_files[filename] = -1 else: # found a file with same path # compare ref hash with file hash ref_sdhash = result['_source']['sdhash'] features = ref_sdhash.split(":")[10:12] if int(features[0]) < 2 and int(features[1]) < 16: changed_files[filename] = -2 continue copy_from_container(container_id + ':' + filename, temp_dir) basename = os.path.basename(filename) file_sdhash = exec_cmd(['sdhash', os.path.join(temp_dir, basename) ]) with open("file_hash", "w") as f: f.write(file_sdhash) with open("ref_hash", "w") as f: f.write(ref_sdhash) file1 = os.path.abspath('file_hash') file2 = os.path.abspath('ref_hash') # compare file hash with reference hash resline = exec_cmd(['sdhash', '-c', file1, file2, '-t','0']) resline = resline.strip() score = resline.split('|')[-1] if score == "100": print fileName + ' match 100%' else: changed_files[filename] = score os.remove("file_hash") os.remove("ref_hash") return json.dumps(changed_files)
def __init__(self, thread_Id): self.thread_Id = thread_Id self.elasticDB = ElasticDatabase(EsCfg) thread = threading.Thread(target=self.run, args=()) thread.daemon = True # Daemonize thread thread.start() # Start the execution