def test_directory_hash(self): """Test that we can read a hashcode for a directory.""" test_dir = "testdata/thaga_janakari_gene_ds/hg19" hasher = Hasher() self.assertTrue( hasher.create_hashcode_for_dir( test_dir) == "cd04c3299a21ce5fbe1bb9dc0d73d907", "Hashed directory did not match ground truth. (" + hasher.create_hashcode_for_dir(test_dir) + ") for path: " + os.path.abspath(test_dir))
def get_hashcode(self): """ Since this class can change annotation values depending on certain state attributes (e.g. tx-mode), we need the hashcode to change. The super class hashcode attribute is treated like an initial hashcode here. In other words, hashcode is not a simple attribute for this datasource class. :return: hashcode including state information """ hasher = Hasher() attrs_relevant_for_caching = [self.hashcode, self.get_tx_mode(), str(self._custom_canonical_txs)] for attr in attrs_relevant_for_caching: hasher.update(attr) return Hasher.md5_hash(hasher.hexdigest())
def create_datasource_md5_file(datasource_dir): """datasource_dir should be the /db_dir/ds_name/genome_build. For example, create_datasource_md5_file("/home/user/my_db_dir/gaf/hg19") """ if datasource_dir.endswith('/'): datasource_dir = datasource_dir[:-1] md5_filename = os.path.abspath(datasource_dir) + ".md5" logging.getLogger(__name__).info("md5 being written to: " + os.path.abspath(md5_filename)) hasher = Hasher() hashcode = hasher.create_hashcode_for_dir(datasource_dir) fp = file(md5_filename, "w") fp.write(hashcode) fp.close()
def get_hashcode(self): """The GAF datasource has to adjust its key based on the internal tx mode. set_hashcode sends in an initial hashcode, which is then adjusted by tx-mode """ hasher = Hasher() hasher.update(self.hashcode) hasher.update(self.get_tx_mode()) return hasher.hexdigest()
def create_db_dir_key(self): """Create the db_dir_key for this annotation configuration. Requires the datasources.""" self.logger.info("Generating db-dir key from datasources...") hasher = Hasher() for ds in self._datasources: self.logger.info(ds.title + " " + ds.version + " md5: " + ds.get_hashcode()) hasher.update(ds.get_hashcode()) db_dir_key = Hasher.md5_hash(hasher.hexdigest()) self.logger.info("Final db-dir md5: " + db_dir_key) return db_dir_key
def create_db_dir_key_simple(self): """Create the db_dir_key for this annotation configuration. Requires the datasources.""" db_dir_key = Hasher.md5_hash(self.createHeaderString(False)) return db_dir_key
def test_simple_hash(self): """Test that the single md5 call (static) functions correctly.""" guess = Hasher.md5_hash("blah\n") self.assertTrue(guess == "0d599f0ec05c3bda8c3b8a68c32a1b47")
def test_directory_hash(self): """Test that we can read a hashcode for a directory.""" test_dir = "testdata/thaga_janakari_gene_ds/hg19" hasher = Hasher() self.assertTrue(hasher.create_hashcode_for_dir(test_dir)=="cd04c3299a21ce5fbe1bb9dc0d73d907", "Hashed directory did not match ground truth. (" + hasher.create_hashcode_for_dir(test_dir) + ") for path: " + os.path.abspath(test_dir))