def test_add_hashes_to_exists_list_with_founds(self): """ Testing add hashes to exists list with already found hashes """ self._add_hashlist(parsed=0, tmp_path='/tmp/1.txt', status='wait') file_put_contents( '/tmp/1.txt', 'c\nd\ne\n', ) self._add_hash(hash='a', summ=md5('a')) self._add_hash(hash='b', summ=md5('b')) self._add_hash(hash='c', summ=md5('c')) self._add_hashlist(id=2) self._add_hash(hashlist_id=2, hash='a', summ=md5('a'), cracked=1, password='******') self.thrd.start() time.sleep(5) assert self.db.fetch_col("SELECT hash FROM hashes WHERE hashlist_id = 1 ORDER BY hash") == \ ['a', 'b', 'c', 'd', 'e'] assert self.db.fetch_one( "SELECT password FROM hashes " "WHERE hashlist_id = 1 AND cracked = 1 AND hash = 'a'") == 'aaa'
def test_mask_task(self): """ Run simple mask task """ self._add_hashlist(alg_id=0, uncracked=4) self._add_hash(hash=md5('123')) self._add_hash(hash=md5('456')) self._add_hash(hash=md5('ccc')) self._add_hash(hash=md5('789')) self._add_work_task() self._add_task(source='?l?l?l', type='mask') self.thrd = WorkerThread( self.db.fetch_row("SELECT * FROM task_works WHERE id = 1")) self.thrd.catch_exceptions = False self.thrd.start() start_time = int(time.time()) while True: if self.thrd.done: break if int(time.time()) - start_time > 5: pytest.fail("Long time of WorkerThread") time.sleep(1) wtask = self.db.fetch_row("SELECT * FROM task_works WHERE id = 1") assert wtask['status'] == 'waitoutparse' assert wtask['uncracked_before'] == 4 assert os.path.exists(wtask['out_file']) assert file_get_contents( wtask['out_file']) == '9df62e693988eb4e1e1444ece0578579:636363\n'
def test_mask_task(self): """ Run simple mask task """ self._add_hashlist(alg_id=0, uncracked=4) self._add_hash(hash=md5('123')) self._add_hash(hash=md5('456')) self._add_hash(hash=md5('ccc')) self._add_hash(hash=md5('789')) self._add_work_task() self._add_task(source='?l?l?l', type='mask') self.thrd = WorkerThread(self.db.fetch_row("SELECT * FROM task_works WHERE id = 1")) self.thrd.catch_exceptions = False self.thrd.start() start_time = int(time.time()) while True: if self.thrd.done: break if int(time.time()) - start_time > 5: pytest.fail("Long time of WorkerThread") time.sleep(1) wtask = self.db.fetch_row("SELECT * FROM task_works WHERE id = 1") assert wtask['status'] == 'waitoutparse' assert wtask['uncracked_before'] == 4 assert os.path.exists(wtask['out_file']) assert file_get_contents(wtask['out_file']) == '9df62e693988eb4e1e1444ece0578579:636363\n'
def sorted_file_to_db_file(self, sorted_file_path, hashlist): """ Convert sorted txt hashlist to in-db file :param sorted_file_path: :param hashlist: hashlist data :return: """ self.update_status("preparedb") self.log("Prepare file for DB load") errors_lines = "" put_in_db_path = self.tmp_dir + "/" + gen_random_md5() fh_to_db = open(put_in_db_path, 'w') fh_sorted = open(sorted_file_path) for _line in fh_sorted: _hash = None _line = _line.strip() if len(_line) == 0 or (int(hashlist['have_salts']) and _line == hashlist['delimiter']): continue if int(hashlist['have_salts']): if _line.count(hashlist['delimiter']): _hash = _line[:_line.index(hashlist['delimiter'])] salt = _line[_line.index(hashlist['delimiter']) + len(hashlist['delimiter']):] else: errors_lines += _line + "\n" else: _hash = _line salt = "" if _hash is not None: fh_to_db.write( '"{0}","{1}","{2}","{3}"\n'.format( hashlist['id'], _hash.replace('\\', '\\\\').replace('"', '\\"'), salt.replace('\\', '\\\\').replace('"', '\\"'), md5(_hash + ":" + salt) if len(salt) else md5(_hash) ) ) fh_to_db.close() fh_sorted.close() os.remove(sorted_file_path) if len(errors_lines): self.update_hashlist_field("errors", errors_lines) return put_in_db_path
def sorted_file_to_db_file(self, sorted_file_path, hashlist): """ Convert sorted txt hashlist to in-db file :param sorted_file_path: :param hashlist: hashlist data :return: """ self.update_status("preparedb") self.log("Prepare file for DB load") errors_lines = "" put_in_db_path = self.tmp_dir + "/" + gen_random_md5() fh_to_db = open(put_in_db_path, 'w') fh_sorted = open(sorted_file_path) for _line in fh_sorted: _hash = None _line = _line.strip() if len(_line) == 0 or (int(hashlist['have_salts']) and _line == hashlist['delimiter']): continue if int(hashlist['have_salts']): if _line.count(hashlist['delimiter']): _hash = _line[:_line.index(hashlist['delimiter'])] salt = _line[_line.index(hashlist['delimiter']) + len(hashlist['delimiter']):] else: errors_lines += _line + "\n" else: _hash = _line salt = "" if _hash is not None: fh_to_db.write('"{0}","{1}","{2}","{3}"\n'.format( hashlist['id'], _hash.replace('\\', '\\\\').replace('"', '\\"'), salt.replace('\\', '\\\\').replace('"', '\\"'), md5(_hash + ":" + salt) if len(salt) else md5(_hash))) fh_to_db.close() fh_sorted.close() os.remove(sorted_file_path) if len(errors_lines): self.update_hashlist_field("errors", errors_lines) return put_in_db_path
def prepare_first_pages(host): """ Prepare link on first page in MongoDB. Add root url if urls for this host not exists. """ pid = Registry().get('pData')['id'] coll = Registry().get('mongo').spider_urls coll.drop() Urls = UrlsModel() urls = Urls.list_by_host_name_for_spider(pid, host) if not len(urls): Registry().get('logger').log("Spider: Root URL was added automaticaly") Urls.add( pid, HostsModel().get_id_by_name(pid, host), '/', who_add='spider' ) urls = Urls.list_by_host_name_for_spider(pid, host) for url in urls: url = urlparse(url['url']) data = { 'hash': md5(str(url.path + url.query)), 'path': url.path, 'query': url.query, 'time': 0, 'code':0, 'checked': 0, 'getted' : 0, 'referer': '', 'size': 0, 'founder': 'spider' } coll.insert(data) coll.create_index([('hash', 1)], unique=True, dropDups=True) coll.create_index([('checked', 1)])
def add(self, pid, host_id, url, referer='', response_code=0, response_time=0, who_add='human', spidered=0, size=0, descr=''): """ Add url to table """ try: return self._db.insert( "urls", { "project_id": pid, "host_id": host_id, "hash": md5(url), "url": url, "referer": referer, "response_code": response_code, "response_time": response_time, "when_add": int(time.time()), "who_add": who_add, "spidered": spidered, "size": size, "descr": descr }, 1) except BaseException as e: if Registry().isset('logger'): Registry().get('logger').ex(e) else: print "Can`t add url: " + str(e)
def delete(self, project_id, host, url): """ Delete url from table """ host_id = HostsModel().get_id_by_name(project_id, host) self._db.q( "DELETE FROM urls WHERE project_id = {0} AND host_id = {1} AND hash = {2} " .format(project_id, host_id, self._db.quote(md5(url))) )
def update_url_field(self, project_id, host, url, field, value): """ Update custom field of current url """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.update( "urls", {field: value}, "hash = '{0}' AND project_id = {1} AND host_id = {2}".format( md5(url), project_id, host_id))
def exists(self, project_id, host, url): """ Is url exists? """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.fetch_one( "SELECT 1 FROM urls WHERE project_id = {0} AND host_id={1} AND hash = '{2}'" .format(project_id, host_id, md5(url)))
def add( self, pid, host_id, url, referer='', response_code=0, response_time=0, who_add='human', spidered=0, size=0, descr='' ): """ Add url to table """ try: return self._db.insert( "urls", { "project_id": pid, "host_id": host_id, "hash": md5(url), "url": url, "referer": referer, "response_code": response_code, "response_time": response_time, "when_add": int(time.time()), "who_add": who_add, "spidered": spidered, "size": size, "descr": descr }, 1 ) except BaseException as e: if Registry().isset('logger'): Registry().get('logger').ex(e) else: print "Can`t add url: " + str(e)
def test_add_mass(self): assert self.db.fetch_one("SELECT 1 FROM urls") is None data = [ {'url': '/1/', 'referer': '/ref1/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/2/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/3/', 'referer': '/ref3/', 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/4/', 'referer': '/ref4/', 'response_code': 401, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/5/', 'referer': '/ref5/', 'response_code': 401, 'response_time': 10, 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/6/', 'referer': '/ref6/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'size': 20, 'descr': 'some descr'}, {'url': '/7/', 'referer': '/ref7/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'descr': 'some descr'}, {'url': '/8/', 'referer': '/ref8/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20} ] test_data = [ {'url': '/1/', 'referer': '/ref1/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/2/', 'referer': '', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/3/', 'referer': '/ref3/', 'response_code': 0, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/4/', 'referer': '/ref4/', 'response_code': 401, 'response_time': 0, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/5/', 'referer': '/ref5/', 'response_code': 401, 'response_time': 10, 'who_add': 'human', 'spidered': 1, 'size': 20, 'descr': 'some descr'}, {'url': '/6/', 'referer': '/ref6/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 0, 'size': 20, 'descr': 'some descr'}, {'url': '/7/', 'referer': '/ref7/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 0, 'descr': 'some descr'}, {'url': '/8/', 'referer': '/ref8/', 'response_code': 401, 'response_time': 10, 'who_add': 'dafs', 'spidered': 1, 'size': 20, 'descr': ''}, ] self.model.add_mass(1, 2, data) for test_url in self.db.fetch_all("SELECT * FROM urls ORDER BY id ASC"): test_key = test_url['id']-1 assert test_url['project_id'] == 1 assert test_url['host_id'] == 2 assert test_url['hash'] == md5(test_data[test_key]['url']) assert test_url['url'] == test_data[test_key]['url'] assert test_url['referer'] == \ ('' if 'referer' not in test_data[test_key].keys() else test_data[test_key]['referer']) assert test_url['response_code'] == \ (0 if 'response_code' not in test_data[test_key].keys() else test_data[test_key]['response_code']) assert test_url['response_time'] == \ (0 if 'response_time' not in test_data[test_key].keys() else test_data[test_key]['response_time']) assert test_url['size'] == (0 if 'size' not in test_data[test_key].keys() else test_data[test_key]['size']) assert test_url['who_add'] == \ ('human' if 'who_add' not in test_data[test_key].keys() else test_data[test_key]['who_add']) assert test_url['descr'] == \ ('' if 'descr' not in test_data[test_key].keys() else test_data[test_key]['descr']) assert test_url['spidered'] == \ (0 if 'spidered' not in test_data[test_key].keys() else test_data[test_key]['spidered'])
def exists(self, project_id, host, url): """ Is url exists? """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.fetch_one( "SELECT 1 FROM urls WHERE project_id = {0} AND host_id={1} AND hash = '{2}'" .format(project_id, host_id, md5(url)) )
def update_url_field(self, project_id, host, url, field, value): """ Update custom field of current url """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.update( "urls", {field: value}, "hash = '{0}' AND project_id = {1} AND host_id = {2}".format(md5(url), project_id, host_id) )
def _scan(self, config, test_data, test_files_content, url_decrease, insert_root=True, additional_run_params=None): """ Common scan method :param config: :param test_data: :param test_files_content: :param url_decrease: :param insert_root: :param additional_run_params: :return: """ additional_run_params = additional_run_params or [] self._prepare_db(insert_root) run_params = [ './main.py', 'prj', 'Spider', 'scan', '--host=wrtest.com' ] run_params.extend(additional_run_params) run_params.append('--threads=3' if '--selenium=1' not in run_params else '--threads=1') self._replace_config(config) os.chdir(wrpath) out = subprocess.check_output(run_params) self._restore_config() self.output_errors(out) urls = self.db.fetch_all("SELECT * FROM urls ORDER BY id ASC") assert len(urls) == len(test_data) assert len(urls)-url_decrease == len(os.listdir(Registry().get('data_path') + 'wrtest.com/')) for url in urls: if url['url'] == '/slow.php': assert url['response_time'] > 4 test_data_row = test_data[url['url']] for field in test_data_row: assert test_data_row[field] == url[field] for url in test_files_content: data = file_get_contents(Registry().get('data_path') + 'wrtest.com/' + md5(url)) assert data.find(test_files_content[url]) > -1 assert self.db.fetch_one("SELECT COUNT(id) FROM urls_base WHERE name='/' AND parent_id=0 AND host_id=1") == 1 assert self.db.fetch_one("SELECT COUNT(id) FROM urls_base WHERE name='deep' AND parent_id=1 AND host_id=1") == 1 assert self.db.fetch_one( "SELECT COUNT(id) FROM urls_base WHERE name='moredeep' AND parent_id=" "(SELECT id FROM urls_base WHERE name='deep' AND parent_id=1 AND host_id=1) AND host_id=1" ) == 1 assert self.db.fetch_one( "SELECT COUNT(id) FROM urls_base WHERE name='dir1' AND parent_id=" "(SELECT id FROM urls_base WHERE name='moredeep' AND host_id=1) AND host_id=1" ) == 1 assert self.db.fetch_one("SELECT COUNT(id) FROM urls_base_params") > 0 return out
def update_url_field_mass(self, project_id, host, field, data): """ Mass update custom field of many urls """ host_id = HostsModel().get_id_by_name(project_id, host) update = {} for row in data: case = "host_id = '{0}' AND `hash` = '{1}' ".format( host_id, md5(row['url'])) update[case] = row['value']
def save(self, data): db = self.get_db() col = db.get_collection('vmgirls') for item in data: _id = md5(item['title']) if not col.find_one({'_id': _id}): item['download'] = 0 item['imgur_id'] = '' col.update_one({'_id': _id}, {'$set': item}, True)
def _path_exists(self, host_id, parent_id, name): """ Is current path exists? """ _hash = md5("{0}-{1}-{2}".format(host_id, parent_id, name)) if _hash not in self._pathCache.keys(): self._pathCache[_hash] = \ self._db.fetch_one( 'SELECT id FROM urls_base WHERE host_id = {0} AND parent_id = {1} AND name = {2}'. format(int(host_id), int(parent_id), self._db.quote(name)) ) return self._pathCache[_hash]
def _path_exists(self, host_id, parent_id, name): """ Is current path exists? """ _hash = md5("{0}-{1}-{2}".format(host_id, parent_id, name)) if _hash not in self._pathCache.keys(): self._pathCache[_hash] = \ self._db.fetch_one( 'SELECT id FROM urls_base WHERE host_id = {0} AND parent_id = {1} AND name = {2}'. format(int(host_id), int(parent_id), self._db.quote(name)) ) return self._pathCache[_hash]
def update_url_field_mass(self, project_id, host, field, data): """ Mass update custom field of many urls """ host_id = HostsModel().get_id_by_name(project_id, host) update = {} for row in data: case = "host_id = '{0}' AND `hash` = '{1}' ".format(host_id, md5(row['url'])) update[case] = row['value'] self._db.update_mass("urls", field, update)
def test_maskdict_task(self): """ Run hybride mask+dict task """ self._add_hashlist(alg_id=0, uncracked=4) self._add_hash(hash=md5('123')) self._add_hash(hash=md5('456')) self._add_hash(hash=md5('1ccc')) self._add_hash(hash=md5('789')) self._add_work_task() self._add_task(source=json.dumps({ 'mask': '?d', 'dict': 1 }), type='maskdict') dicts_path = Registry().get('config')['main']['dicts_path'] self._add_dict_group() self._add_dict() self._add_dict(id=2, hash='2') file_put_contents(dicts_path + "/1.dict", "aaa\nbbb\n") file_put_contents(dicts_path + "/2.dict", "ccc\nddd\n") self.thrd = WorkerThread( self.db.fetch_row("SELECT * FROM task_works WHERE id = 1")) self.thrd.catch_exceptions = False self.thrd.start() start_time = int(time.time()) while True: if self.thrd.done: break if int(time.time()) - start_time > 5: pytest.fail("Long time of WorkerThread") time.sleep(1) wtask = self.db.fetch_row("SELECT * FROM task_works WHERE id = 1") assert wtask['status'] == 'waitoutparse' assert wtask['uncracked_before'] == 4 assert os.path.exists(wtask['out_file']) assert file_get_contents( wtask['out_file']) == '49a14108270c0596ac1d70c3c4f82a10:31636363\n'
def scan_links(self, links): """ Scan links """ for link in links: self.last_action = int(time.time()) url = SpiderCommon.gen_url(link, self.host) start_time = int(round(time.time() * 1000)) pre_url = link['path'] + '?' + link['query'] if len( link['query']) else link['path'] if self.delay: time.sleep(self.delay) try: self.browser.get(url) time.sleep(1) #content_type = response.headers['content-type'].split(";")[0] \ # if (response.headers['content-type'].find(";") != -1) \ # else response.headers['content-type'] #if 299 < response.status_code < 400: # SpiderCommon.insert_links([response.headers['Location']], url, self.host) #else: #new_links = self.links_parser.parse_links('text/html', str(self.browser.page_source), link) #SpiderCommon.insert_links(new_links, url, self.host) source = str( self.browser.page_source.encode('utf8', errors='ignore')) new_links = self.links_parser.parse_links( 'text/html', source, link) SpiderCommon.insert_links(new_links, url, self.host) if self.not_found_re.findall(self.browser.page_source): link['code'] = 404 result_time = int(round(time.time() * 1000)) - start_time file_put_contents( "{0}{1}/{2}".format(Registry().get('data_path'), self.host, md5(pre_url)), str(self.browser.page_source.encode('utf-8'))) link['size'] = len(self.browser.page_source) link['time'] = result_time if int(link['code']) == 0: del link['code'] except BaseException as e: if not str(e).count('Timed out waiting for page load'): print str(e) self.up_requests_count() SpiderCommon.links_checked(links)
def test_add_hashes_to_exists_list_with_founds(self): """ Testing add hashes to exists list with already found hashes """ self._add_hashlist(parsed=0, tmp_path='/tmp/1.txt', status='wait') file_put_contents('/tmp/1.txt', 'c\nd\ne\n', ) self._add_hash(hash='a', summ=md5('a')) self._add_hash(hash='b', summ=md5('b')) self._add_hash(hash='c', summ=md5('c')) self._add_hashlist(id=2) self._add_hash(hashlist_id=2, hash='a', summ=md5('a'), cracked=1, password='******') self.thrd.start() time.sleep(5) assert self.db.fetch_col("SELECT hash FROM hashes WHERE hashlist_id = 1 ORDER BY hash") == \ ['a', 'b', 'c', 'd', 'e'] assert self.db.fetch_one("SELECT password FROM hashes " "WHERE hashlist_id = 1 AND cracked = 1 AND hash = 'a'") == 'aaa'
def put_found_hashes_in_db(self, alg_id, hashes): """ Put found by Finder hashes in db :param alg_id: :param hashes: list of found hashes :return: """ for _hash in hashes: summ = md5("{0}:{1}".format(_hash['hash'], _hash['salt'])) if len( _hash['salt']) else md5(_hash['hash']) self._db.q( "UPDATE `hashes` h, hashlists hl " "SET h.`password` = {0}, h.cracked = 1 " "WHERE h.hashlist_id = hl.id AND hl.alg_id = {1} AND h.summ = {2} AND h.cracked = 0" .format( self._db.quote(_hash['password']), alg_id, self._db.quote(summ), ))
def scan_links(self, links): """ Scan links """ for link in links: self.last_action = int(time.time()) url = SpiderCommon.gen_url(link, self.host, self.protocol) start_time = int(round(time.time() * 1000)) pre_url = link['path'] + '?' + link['query'] if len(link['query']) else link['path'] if self.delay: time.sleep(self.delay) try: self.browser.get(url) time.sleep(1) #content_type = response.headers['content-type'].split(";")[0] \ # if (response.headers['content-type'].find(";") != -1) \ # else response.headers['content-type'] #if 299 < response.status_code < 400: # SpiderCommon.insert_links([response.headers['Location']], url, self.host) #else: #new_links = self.links_parser.parse_links('text/html', str(self.browser.page_source), link) #SpiderCommon.insert_links(new_links, url, self.host) source = str(self.browser.page_source.encode('utf8', errors='ignore')) new_links = self.links_parser.parse_links('text/html', source, link) SpiderCommon.insert_links(new_links, url, self.host) if self.not_found_re.findall(self.browser.page_source): link['code'] = 404 result_time = int(round(time.time() * 1000)) - start_time file_put_contents("{0}{1}/{2}".format( Registry().get('data_path'), self.host, md5(pre_url) ), str(self.browser.page_source.encode('utf-8'))) link['size'] = len(self.browser.page_source) link['time'] = result_time if int(link['code']) == 0: del link['code'] except BaseException as e: if not str(e).count('Timed out waiting for page load'): print str(e) self.up_requests_count() SpiderCommon.links_checked(links)
def add_mass(self, pid, host_id, data): """ Add many urls at once in table """ to_insert = [] for row in data: for field in [ 'url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr' ]: if field not in row.keys(): if field in [ 'referer', 'response_code', 'response_time', 'descr' ]: row[field] = '' elif field in ['spidered', 'size']: row[field] = 0 elif field == 'who_add': row[field] = 'human' elif field == 'url': raise WSException("URL row must have a 'url' key") for k in row.keys(): if k not in [ 'url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr' ]: raise WSException( "Key '{0}' must not be in url data".format(k)) to_insert.append({ 'project_id': pid, "host_id": host_id, "hash": md5(row['url']), "url": row['url'], "referer": row['referer'], "response_code": row['response_code'], "response_time": row['response_time'], "when_add": int(time.time()), "who_add": row['who_add'], "spidered": row['spidered'], "size": row['size'], "descr": row['descr'] }) if len(to_insert) % 50 == 0: self._db.insert_mass("urls", to_insert, 1) to_insert = [] if len(to_insert): self._db.insert_mass("urls", to_insert, 1) return True
def test_dict_task(self): """ Run simple dict task """ self._add_hashlist(alg_id=0, uncracked=4) self._add_hash(hash=md5('123')) self._add_hash(hash=md5('456')) self._add_hash(hash=md5('ccc')) self._add_hash(hash=md5('789')) self._add_work_task() dicts_path = Registry().get('config')['main']['dicts_path'] self._add_dict_group() self._add_dict() self._add_dict(id=2, hash='2') file_put_contents(dicts_path + "/1.dict", "aaa\nbbb") file_put_contents(dicts_path + "/2.dict", "ccc\nddd") self._add_task(source=1) self.thrd = WorkerThread( self.db.fetch_row("SELECT * FROM task_works WHERE id = 1")) self.thrd.catch_exceptions = False self.thrd.start() start_time = int(time.time()) while True: if self.thrd.done: break if int(time.time()) - start_time > 5: pytest.fail("Long time of WorkerThread") time.sleep(1) wtask = self.db.fetch_row("SELECT * FROM task_works WHERE id = 1") assert wtask['status'] == 'waitoutparse' assert wtask['uncracked_before'] == 4 assert os.path.exists(wtask['out_file']) assert file_get_contents( wtask['out_file']) == '9df62e693988eb4e1e1444ece0578579:636363\n'
def test_dict_task(self): """ Run simple dict task """ self._add_hashlist(alg_id=0, uncracked=4) self._add_hash(hash=md5('123')) self._add_hash(hash=md5('456')) self._add_hash(hash=md5('ccc')) self._add_hash(hash=md5('789')) self._add_work_task() dicts_path = Registry().get('config')['main']['dicts_path'] self._add_dict_group() self._add_dict() self._add_dict(id=2, hash='2') file_put_contents(dicts_path + "/1.dict", "aaa\nbbb") file_put_contents(dicts_path + "/2.dict", "ccc\nddd") self._add_task(source=1) self.thrd = WorkerThread(self.db.fetch_row("SELECT * FROM task_works WHERE id = 1")) self.thrd.catch_exceptions = False self.thrd.start() start_time = int(time.time()) while True: if self.thrd.done: break if int(time.time()) - start_time > 5: pytest.fail("Long time of WorkerThread") time.sleep(1) wtask = self.db.fetch_row("SELECT * FROM task_works WHERE id = 1") assert wtask['status'] == 'waitoutparse' assert wtask['uncracked_before'] == 4 assert os.path.exists(wtask['out_file']) assert file_get_contents(wtask['out_file']) == '9df62e693988eb4e1e1444ece0578579:636363\n'
def scan_links(self, links): """ Scan links """ req_func = getattr(self.http, 'get') for link in links: self.last_action = int(time.time()) self.counter.up() url = SpiderCommon.gen_url(link, self.host, self.protocol) start_time = int(round(time.time() * 1000)) pre_url = link['path'] + '?' + link['query'] if len(link['query']) else link['path'] if self.delay: time.sleep(self.delay) response = req_func(url) self.src.up() if response is not None: result_time = int(round(time.time() * 1000)) - start_time if 'content-type' in response.headers: content_type = response.headers['content-type'].split(";")[0] \ if (response.headers['content-type'].find(";") != -1) \ else response.headers['content-type'] else: content_type = 'unknown/unknown' if 299 < response.status_code < 400: SpiderCommon.insert_links([response.headers['Location']], url, self.host) else: new_links = self.links_parser.parse_links(content_type, str(response.content), link) SpiderCommon.insert_links(new_links, url, self.host) file_put_contents( "{0}{1}/{2}".format( Registry().get('data_path'), self.host, md5(pre_url) ), str(response.content) ) link['size'] = len(response.content) if response is not None else 0 link['code'] = response.status_code if response is not None else 0 link['time'] = result_time if response is not None else 0 SpiderCommon.links_checked(links)
def test_maskdict_task(self): """ Run hybride mask+dict task """ self._add_hashlist(alg_id=0, uncracked=4) self._add_hash(hash=md5('123')) self._add_hash(hash=md5('456')) self._add_hash(hash=md5('1ccc')) self._add_hash(hash=md5('789')) self._add_work_task() self._add_task(source=json.dumps({'mask': '?d', 'dict': 1}), type='maskdict') dicts_path = Registry().get('config')['main']['dicts_path'] self._add_dict_group() self._add_dict() self._add_dict(id=2, hash='2') file_put_contents(dicts_path + "/1.dict", "aaa\nbbb\n") file_put_contents(dicts_path + "/2.dict", "ccc\nddd\n") self.thrd = WorkerThread(self.db.fetch_row("SELECT * FROM task_works WHERE id = 1")) self.thrd.catch_exceptions = False self.thrd.start() start_time = int(time.time()) while True: if self.thrd.done: break if int(time.time()) - start_time > 5: pytest.fail("Long time of WorkerThread") time.sleep(1) wtask = self.db.fetch_row("SELECT * FROM task_works WHERE id = 1") assert wtask['status'] == 'waitoutparse' assert wtask['uncracked_before'] == 4 assert os.path.exists(wtask['out_file']) assert file_get_contents(wtask['out_file']) == '49a14108270c0596ac1d70c3c4f82a10:31636363\n'
def scan_links(self, links): """ Scan links """ req_func = getattr(self.http, 'get') for link in links: self.last_action = int(time.time()) self.counter.up() url = SpiderCommon.gen_url(link, self.host) start_time = int(round(time.time() * 1000)) pre_url = link['path'] + '?' + link['query'] if len( link['query']) else link['path'] if self.delay: time.sleep(self.delay) response = req_func(url) self.src.up() if response is not None: result_time = int(round(time.time() * 1000)) - start_time if 'content-type' in response.headers: content_type = response.headers['content-type'].split(";")[0] \ if (response.headers['content-type'].find(";") != -1) \ else response.headers['content-type'] else: content_type = 'unknown/unknown' if 299 < response.status_code < 400: SpiderCommon.insert_links([response.headers['Location']], url, self.host) else: new_links = self.links_parser.parse_links( content_type, str(response.content), link) SpiderCommon.insert_links(new_links, url, self.host) file_put_contents( "{0}{1}/{2}".format(Registry().get('data_path'), self.host, md5(pre_url)), str(response.content)) link['size'] = len(response.content) if response is not None else 0 link['code'] = response.status_code if response is not None else 0 link['time'] = result_time if response is not None else 0 SpiderCommon.links_checked(links)
def add_mass(self, pid, host_id, data): """ Add many urls at once in table """ to_insert = [] for row in data: for field in ['url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr']: if field not in row.keys(): if field in ['referer', 'response_code', 'response_time', 'descr']: row[field] = '' elif field in ['spidered', 'size']: row[field] = 0 elif field == 'who_add': row[field] = 'human' elif field == 'url': raise WSException("URL row must have a 'url' key") for k in row.keys(): if k not in [ 'url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr' ]: raise WSException("Key '{0}' must not be in url data".format(k)) to_insert.append({ 'project_id': pid, "host_id": host_id, "hash": md5(row['url']), "url": row['url'], "referer": row['referer'], "response_code": row['response_code'], "response_time": row['response_time'], "when_add": int(time.time()), "who_add": row['who_add'], "spidered": row['spidered'], "size": row['size'], "descr": row['descr'] }) if len(to_insert)%50 == 0: self._db.insert_mass("urls", to_insert, 1) to_insert = [] if len(to_insert): self._db.insert_mass("urls", to_insert, 1) return True
def test_one_add(self): assert self.db.fetch_one("SELECT 1 FROM urls") is None _id = self.model.add(1, 2, '/1/', '/ref', 200, 10, 'dafs', 1, 100, 'desc') assert bool(_id) test_url = self.db.fetch_row("SELECT * FROM urls WHERE id = " + str(_id)) assert test_url['project_id'] == 1 assert test_url['host_id'] == 2 assert test_url['hash'] == md5('/1/') assert test_url['url'] == '/1/' assert test_url['referer'] == '/ref' assert test_url['response_code'] == 200 assert test_url['response_time'] == 10 assert test_url['size'] == 100 assert test_url['descr'] == 'desc' assert test_url['spidered'] == 1
def parse_page(self, response): doc = response.doc alias = doc('#thread_subject').text() r = re.search(r'([A-Z]+-[0-9]+)', alias) if not r: return None alias = r.group(1) self.logger.info(f"{response.index}/{response.total}: {alias}.") magnet_link = doc('.blockcode li').text().strip() if not magnet_link: return None data = {'alias': alias, 'magnet_link': magnet_link, 'download': 0} self.col.update_one({'_id': md5(alias)}, {'$set': data}, True) return data
def put_found_hashes_in_db(self, alg_id, hashes): """ Put found by Finder hashes in db :param alg_id: :param hashes: list of found hashes :return: """ for _hash in hashes: summ = md5("{0}:{1}".format(_hash['hash'], _hash['salt'])) if len(_hash['salt']) else md5(_hash['hash']) self._db.q( "UPDATE `hashes` h, hashlists hl " "SET h.`password` = {0}, h.cracked = 1 " "WHERE h.hashlist_id = hl.id AND hl.alg_id = {1} AND h.summ = {2} AND h.cracked = 0" .format( self._db.quote(_hash['password']), alg_id, self._db.quote(summ), ) )
def parse_outfile_and_fill_found_hashes(self, work_task, hashlist): """ Parse outfile from hc and put found hashes in db :param work_task: worktask row :param hashlist: hashlist row :return: """ out_file_fh = open(work_task['out_file'], 'r') for _line in out_file_fh: _line = _line.strip() password = _line[_line.rfind(":") + 1:].strip().decode("hex") summ = md5(_line[:_line.rfind(":")]) self._db.q( "UPDATE `hashes` h, hashlists hl " "SET h.`password` = {0}, h.cracked = 1 " "WHERE h.hashlist_id = hl.id AND hl.alg_id = {1} AND h.summ = {2} AND h.cracked = 0" .format(self._db.quote(password), hashlist['alg_id'], self._db.quote(summ)) )
def detail_handler(self, task, *args): data = super().detail_handler(task, *args) if not data or data.get('url').find('plus/') != -1: self.logger.error(task) return None doc = data.get('doc') try: params = self.get_default_params(doc, data['url']) if not params: return None params['_id'] = md5(params['origin_title']) params['create_date'] = ( datetime.datetime.now() + datetime.timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S") self.db.save(params) self.logger.info(f"[{args[0]}/{args[1]}]:{params['origin_title']}") except Exception as e: self.logger.exception(e)
def prepare_first_pages(host): """ Prepare link on first page in MongoDB. Add root url if urls for this host not exists. """ pid = Registry().get('pData')['id'] coll = Registry().get('mongo').spider_urls coll.drop() Urls = UrlsModel() urls = Urls.list_by_host_name_for_spider(pid, host) if not len(urls): Registry().get('logger').log( "Spider: Root URL was added automaticaly") Urls.add(pid, HostsModel().get_id_by_name(pid, host), '/', who_add='spider') urls = Urls.list_by_host_name_for_spider(pid, host) for url in urls: url = urlparse(url['url']) data = { 'hash': md5(str(url.path + url.query)), 'path': url.path, 'query': url.query, 'time': 0, 'code': 0, 'checked': 0, 'getted': 0, 'referer': '', 'size': 0, 'founder': 'spider' } coll.insert(data) coll.create_index([('hash', 1)], unique=True, dropDups=True) coll.create_index([('checked', 1)])
class Test_HashlistsByAlgLoaderThread(CommonIntegration): """ Class of integration tests - HashlistsByAlgLoaderThread """ thrd = None loader_thrd = None def setup(self): """ Tests setup """ self._clean_db() self.thrd = HashlistsByAlgLoaderThread() self.thrd.delay_per_check = 1 self.thrd.catch_exceptions = False self.loader_thrd = HashlistsLoaderThread() self.loader_thrd.delay_per_check = 1 self.loader_thrd.catch_exceptions = False def teardown(self): """ Tests teardown """ if isinstance(self.thrd, HashlistsByAlgLoaderThread): self.thrd.available = False time.sleep(1) del self.thrd if isinstance(self.loader_thrd, HashlistsLoaderThread): self.loader_thrd.available = False time.sleep(1) del self.loader_thrd self._clean_db() test_data = [ ([{ 'hash': 'a', 'salt': '\\ta\'1\\', 'summ': md5('a:\\ta\'1\\') }, { 'hash': 'b', 'salt': '\\nb"2\\', 'summ': md5('b:\\nb"2\\') }], 1), ([{ 'hash': 'a', 'salt': '1', 'summ': md5('a:1') }, { 'hash': 'b', 'salt': '2', 'summ': md5('b:2') }], 1), ([{ 'hash': 'a', 'salt': '', 'summ': md5('a') }, { 'hash': 'b', 'salt': '', 'summ': md5('b') }], 0), ] @pytest.mark.parametrize("hashes,have_salt", test_data) def test_simple_build(self, hashes, have_salt): """ Simple common hashlist build :param hashes: hashes rows :param have_salt: Does alg has salt? :return: """ self._add_hashlist(have_salts=have_salt) for _hash in hashes: self._add_hash(hash=_hash['hash'], salt=_hash['salt'], summ=_hash['summ']) assert self.db.fetch_one( "SELECT id FROM hashlists WHERE common_by_alg") is None self.thrd.start() self.loader_thrd.start() time.sleep(5) test_hashlist_data = { 'id': 2, 'name': 'All-MD4', 'have_salts': have_salt, 'delimiter': self.thrd.DELIMITER, 'cracked': 0, 'uncracked': 2, 'errors': '', 'parsed': 1, 'status': 'ready', 'common_by_alg': 3 } hashlist_data = self.db.fetch_row( "SELECT * FROM hashlists WHERE common_by_alg") assert int( self.db.fetch_one( "SELECT when_loaded FROM hashlists WHERE common_by_alg")) > 0 for field in test_hashlist_data: assert hashlist_data[field] == test_hashlist_data[field] for _hash in hashes: assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hash = {0} AND salt={1} AND summ = {2} AND hashlist_id = 2" .format(self.db.quote(_hash['hash']), self.db.quote(_hash['salt']), self.db.quote(_hash['summ']))) == 1 test_data = [ ([ { 'hash': 'a', 'salt': '1', 'summ': md5('a:1'), 'cracked': 0 }, { 'hash': 'b', 'salt': '2', 'summ': md5('b:2'), 'cracked': 1 }, { 'hash': 'c', 'salt': '3', 'summ': md5('c:3'), 'cracked': 0 }, { 'hash': 'd', 'salt': '4', 'summ': md5('d:4'), 'cracked': 0 }, ], [ { 'hash': 'a', 'salt': '1', 'summ': md5('a:1'), 'cracked': 0 }, { 'hash': 'b', 'salt': '2', 'summ': md5('b:2'), 'cracked': 0 }, ], 1), ([ { 'hash': 'a', 'salt': '', 'summ': md5('a'), 'cracked': 0 }, { 'hash': 'b', 'salt': '', 'summ': md5('b'), 'cracked': 1 }, { 'hash': 'c', 'salt': '', 'summ': md5('c'), 'cracked': 0 }, { 'hash': 'd', 'salt': '', 'summ': md5('d'), 'cracked': 0 }, ], [ { 'hash': 'a', 'salt': '', 'summ': md5('a'), 'cracked': 0 }, { 'hash': 'b', 'salt': '', 'summ': md5('b'), 'cracked': 0 }, ], 0), ] @pytest.mark.parametrize("hashes_in_self,hashes_in_common,have_salt", test_data) def test_update_exists_list(self, hashes_in_self, hashes_in_common, have_salt): """ Updating exists common hashlist :param hashes_in_self: Hashes in usual hashlist :param hashes_in_common: Hashes in common hashlist :param have_salt: Does alg has salt? :return: """ self._add_hashlist(have_salts=have_salt) for _hash in hashes_in_self: self._add_hash(hash=_hash['hash'], salt=_hash['salt'], summ=_hash['summ'], cracked=_hash['cracked']) self._add_hashlist(id=2, alg_id=3, common_by_alg=3, have_salts=have_salt) for _hash in hashes_in_common: self._add_hash(hashlist_id=2, hash=_hash['hash'], salt=_hash['salt'], summ=_hash['summ'], cracked=_hash['cracked']) assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hash='b'") == 2 assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hash='c'") == 1 assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hash='d'") == 1 self.thrd.start() self.loader_thrd.start() time.sleep(5) assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hash='b'") == 1 assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hash='c'") == 2 assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hash='d'") == 2 assert [{'hash': 'a'}, {'hash': 'c'}, {'hash': 'd'}] \ == self.db.fetch_all("SELECT hash FROM hashes WHERE hashlist_id = 2") test_data = [('outparsing'), ('waitoutparse')] @pytest.mark.parametrize("status", test_data) def test_build_with_parsing_alg(self, status): """ Try build no ready hashlist :param status: :return: """ self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(hash='b', summ='222') self._add_hashlist(id=2, alg_id=3, common_by_alg=0) self._add_work_task(hashlist_id=2, status=status) assert self.db.fetch_one( "SELECT id FROM hashlists WHERE common_by_alg") is None self.thrd.start() self.loader_thrd.start() time.sleep(5) test_hashlist_data = { 'id': 3, 'name': 'All-MD4', 'have_salts': 0, 'delimiter': self.thrd.DELIMITER, 'cracked': 0, 'uncracked': 0, 'errors': '', 'parsed': 0, 'status': 'ready', 'common_by_alg': 3 } hashlist_data = self.db.fetch_row( "SELECT * FROM hashlists WHERE common_by_alg") for field in test_hashlist_data: assert hashlist_data[field] == test_hashlist_data[field] self.db.update("task_works", {'status': 'wait'}, 'id=1') time.sleep(5) test_hashlist_data = { 'id': 3, 'name': 'All-MD4', 'have_salts': 0, 'delimiter': self.thrd.DELIMITER, 'cracked': 0, 'uncracked': 2, 'errors': '', 'parsed': 1, 'status': 'ready', 'common_by_alg': 3 } hashlist_data = self.db.fetch_row( "SELECT * FROM hashlists WHERE common_by_alg") for field in test_hashlist_data: assert hashlist_data[field] == test_hashlist_data[field] assert self.db.fetch_all( "SELECT hash FROM hashes WHERE hashlist_id = 3") == [{ 'hash': 'a' }, { 'hash': 'b' }]
def delete(self, project_id, host, url): """ Delete url from table """ host_id = HostsModel().get_id_by_name(project_id, host) self._db.q( "DELETE FROM urls WHERE project_id = {0} AND host_id = {1} AND hash = {2} " .format(project_id, host_id, self._db.quote(md5(url))))
def _build_hash(self, project_id, host_id, path, params, method, protocol): """ Build hash of request """ return md5("{0}|{1}|{2}|{3}|{4}|{5}".format(project_id, host_id, path, params, method, protocol))
class Test_HashlistsLoaderThread(CommonIntegration): """ HashlistsLoaderThread integration tests """ thrd = None def setup(self): """ Setup tests """ self._clean_db() self.thrd = HashlistsLoaderThread() self.thrd.delay_per_check = 1 self.thrd.catch_exceptions = False def teardown(self): """ Teardown tests """ if isinstance(self.thrd, HashlistsLoaderThread): self.thrd.available = False time.sleep(1) del self.thrd self._clean_db() test_data = [ (1, 'aUNIQUEDELIMITER1\nbUNIQUEDELIMITER2\ncUNIQUEDELIMITER3', 3, [ { 'hash': 'a', 'salt': '1', 'summ': md5('a:1'), 'cracked': 0, 'password': '' }, { 'hash': 'b', 'salt': '2', 'summ': md5('b:2'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '3', 'summ': md5('c:3'), 'cracked': 0, 'password': '' }, ], []), (1, 'aUNIQUEDELIMITER1\nbUNIQUEDELIMITER2\ncUNIQUEDELIMITER3\n', 3, [ { 'hash': 'a', 'salt': '1', 'summ': md5('a:1'), 'cracked': 0, 'password': '' }, { 'hash': 'b', 'salt': '2', 'summ': md5('b:2'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '3', 'summ': md5('c:3'), 'cracked': 0, 'password': '' }, ], []), (1, 'aUNIQUEDELIMITER1\nbUNIQUEDELIMITER2\ncUNIQUEDELIMITER3\naUNIQUEDELIMITER1', 3, [ { 'hash': 'a', 'salt': '1', 'summ': md5('a:1'), 'cracked': 0, 'password': '' }, { 'hash': 'b', 'salt': '2', 'summ': md5('b:2'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '3', 'summ': md5('c:3'), 'cracked': 0, 'password': '' }, ], []), (1, 'aUNIQUEDELIMITER1\nbUNIQUEDELIMITER2\ncUNIQUEDELIMITER3', 3, [ { 'hash': 'a', 'salt': '1', 'summ': md5('a:1'), 'cracked': 1, 'password': '******' }, { 'hash': 'b', 'salt': '2', 'summ': md5('b:2'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '3', 'summ': md5('c:3'), 'cracked': 0, 'password': '' }, ], [ { 'hash': 'a', 'salt': '1', 'summ': md5('a:1'), 'cracked': 1, 'password': '******' }, ]), (1, 'aUNIQUEDELIMITERa\\nb\\c\nbUNIQUEDELIMITERa\'b\\c\ncUNIQUEDELIMITERa\\tb\\nc\n', 3, [ { 'hash': 'a', 'salt': 'a\\nb\\c', 'summ': md5('a:a\\nb\\c'), 'cracked': 0, 'password': '' }, { 'hash': 'b', 'salt': 'a\'b\\c', 'summ': md5('b:a\'b\\c'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': 'a\\tb\\nc', 'summ': md5('c:a\\tb\\nc'), 'cracked': 0, 'password': '' }, ], []), (0, 'a\nb\nc', 3, [ { 'hash': 'a', 'salt': '', 'summ': md5('a'), 'cracked': 0, 'password': '' }, { 'hash': 'b', 'salt': '', 'summ': md5('b'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '', 'summ': md5('c'), 'cracked': 0, 'password': '' }, ], []), (0, 'a\nb\nc\n', 3, [ { 'hash': 'a', 'salt': '', 'summ': md5('a'), 'cracked': 0, 'password': '' }, { 'hash': 'b', 'salt': '', 'summ': md5('b'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '', 'summ': md5('c'), 'cracked': 0, 'password': '' }, ], []), (0, 'a\nb\nc\na', 3, [ { 'hash': 'a', 'salt': '', 'summ': md5('a'), 'cracked': 0, 'password': '' }, { 'hash': 'b', 'salt': '', 'summ': md5('b'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '', 'summ': md5('c'), 'cracked': 0, 'password': '' }, ], []), (0, 'a\nb\nc', 3, [ { 'hash': 'a', 'salt': '', 'summ': md5('a'), 'cracked': 1, 'password': '******' }, { 'hash': 'b', 'salt': '', 'summ': md5('b'), 'cracked': 0, 'password': '' }, { 'hash': 'c', 'salt': '', 'summ': md5('c'), 'cracked': 0, 'password': '' }, ], [ { 'hash': 'a', 'salt': '', 'summ': md5('a'), 'cracked': 1, 'password': '******' }, ]), ] @pytest.mark.parametrize( "have_salts,hashes_content,count_expected,hashes_expected,hashes_found", test_data) def test_load_simple_list(self, have_salts, hashes_content, count_expected, hashes_expected, hashes_found): """ Loading simple list in db :param have_salts: Does hashlist has salt? :param hashes_content: Text content of hashlist :param count_expected: How many hashes we expected in db? :param hashes_expected: Rows with hashes we expected in db? :param hashes_found: Rows with found hashes after load, we expected :return: """ self._add_hashlist(have_salts=have_salts, parsed=0, tmp_path='/tmp/1.txt', status='wait') file_put_contents('/tmp/1.txt', hashes_content) if len(hashes_found): self._add_hashlist(id=2, have_salts=have_salts, parsed=1, status='ready') for _hash in hashes_found: self._add_hash(hashlist_id=2, hash=_hash['hash'], salt=_hash['salt'], password=_hash['password'], cracked=_hash['cracked'], summ=_hash['summ']) self.thrd.start() time.sleep(5) assert count_expected == self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hashlist_id = 1") for _hash in hashes_expected: assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE hashlist_id = 1 " "AND hash = {0} AND salt = {1} AND summ = {2} AND password = {3} AND cracked = {4}" .format(self.db.quote(_hash['hash']), self.db.quote(_hash['salt']), self.db.quote(_hash['summ']), self.db.quote(_hash['password']), _hash['cracked'])) == 1 def test_add_hashes_to_exists_list(self): """ Test adding hashes to exists hashlist """ self._add_hashlist(parsed=0, tmp_path='/tmp/1.txt', status='wait') file_put_contents( '/tmp/1.txt', 'c\nd\ne\n', ) self._add_hash(hash='a') self._add_hash(hash='b') self._add_hash(hash='c') self.thrd.start() time.sleep(5) assert ['a', 'b', 'c', 'd', 'e'] == \ self.db.fetch_col("SELECT hash FROM hashes WHERE hashlist_id = 1 ORDER BY hash") def test_add_hashes_to_exists_list_with_founds(self): """ Testing add hashes to exists list with already found hashes """ self._add_hashlist(parsed=0, tmp_path='/tmp/1.txt', status='wait') file_put_contents( '/tmp/1.txt', 'c\nd\ne\n', ) self._add_hash(hash='a', summ=md5('a')) self._add_hash(hash='b', summ=md5('b')) self._add_hash(hash='c', summ=md5('c')) self._add_hashlist(id=2) self._add_hash(hashlist_id=2, hash='a', summ=md5('a'), cracked=1, password='******') self.thrd.start() time.sleep(5) assert self.db.fetch_col("SELECT hash FROM hashes WHERE hashlist_id = 1 ORDER BY hash") == \ ['a', 'b', 'c', 'd', 'e'] assert self.db.fetch_one( "SELECT password FROM hashes " "WHERE hashlist_id = 1 AND cracked = 1 AND hash = 'a'") == 'aaa'
def get_hash_id_by_path(self, path): """ Return ID of hash by path from cms_paths_hashes table """ return self._db.fetch_one( "SELECT id FROM `cms_paths_hashes` WHERE hash={0}".format( self._db.quote(md5(path))))
def get_hash_id_by_path(self, path): """ Return ID of hash by path from cms_paths_hashes table """ return self._db.fetch_one( "SELECT id FROM `cms_paths_hashes` WHERE hash={0}" .format(self._db.quote(md5(path))) )
def _build_hash(self, project_id, host_id, path, params, method, protocol): """ Build hash of request """ return md5("{0}|{1}|{2}|{3}|{4}|{5}".format(project_id, host_id, path, params, method, protocol))
def test_run_2(self): """ Have one ready hashlist. Load new hashlist. Wait common hashlist build. Start brute on it. Found one hash which exists in first & second hashlists. Rebuild common hashlist """ self._add_hashlist(alg_id=4, uncracked=3) self._add_hash(hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hash=md5('ccc'), summ=md5(md5('ccc'))) self._add_hashlist(id=2, parsed=0, tmp_path='/tmp/1.txt', status='wait', alg_id=4) file_put_contents('/tmp/1.txt', '{0}\n{1}\n{2}\n'.format(md5('111'), md5('333'), md5('ccc'))) process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) time.sleep(10) assert self.db.fetch_one("SELECT name FROM hashlists WHERE common_by_alg = 4") == 'All-MD5' assert self.db.fetch_one("SELECT id FROM hashlists WHERE common_by_alg = 4") == 3 self._add_work_task(hashlist_id=3) self._add_task(source='?l?l?l', type='mask') time.sleep(20) os.killpg(os.getpgid(process.pid), signal.SIGTERM) #stdout = process.stdout.read() stderr = process.stderr.read()\ .replace('Warning: Using a password on the command line interface can be insecure.\n', '') assert stderr == '' assert self.db.fetch_one("SELECT COUNT(id) FROM hashes") == 9 assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******' AND hash = '{0}'".format(md5('ccc')) ) == 2 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1") == 2 assert self.db.fetch_one("SELECT status FROM task_works WHERE id=1") == "done" assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE hashlist_id = 3") == 3
class Test_Full(CommonIntegration): """ Class for integration tests - Full run """ thrd = None def setup(self): """ Tests setup """ if not os.path.exists(Registry().get('config')['main']['path_to_hc']): pytest.fail("HC dir not exists") self._clean_db() self.db.update("algs", {'finder_insidepro_allowed': 0}, "id") def teardown(self): """ Tests teardown """ if isinstance(self.thrd, WorkerThread): del self.thrd self._clean_db() def test_run_1(self): """ Have one ready hashlist. Load new hashlist, start brute on it. Found same hashes in first hashlist """ self._add_hashlist(alg_id=4, uncracked=3) self._add_hash(hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hash=md5('ccc'), summ=md5(md5('ccc'))) self._add_hashlist(id=2, parsed=0, tmp_path='/tmp/1.txt', status='wait', alg_id=4) file_put_contents('/tmp/1.txt', '{0}\n{1}\n{2}\n'.format(md5('111'), md5('333'), md5('ccc'))) self._add_work_task(hashlist_id=2) self._add_task(source='?l?l?l', type='mask') process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) time.sleep(20) os.killpg(os.getpgid(process.pid), signal.SIGTERM) assert self.db.fetch_one("SELECT status FROM hashlists WHERE id=2") == 'ready' #stdout = process.stdout.read() stderr = process.stderr.read()\ .replace('Warning: Using a password on the command line interface can be insecure.\n', '') assert stderr == '' assert self.db.fetch_one("SELECT COUNT(id) FROM hashes") == 9 assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******' AND hash = '{0}'".format(md5('ccc')) ) == 2 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1") == 2 assert self.db.fetch_one("SELECT status FROM task_works WHERE id=1") == 'done' assert self.db.fetch_one("SELECT COUNT(id) FROM hashlists WHERE common_by_alg <> 0") == 1 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE hashlist_id = 3") == 3 assert self.db.fetch_one("SELECT name FROM hashlists WHERE common_by_alg = 4") == 'All-MD5' def test_run_2(self): """ Have one ready hashlist. Load new hashlist. Wait common hashlist build. Start brute on it. Found one hash which exists in first & second hashlists. Rebuild common hashlist """ self._add_hashlist(alg_id=4, uncracked=3) self._add_hash(hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hash=md5('ccc'), summ=md5(md5('ccc'))) self._add_hashlist(id=2, parsed=0, tmp_path='/tmp/1.txt', status='wait', alg_id=4) file_put_contents('/tmp/1.txt', '{0}\n{1}\n{2}\n'.format(md5('111'), md5('333'), md5('ccc'))) process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) time.sleep(10) assert self.db.fetch_one("SELECT name FROM hashlists WHERE common_by_alg = 4") == 'All-MD5' assert self.db.fetch_one("SELECT id FROM hashlists WHERE common_by_alg = 4") == 3 self._add_work_task(hashlist_id=3) self._add_task(source='?l?l?l', type='mask') time.sleep(20) os.killpg(os.getpgid(process.pid), signal.SIGTERM) #stdout = process.stdout.read() stderr = process.stderr.read()\ .replace('Warning: Using a password on the command line interface can be insecure.\n', '') assert stderr == '' assert self.db.fetch_one("SELECT COUNT(id) FROM hashes") == 9 assert self.db.fetch_one( "SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******' AND hash = '{0}'".format(md5('ccc')) ) == 2 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1") == 2 assert self.db.fetch_one("SELECT status FROM task_works WHERE id=1") == "done" assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE hashlist_id = 3") == 3 def test_run_3(self): """ Have 2 hashlists. Start task by one, add second with high priority. First task stop, start second. Wait for second done, and first return to work and done. """ self._add_hashlist(alg_id=4, uncracked=3) self._add_hash(hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hash=md5('ccccccc'), summ=md5(md5('ccc'))) self._add_hashlist(id=2, alg_id=23, uncracked=3) self._add_hash(hashlist_id=2, hash=md5(md5('333')), summ=md5(md5(md5('333')))) self._add_hash(hashlist_id=2, hash=md5(md5('444')), summ=md5(md5(md5('444')))) self._add_hash(hashlist_id=2, hash=md5(md5('zzzweeg')), summ=md5(md5(md5('ccc')))) process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) self._add_work_task(hashlist_id=2) self._add_task(source='?l?l?l?l?l?l?l', type='mask') start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") != "work": if int(time.time()) - start_time > 20: pytest.fail("Timeout start first task") self._add_work_task(id=2, hashlist_id=1, priority=100) start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 2") != "work": if int(time.time()) - start_time > 20: pytest.fail("Timeout start second task") assert self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") == "wait" time.sleep(30) os.killpg(os.getpgid(process.pid), signal.SIGTERM) #stdout = process.stdout.read() stderr = process.stderr.read()\ .replace('Warning: Using a password on the command line interface can be insecure.\n', '') assert stderr == '' assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT DISTINCT status FROM task_works") == 'done' assert self.db.fetch_one("SELECT name FROM hashlists WHERE common_by_alg = 4") == 'All-MD5' assert self.db.fetch_one("SELECT name FROM hashlists WHERE common_by_alg = 23") == 'All-md5(md5($pass))' assert self.db.fetch_col("SELECT id FROM hashlists WHERE common_by_alg") == [3, 4] def test_run_4(self): """ Have 2 hashlists. Start one task by first, add second task with same priority. Stop first "manually", second start and done. After that first "manually" return to work. """ self._add_hashlist(alg_id=4, uncracked=3) self._add_hash(hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hash=md5('ccccccc'), summ=md5(md5('ccccccc'))) self._add_hashlist(id=2, alg_id=23, uncracked=3) self._add_hash(hashlist_id=2, hash=md5(md5('333')), summ=md5(md5(md5('333')))) self._add_hash(hashlist_id=2, hash=md5(md5('444')), summ=md5(md5(md5('444')))) self._add_hash(hashlist_id=2, hash=md5(md5('zzzweeg')), summ=md5(md5(md5('zzzweeg')))) self._add_hashlist(id=3, alg_id=4, common_by_alg=4, uncracked=3) self._add_hash(hashlist_id=3, hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hashlist_id=3, hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hashlist_id=3, hash=md5('ccccccc'), summ=md5(md5('ccccccc'))) self._add_hashlist(id=4, alg_id=23, common_by_alg=23, uncracked=3) self._add_hash(hashlist_id=4, hash=md5(md5('333')), summ=md5(md5(md5('333')))) self._add_hash(hashlist_id=4, hash=md5(md5('444')), summ=md5(md5(md5('444')))) self._add_hash(hashlist_id=4, hash=md5(md5('zzzweeg')), summ=md5(md5(md5('zzzweeg')))) process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) self._add_work_task(hashlist_id=2) self._add_task(source='?l?l?l?l?l?l?l', type='mask') start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") != "work": if int(time.time()) - start_time > 5: pytest.fail("Timeout start first task") self._add_work_task(id=2, hashlist_id=1) self.db.update("task_works", {'status': 'go_stop'}, "id = 1") start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 2") != "work": if int(time.time()) - start_time > 20: pytest.fail("Timeout start second task") self.db.update("task_works", {'status': 'wait'}, "id = 1") start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") != "work": file_put_contents( '/tmp/time.txt', "{0}{1}\n".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") ), True) if int(time.time()) - start_time > 40: pytest.fail("Timeout start first task after stop") time.sleep(1) assert self.db.fetch_one("SELECT status FROM task_works WHERE id = 2") == "done" time.sleep(30) assert self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") == "done" os.killpg(os.getpgid(process.pid), signal.SIGTERM) #stdout = process.stdout.read() stderr = process.stderr.read()\ .replace('Warning: Using a password on the command line interface can be insecure.\n', '') assert stderr == '' assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT DISTINCT status FROM task_works") == "done" test_data = [ ( 74, 1, [ {'id': 1, 'common_id': 2, 'hash': '0065ffe5f9e4e5996c2c3f52f81c6e31', 'salt': 'cB6Ar', 'cracked': 1, 'summ': md5("0065ffe5f9e4e5996c2c3f52f81c6e31:cB6Ar"), 'password': '******'}, {'id': 3, 'common_id': 4, 'hash': '20e153b046072c949562f3c939611db8', 'salt': '0RTV', 'cracked': 0, 'summ': md5("20e153b046072c949562f3c939611db8:0RTV"), 'password': ''}, ] ), ( 4, 0, [ {'id': 1, 'common_id': 2, 'hash': md5('aaa'), 'salt': '', 'cracked': 1, 'summ': md5(md5('aaa')), 'password': '******'}, {'id': 3, 'common_id': 4, 'hash': '10e153b046072c949562f3c939611db7', 'salt': '', 'cracked': 0, 'summ': md5("10e153b046072c949562f3c939611db7"), 'password': ''}, ] ) ] @pytest.mark.parametrize("alg_id,have_salt,hashes", test_data) def test_run_5(self, alg_id, have_salt, hashes): """ Have no tasks, bun FinderInsidePro works :param alg_id: :param have_salt: Does alg has salt? :param hashes: Hashes rows :return: """ self.db.update("algs", {'finder_insidepro_allowed': 1}, "id") self._add_hashlist(alg_id=alg_id, have_salts=have_salt, uncracked=len(hashes)) self._add_hashlist(id=2, alg_id=alg_id, have_salts=have_salt, common_by_alg=alg_id, uncracked=len(hashes)) for _hash in hashes: self._add_hash(hashlist_id=1, id=_hash['id'], hash=_hash['hash'], salt=_hash['salt'], summ=_hash['summ']) self._add_hash(hashlist_id=2, id=_hash['common_id'], hash=_hash['hash'], salt=_hash['salt'], summ=_hash['summ']) process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) time.sleep(15) for _hash in hashes: test_data = self.db.fetch_row("SELECT * FROM hashes WHERE id = {0}".format(_hash['id'])) assert test_data['summ'] == _hash['summ'] assert test_data['cracked'] == _hash['cracked'] assert test_data['password'] == _hash['password'] # Was found and deleted by HashlistsByAlgThread assert self.db.fetch_row("SELECT * FROM hashes WHERE id = {0}".format(_hash['common_id'])) is None
def test_run_4(self): """ Have 2 hashlists. Start one task by first, add second task with same priority. Stop first "manually", second start and done. After that first "manually" return to work. """ self._add_hashlist(alg_id=4, uncracked=3) self._add_hash(hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hash=md5('ccccccc'), summ=md5(md5('ccccccc'))) self._add_hashlist(id=2, alg_id=23, uncracked=3) self._add_hash(hashlist_id=2, hash=md5(md5('333')), summ=md5(md5(md5('333')))) self._add_hash(hashlist_id=2, hash=md5(md5('444')), summ=md5(md5(md5('444')))) self._add_hash(hashlist_id=2, hash=md5(md5('zzzweeg')), summ=md5(md5(md5('zzzweeg')))) self._add_hashlist(id=3, alg_id=4, common_by_alg=4, uncracked=3) self._add_hash(hashlist_id=3, hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hashlist_id=3, hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hashlist_id=3, hash=md5('ccccccc'), summ=md5(md5('ccccccc'))) self._add_hashlist(id=4, alg_id=23, common_by_alg=23, uncracked=3) self._add_hash(hashlist_id=4, hash=md5(md5('333')), summ=md5(md5(md5('333')))) self._add_hash(hashlist_id=4, hash=md5(md5('444')), summ=md5(md5(md5('444')))) self._add_hash(hashlist_id=4, hash=md5(md5('zzzweeg')), summ=md5(md5(md5('zzzweeg')))) process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) self._add_work_task(hashlist_id=2) self._add_task(source='?l?l?l?l?l?l?l', type='mask') start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") != "work": if int(time.time()) - start_time > 5: pytest.fail("Timeout start first task") self._add_work_task(id=2, hashlist_id=1) self.db.update("task_works", {'status': 'go_stop'}, "id = 1") start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 2") != "work": if int(time.time()) - start_time > 20: pytest.fail("Timeout start second task") self.db.update("task_works", {'status': 'wait'}, "id = 1") start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") != "work": file_put_contents( '/tmp/time.txt', "{0}{1}\n".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") ), True) if int(time.time()) - start_time > 40: pytest.fail("Timeout start first task after stop") time.sleep(1) assert self.db.fetch_one("SELECT status FROM task_works WHERE id = 2") == "done" time.sleep(30) assert self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") == "done" os.killpg(os.getpgid(process.pid), signal.SIGTERM) #stdout = process.stdout.read() stderr = process.stderr.read()\ .replace('Warning: Using a password on the command line interface can be insecure.\n', '') assert stderr == '' assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT DISTINCT status FROM task_works") == "done"
def test_run_3(self): """ Have 2 hashlists. Start task by one, add second with high priority. First task stop, start second. Wait for second done, and first return to work and done. """ self._add_hashlist(alg_id=4, uncracked=3) self._add_hash(hash=md5('333'), summ=md5(md5('333'))) self._add_hash(hash=md5('444'), summ=md5(md5('444'))) self._add_hash(hash=md5('ccccccc'), summ=md5(md5('ccc'))) self._add_hashlist(id=2, alg_id=23, uncracked=3) self._add_hash(hashlist_id=2, hash=md5(md5('333')), summ=md5(md5(md5('333')))) self._add_hash(hashlist_id=2, hash=md5(md5('444')), summ=md5(md5(md5('444')))) self._add_hash(hashlist_id=2, hash=md5(md5('zzzweeg')), summ=md5(md5(md5('ccc')))) process = Popen("python ../../hbs.py", stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, preexec_fn=os.setsid) self._add_work_task(hashlist_id=2) self._add_task(source='?l?l?l?l?l?l?l', type='mask') start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") != "work": if int(time.time()) - start_time > 20: pytest.fail("Timeout start first task") self._add_work_task(id=2, hashlist_id=1, priority=100) start_time = int(time.time()) while self.db.fetch_one("SELECT status FROM task_works WHERE id = 2") != "work": if int(time.time()) - start_time > 20: pytest.fail("Timeout start second task") assert self.db.fetch_one("SELECT status FROM task_works WHERE id = 1") == "wait" time.sleep(30) os.killpg(os.getpgid(process.pid), signal.SIGTERM) #stdout = process.stdout.read() stderr = process.stderr.read()\ .replace('Warning: Using a password on the command line interface can be insecure.\n', '') assert stderr == '' assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT COUNT(id) FROM hashes WHERE cracked=1 AND password='******'") == 1 assert self.db.fetch_one("SELECT DISTINCT status FROM task_works") == 'done' assert self.db.fetch_one("SELECT name FROM hashlists WHERE common_by_alg = 4") == 'All-MD5' assert self.db.fetch_one("SELECT name FROM hashlists WHERE common_by_alg = 23") == 'All-md5(md5($pass))' assert self.db.fetch_col("SELECT id FROM hashlists WHERE common_by_alg") == [3, 4]
class Test_HashlistsByAlgLoaderThread(CommonUnit): """ Unit tests for HashlistsByAlgLoaderThread """ db = None thrd = None def setup(self): """ Tests setup """ self._clean_db() self.thrd = HashlistsByAlgLoaderThread() self.thrd.catch_exceptions = False def teardown(self): """ Tests teardown """ if isinstance(self.thrd, HashlistsByAlgLoaderThread): self.thrd.available = False time.sleep(1) del self.thrd self._clean_db() def test_get_common_hashlist_id_by_alg_get(self): """ Test get_common_hashlist_id_by_alg_get() """ self._add_hashlist(have_salts=1, common_by_alg=3) assert self.thrd.get_common_hashlist_id_by_alg(3) == 1 test_data = [ ( 1, {'hash': 'a', 'salt': '1', 'summ': md5('a:1')}, ), ( 0, {'hash': 'a', 'salt': '', 'summ': md5('a')}, ), ] @pytest.mark.parametrize("have_salt,_hash", test_data) def test_get_common_hashlist_id_by_alg_create(self, have_salt, _hash): """ Test get_common_hashlist_id_by_alg_create() :param have_salt: does hashlist has salt? :param _hash: hash data row :return: """ self._add_hashlist(have_salts=have_salt, common_by_alg=0) self._add_hash(hash=_hash['hash'], salt=_hash['salt'], summ=_hash['summ']) assert self.thrd.get_common_hashlist_id_by_alg(3) == 2 test_hashlist_data = {'id': 2, 'name': 'All-MD4', 'have_salts': have_salt, 'delimiter': self.thrd.DELIMITER, 'cracked': 0, 'uncracked': 0, 'errors': '', 'parsed': 0, 'status': 'ready', 'common_by_alg': 3} hashlist_data = self.db.fetch_row("SELECT * FROM hashlists WHERE id = 2") for field in test_hashlist_data: assert hashlist_data[field] == test_hashlist_data[field] def test_get_common_hashlist_id_by_alg_with_salt_create_one_salt_forget(self): """ Test get_common_hashlist_id_by_alg_create() """ self._add_hashlist(have_salts=1, common_by_alg=0) self._add_hash(hash='a', salt='b', summ='333') self._add_hashlist(id=2, have_salts=0, common_by_alg=0) self._add_hash(hashlist_id=2, hash='c', salt='d', summ='111') assert self.thrd.get_common_hashlist_id_by_alg(3) == 3 test_hashlist_data = {'id': 3, 'name': 'All-MD4', 'have_salts': 1, 'delimiter': self.thrd.DELIMITER, 'cracked': 0, 'uncracked': 0, 'errors': '', 'parsed': 0, 'status': 'ready', 'common_by_alg': 3} hashlist_data = self.db.fetch_row("SELECT * FROM hashlists WHERE id = 3") for field in test_hashlist_data: assert hashlist_data[field] == test_hashlist_data[field] def test_get_current_work_hashlist(self): """ Test get_current_work_hashlist() """ assert not self.thrd.get_current_work_hashlist() self.db.insert("task_works", {'hashlist_id': 3, 'status': 'work', 'task_id': 1}) assert self.thrd.get_current_work_hashlist() == 3 def test_get_hashlist_status(self): """ Test get_hashlist_status() """ self._add_hashlist(common_by_alg=1) assert self.thrd.get_hashlist_status(1) == 'ready' def test_is_alg_in_parse(self): """ Test is_alg_in_parse() """ assert self.thrd.is_alg_in_parse(3) is False self._add_hashlist(common_by_alg=1) self.db.insert("task_works", {'hashlist_id': 1, 'status': 'waitoutparse', 'task_id': 1}) assert self.thrd.is_alg_in_parse(3) is True assert self.thrd.is_alg_in_parse(4) is False self._add_hashlist(id=2, alg_id=4, common_by_alg=1) self.db.insert("task_works", {'hashlist_id': 2, 'status': 'outparsing', 'task_id': 1}) assert self.thrd.is_alg_in_parse(4) is True def test_hashes_count_in_hashlist(self): """ Test hashes_count_in_hashlist() """ assert self.thrd.hashes_count_in_hashlist(1) == 0 self._add_hash() assert self.thrd.hashes_count_in_hashlist(1) == 1 def test_hashes_count_by_algs(self): """ Test hashes_count_by_algs() """ assert self.thrd.hashes_count_by_algs() == {} self._add_hashlist() self._add_hash(summ='111') self._add_hash(summ='222', hash='a', salt='b') self._add_hashlist(id=2, alg_id=4) self._add_hash(hashlist_id=2, summ='333') assert self.thrd.hashes_count_by_algs() == {3: 2, 4: 1} def test_is_alg_have_salts(self): """ Test is_alg_have_salts() """ self._add_hashlist() assert self.thrd.is_alg_have_salts(3) is False self._add_hashlist(id=2, have_salts=1) # Forget salt bug assert self.thrd.is_alg_have_salts(3) is True def test_get_possible_hashlist_and_alg_simple(self): """ Test get_possible_hashlist_and_alg_simple() """ self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(hash='b', summ='222') assert self.thrd.get_possible_hashlist_and_alg() == {'hashlist_id': 2, 'alg_id': 3} def test_get_possible_hashlist_and_alg_none_already(self): """ Test get_possible_hashlist_and_alg_none_already() """ self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(hash='b', summ='222') self._add_hashlist(id=2, common_by_alg=3) self._add_hash(hashlist_id=2, hash='a', summ='111') self._add_hash(hashlist_id=2, hash='b', summ='222') assert self.thrd.get_possible_hashlist_and_alg() is None def test_get_possible_hashlist_and_alg_none_in_parse(self): """ Test get_possible_hashlist_and_alg_none_in_parse() """ self.db.insert("task_works", {'hashlist_id': 1, 'status': 'waitoutparse', 'task_id': 1}) self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(hash='b', summ='222') assert self.thrd.get_possible_hashlist_and_alg() is None self.db.update("task_works", {'status': 'outparsing'}, "id=1") assert self.thrd.get_possible_hashlist_and_alg() is None def test_get_possible_hashlist_and_alg_none_not_ready(self): """ Test get_possible_hashlist_and_alg_none_not_ready() """ self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(hash='b', summ='222') self._add_hashlist(id=2, status='wait', common_by_alg=3) assert self.thrd.get_possible_hashlist_and_alg() is None def test_get_possible_hashlist_and_alg_none_in_work(self): """ Test get_possible_hashlist_and_alg_none_in_work() """ self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(hash='b', summ='222') self._add_hashlist(id=2, common_by_alg=3) self.db.insert("task_works", {'hashlist_id': 2, 'status': 'work', 'task_id': 1}) assert self.thrd.get_possible_hashlist_and_alg() is None def test_clean_old_hashes(self): """ Test clean_old_hashes() """ self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(hash='b', summ='222') assert self.db.fetch_one("SELECT COUNT(*) FROM hashes WHERE hashlist_id = 1") == 2 self.thrd.clean_old_hashes(1) assert self.db.fetch_one("SELECT COUNT(*) FROM hashes WHERE hashlist_id = 1") == 0 assert self.db.fetch_one("SELECT cracked+uncracked FROM hashlists WHERE id = 1") == 0 def test_put_all_hashes_of_alg_in_file(self): """ Test put_all_hashes_of_alg_in_file() """ self._add_hashlist() self._add_hash(hash='a', summ='111') self._add_hash(summ='222') self._add_hash(hash='b', summ='333') path = self.thrd.put_all_hashes_of_alg_in_file(3) assert os.path.exists(path) assert file_get_contents(path) == 'a\nb\n' self._add_hashlist(id=2, have_salts=1, alg_id=4) self._add_hash(hashlist_id=2, hash='a', salt='b', summ='111') self._add_hash(hashlist_id=2, summ='222') self._add_hash(hashlist_id=2, hash='c', salt='d', summ='333') path = self.thrd.put_all_hashes_of_alg_in_file(4) assert os.path.exists(path) assert file_get_contents(path) == 'a{0}b\nc{0}d\n'.format(self.thrd.DELIMITER)
class Test_HashlistsLoaderThread(CommonIntegration): """ Integration tests for FinderInsideProThread """ thrd = None def setup(self): """ Tests setup """ self._clean_db() self.thrd = FinderInsideProThread() self.thrd.catch_exceptions = False self.db.update("algs", {'finder_insidepro_allowed': 1}, "id") def teardown(self): """ Tests teardown """ if isinstance(self.thrd, FinderInsideProThread): self.thrd.available = False time.sleep(1) del self.thrd self._clean_db() test_data = [(74, 1, [ { 'id': 1, 'hash': '0065ffe5f9e4e5996c2c3f52f81c6e31', 'salt': 'cB6Ar', 'cracked': 1, 'summ': md5("0065ffe5f9e4e5996c2c3f52f81c6e31:cB6Ar"), 'password': '******' }, { 'id': 2, 'hash': '20e153b046072c949562f3c939611db8', 'salt': '0RTV', 'cracked': 0, 'summ': md5("20e153b046072c949562f3c939611db8:0RTV"), 'password': '' }, ]), (4, 0, [ { 'id': 1, 'hash': md5('aaa'), 'salt': '', 'cracked': 1, 'summ': md5(md5('aaa')), 'password': '******' }, { 'id': 2, 'hash': '10e153b046072c949562f3c939611db7', 'salt': '', 'cracked': 0, 'summ': md5("10e153b046072c949562f3c939611db7"), 'password': '' }, ])] @pytest.mark.parametrize("alg_id,have_salt,hashes", test_data) def test_run(self, alg_id, have_salt, hashes): """ Test simple run :param alg_id: :param have_salt: does alg has salts? :param hashes: Hashes rows :return: """ self._add_hashlist(common_by_alg=alg_id, alg_id=alg_id, have_salts=have_salt, last_finder_checked=int(time.time())) for _hash in hashes: self._add_hash(id=_hash['id'], hash=_hash['hash'], salt=_hash['salt'], summ=_hash['summ']) self.thrd.start() time.sleep(10) assert self.db.fetch_one( "SELECT 1 FROM hashes WHERE id = 1 AND cracked = 0 AND password=''" ) == 1 assert self.db.fetch_one( "SELECT 1 FROM hashes WHERE id = 2 AND cracked = 0 AND password=''" ) == 1 assert self.db.fetch_one( "SELECT last_finder_checked FROM hashlists WHERE id = 1" ) < time.time() self.db.update("hashlists", {"last_finder_checked": 0}, "id = 1") time.sleep(10) for _hash in hashes: test_data = self.db.fetch_row( "SELECT * FROM hashes WHERE id = {0}".format(_hash['id'])) assert test_data['summ'] == _hash['summ'] assert test_data['cracked'] == _hash['cracked'] assert test_data['password'] == _hash['password'] assert self.db.fetch_one( "SELECT last_finder_checked FROM hashlists WHERE id = 1" ) > time.time() - 20
def _add_path_cache(self, host_id, parent_id, name, _id): """ Add branch id to path-cache """ _hash = md5("{0}-{1}-{2}".format(host_id, parent_id, name)) self._pathCache[_hash] = _id