class MongoJob(WSJob): """ Common class for jobs works with MongoDB """ unique = True collection = None select_limit = 50 skip_blank_rows = True counter = 0 collection_name = None def __init__(self, maxsize=0): WSJob.__init__(self, maxsize) self.collection = Registry().get('mongo')[self.collection_name] def build_row(self, _str): """ Common build row method for MongoDB """ return { "name": _str.strip(), "checked": 0, "getted": 0 } def qsize(self): """ Size of queue """ return self.collection.find({"checked": 0}).count() def set_unique(self, unique=True): """ Enable remove dups in queue """ self.unique = unique def set_skip_blank_rows(self, value=True): """ If True - we will skip blank rows then fill queue from dict or file """ self.skip_blank_rows = value def task_done(self, name): """ Mark current row as done """ self.counter += 1 self.collection.update({'name': str(unicode(name)), "getted": 1}, {"$set": {"checked": 1}}) WSJob.task_done(self) def get(self, block=False, timeout=None): """ Get next item from queue """ if self.empty() or self.qsize() < 50: self.load_data() if self.empty(): raise Queue.Empty return WSJob.get(self, block, timeout) def load_data(self): """ Load data into queue from MongoDB """ data = self.collection.find( {"checked": 0, "getted": 0}, limit=int(Registry().get('config')['main']['mongo_data_load_per_once']) ) for row in data: self.put(row['name']) self.collection.update({"name": row['name']}, {"$set": {"getted": 1}}) return True def load_dict(self, dict_for_load, drop=True): """ Fill collection from dict """ if drop: self.collection.drop() counter = 0 last = "START OF FILE" for line in dict_for_load: try: line = line.strip() unicode(line) self.collection.insert(self.build_row(line)) except UnicodeDecodeError: _str = " UNICODE ERROR: In file '{0}' skip word '{1}', after word '{2}' !".format(file, line, last) if Registry().isset('logger'): Registry().get('logger').log(_str) else: print _str continue counter += 1 last = line self.load_data() return counter def load_dom(self, dom): """ Fill queue from DictOfMask """ self.collection.drop() while True: word = dom.get() if word is None: break self.collection.insert(self.build_row(word)) self.collection.create_index('name', drop_dups=True, unique=self.unique) self.load_data() return self.collection.count() def load_file(self, _file): """ Fill queue from text file """ self.collection.drop() fh = open(_file) last = "START OF FILE" while True: line = fh.readline() if not line: break if not line.strip() and self.skip_blank_rows: continue try: line = line.strip() unicode(line) self.collection.insert(self.build_row(line)) except UnicodeDecodeError: _str = " UNICODE ERROR: In file '{0}' skip word '{1}', after word '{2}' !".format(_file, line, last) if Registry().isset('logger'): Registry().get('logger').log(_str) else: print _str continue last = line fh.close() self.collection.create_index('name', drop_dups=True, unique=self.unique) self.load_data() return self.collection.count() # 2 метода ниже взяты с # http://stackoverflow.com/questions/1581895/how-check-if-a-task-is-already-in-python-queue # Рецепт для уникальных задачь в очереди def _init(self, maxsize): WSJob._init(self, maxsize) if self.unique: self.all_items = set() def _put(self, item): if self.unique: if item not in self.all_items: WSJob._put(self, item) self.all_items.add(item) else: _str = "WARNING: try to add not unique item `{0}`".format(item) if Registry().isset('logger'): #Registry().get('logger').log(_str) pass else: #print _str pass else: WSJob._put(self, item)
class UrlsModel(object): """ Class for work with urls table """ _db = None def __init__(self): self._db = Registry().get('ndb') def add( self, pid, host_id, url, referer='', response_code=0, response_time=0, who_add='human', spidered=0, size=0, descr='' ): """ Add url to table """ try: return self._db.insert( "urls", { "project_id": pid, "host_id": host_id, "hash": md5(url), "url": url, "referer": referer, "response_code": response_code, "response_time": response_time, "when_add": int(time.time()), "who_add": who_add, "spidered": spidered, "size": size, "descr": descr }, 1 ) except BaseException as e: if Registry().isset('logger'): Registry().get('logger').ex(e) else: print "Can`t add url: " + str(e) def add_mass(self, pid, host_id, data): """ Add many urls at once in table """ to_insert = [] for row in data: for field in ['url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr']: if field not in row.keys(): if field in ['referer', 'response_code', 'response_time', 'descr']: row[field] = '' elif field in ['spidered', 'size']: row[field] = 0 elif field == 'who_add': row[field] = 'human' elif field == 'url': raise WSException("URL row must have a 'url' key") for k in row.keys(): if k not in [ 'url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr' ]: raise WSException("Key '{0}' must not be in url data".format(k)) to_insert.append({ 'project_id': pid, "host_id": host_id, "hash": md5(row['url']), "url": row['url'], "referer": row['referer'], "response_code": row['response_code'], "response_time": row['response_time'], "when_add": int(time.time()), "who_add": row['who_add'], "spidered": row['spidered'], "size": row['size'], "descr": row['descr'] }) if len(to_insert)%50 == 0: self._db.insert_mass("urls", to_insert, 1) to_insert = [] if len(to_insert): self._db.insert_mass("urls", to_insert, 1) return True def list_by_host_name(self, project_id, host, like=""): """ Get urls list by host name and project_id """ host_id = HostsModel().get_id_by_name(project_id, host) like_expr = "" \ if not len(like.strip()) \ else " AND url LIKE '%{0}%' ".format(self._db.escape(like.strip())) return self._db.fetch_all( "SELECT url, response_code as code, response_time as time, when_add, who_add, descr, size FROM urls " "WHERE project_id = {0} AND host_id = {1} ".format(project_id, host_id) + like_expr + "ORDER BY url" ) def list_by_host_name_for_spider(self, project_id, host): """ Get urls list by host name and project_id, but in special format for spider """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.fetch_all( "SELECT url, response_code as code, response_time as time, when_add, who_add, descr FROM urls " "WHERE project_id = {0} AND host_id = {1} AND !spidered " "ORDER BY url".format(project_id, host_id) ) def exists(self, project_id, host, url): """ Is url exists? """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.fetch_one( "SELECT 1 FROM urls WHERE project_id = {0} AND host_id={1} AND hash = '{2}'" .format(project_id, host_id, md5(url)) ) def delete(self, project_id, host, url): """ Delete url from table """ host_id = HostsModel().get_id_by_name(project_id, host) self._db.q( "DELETE FROM urls WHERE project_id = {0} AND host_id = {1} AND hash = {2} " .format(project_id, host_id, self._db.quote(md5(url))) ) def update_url_field(self, project_id, host, url, field, value): """ Update custom field of current url """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.update( "urls", {field: value}, "hash = '{0}' AND project_id = {1} AND host_id = {2}".format(md5(url), project_id, host_id) ) def update_url_field_mass(self, project_id, host, field, data): """ Mass update custom field of many urls """ host_id = HostsModel().get_id_by_name(project_id, host) update = {} for row in data: case = "host_id = '{0}' AND `hash` = '{1}' ".format(host_id, md5(row['url'])) update[case] = row['value'] self._db.update_mass("urls", field, update)
class CommonUnit(object): """ Common class for unit tests """ db = None def setup_class(self): """ Prepare class for run tests """ CURPATH = os.path.dirname(__file__) + "/" config = configparser.ConfigParser() config.read(CURPATH + 'config.ini') Registry().set('config', config) Registry().set('logger', LoggerMock()) db = Database( config['main']['mysql_host'], config['main']['mysql_user'], config['main']['mysql_pass'], config['main']['mysql_dbname'], ) Registry().set('db', db) self.db = Registry().get('db') # type: Database def _clean_db(self): """ Clean tables for tests """ self.db.q("TRUNCATE TABLE dicts") self.db.q("TRUNCATE TABLE dicts_groups") self.db.q("TRUNCATE TABLE hashes") self.db.q("TRUNCATE TABLE hashlists") self.db.q("TRUNCATE TABLE rules") self.db.q("TRUNCATE TABLE tasks") self.db.q("TRUNCATE TABLE tasks_groups") self.db.q("TRUNCATE TABLE task_works") self.db.q("TRUNCATE TABLE logs") self.db.update("algs", {'finder_insidepro_allowed': 0}, "id") def _add_hashlist(self, id=1, name='test', alg_id=3, have_salts=0, status='ready', common_by_alg=0, parsed=1, last_finder_checked=0): """ Add hashlist record """ self.db.insert( "hashlists", { 'id': id, 'name': name, 'alg_id': alg_id, 'have_salts': have_salts, 'delimiter': '', 'cracked': 0, 'uncracked': 0, 'errors': '', 'parsed': parsed, 'tmp_path': '', 'status': status, 'when_loaded': 0, 'common_by_alg': common_by_alg, 'last_finder_checked': last_finder_checked } ) def _add_hash(self, hashlist_id=1, hash='', salt='', summ='', password='', cracked=0, id=None): """ Add hash record """ self.db.insert( "hashes", { 'id': id, 'hashlist_id': hashlist_id, 'hash': hash, 'salt': salt, 'password': password, 'cracked': cracked, 'summ': summ } ) def _add_work_task(self, id=1, hashlist_id=1, task_id=1, status='wait', priority=0): """ Add work task record """ self.db.insert( "task_works", { 'id': id, 'hashlist_id': hashlist_id, 'task_id': task_id, 'status': status, 'priority': priority, } ) def _add_task(self, id=1, name='task', group_id=1, type='dict', source=1): """ Add task record """ self.db.insert( "tasks", { 'id': id, 'name': name, 'group_id': group_id, 'type': type, 'source': source, } ) def _add_dict(self, id=1, group_id=1, name='dict', hash='1'): """ Add dict record """ self.db.insert( "dicts", { 'id': id, 'name': name, 'group_id': group_id, 'hash': hash, } ) def _add_dict_group(self, id=1, name='group'): """ Add dict group record """ self.db.insert( "dicts_groups", { 'id': id, 'name': name, } ) def _add_rule(self, id=1, name='rule', hash='1.rule', count=1): """ Add rule record """ self.db.insert( "rules", { 'id': id, 'name': name, 'hash': hash, 'count': count } )
class UrlsModel(object): """ Class for work with urls table """ _db = None def __init__(self): self._db = Registry().get('ndb') def add(self, pid, host_id, url, referer='', response_code=0, response_time=0, who_add='human', spidered=0, size=0, descr=''): """ Add url to table """ try: return self._db.insert( "urls", { "project_id": pid, "host_id": host_id, "hash": md5(url), "url": url, "referer": referer, "response_code": response_code, "response_time": response_time, "when_add": int(time.time()), "who_add": who_add, "spidered": spidered, "size": size, "descr": descr }, 1) except BaseException as e: if Registry().isset('logger'): Registry().get('logger').ex(e) else: print "Can`t add url: " + str(e) def add_mass(self, pid, host_id, data): """ Add many urls at once in table """ to_insert = [] for row in data: for field in [ 'url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr' ]: if field not in row.keys(): if field in [ 'referer', 'response_code', 'response_time', 'descr' ]: row[field] = '' elif field in ['spidered', 'size']: row[field] = 0 elif field == 'who_add': row[field] = 'human' elif field == 'url': raise WSException("URL row must have a 'url' key") for k in row.keys(): if k not in [ 'url', 'referer', 'response_code', 'response_time', 'who_add', 'spidered', 'size', 'descr' ]: raise WSException( "Key '{0}' must not be in url data".format(k)) to_insert.append({ 'project_id': pid, "host_id": host_id, "hash": md5(row['url']), "url": row['url'], "referer": row['referer'], "response_code": row['response_code'], "response_time": row['response_time'], "when_add": int(time.time()), "who_add": row['who_add'], "spidered": row['spidered'], "size": row['size'], "descr": row['descr'] }) if len(to_insert) % 50 == 0: self._db.insert_mass("urls", to_insert, 1) to_insert = [] if len(to_insert): self._db.insert_mass("urls", to_insert, 1) return True def list_by_host_name(self, project_id, host, like=""): """ Get urls list by host name and project_id """ host_id = HostsModel().get_id_by_name(project_id, host) like_expr = "" \ if not len(like.strip()) \ else " AND url LIKE '%{0}%' ".format(self._db.escape(like.strip())) return self._db.fetch_all( "SELECT url, response_code as code, response_time as time, when_add, who_add, descr, size FROM urls " "WHERE project_id = {0} AND host_id = {1} ".format( project_id, host_id) + like_expr + "ORDER BY url") def list_by_host_name_for_spider(self, project_id, host): """ Get urls list by host name and project_id, but in special format for spider """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.fetch_all( "SELECT url, response_code as code, response_time as time, when_add, who_add, descr FROM urls " "WHERE project_id = {0} AND host_id = {1} AND !spidered " "ORDER BY url".format(project_id, host_id)) def exists(self, project_id, host, url): """ Is url exists? """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.fetch_one( "SELECT 1 FROM urls WHERE project_id = {0} AND host_id={1} AND hash = '{2}'" .format(project_id, host_id, md5(url))) def delete(self, project_id, host, url): """ Delete url from table """ host_id = HostsModel().get_id_by_name(project_id, host) self._db.q( "DELETE FROM urls WHERE project_id = {0} AND host_id = {1} AND hash = {2} " .format(project_id, host_id, self._db.quote(md5(url)))) def update_url_field(self, project_id, host, url, field, value): """ Update custom field of current url """ host_id = HostsModel().get_id_by_name(project_id, host) return self._db.update( "urls", {field: value}, "hash = '{0}' AND project_id = {1} AND host_id = {2}".format( md5(url), project_id, host_id)) def update_url_field_mass(self, project_id, host, field, data): """ Mass update custom field of many urls """ host_id = HostsModel().get_id_by_name(project_id, host) update = {} for row in data: case = "host_id = '{0}' AND `hash` = '{1}' ".format( host_id, md5(row['url'])) update[case] = row['value'] self._db.update_mass("urls", field, update)
class CommonIntegration(object): """ Common class for integration tests """ db = None def setup_class(self): """ Prepare class for run tests """ CURPATH = os.path.dirname(__file__) + "/" config = configparser.ConfigParser() config.read(CURPATH + 'config.ini') Registry().set('config', config) Registry().set('logger', LoggerMock()) db = Database( config['main']['mysql_host'], config['main']['mysql_user'], config['main']['mysql_pass'], config['main']['mysql_dbname'], ) Registry().set('db', db) self.db = Registry().get('db') # type: Database def _clean_db(self): """ Clean tables for tests """ self.db.q("TRUNCATE TABLE dicts") self.db.q("TRUNCATE TABLE dicts_groups") self.db.q("TRUNCATE TABLE hashes") self.db.q("TRUNCATE TABLE hashlists") self.db.q("TRUNCATE TABLE rules") self.db.q("TRUNCATE TABLE tasks") self.db.q("TRUNCATE TABLE tasks_groups") self.db.q("TRUNCATE TABLE task_works") self.db.q("TRUNCATE TABLE logs") self.db.update("algs", {'finder_insidepro_allowed': 0}, "id") def _add_hashlist(self, id=1, name='test', alg_id=3, have_salts=0, status='ready', common_by_alg=0, parsed=1, tmp_path='', last_finder_checked=0, uncracked=0): """ Add hashlist record """ self.db.insert( "hashlists", { 'id': id, 'name': name, 'alg_id': alg_id, 'have_salts': have_salts, 'delimiter': 'UNIQUEDELIMITER' if have_salts else '', 'cracked': 0, 'uncracked': uncracked, 'errors': '', 'parsed': parsed, 'tmp_path': tmp_path, 'status': status, 'when_loaded': 0, 'common_by_alg': common_by_alg, 'last_finder_checked': last_finder_checked, }) def _add_hash(self, hashlist_id=1, hash='', salt='', summ='', password='', cracked=0, id=None): """ Add hash record """ self.db.insert( "hashes", { 'id': id, 'hashlist_id': hashlist_id, 'hash': hash, 'salt': salt, 'password': password, 'cracked': cracked, 'summ': summ }) def _add_work_task(self, id=1, hashlist_id=1, task_id=1, status='wait', priority=0, out_file=''): """ Add work task record """ self.db.insert( "task_works", { 'id': id, 'hashlist_id': hashlist_id, 'task_id': task_id, 'status': status, 'priority': priority, 'out_file': out_file, }) def _add_task(self, id=1, name='task', group_id=1, type='dict', source=1): """ Add task record """ self.db.insert( "tasks", { 'id': id, 'name': name, 'group_id': group_id, 'type': type, 'source': source, }) def _add_dict(self, id=1, group_id=1, name='dict', hash='1'): """ Add dict record """ self.db.insert("dicts", { 'id': id, 'name': name, 'group_id': group_id, 'hash': hash, }) def _add_dict_group(self, id=1, name='group'): """ Add dict group record """ self.db.insert("dicts_groups", { 'id': id, 'name': name, }) def _add_rule(self, id=1, name='rule', hash='1.rule', count=1): """ Add rule record """ self.db.insert("rules", { 'id': id, 'name': name, 'hash': hash, 'count': count })