def __init__(self, db): self.db = db self.findex = Findex(db) self.env = {} self.files = None
class Searcher(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(self.db) def _key_check(self, keyword): if isinstance(keyword, dict): if not 'key' in keyword or not keyword['key']: raise SearchException('Search query must contain 4 characters or more') keyword = keyword['key'][0] block = ['-', ',', '+', '_', '%'] for b in block: keyword = keyword.replace(b, ' ') if len(keyword) < 4: raise SearchException('Search query must contain 4 characters or more') return keyword def search(self, vars): val = self._key_check(vars) val = val.lower() filtered = False start_dbtime = datetime.now() # to-do: move this to API (or make api.py use this class) q = self.db.query(Files) # if this is later set with Files.<column_name>, it will be sorted on this. sort = '' sdata = { 'protocols': [], 'hosts': [], 'exts': [], 'cats': [], 'fsize': 0 } if 'protocols' in vars: protocols = [z.lower() for z in vars['protocols']] plookup = {'ftp': 0, 'http': 1, 'smb': 2} protocols_ids = [] if isinstance(protocols, list): protocols = [z.lower() for z in protocols] for p in protocols: if p in plookup and not plookup[p] in protocols_ids: protocols_ids.append(plookup[p]) if protocols_ids: sdata['protocols'] = protocols_ids else: sdata['protocols'] = [0, 1, 2] if 'hosts' in vars: dhosts = vars['hosts'] if isinstance(dhosts, list): if not dhosts[0] == '*': host_ids = [] for host in dhosts: host_results = self.db.query(Hosts).filter(Hosts.address==host).filter(Hosts.protocol.in_(sdata['protocols'])).all() for host_result in host_results: host_ids.append(host_result.id) if host_ids: sdata['hosts'] = host_ids else: raise SearchException('Could not find any host entries for specified host(s)') if sdata['hosts']: q = q.filter(Files.host_id.in_(sdata['hosts'])) filtered = True if 'cats' in vars: clookup = { 'unknown': 0, 'documents': 1, 'movies': 2, 'music': 3, 'pictures': 4 } dformats = [] for cat in [z.lower() for z in vars['cats']]: if cat in clookup: dformats.append(clookup[cat]) else: dformats.append(int(cat)) if isinstance(dformats, list): q = q.filter(Files.file_format.in_(dformats)) for dformat in dformats: sdata['cats'].append(dformat) else: sdata['cats'] = [0, 1, 2, 3, 4] for i in [0, 1, 2, 3, 4]: if not i in sdata['cats']: filtered = True if 'exts' in vars: exts = vars['exts'] if isinstance(exts, list): exts = [z.replace('.', '') for z in exts if z] q = q.filter(Files.file_ext.in_(exts)) filtered = True for ext in exts: sdata['exts'].append(ext) elif '.' in val: spl = val.split('.', 1) ext = spl[1].replace(',', '').strip() q = q.filter(Files.file_ext == ext) sdata['exts'].append(ext) val = self._key_check(spl[0]) filtered = True if 'size' in vars: fsize = vars['size'] if isinstance(fsize, list): fsize = int(fsize[0]) sizes = { 0: '*', 1: (0, 8388600), 2: (8388600, 134220000), 3: (134220000, 536870912), 4: (536870912, 2147483648), 5: (2147483648, 8589934592), 6: (8589934592) } if fsize == 0: pass elif 1 <= fsize <= 5: q = q.filter(Files.file_size > sizes[fsize][0], Files.file_size < sizes[fsize][1]) filtered = True elif fsize == 6: q = q.filter(Files.file_size > sizes[fsize]) filtered = True sdata['fsize'] = fsize sort = 'file_size' if 'path' in vars: path = vars['path'] if isinstance(path, list): path = path[0] if len(path) > 3: path = quote_plus(path) q = q.filter(Files.file_path.like(path+'%')) filtered = True if 'host' in vars: host = vars['host'] if isinstance(host, list): host = int(host[0]) q = q.filter(Files.host_id == host) filtered = True q = q.filter(Files.searchable.like('%'+val+'%')).limit(600) results = {} results['data'] = q.all() results['load_dbtime'] = (datetime.now() - start_dbtime).total_seconds() if sort: results['data'] = sorted(results['data'], key=lambda k: k.file_size, reverse=True) # to-do: dont do this here for r in results['data']: host = self.findex.get_host_objects(r.host_id) setattr(r, 'host', host) results['data'] = self.findex.set_humanize(results['data']) results['data'] = self.findex.set_icons(results['data']) sdata['filtered'] = filtered return {'sdata': sdata, 'results': results, 'key': jinja2.escape(vars['key'][0])}
class Browser: def __init__(self, db): self.db = db self.findex = Findex(db) self.env = {} self.files = None def parse_incoming_path(self, path): self.env["isdir"] = path.endswith("/") spl = path.split("/") self.env["host"] = spl[0] self.env["file_path"] = "/" + "/".join(spl[1:-1]) if not self.env["isdir"]: self.env["file_name"] = path.split("/")[-1] if self.env["file_path"] != "/": self.env["file_path"] += "/" self.env["file_path_quoted"] = quote_plus(self.env["file_path"]) def fetch_files(self): host = self.db.query(Hosts).filter_by(address=self.env["host"]).first() if not host: raise BrowseException("No host found") self.env["host_id"] = host.id files = self.findex.get_files_objects(host_id=host.id, file_path=self.env["file_path_quoted"]) if not files: raise BrowseException("No files found") self.files = files def prepare_files(self, sort=None): # sort files self.files = sorted(self.files, key=lambda k: k.file_name) # alphabetically self.files = sorted(self.files, key=lambda k: k.file_isdir, reverse=True) # folders always on top if not self.env["file_path"] == "/": # add CDUP dirs x = Files( file_name="..", file_path="../", file_ext="", file_format=-1, file_isdir=True, file_modified=datetime.now(), file_perm=None, searchable=None, file_size=0, host=self.env["host"], ) setattr(x, "file_name_human", "..") self.files.insert(0, x) self.files = self.findex.set_icons(self.files) def sort(self): # calculate total folder file size (non-recursive) total_size = 0 for f in self.files: total_size += f.file_size def generate_action_fetches(self): url = "ftp://%s" % self.env["host"] if self.env["file_path"] == "/": path = "" elif self.env["file_path"].startswith("/") and url.endswith("/"): path = self.env["file_path"][1:] wget_extras = "" lftp_extras = "" # if self.source.crawl_authtype: # wget_extras = 'user=%s password=%s ' % (self.source.crawl_username, self.source.crawl_password) # # if self.source.crawl_authtype == 'HTTP_DIGEST': # lftp_extras = Debug('Authentication using DIGEST is not supported by lftp') # else: # lftp_extras = '-u %s,%s ' % (self.source.crawl_username, self.source.crawl_password) wget = "wget %s-r -nH --no-parent '%s'" % (wget_extras, url + self.env["file_path"]) # if not isinstance(lftp_extras, Debug): lftp = "lftp %s-e 'mirror' '%s'" % (lftp_extras, url + self.env["file_path"]) # else: # lftp = lftp_extras.message return dict(wget=wget, lftp=lftp) def breadcrumbs(self): data = [self.env["host"]] data += [z for z in self.env["file_path"].split("/")[1:] if z] return data def output_json(self): data = [] for source_file in self.files: if source_file.filename_human == "..": continue data.append( "[%s] %s%s%s" % ( "D" if source_file.is_directory else "F", self.folder.source.crawl_url, source_file.filepath_human, source_file.filename_human, ) ) return "\n".join(data)
def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(db=self.db)
def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(self.db)
class Browse(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(db=self.db) @data_strap def hosts(self, env): data = {} data['hosts'] = self.db.query(Hosts).all() return jinja2_template('main/browse_hosts', env=env, data=data) @data_strap def browse(self, path, env): env['load_dbtime'] = 0 browser = Browser(db=self.db) try: browser.parse_incoming_path(path) start_dbtime = datetime.now() browser.fetch_files() env['load_dbtime'] = (datetime.now() - start_dbtime).total_seconds() browser.prepare_files() data = { 'files': browser.files, 'breadcrumbs': browser.breadcrumbs(), 'action_fetches': browser.generate_action_fetches(), 'env': browser.env } return jinja2_template('main/browse_dir', env=env, data=data) except Exception as ex: return jinja2_template('main/error', env=env, data={'error': 'no files were found'}) return '' @data_strap def goto(self, path, env): try: uid = int(path) f = self.findex.get_files_objects(id=uid) if not f: raise Exception() f = f[0] h = self.db.query(Hosts).filter_by(id=f.host_id).first() if f and h: data = {'file': f, 'host': h} else: raise Exception() return jinja2_template('main/browse_goto', env=env, data=data) except Exception as ex: return 'error :( we could always stay here'
class Browse(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(db=self.db) @data_strap def hosts(self, env): data = {} data['hosts'] = self.db.query(Hosts).all() return jinja2_template('main/browse_hosts', env=env, data=data) @data_strap def browse(self, path, env): env['load_dbtime'] = 0 browser = Browser(db=self.db) try: browser.parse_incoming_path(path) start_dbtime = datetime.now() browser.fetch_files() env['load_dbtime'] = (datetime.now() - start_dbtime).total_seconds() browser.prepare_files() data = { 'files': browser.files, 'breadcrumbs': browser.breadcrumbs(), 'action_fetches': browser.generate_action_fetches(), 'env': browser.env } return jinja2_template('main/browse_dir', env=env, data=data) except Exception as ex: return jinja2_template('main/error', env=env, data={ 'error': 'no files were found' }) return '' @data_strap def goto(self, path, env): try: uid = int(path) f = self.findex.get_files_objects(id=uid) if not f: raise Exception() f = f[0] h = self.db.query(Hosts).filter_by( id=f.host_id ).first() if f and h: data = { 'file': f, 'host': h } else: raise Exception() return jinja2_template('main/browse_goto', env=env, data=data) except Exception as ex: return 'error :( we could always stay here'
class Browser(): def __init__(self, db): self.db = db self.findex = Findex(db) self.env = {} self.files = None def parse_incoming_path(self, path): self.env['isdir'] = path.endswith('/') spl = path.split('/') self.env['host'] = spl[0] self.env['file_path'] = '/' + '/'.join(spl[1:-1]) if not self.env['isdir']: self.env['file_name'] = path.split('/')[-1] if self.env['file_path'] != '/': self.env['file_path'] += '/' self.env['file_path_quoted'] = quote_plus(self.env['file_path']) def fetch_files(self): host = self.db.query(Hosts).filter_by( address=self.env['host'] ).first() if not host: raise BrowseException('No host found') self.env['host_id'] = host.id files = self.findex.get_files_objects(host_id=host.id, file_path=self.env['file_path_quoted']) if not files: raise BrowseException('No files found') self.files = files def prepare_files(self, sort=None): # sort files self.files = sorted(self.files, key=lambda k: k.file_name) # alphabetically self.files = sorted(self.files, key=lambda k: k.file_isdir, reverse=True) # folders always on top if not self.env['file_path'] == '/': # add CDUP dirs x = Files( file_name='..', file_path='../', file_ext='', file_format=-1, file_isdir=True, file_modified=datetime.now(), file_perm=None, searchable=None, file_size=0, host=self.env['host'] ) setattr(x, 'file_name_human', '..') self.files.insert(0, x) self.files = self.findex.set_icons(self.files) def sort(self): # calculate total folder file size (non-recursive) total_size = 0 for f in self.files: total_size += f.file_size def generate_action_fetches(self): url = 'ftp://%s' % self.env['host'] if self.env['file_path'] == '/': path = '' elif self.env['file_path'].startswith('/') and url.endswith('/'): path = self.env['file_path'][1:] wget_extras = '' lftp_extras = '' # if self.source.crawl_authtype: # wget_extras = 'user=%s password=%s ' % (self.source.crawl_username, self.source.crawl_password) # # if self.source.crawl_authtype == 'HTTP_DIGEST': # lftp_extras = Debug('Authentication using DIGEST is not supported by lftp') # else: # lftp_extras = '-u %s,%s ' % (self.source.crawl_username, self.source.crawl_password) wget = 'wget %s-r -nH --no-parent \'%s\'' % (wget_extras, url + self.env['file_path']) #if not isinstance(lftp_extras, Debug): lftp = 'lftp %s-e \'mirror\' \'%s\'' % (lftp_extras, url + self.env['file_path']) #else: # lftp = lftp_extras.message return dict(wget=wget, lftp=lftp) def breadcrumbs(self): data = [self.env['host']] data += [z for z in self.env['file_path'].split('/')[1:] if z] return data def output_json(self): data = [] for source_file in self.files: if source_file.filename_human == '..': continue data.append('[%s] %s%s%s' % ( 'D' if source_file.is_directory else 'F', self.folder.source.crawl_url, source_file.filepath_human, source_file.filename_human)) return '\n'.join(data)
class Searcher(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(self.db) def _key_check(self, keyword): if isinstance(keyword, dict): if not 'key' in keyword or not keyword['key']: raise SearchException( 'Search query must contain 4 characters or more') keyword = keyword['key'][0] block = ['-', ',', '+', '_', '%'] for b in block: keyword = keyword.replace(b, ' ') if len(keyword) < 4: raise SearchException( 'Search query must contain 4 characters or more') return keyword def search(self, vars): val = self._key_check(vars) val = val.lower() filtered = False start_dbtime = datetime.now() # to-do: move this to API (or make api.py use this class) q = self.db.query(Files) # if this is later set with Files.<column_name>, it will be sorted on this. sort = '' sdata = { 'protocols': [], 'hosts': [], 'exts': [], 'cats': [], 'fsize': 0 } if 'protocols' in vars: protocols = [z.lower() for z in vars['protocols']] plookup = {'ftp': 0, 'http': 1, 'smb': 2} protocols_ids = [] if isinstance(protocols, list): protocols = [z.lower() for z in protocols] for p in protocols: if p in plookup and not plookup[p] in protocols_ids: protocols_ids.append(plookup[p]) if protocols_ids: sdata['protocols'] = protocols_ids else: sdata['protocols'] = [0, 1, 2] if 'hosts' in vars: dhosts = vars['hosts'] if isinstance(dhosts, list): if not dhosts[0] == '*': host_ids = [] for host in dhosts: host_results = self.db.query(Hosts).filter( Hosts.address == host).filter( Hosts.protocol.in_(sdata['protocols'])).all() for host_result in host_results: host_ids.append(host_result.id) if host_ids: sdata['hosts'] = host_ids else: raise SearchException( 'Could not find any host entries for specified host(s)' ) if sdata['hosts']: q = q.filter(Files.host_id.in_(sdata['hosts'])) filtered = True if 'cats' in vars: clookup = { 'unknown': 0, 'documents': 1, 'movies': 2, 'music': 3, 'pictures': 4 } dformats = [] for cat in [z.lower() for z in vars['cats']]: if cat in clookup: dformats.append(clookup[cat]) else: dformats.append(int(cat)) if isinstance(dformats, list): q = q.filter(Files.file_format.in_(dformats)) for dformat in dformats: sdata['cats'].append(dformat) else: sdata['cats'] = [0, 1, 2, 3, 4] for i in [0, 1, 2, 3, 4]: if not i in sdata['cats']: filtered = True if 'exts' in vars: exts = vars['exts'] if isinstance(exts, list): exts = [z.replace('.', '') for z in exts if z] q = q.filter(Files.file_ext.in_(exts)) filtered = True for ext in exts: sdata['exts'].append(ext) elif '.' in val: spl = val.split('.', 1) ext = spl[1].replace(',', '').strip() q = q.filter(Files.file_ext == ext) sdata['exts'].append(ext) val = self._key_check(spl[0]) filtered = True if 'size' in vars: fsize = vars['size'] if isinstance(fsize, list): fsize = int(fsize[0]) sizes = { 0: '*', 1: (0, 8388600), 2: (8388600, 134220000), 3: (134220000, 536870912), 4: (536870912, 2147483648), 5: (2147483648, 8589934592), 6: (8589934592) } if fsize == 0: pass elif 1 <= fsize <= 5: q = q.filter(Files.file_size > sizes[fsize][0], Files.file_size < sizes[fsize][1]) filtered = True elif fsize == 6: q = q.filter(Files.file_size > sizes[fsize]) filtered = True sdata['fsize'] = fsize sort = 'file_size' if 'path' in vars: path = vars['path'] if isinstance(path, list): path = path[0] if len(path) > 3: path = quote_plus(path) q = q.filter(Files.file_path.like(path + '%')) filtered = True if 'host' in vars: host = vars['host'] if isinstance(host, list): host = int(host[0]) q = q.filter(Files.host_id == host) filtered = True q = q.filter(Files.searchable.like('%' + val + '%')).limit(600) results = {} results['data'] = q.all() results['load_dbtime'] = (datetime.now() - start_dbtime).total_seconds() if sort: results['data'] = sorted(results['data'], key=lambda k: k.file_size, reverse=True) # to-do: dont do this here for r in results['data']: host = self.findex.get_host_objects(r.host_id) setattr(r, 'host', host) results['data'] = self.findex.set_humanize(results['data']) results['data'] = self.findex.set_icons(results['data']) sdata['filtered'] = filtered return { 'sdata': sdata, 'results': results, 'key': jinja2.escape(vars['key'][0]) }