def _archive(self): archive_interval = timedelta(days=get_config('generic', 'archive')) cut_time = (datetime.now() - archive_interval).date() cut_time = cut_time.replace(day=1) # Format: # { 2020: { 12: [(directory, uuid)] } } to_archive: Dict[int, Dict[int, List[Path]]] = defaultdict(lambda: defaultdict(list)) for capture_uuid in get_captures_dir().glob('**/uuid'): timestamp = datetime.strptime(capture_uuid.parent.name, '%Y-%m-%dT%H:%M:%S.%f') if timestamp.date() >= cut_time: continue to_archive[timestamp.year][timestamp.month].append(capture_uuid.parent) self.logger.info(f'Archiving {capture_uuid.parent}.') if not to_archive: self.logger.info('Nothing to archive.') return p = self.redis.pipeline() for year, month_captures in to_archive.items(): for month, captures in month_captures.items(): dest_dir = self.archived_captures_dir / str(year) / f'{month:02}' dest_dir.mkdir(parents=True, exist_ok=True) for capture_path in captures: p.delete(str(capture_path)) capture_path.rename(dest_dir / capture_path.name) p.execute() # Clear empty self.logger.info('Archiving done.')
def __init__(self, loglevel: int = logging.INFO): super().__init__(loglevel) self.lookyloo = Lookyloo() self.script_name = 'async_capture' self.only_global_lookups: bool = get_config('generic', 'only_global_lookups') self.capture_dir: Path = get_captures_dir() self.splash_url: str = get_splash_url() self.redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
def __init__(self, loglevel: int = logging.INFO): super().__init__(loglevel) self.script_name = 'async_capture' self.only_global_lookups: bool = get_config('generic', 'only_global_lookups') self.capture_dir: Path = get_captures_dir() self.user_agents = UserAgents() self.fox = FOX(get_config('modules', 'FOX')) if not self.fox.available: self.logger.warning('Unable to setup the FOX module')
def _update_all_capture_indexes(self): '''Run that after the captures are in the proper directories''' # Recent captures directories_to_index = set(capture_dir.parent.parent for capture_dir in get_captures_dir().glob('**/uuid')) for directory_to_index in directories_to_index: self._update_index(directory_to_index) # Archived captures directories_to_index = set(capture_dir.parent.parent for capture_dir in self.archived_captures_dir.glob('**/uuid')) for directory_to_index in directories_to_index: self._update_index(directory_to_index)
def rename_captures(): r = Redis(unix_socket_path=get_socket_path('cache')) capture_dir: Path = get_captures_dir() for uuid_path in capture_dir.glob('*/uuid'): with uuid_path.open() as f: uuid = f.read() dir_key = r.hget('lookup_dirs', uuid) if dir_key: r.hdel('lookup_dirs', uuid) r.delete(dir_key) timestamp = datetime.strptime(uuid_path.parent.name, '%Y-%m-%dT%H:%M:%S.%f') dest_dir = capture_dir / str(timestamp.year) / f'{timestamp.month:02}' safe_create_dir(dest_dir) uuid_path.parent.rename(dest_dir / uuid_path.parent.name)
def _load_indexes(self): # Initialize archives for index in get_captures_dir().glob('**/index'): with index.open('r') as _f: recent_uuids: Dict[str, str] = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()} if recent_uuids: self.redis.hmset('lookup_dirs', recent_uuids) # type: ignore else: index.unlink() # Initialize archives for index in self.archived_captures_dir.glob('**/index'): with index.open('r') as _f: archived_uuids: Dict[str, str] = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()} if archived_uuids: self.redis.hmset('lookup_dirs_archived', archived_uuids) # type: ignore else: index.unlink()