def route_upload(path): global chunked_file_handle time = request.args.get('time', type=float, default=0.0) force = request.args.get('force', type=inputs.boolean, default=False) path = safe_join(os.path.join(app.config['fileroot'], path)) if path is None: abort(404) try: content_range = request.environ['HTTP_CONTENT_RANGE'] parsed_ranges = re.search(r'bytes (\d*)-(\d*)\/(\d*)', content_range) _from, _to, _size = [int(x) for x in parsed_ranges.groups()] deb(f'chunked upload, {_from} to {_to} ({_size}), {_to - _from + 1} bytes') except: content_range = None if not content_range or _from == 0: if os.path.exists(path): if not force: # if force was not given then the default is that the server refuses to rewrite an existing file err(f'file {path} already exist, returning 403 (see --force)') return '', 403 else: directory = os.path.dirname(path) if not os.path.exists(directory): inf(f'constructing new path {directory}') Path(directory).mkdir(parents=True, exist_ok=True) if content_range: if _from == 0: try: if chunked_file_handle.get(path): err('internal error in upload, non closed filehandle') chunked_file_handle[path].close() open(path, 'w').close() chunked_file_handle[path] = open(path, "ab") except: pass inf(f'writing file "{path}" ({human_file_size(_size)})') chunked_file_handle[path].write(request.data) if _to == _size - 1: inf(f'{path} transfer complete') chunked_file_handle[path].close() del chunked_file_handle[path] else: # ordinary non-chunked upload, single write inf(f'writing file "{path}"') with open(path, "wb") as fp: fp.write(request.data) if time > 0.0: deb(f'setting {path} time to {time}') os.utime(path, (time, time)) # 201: Created return '', 201
def print_file_list(files, title=None): if title == '': title = '/' if title: deb(f'filelist for "{title}"') for item in files.items(): fwd_util.print_file(item)
def export_file(filepath): """ Use filuxe to upload the file if it first matches the include regex and second doesn't match the exclude regex. If the include regex and the exclude regex are both empty strings then the file is exported. """ if not FILUXE_WAN: return path = os.path.dirname(filepath) relpath = os.path.relpath(path, FILE_ROOT) file = os.path.basename(filepath) try: dir_rules = ACTIVE_RULES['dirs'][relpath] if not fwd_util.filename_is_included(file, dir_rules): inf(f'filename {file} is not in scope and will not be exported') return deb(f'forwarding {file}') except: inf(f'from {relpath} uploading file {file} (no rules)') try: deb(f'uploading {FILUXE_LAN.log_path(filepath)}') FILUXE_WAN.upload(filepath, os.path.join(relpath, file)) except requests.ConnectionError: war('upload failed, WAN server is not reachable.') except FileNotFoundError: war(f'exception file not found, {os.path.join(relpath, file)} (internal race)' )
def __init__(self, x, directory, rules, delete_by): self.file = x[0] self.attr = x[1] self.numbers = None self.valid = False self.delete_by = delete_by if delete_by == 'version': try: p = re.compile(rules['dirs'][directory]['version']) self.numbers = [int(x) for x in p.search(self.file).group(1).split('.')] if len(self.numbers) == 3: self.valid = True if not self.valid: war(f'sort by version but failed to parse 3 digits from "{directory}/{self.file}"') except KeyError as e: war(f'sort by version but failed to parse 3 digits from "{directory}/{self.file}", key {e} not found') except AttributeError: war(f'version regex failed on filename "{os.path.join(directory, self.file)}"') except Exception as e: deb(f'exception {e}') else: self.time = x[1]['time'] self.valid = True
def get_stats(self): url = f'{self.server}/stats' deb(f'getting stats at {url}') response = requests.get(url, verify=self.certificate) if response.status_code != 200: err(f'got {response.status_code} from {self.domain} server at /stats' ) return ErrorCode.SERVER_ERROR, [] return ErrorCode.OK, json.loads(response.text)
def dump_rules(): try: if not ACTIVE_RULES['dirs'].items(): war('this forwarder has no rules loaded ? Forwarding everything.') else: deb('dumping rules:') for _path, _rules in ACTIVE_RULES['dirs'].items(): deb(f' "{_path}" {_rules}') except: war('no dir rules found')
def delete_files(self, filegroup, group_name, use_http, rules): directory = filegroup['directory'] filelist = filegroup['files'] to_delete = len(filelist) - filegroup['maxfiles'] deleted_files = [] if to_delete > 0: delete_by = filegroup["deleteby"] inf(f'deleting {to_delete} files from {self.domain.domain} filestorage, ' f'path="{filegroup["directory"]}" group "{group_name}". Deleteby={delete_by}') _items = [Item(x, directory, rules, delete_by) for x in filelist.items()] try: _sorted_items = sorted(_items) except: first_filename = list(filelist.keys())[0] war(f'failed running delete by "{delete_by}" in group "{group_name}". Files of type "{first_filename}"') return deleted_files deb(f'http filelist sorted by: "{delete_by}", delete from top') for index, item in enumerate(_sorted_items): if index < to_delete: extra = 'DEL ' else: extra = 'KEEP' fwd_util.print_file(item, extra) for item in _sorted_items[:to_delete]: filepath = os.path.join(directory, item.file) if self.dryrun: inf(f'dryrun: not deleting {filepath}') else: try: if use_http: fwd_util.delete_http_file(self.domain, filepath) else: fqn = os.path.join(self.domain.root(), filepath) inf(f'deleting {self.domain.log_path(fqn)}') os.remove(fqn) deleted_files.append(item.file) except: war(f'failed to delete file {fqn} (http={use_http})') return deleted_files
def filter_filelist(filelist, rules): """ Return a copy of the filelist where not-included and excluded files are removed according to the rule set. """ try: filtered_filelist = copy.deepcopy(filelist) for path, files in filelist['filelist'].items(): if not rules['dirs'].get(path): # Probably got a directory from a wan filelist that does not exist on lan. inf(f'filter filelist: ignoring directory "{path}" which is not found in rules' ) else: for filename in files: if not filename_is_included(filename, rules['dirs'][path]): del filtered_filelist['filelist'][path][filename] except: deb('http filelist returned unfiltered (bad rules?)') return filtered_filelist
def new_file(filename): if not os.path.exists(filename): deb(f'listener: changed file "{filename}" does not exist anymore?') return inf(f'listener: new/changed file "{FILUXE_LAN.log_path(filename)}"') with Indent() as _: if LAN_FILE_DELETER: path = os.path.dirname(filename) filestorage_path = os.path.relpath(path, FILUXE_LAN.root()) LAN_FILE_DELETER.enforce_max_files(filestorage_path, rules=ACTIVE_RULES, recursive=False) if not os.path.exists(filename): war(f'listener: new file "{FILUXE_LAN.log_path(filename)}" already deleted and will not be forwarded' ) return export_file(filename)
def delete_wan_file(filepath): """ Delete file on WAN if WAN is configured. This will be triggered when a file is deleted from LAN filestorage. """ if not FILUXE_WAN: deb(f'no wan configured, not deleting {filepath} on wan') return filestorage_path = os.path.relpath(filepath, FILUXE_LAN.root()) path = os.path.dirname(filestorage_path) try: if not ACTIVE_RULES['dirs'][path]['delete']: deb(f'not deleting on wan since delete=false for {filepath}') return except: pass try: file = os.path.basename(filepath) rule_path = os.path.normpath(path) dir_rules = ACTIVE_RULES['dirs'][rule_path] if not fwd_util.filename_is_included(file, dir_rules): inf(f'filename {file} is not in scope and will not be exported') return except: deb(f'from "{FILUXE_WAN.log_path(filepath)}" deleting file {file} (no rules)' ) fwd_util.delete_http_file(FILUXE_WAN, filestorage_path)
def parse_into_file_groups(self, directory, filelist, directory_settings): file_groups = {} max_files, delete_by, file_group_rules = directory_settings deb(f'find groups in {self.domain.domain} "{directory}" ({len(filelist["filelist"][directory])} files)') for filename, fileinfo in filelist['filelist'][directory].items(): group_key = 'ungrouped' for group in file_group_rules: try: match = re.match(fr'{group}', filename) except Exception as e: err(f'regex gave exception {e.__repr__()} with regex "{group}"') exit(1) if match: nof_groups = len(match.groups()) nof_group_regex_groups = re.compile(group).groups if nof_groups != nof_group_regex_groups: deb(f'parsing {filename} failed, found {nof_groups} groups, not {nof_group_regex_groups}') else: group_key = ':'.join(match.groups()) break if file_group_rules and group_key == 'ungrouped': inf(f'no group match for {filename}, adding to "ungrouped"') group_key = os.path.join(directory, group_key) try: file_groups[group_key] except: file_groups[group_key] = {} file_groups[group_key]['files'] = {} file_groups[group_key]['maxfiles'] = max_files file_groups[group_key]['deleteby'] = delete_by file_groups[group_key]['directory'] = directory file_groups[group_key]['files'][filename] = fileinfo for group in file_groups.keys(): deb(f'group {group} with {len(file_groups[group]["files"])} files') return file_groups
def filename_is_included(filename, rules): try: if not rules['export']: deb(f'export is false for {filename}') return False except: pass try: include_list = rules['include'] for include in include_list: a = re.search(include, filename) if a: break if not a: deb(f'{filename} was not included by "{include}"') return False except re.error as e: err(f'include regex exception, "{include}" gave {e.__repr__()}. File ignored.' ) return False except: # then default forward everything pass try: exclude_list = rules['exclude'] for exclude in exclude_list: a = re.search(exclude, filename) if a: deb(f'ignore file hit for {filename}') return False except re.error as e: err(f'exclude regex exception, "{exclude}" gave {e.__repr__()}. File ignored.' ) return False except: pass return True
def get_rules_for_path(rules, path): """ Return tuple with (max_files, delete_by, filegroups) """ try: max_files = rules['dirs'][path]['max_files'] except: try: max_files = rules['default']['max_files'] except: deb(f'rules: "{path}" has no file limit') return None, None, [] if max_files == 'unlimited': max_files = -1 if max_files < 0: deb(f'rules: "{path}" has no file limit') return -1, None, [] try: delete_by = rules['dirs'][path]['delete_by'] except: try: delete_by = rules['default']['delete_by'] except: delete_by = 'time' try: filegroups = rules['dirs'][path]['group'] except: try: filegroups = rules['default']['group'] except: filegroups = [] deb(f'rules: "{path}" has file limit {max_files}, delete by {delete_by}') return max_files, delete_by, filegroups
def enforce_max_files(self, path, rules, recursive=True, use_http=False, lan_files=None): """ Get the list of files (locally or over http) and delete files if required by the rule "max_files". It can run a full recursive scan-and-delete as is done when starting the forwarder (with an empty path and recursive=True), and it can run in a specific directory when triggered by a new or modified file (with a path and recursive=False). Returns nothing. """ try: deb(f'enforce max files in {self.domain.domain} with path="{path}", dryrun={self.dryrun}') if use_http: filelist = fwd_util.get_http_filelist(self.domain, path, recursive, rules) else: if not lan_files: scan_directory = self.domain.root() filelist = fwd_util.get_local_filelist(scan_directory, path, recursive, rules) else: filelist = lan_files try: directories = filelist['filelist'].keys() except: deb(f'got empty filelist from {self.domain.domain} at "{path}"') return group_list = {} for directory in directories: directory_settings = get_rules_for_path(rules, directory) max_files, _delete_by, _file_groups = directory_settings if max_files == -1: inf(f'"{self.domain.domain}/{path}" has no filelimit, skipping.' f' ({len(filelist["filelist"][directory])} files)') continue if not max_files: continue group_list[directory] = self.parse_into_file_groups(directory, filelist, directory_settings) deb(f'found total {len(group_list)} groups') for directory, group in group_list.items(): for group_key, file_group in group.items(): nof_files = len(file_group['files']) max_files = file_group['maxfiles'] excess_files = nof_files - max_files if excess_files > 0: message = f'"{self.domain.domain}/{directory}" group:"{group_key}" exceeded max files '\ f'with {excess_files}. ({nof_files} files, limit is {max_files})' inf(message) with Indent() as _: deleted_files = self.delete_files(file_group, group_key, use_http, rules) for file in deleted_files: del filelist['filelist'][directory][file] else: message = f'"{self.domain.domain}/{directory}" group:"{group_key}" no action. '\ f'({nof_files} files, limit is {max_files})' deb(message) except Exception as e: die(f'exception in enforce_max_files {e.__repr__()}', e, error_code=ErrorCode.INTERNAL_ERROR)
def print_file(item, extra=''): datetime = time.strftime("%m/%d/%Y %H:%M:%S", time.gmtime(item.attr["time"])) human_size = human_file_size(item.attr["size"]) deb(f'{extra} - {human_size:<10} {item.attr["time"]:<20} {datetime} "{item.file}"' )
def filestorage_scan(root, path='', recursive=True): _filelist = {} total_directories = 0 total_files = 0 total_size = 0 scan_root = os.path.join(root, path) if recursive: inf(f'recursively scanning "{scan_root}"') else: inf(f'rescanning directory "{scan_root}"') with Indent() as _: for _root, _dirs, _files in os.walk(scan_root): _path = os.path.relpath(_root, scan_root) size = 0 relative_path = os.path.normpath(os.path.join(path, _path)) if not _filelist.get(relative_path): _filelist[relative_path] = {} for _file in _files: try: file = os.path.join(_root, _file) if util.file_is_closed(os.path.abspath(file)): _size = os.path.getsize(file) epoch = util.get_file_time(file) metrics = {'size': _size, 'time': epoch} _filelist[relative_path][_file] = metrics size += os.path.getsize(os.path.join(_root, _file)) else: war(f'filestorage scan, ignoring open file {file}') except FileNotFoundError: deb(f'filestorage scan: file not found {file}') total_directories += 1 total_files += len(_files) total_size += size message = f'scanned "{relative_path}", {human_file_size(size)} in {len(_files)} files' if recursive: # If recursive it will be the first full scan so print what is happening inf(message) else: deb(message) if not recursive: break inf(f'found {total_directories} directories with {total_files} files occupying {human_file_size(total_size)}' ) return { 'filelist': _filelist, 'info': { 'dirs': total_directories, 'fileroot': relative_path, 'files': total_files, 'size': total_size } }
def calculate_rules(check_dirs): """ 1: check_dirs as list of directories: Starting from the root then recursively propagate rules matching the given directory structure. 2: check_dirs as directory This can set rules on a new directory. If the directory already has rules assigned then it is a no-op. """ global LOADED_RULES, ACTIVE_RULES if not LOADED_RULES: ACTIVE_RULES = None return inf('calculating rules') with Indent() as _: default_rule = LOADED_RULES["default"] try: dir_rules = LOADED_RULES["dirs"] entry_rules_json = json.dumps(dir_rules, sort_keys=True) except: war('no "dir" rules file section found, using "default" section only' ) return if isinstance(check_dirs, list): check_dirs.sort() new_rules = {} else: check_dirs = [check_dirs] new_rules = copy.deepcopy(ACTIVE_RULES['dirs']) try: for _key in check_dirs: _path_elements = _key.split(os.sep) for i in range(len(_path_elements)): path_elements = _path_elements[:i + 1] path = os.path.join(*path_elements) if path == '.': new_rules[path] = default_rule else: previous = os.path.relpath( os.path.join(path, os.pardir)) try: new_rules[path] = { **new_rules[previous], **dir_rules[path] } except: try: new_rules[path] = new_rules[previous] except: deb(f'no rules found for {path}, skipped') deb(f'transient rule: "{path}" {new_rules[path]}') except: war(f'establishing rules for {_key} failed, check rules file') # purge rules that doesn't trigger any actions new_rules_copy = copy.deepcopy(new_rules) active_new_rules = {} active_new_rules['dirs'] = {} for path, path_rules in new_rules_copy.items(): if path_rules.get('export') or len(path_rules) > 1: inf(f'adding rule for "{path}" : {path_rules}') active_new_rules['dirs'][path] = path_rules new_rules_json = json.dumps(active_new_rules, sort_keys=True) changed = entry_rules_json != new_rules_json if changed: ACTIVE_RULES = active_new_rules extras = '. Rules were adjusted' else: extras = '. No changes ?' inf(f'rules calculated, {len(ACTIVE_RULES["dirs"])} active rules{extras}')
def process_IN_MOVED_TO(self, event): deb(f'inotify: move(write) {event.pathname}') new_file(event.pathname) IDLE_DETECT.activity()
def process_IN_MOVED_FROM(self, event): deb(f'inotify: move(delete) {event.pathname}') delete_wan_file(event.pathname) IDLE_DETECT.activity()
def process_IN_DELETE(self, event): deb(f'inotify: delete {event.pathname}') delete_wan_file(event.pathname) IDLE_DETECT.activity()
def process_IN_CLOSE_WRITE(self, event): deb(f'inotify: closed {event.pathname}') new_file(event.pathname) IDLE_DETECT.activity()