Example #1
0
def route_upload(path):
    global chunked_file_handle
    time = request.args.get('time', type=float, default=0.0)
    force = request.args.get('force', type=inputs.boolean, default=False)
    path = safe_join(os.path.join(app.config['fileroot'], path))
    if path is None:
        abort(404)

    try:
        content_range = request.environ['HTTP_CONTENT_RANGE']
        parsed_ranges = re.search(r'bytes (\d*)-(\d*)\/(\d*)', content_range)
        _from, _to, _size = [int(x) for x in parsed_ranges.groups()]
        deb(f'chunked upload, {_from} to {_to} ({_size}), {_to - _from + 1} bytes')
    except:
        content_range = None

    if not content_range or _from == 0:
        if os.path.exists(path):
            if not force:
                # if force was not given then the default is that the server refuses to rewrite an existing file
                err(f'file {path} already exist, returning 403 (see --force)')
                return '', 403
        else:
            directory = os.path.dirname(path)
            if not os.path.exists(directory):
                inf(f'constructing new path {directory}')
                Path(directory).mkdir(parents=True, exist_ok=True)

    if content_range:
        if _from == 0:
            try:
                if chunked_file_handle.get(path):
                    err('internal error in upload, non closed filehandle')
                    chunked_file_handle[path].close()
                open(path, 'w').close()
                chunked_file_handle[path] = open(path, "ab")
            except:
                pass

            inf(f'writing file "{path}" ({human_file_size(_size)})')

        chunked_file_handle[path].write(request.data)

        if _to == _size - 1:
            inf(f'{path} transfer complete')
            chunked_file_handle[path].close()
            del chunked_file_handle[path]

    else:
        # ordinary non-chunked upload, single write
        inf(f'writing file "{path}"')
        with open(path, "wb") as fp:
            fp.write(request.data)

    if time > 0.0:
        deb(f'setting {path} time to {time}')
        os.utime(path, (time, time))

    # 201: Created
    return '', 201
Example #2
0
def print_file_list(files, title=None):
    if title == '':
        title = '/'
    if title:
        deb(f'filelist for "{title}"')
    for item in files.items():
        fwd_util.print_file(item)
Example #3
0
def export_file(filepath):
    """ Use filuxe to upload the file if it first matches the include regex and
        second doesn't match the exclude regex.
        If the include regex and the exclude regex are both empty strings then
        the file is exported.
    """
    if not FILUXE_WAN:
        return

    path = os.path.dirname(filepath)
    relpath = os.path.relpath(path, FILE_ROOT)

    file = os.path.basename(filepath)

    try:
        dir_rules = ACTIVE_RULES['dirs'][relpath]
        if not fwd_util.filename_is_included(file, dir_rules):
            inf(f'filename {file} is not in scope and will not be exported')
            return
        deb(f'forwarding {file}')
    except:
        inf(f'from {relpath} uploading file {file} (no rules)')

    try:
        deb(f'uploading {FILUXE_LAN.log_path(filepath)}')
        FILUXE_WAN.upload(filepath, os.path.join(relpath, file))
    except requests.ConnectionError:
        war('upload failed, WAN server is not reachable.')
    except FileNotFoundError:
        war(f'exception file not found, {os.path.join(relpath, file)} (internal race)'
            )
Example #4
0
    def __init__(self, x, directory, rules, delete_by):
        self.file = x[0]
        self.attr = x[1]
        self.numbers = None
        self.valid = False
        self.delete_by = delete_by

        if delete_by == 'version':
            try:
                p = re.compile(rules['dirs'][directory]['version'])
                self.numbers = [int(x) for x in p.search(self.file).group(1).split('.')]
                if len(self.numbers) == 3:
                    self.valid = True
                if not self.valid:
                    war(f'sort by version but failed to parse 3 digits from "{directory}/{self.file}"')

            except KeyError as e:
                war(f'sort by version but failed to parse 3 digits from "{directory}/{self.file}", key {e} not found')
            except AttributeError:
                war(f'version regex failed on filename "{os.path.join(directory, self.file)}"')
            except Exception as e:
                deb(f'exception {e}')

        else:
            self.time = x[1]['time']
            self.valid = True
Example #5
0
 def get_stats(self):
     url = f'{self.server}/stats'
     deb(f'getting stats at {url}')
     response = requests.get(url, verify=self.certificate)
     if response.status_code != 200:
         err(f'got {response.status_code} from {self.domain} server at /stats'
             )
         return ErrorCode.SERVER_ERROR, []
     return ErrorCode.OK, json.loads(response.text)
Example #6
0
def dump_rules():
    try:
        if not ACTIVE_RULES['dirs'].items():
            war('this forwarder has no rules loaded ? Forwarding everything.')
        else:
            deb('dumping rules:')
            for _path, _rules in ACTIVE_RULES['dirs'].items():
                deb(f' "{_path}" {_rules}')
    except:
        war('no dir rules found')
Example #7
0
    def delete_files(self, filegroup, group_name, use_http, rules):
        directory = filegroup['directory']
        filelist = filegroup['files']
        to_delete = len(filelist) - filegroup['maxfiles']
        deleted_files = []

        if to_delete > 0:
            delete_by = filegroup["deleteby"]

            inf(f'deleting {to_delete} files from {self.domain.domain} filestorage, '
                f'path="{filegroup["directory"]}" group "{group_name}". Deleteby={delete_by}')

            _items = [Item(x, directory, rules, delete_by) for x in filelist.items()]
            try:
                _sorted_items = sorted(_items)
            except:
                first_filename = list(filelist.keys())[0]
                war(f'failed running delete by "{delete_by}" in group "{group_name}". Files of type "{first_filename}"')
                return deleted_files

            deb(f'http filelist sorted by: "{delete_by}", delete from top')
            for index, item in enumerate(_sorted_items):
                if index < to_delete:
                    extra = 'DEL '
                else:
                    extra = 'KEEP'
                fwd_util.print_file(item, extra)

            for item in _sorted_items[:to_delete]:
                filepath = os.path.join(directory, item.file)
                if self.dryrun:
                    inf(f'dryrun: not deleting {filepath}')
                else:
                    try:
                        if use_http:
                            fwd_util.delete_http_file(self.domain, filepath)
                        else:
                            fqn = os.path.join(self.domain.root(), filepath)
                            inf(f'deleting {self.domain.log_path(fqn)}')
                            os.remove(fqn)
                            deleted_files.append(item.file)
                    except:
                        war(f'failed to delete file {fqn} (http={use_http})')

        return deleted_files
Example #8
0
def filter_filelist(filelist, rules):
    """
    Return a copy of the filelist where not-included and excluded files are removed according to the rule set.
    """
    try:
        filtered_filelist = copy.deepcopy(filelist)

        for path, files in filelist['filelist'].items():
            if not rules['dirs'].get(path):
                # Probably got a directory from a wan filelist that does not exist on lan.
                inf(f'filter filelist: ignoring directory "{path}" which is not found in rules'
                    )
            else:
                for filename in files:
                    if not filename_is_included(filename, rules['dirs'][path]):
                        del filtered_filelist['filelist'][path][filename]
    except:
        deb('http filelist returned unfiltered (bad rules?)')

    return filtered_filelist
Example #9
0
def new_file(filename):
    if not os.path.exists(filename):
        deb(f'listener: changed file "{filename}" does not exist anymore?')
        return

    inf(f'listener: new/changed file "{FILUXE_LAN.log_path(filename)}"')

    with Indent() as _:
        if LAN_FILE_DELETER:
            path = os.path.dirname(filename)
            filestorage_path = os.path.relpath(path, FILUXE_LAN.root())
            LAN_FILE_DELETER.enforce_max_files(filestorage_path,
                                               rules=ACTIVE_RULES,
                                               recursive=False)

        if not os.path.exists(filename):
            war(f'listener: new file "{FILUXE_LAN.log_path(filename)}" already deleted and will not be forwarded'
                )
            return

        export_file(filename)
Example #10
0
def delete_wan_file(filepath):
    """
    Delete file on WAN if WAN is configured. This will be triggered when a file is
    deleted from LAN filestorage.
    """
    if not FILUXE_WAN:
        deb(f'no wan configured, not deleting {filepath} on wan')
        return

    filestorage_path = os.path.relpath(filepath, FILUXE_LAN.root())
    path = os.path.dirname(filestorage_path)

    try:
        if not ACTIVE_RULES['dirs'][path]['delete']:
            deb(f'not deleting on wan since delete=false for {filepath}')
            return
    except:
        pass

    try:
        file = os.path.basename(filepath)
        rule_path = os.path.normpath(path)
        dir_rules = ACTIVE_RULES['dirs'][rule_path]
        if not fwd_util.filename_is_included(file, dir_rules):
            inf(f'filename {file} is not in scope and will not be exported')
            return
    except:
        deb(f'from "{FILUXE_WAN.log_path(filepath)}" deleting file {file} (no rules)'
            )

    fwd_util.delete_http_file(FILUXE_WAN, filestorage_path)
Example #11
0
    def parse_into_file_groups(self, directory, filelist, directory_settings):
        file_groups = {}
        max_files, delete_by, file_group_rules = directory_settings
        deb(f'find groups in {self.domain.domain} "{directory}" ({len(filelist["filelist"][directory])} files)')

        for filename, fileinfo in filelist['filelist'][directory].items():
            group_key = 'ungrouped'

            for group in file_group_rules:
                try:
                    match = re.match(fr'{group}', filename)
                except Exception as e:
                    err(f'regex gave exception {e.__repr__()} with regex "{group}"')
                    exit(1)

                if match:
                    nof_groups = len(match.groups())
                    nof_group_regex_groups = re.compile(group).groups

                    if nof_groups != nof_group_regex_groups:
                        deb(f'parsing {filename} failed, found {nof_groups} groups, not {nof_group_regex_groups}')
                    else:
                        group_key = ':'.join(match.groups())
                        break

            if file_group_rules and group_key == 'ungrouped':
                inf(f'no group match for {filename}, adding to "ungrouped"')

            group_key = os.path.join(directory, group_key)
            try:
                file_groups[group_key]
            except:
                file_groups[group_key] = {}
                file_groups[group_key]['files'] = {}

            file_groups[group_key]['maxfiles'] = max_files
            file_groups[group_key]['deleteby'] = delete_by
            file_groups[group_key]['directory'] = directory
            file_groups[group_key]['files'][filename] = fileinfo

        for group in file_groups.keys():
            deb(f'group {group} with {len(file_groups[group]["files"])} files')

        return file_groups
Example #12
0
def filename_is_included(filename, rules):
    try:
        if not rules['export']:
            deb(f'export is false for {filename}')
            return False
    except:
        pass

    try:
        include_list = rules['include']
        for include in include_list:
            a = re.search(include, filename)
            if a:
                break
        if not a:
            deb(f'{filename} was not included by "{include}"')
            return False
    except re.error as e:
        err(f'include regex exception, "{include}" gave {e.__repr__()}. File ignored.'
            )
        return False
    except:
        # then default forward everything
        pass

    try:
        exclude_list = rules['exclude']
        for exclude in exclude_list:
            a = re.search(exclude, filename)
            if a:
                deb(f'ignore file hit for {filename}')
                return False
    except re.error as e:
        err(f'exclude regex exception, "{exclude}" gave {e.__repr__()}. File ignored.'
            )
        return False
    except:
        pass

    return True
Example #13
0
def get_rules_for_path(rules, path):
    """
    Return tuple with (max_files, delete_by, filegroups)
    """
    try:
        max_files = rules['dirs'][path]['max_files']
    except:
        try:
            max_files = rules['default']['max_files']
        except:
            deb(f'rules: "{path}" has no file limit')
            return None, None, []

    if max_files == 'unlimited':
        max_files = -1
    if max_files < 0:
        deb(f'rules: "{path}" has no file limit')
        return -1, None, []

    try:
        delete_by = rules['dirs'][path]['delete_by']
    except:
        try:
            delete_by = rules['default']['delete_by']
        except:
            delete_by = 'time'

    try:
        filegroups = rules['dirs'][path]['group']
    except:
        try:
            filegroups = rules['default']['group']
        except:
            filegroups = []

    deb(f'rules: "{path}" has file limit {max_files}, delete by {delete_by}')
    return max_files, delete_by, filegroups
Example #14
0
    def enforce_max_files(self, path, rules, recursive=True, use_http=False, lan_files=None):
        """
        Get the list of files (locally or over http) and delete files if required by
        the rule "max_files". It can run a full recursive scan-and-delete as is
        done when starting the forwarder (with an empty path and recursive=True),
        and it can run in a specific directory when triggered by a new or modified
        file (with a path and recursive=False).
        Returns nothing.
        """

        try:
            deb(f'enforce max files in {self.domain.domain} with path="{path}", dryrun={self.dryrun}')
            if use_http:
                filelist = fwd_util.get_http_filelist(self.domain, path, recursive, rules)
            else:
                if not lan_files:
                    scan_directory = self.domain.root()
                    filelist = fwd_util.get_local_filelist(scan_directory, path, recursive, rules)
                else:
                    filelist = lan_files

            try:
                directories = filelist['filelist'].keys()
            except:
                deb(f'got empty filelist from {self.domain.domain} at "{path}"')
                return

            group_list = {}
            for directory in directories:
                directory_settings = get_rules_for_path(rules, directory)
                max_files, _delete_by, _file_groups = directory_settings
                if max_files == -1:
                    inf(f'"{self.domain.domain}/{path}" has no filelimit, skipping.'
                        f' ({len(filelist["filelist"][directory])} files)')
                    continue
                if not max_files:
                    continue
                group_list[directory] = self.parse_into_file_groups(directory, filelist, directory_settings)

            deb(f'found total {len(group_list)} groups')

            for directory, group in group_list.items():
                for group_key, file_group in group.items():
                    nof_files = len(file_group['files'])
                    max_files = file_group['maxfiles']
                    excess_files = nof_files - max_files
                    if excess_files > 0:
                        message = f'"{self.domain.domain}/{directory}" group:"{group_key}" exceeded max files '\
                                  f'with {excess_files}. ({nof_files} files, limit is {max_files})'
                        inf(message)
                        with Indent() as _:
                            deleted_files = self.delete_files(file_group, group_key, use_http, rules)
                            for file in deleted_files:
                                del filelist['filelist'][directory][file]

                    else:
                        message = f'"{self.domain.domain}/{directory}" group:"{group_key}" no action. '\
                                  f'({nof_files} files, limit is {max_files})'
                        deb(message)

        except Exception as e:
            die(f'exception in enforce_max_files {e.__repr__()}', e, error_code=ErrorCode.INTERNAL_ERROR)
Example #15
0
def print_file(item, extra=''):
    datetime = time.strftime("%m/%d/%Y %H:%M:%S",
                             time.gmtime(item.attr["time"]))
    human_size = human_file_size(item.attr["size"])
    deb(f'{extra} - {human_size:<10} {item.attr["time"]:<20} {datetime} "{item.file}"'
        )
Example #16
0
def filestorage_scan(root, path='', recursive=True):
    _filelist = {}
    total_directories = 0
    total_files = 0
    total_size = 0

    scan_root = os.path.join(root, path)

    if recursive:
        inf(f'recursively scanning "{scan_root}"')
    else:
        inf(f'rescanning directory "{scan_root}"')

    with Indent() as _:
        for _root, _dirs, _files in os.walk(scan_root):
            _path = os.path.relpath(_root, scan_root)
            size = 0

            relative_path = os.path.normpath(os.path.join(path, _path))
            if not _filelist.get(relative_path):
                _filelist[relative_path] = {}

            for _file in _files:
                try:
                    file = os.path.join(_root, _file)
                    if util.file_is_closed(os.path.abspath(file)):
                        _size = os.path.getsize(file)
                        epoch = util.get_file_time(file)
                        metrics = {'size': _size, 'time': epoch}
                        _filelist[relative_path][_file] = metrics
                        size += os.path.getsize(os.path.join(_root, _file))
                    else:
                        war(f'filestorage scan, ignoring open file {file}')
                except FileNotFoundError:
                    deb(f'filestorage scan: file not found {file}')

            total_directories += 1
            total_files += len(_files)
            total_size += size

            message = f'scanned "{relative_path}", {human_file_size(size)} in {len(_files)} files'
            if recursive:
                # If recursive it will be the first full scan so print what is happening
                inf(message)
            else:
                deb(message)

            if not recursive:
                break

        inf(f'found {total_directories} directories with {total_files} files occupying {human_file_size(total_size)}'
            )

    return {
        'filelist': _filelist,
        'info': {
            'dirs': total_directories,
            'fileroot': relative_path,
            'files': total_files,
            'size': total_size
        }
    }
Example #17
0
def calculate_rules(check_dirs):
    """
    1: check_dirs as list of directories:
    Starting from the root then recursively propagate rules matching the given directory structure.

    2: check_dirs as directory
    This can set rules on a new directory. If the directory already has rules assigned then it is a no-op.
    """
    global LOADED_RULES, ACTIVE_RULES

    if not LOADED_RULES:
        ACTIVE_RULES = None
        return

    inf('calculating rules')
    with Indent() as _:

        default_rule = LOADED_RULES["default"]
        try:
            dir_rules = LOADED_RULES["dirs"]
            entry_rules_json = json.dumps(dir_rules, sort_keys=True)
        except:
            war('no "dir" rules file section found, using "default" section only'
                )
            return

        if isinstance(check_dirs, list):
            check_dirs.sort()
            new_rules = {}
        else:
            check_dirs = [check_dirs]
            new_rules = copy.deepcopy(ACTIVE_RULES['dirs'])

        try:
            for _key in check_dirs:
                _path_elements = _key.split(os.sep)

                for i in range(len(_path_elements)):
                    path_elements = _path_elements[:i + 1]

                    path = os.path.join(*path_elements)

                    if path == '.':
                        new_rules[path] = default_rule
                    else:
                        previous = os.path.relpath(
                            os.path.join(path, os.pardir))
                        try:
                            new_rules[path] = {
                                **new_rules[previous],
                                **dir_rules[path]
                            }
                        except:
                            try:
                                new_rules[path] = new_rules[previous]
                            except:
                                deb(f'no rules found for {path}, skipped')

                    deb(f'transient rule: "{path}" {new_rules[path]}')
        except:
            war(f'establishing rules for {_key} failed, check rules file')

        # purge rules that doesn't trigger any actions

        new_rules_copy = copy.deepcopy(new_rules)
        active_new_rules = {}
        active_new_rules['dirs'] = {}

        for path, path_rules in new_rules_copy.items():
            if path_rules.get('export') or len(path_rules) > 1:
                inf(f'adding rule for "{path}" : {path_rules}')
                active_new_rules['dirs'][path] = path_rules

        new_rules_json = json.dumps(active_new_rules, sort_keys=True)
        changed = entry_rules_json != new_rules_json

        if changed:
            ACTIVE_RULES = active_new_rules
            extras = '. Rules were adjusted'
        else:
            extras = '. No changes ?'

    inf(f'rules calculated, {len(ACTIVE_RULES["dirs"])} active rules{extras}')
Example #18
0
 def process_IN_MOVED_TO(self, event):
     deb(f'inotify: move(write) {event.pathname}')
     new_file(event.pathname)
     IDLE_DETECT.activity()
Example #19
0
 def process_IN_MOVED_FROM(self, event):
     deb(f'inotify: move(delete) {event.pathname}')
     delete_wan_file(event.pathname)
     IDLE_DETECT.activity()
Example #20
0
 def process_IN_DELETE(self, event):
     deb(f'inotify: delete {event.pathname}')
     delete_wan_file(event.pathname)
     IDLE_DETECT.activity()
Example #21
0
 def process_IN_CLOSE_WRITE(self, event):
     deb(f'inotify: closed {event.pathname}')
     new_file(event.pathname)
     IDLE_DETECT.activity()