def __init__(self, url_prefix, title, description, reference, tags=None, creation_date=now(), modification_date=now(), fork_count=0, forked_from=None, forked_from_id=None): with lock: if DatasetDAO.query.get(url_prefix=url_prefix) is not None: raise Exception("Url prefix already taken.") if forked_from_id is None and forked_from is not None: forked_from_id = forked_from._id size = 0 kwargs = { k: v for k, v in locals().items() if k not in ["self", "__class__", "forked_from"] } super().__init__(**kwargs)
def __init__(self, title, description, file_ref_id, http_ref=None, tags=None, addition_date=None, modification_date=None, dataset_id=None, dataset=None): if addition_date is None: addition_date = now() if modification_date is None: modification_date = now() if dataset_id is None and dataset is not None: dataset_id = [dataset._id] kwargs = { k: v for k, v in locals().items() if k not in ["self", "__class__", "datasets"] } super().__init__(**kwargs)
def __thread_func(self): while not self.__stop_requested(): if (now() - self.last_tick).total_seconds() > TIMER_TICK: with self.lock: self.last_tick = now() self.do_garbage_collect() sleep(1) print("[GC] Exited.")
def __init__(self, description, max_dataset_count, max_dataset_size, url_prefix, token_gui=None, creation_date=now(), modification_date=now(), end_date=token_future_end(), privileges=Privileges.RO_WATCH_DATASET): if token_gui is None: token_gui = self.generate_token() else: if self.query.get(token_gui=token_uuid) is not None: raise Exception("Specified token GUI already exists.") kwargs = {k: v for k, v in locals().items() if k not in ["self", "__class__"]} super().__init__(**kwargs)
def __init__(self): super().__init__() self.get_parser = reqparse.RequestParser() self.delete_parser = self.get_parser self.post_parser = reqparse.RequestParser() self.session = global_config.get_session() arguments = { "token_gui": { "type": str, "required": True, "help": "Global Unique Identifier of the token is required.", "location": "json" } } for argument, kwargs in arguments.items(): self.get_parser.add_argument(argument, **kwargs) arguments = { "description": { "type": str, "help": "Description for the token.", "location": "json" }, "max_dataset_count": { "type": int, "help": "Max number of datasets that it is allowed to create this token.", "location": "json" }, "max_dataset_size": { "type": int, "help": "Max size for a dataset created by this token", "location": "json" }, "end_date": { "type": str, "help": "End date for this token, in format {}".format(now()), "location": "json" }, "privileges": { "type": int, "help": "Privileges integer for this token", "location": "json" }, "token_gui": { "type": str, "help": "Token GUI, if not specified a new one is generated.", "location": "json" }, "url_prefix": { "type": str, "help": "Token URL prefix.", "location": "json" }, } for argument, kwargs in arguments.items(): self.post_parser.add_argument(argument, **kwargs)
def parse_args(self, req=None, strict=False, required_any_token_privileges=None, required_all_token_privileges=None): args = super().parse_args(req=req, strict=strict) if required_any_token_privileges is None: required_any_token_privileges = [] if required_all_token_privileges is None: required_all_token_privileges = [] token_gui = args['_tok'] if token_gui is None: abort(404) token = TokenDAO.query.get(token_gui=token_gui) if token is None: print("Token invalid {}".format(token_gui)) abort(404) pbac_ok = all([bool(token.privileges & privilege) for privilege in required_all_token_privileges]) and \ any([bool(token.privileges & privilege) for privilege in required_any_token_privileges]) if not pbac_ok: abort(401) if token.end_date < now(): abort(410) return args, token
def add_element(self, title, description, file_ref_id, http_ref=None, tags=None, addition_date=now(), modification_date=now()): return DatasetElementDAO(title, description, file_ref_id, http_ref, tags, addition_date, modification_date, dataset=self)
def edit_dataset(self, edit_url_prefix:str, *args, **kwargs) -> DatasetDAO: can_edit_inner_dataset = bool(self.token.privileges & Privileges.EDIT_DATASET) can_edit_others_dataset = bool(self.token.privileges & Privileges.ADMIN_EDIT_TOKEN) illegal_chars = self.illegal_chars if not any([can_edit_inner_dataset, can_edit_others_dataset]): abort(401, message="Your token does not have privileges enough to edit datasets.") try: url_prefix = kwargs["url_prefix"] if can_edit_others_dataset: illegal_chars = illegal_chars[1:] # "/" is allowed for admin if any([illegal_char in url_prefix for illegal_char in illegal_chars]): abort(400, message="The following chars are not allowed in the url-prefix: {}".format(illegal_chars)) if "/" not in url_prefix: url_prefix="{}/{}".format(self.token.url_prefix, url_prefix) kwargs['url_prefix'] = url_prefix except KeyError as ex: pass if 'elements' in kwargs: abort(400, message="There is a field not allowed in the edit request.") if 'comments' in kwargs: abort(400, message="There is a field not allowed in the edit request.") edit_dataset = DatasetDAO.query.get(url_prefix=edit_url_prefix) if edit_dataset is None: abort(404, message="Dataset wasn't found.") if not can_edit_others_dataset: if edit_dataset.url_prefix.split("/")[0] != self.token.url_prefix: abort(401, message="Dataset can't be accessed.") # Fix: this token can only edit a dataset if the dataset is linked to it. if not self.token.has_dataset(edit_dataset): abort(401, message="Dataset can't be accessed.") kwargs['modification_date'] = now() # Modification is performed here for k, v in kwargs.items(): if v is None: continue edit_dataset[k] = v self.session.flush() return edit_dataset
def add_comment(self, author_name, author_link, content, addition_date=now()): return DatasetElementCommentDAO(author_name, author_link, content, addition_date, element=self)
def __init__(self): super().__init__() self.get_parser = reqparse.RequestParser() self.get_parser.add_argument("url_prefix", type=str, required=False, help="URL prefix to get tokens from.") self.post_parser = reqparse.RequestParser() self.session = global_config.get_session() arguments = { "description": { "type": str, "required": True, "help": "Description for the dataset.", "location": "json" }, "max_dataset_count": { "type": int, "required": True, "help": "Max number of datasets that it is allowed to create this token.", "location": "json" }, "max_dataset_size": { "type": int, "required": True, "help": "Max size for a dataset created by this token", "location": "json" }, "end_date": { "type": str, "help": "End date for this token, in format {}".format(now()), "location": "json" }, "privileges": { "type": int, "required": True, "help": "Privileges integer for this token", "location": "json" }, "url_prefix": { "type": str, "help": "Token URL prefix.", "location": "json" }, } for argument, kwargs in arguments.items(): self.post_parser.add_argument(argument, **kwargs)
def args_wrap(*args, **kwargs): remote_ip = request.remote_addr ip_control = RestAPIDAO.query.get(ip=remote_ip) if ip_control is None: ip_control = RestAPIDAO(ip=remote_ip, last_access=now(), num_accesses=0) if (now() - ip_control.last_access ).total_seconds() > ACCESS_RESET_TIME: ip_control.last_access = now() ip_control.num_accesses = 0 if ip_control.num_accesses > MAX_ACCESS_TIMES: abort(429) ip_control.num_accesses += 1 session.flush() session.clear() return func(*args, **kwargs)
def __init__(self, author_name, author_link, content, addition_date=now(), element_id=None, element=None): if element_id is None and element is not None: element_id = element._id kwargs = { k: v for k, v in locals().items() if k not in ["self", "__class__", "element"] } super().__init__(**kwargs)
def __init__(self, author_name, author_link, content, addition_date=now(), dataset_id=None, dataset=None): if dataset_id is None and dataset is not None: dataset_id = dataset._id kwargs = { k: v for k, v in locals().items() if k not in ["self", "__class__", "datasets"] } super().__init__(**kwargs)
def __create_token__(namespace, description, max_datasets, max_elements_per_dataset, privileges, duration_in_days, end_date=None): illegal_chars = "/*;:,.ç´`+Ǩ^><¿?'¡¿!\"·$%&()@~¬" if any([i in namespace for i in illegal_chars]): print("[ERROR] The namespace can't hold any of the following chars:\n\"{}\"".format(illegal_chars)) exit(-1) from mldatahub.odm.dataset_dao import DatasetDAO from mldatahub.odm.token_dao import TokenDAO if end_date is not None: end_date = end_date else: end_date = now()+relativedelta(days=+duration_in_days) token = TokenDAO(description=description, max_dataset_count=max_datasets, max_dataset_size=max_elements_per_dataset, url_prefix=namespace, end_date=end_date, privileges=privileges) global_config.get_session().flush() return token
def __init__(self, storage=None): if storage is None: storage = global_config.get_storage() self.storage = storage drive_folder = global_config.get_google_drive_folder() i("Initializing PYGFolder...") self.pygfolder = PyGFolder() i("Done") if drive_folder.endswith("/"): drive_folder = drive_folder[:-1] init_file = drive_folder + "/init" i("Creating init file with current timestamp...") self.pygfolder[init_file] = as_bytes(now()) i("Done") i("Accessing folder...") self.pygfolder = self.pygfolder[drive_folder] i("Done") self.uploader = Thread(target=self.__uploader__, daemon=True) self.downloader = Thread(target=self.__downloader__, daemon=True) self.uploader.start() self.downloader.start()
def token_future_end(): return now() + relativedelta(years=+500)
class GarbageCollector(object): """ Class whose purpose is to clean the filesystem from files that lost references. This is a GarbageCollector. In order to do this optimally, fortunately the storage class keeps a set with all the filenames he worked with, it is guaranteed that it is up-to-date and persistent across server reboots. It is better to iterate over this structure rather than the filesystem itself. """ lock = threading.Lock() do_stop = False storage = global_config.get_storage() # type: GenericStorage last_tick = now() - datetime.timedelta(minutes=60) def __init__(self): self.thread = Thread(target=self.__thread_func, daemon=True) self.thread.start() self.previous_unused_files = {} def __stop_requested(self): with self.lock: value = self.do_stop return value def __thread_func(self): while not self.__stop_requested(): if (now() - self.last_tick).total_seconds() > TIMER_TICK: with self.lock: self.last_tick = now() self.do_garbage_collect() sleep(1) print("[GC] Exited.") def collect_unused_files(self): """ Searchs for unused files in the DB and returns a list of ids. :return: list with IDs of the unused files. """ unused_files = [] files_count = len(self.storage) print("[GC] {} files to be checked.".format(files_count)) sleep_batch=50 files_per_second = [0] files_per_second_avg = 0 time_remaining = -1 with Measure() as timing: for index, file in enumerate(self.storage): if DatasetElementDAO.query.get(file_ref_id=file._id) is None: unused_files.append(file._id) if index % sleep_batch == 0: sleep(0.1) if len(files_per_second) < 5: files_per_second[-1] += 1 if timing.elapsed().seconds >= 1: files_per_second.append(0) timing.reset() files_per_second_avg = sum(files_per_second) / len(files_per_second) time_remaining = "" else: time_remaining = " {} remaining".format(time_left_as_str((len(self.storage) - index) // files_per_second_avg)) if self.__stop_requested(): break progress(index+1, files_count, "{} files are orphan.{}".format(len(unused_files), time_remaining)) print("") return unused_files def do_garbage_collect(self): print("[GC] Collecting garbage...") global_config.session.clear() # 1. We retrieve the unused files. unused_files = self.collect_unused_files() # 2. We check how many unused files are in common with the previous unused files. new_unused_files = [] remove_files = [] print("[GC] Comparing {} unused files to previous {} unused files.".format(len(unused_files), len(self.previous_unused_files))) print("[GC] Cleaning {} elements...".format(len(remove_files))) for index, file in enumerate(unused_files): if file in self.previous_unused_files: remove_files.append(file) else: new_unused_files.append(file) # 3. We delete by batches files_count = 0 for list_ids in segment(remove_files, 50): files_count += len(list_ids) self.storage.delete_files(list_ids) progress(files_count, len(remove_files), "{} files garbage collected.".format(files_count)) sleep(0.1) self.previous_unused_files = set(new_unused_files) print("[GC] Cleaned {} elements...".format(len(remove_files))) return files_count def stop(self, wait_for_finish=True): with self.lock: self.do_stop = True if wait_for_finish: self.thread.join() def __del__(self): self.stop()