def test_hard_delete_dataset(self): user = User('user@domain', 'password') # let's create five datasets datasets = [] for _ in range(5): datasets.append(create_test_dataset(owner=user)) # and make two of them expired for dataset in datasets[:2]: update(dataset, store_until=utc_now()) db.session.commit() time.sleep(2) removed_cnt = hard_delete_expired_datasets() # two were removed, three remained assert removed_cnt == 2 assert UsersMutationsDataset.query.count() == 3
def remove(self, commit=True): """Performs hard-delete of dataset. Current session won't be committed if commit=False is provided. """ # hard delete of data is the first priority with suppress(FileNotFoundError): os.remove(self._path) # soft delete associated entry update(self, store_until=utc_now()) if commit: db.session.commit() # prompt python interpreter to remove data from memory with suppress(AttributeError): del self._data # and delete from session db.session.delete(self) if commit: db.session.commit()
def test_init(self): user = User('user@domain', 'password') dataset = create_test_dataset(owner=user) assert user.datasets == [dataset] public_id = dataset.uri d = UsersMutationsDataset.query.filter_by(uri=public_id).one() assert d == dataset assert UsersMutationsDataset.by_uri(public_id) == dataset assert dataset.data assert dataset.query_size == 3 # should be empty as no mutations where imported assert not dataset.mutations assert dataset.name == 'test' assert dataset.life_expectancy < timedelta(days=7) assert not dataset.is_expired update(dataset, store_until=utc_now()) db.session.commit() time.sleep(2) assert dataset.is_expired d = UsersMutationsDataset.query.filter_by(is_expired=True).one() assert d == dataset u = User.query.filter_by(email='user@domain').one() assert u.datasets == []
def is_expired(self): return UsersMutationsDataset.store_until < utc_now()
class UsersMutationsDataset(CMSModel): mutations_dir = 'user_mutations' name = db.Column(db.String(256)) uri = db.Column(db.String(256), unique=True, index=True) owner_id = db.Column(db.Integer, db.ForeignKey('user.id')) query_count = db.Column(db.Integer) results_count = db.Column(db.Integer) # as we get newer MySQL version, use of server_default would be preferable created_on = db.Column(db.DateTime, default=utc_now()) # using default, not server_default as MySQL cannot handle functions as defaults, see: # https://dba.stackexchange.com/questions/143953/how-can-i-set-timestamps-default-to-future-date store_until = db.Column(db.DateTime, default=utc_days_after(7)) def __init__(self, *args, **kwargs): data = kwargs.pop('data') super().__init__(*args, **kwargs) self.data = data @classmethod def by_uri(cls, uri): return cls.query.filter_by(uri=uri.rstrip('/')).one() @property def data(self) -> 'MutationSearch': if not hasattr(self, '_data'): try: self._data = self._load_from_file() except FileNotFoundError: # None if associated file was deleted. # Be aware of this line when debugging. return return self._data @data.setter def data(self, data): self._data = data uri = self._save_to_file(data, self.uri) self.uri = uri def remove(self, commit=True): """Performs hard-delete of dataset. Current session won't be committed if commit=False is provided. """ # hard delete of data is the first priority with suppress(FileNotFoundError): os.remove(self._path) # soft delete associated entry update(self, store_until=utc_now()) if commit: db.session.commit() # prompt python interpreter to remove data from memory with suppress(AttributeError): del self._data # and delete from session db.session.delete(self) if commit: db.session.commit() def _save_to_file(self, data, uri=None): """Saves data to a file identified by uri argument. If no uri is given, new unique file is created and new uri returned. Returned uri is unique so it can serve as a kind of a randomized id to prevent malicious software from iteration over all entries. """ import base64 from tempfile import NamedTemporaryFile os.makedirs(self.mutations_dir, exist_ok=True) encoded_name = str(base64.urlsafe_b64encode(bytes(self.name, 'utf-8')), 'utf-8') if uri: file_name = uri + '.db' path = os.path.join(self.mutations_dir, file_name) db_file = open(path, 'wb') else: db_file = NamedTemporaryFile(dir=self.mutations_dir, prefix=encoded_name, suffix='.db', delete=False) pickle.dump(data, db_file, protocol=4) uri_code = os.path.basename(db_file.name)[:-3] return uri_code @property def _path(self): from urllib.parse import unquote file_name = unquote(self.uri) + '.db' return os.path.join(self.mutations_dir, file_name) def _load_from_file(self): with open(self._path, 'rb') as f: data = pickle.load(f) return data @hybrid_property def is_expired(self): return self.life_expectancy < timedelta(0) @is_expired.expression def is_expired(self): return UsersMutationsDataset.store_until < utc_now() @hybrid_property def life_expectancy(self): """How much time is left for this dataset before removal.""" return self.store_until - datetime.utcnow() @property def query_size(self): if self.query_count is None: new_lines = self.data.query.count('\n') return new_lines + 1 if new_lines else 0 return self.query_count @property def mutations(self): mutations = [] results = self.data.results for results in results.values(): for result in results: mutations.append(result.mutation) return mutations @property def mutations_count(self): if self.results_count is None: return len(self.mutations) return self.results_count def get_mutation_details(self, protein, pos, alt): protein_results = self.data.results_by_refseq[protein.refseq] return protein_results[pos, alt].meta_user