Beispiel #1
0
    def test_hard_delete_dataset(self):
        user = User('user@domain', 'password')

        # let's create five datasets
        datasets = []
        for _ in range(5):
            datasets.append(create_test_dataset(owner=user))

        # and make two of them expired
        for dataset in datasets[:2]:
            update(dataset, store_until=utc_now())

        db.session.commit()
        time.sleep(2)

        removed_cnt = hard_delete_expired_datasets()

        # two were removed, three remained
        assert removed_cnt == 2
        assert UsersMutationsDataset.query.count() == 3
Beispiel #2
0
    def remove(self, commit=True):
        """Performs hard-delete of dataset.

        Current session won't be committed if commit=False is provided.
        """
        # hard delete of data is the first priority
        with suppress(FileNotFoundError):
            os.remove(self._path)

        # soft delete associated entry
        update(self, store_until=utc_now())
        if commit:
            db.session.commit()

        # prompt python interpreter to remove data from memory
        with suppress(AttributeError):
            del self._data

        # and delete from session
        db.session.delete(self)
        if commit:
            db.session.commit()
    def test_init(self):
        user = User('user@domain', 'password')

        dataset = create_test_dataset(owner=user)

        assert user.datasets == [dataset]

        public_id = dataset.uri

        d = UsersMutationsDataset.query.filter_by(uri=public_id).one()

        assert d == dataset
        assert UsersMutationsDataset.by_uri(public_id) == dataset

        assert dataset.data
        assert dataset.query_size == 3

        # should be empty as no mutations where imported
        assert not dataset.mutations

        assert dataset.name == 'test'

        assert dataset.life_expectancy < timedelta(days=7)

        assert not dataset.is_expired

        update(dataset, store_until=utc_now())
        db.session.commit()

        time.sleep(2)

        assert dataset.is_expired
        d = UsersMutationsDataset.query.filter_by(is_expired=True).one()

        assert d == dataset

        u = User.query.filter_by(email='user@domain').one()
        assert u.datasets == []
Beispiel #4
0
 def is_expired(self):
     return UsersMutationsDataset.store_until < utc_now()
Beispiel #5
0
class UsersMutationsDataset(CMSModel):
    mutations_dir = 'user_mutations'

    name = db.Column(db.String(256))
    uri = db.Column(db.String(256), unique=True, index=True)
    owner_id = db.Column(db.Integer, db.ForeignKey('user.id'))

    query_count = db.Column(db.Integer)
    results_count = db.Column(db.Integer)
    # as we get newer MySQL version, use of server_default would be preferable
    created_on = db.Column(db.DateTime, default=utc_now())
    # using default, not server_default as MySQL cannot handle functions as defaults, see:
    # https://dba.stackexchange.com/questions/143953/how-can-i-set-timestamps-default-to-future-date
    store_until = db.Column(db.DateTime, default=utc_days_after(7))

    def __init__(self, *args, **kwargs):
        data = kwargs.pop('data')
        super().__init__(*args, **kwargs)
        self.data = data

    @classmethod
    def by_uri(cls, uri):
        return cls.query.filter_by(uri=uri.rstrip('/')).one()

    @property
    def data(self) -> 'MutationSearch':
        if not hasattr(self, '_data'):
            try:
                self._data = self._load_from_file()
            except FileNotFoundError:
                # None if associated file was deleted.
                # Be aware of this line when debugging.
                return
        return self._data

    @data.setter
    def data(self, data):
        self._data = data
        uri = self._save_to_file(data, self.uri)
        self.uri = uri

    def remove(self, commit=True):
        """Performs hard-delete of dataset.

        Current session won't be committed if commit=False is provided.
        """
        # hard delete of data is the first priority
        with suppress(FileNotFoundError):
            os.remove(self._path)

        # soft delete associated entry
        update(self, store_until=utc_now())
        if commit:
            db.session.commit()

        # prompt python interpreter to remove data from memory
        with suppress(AttributeError):
            del self._data

        # and delete from session
        db.session.delete(self)
        if commit:
            db.session.commit()

    def _save_to_file(self, data, uri=None):
        """Saves data to a file identified by uri argument.

        If no uri is given, new unique file is created and new uri returned.
        Returned uri is unique so it can serve as a kind of a randomized id to
        prevent malicious software from iteration over all entries.
        """
        import base64
        from tempfile import NamedTemporaryFile

        os.makedirs(self.mutations_dir, exist_ok=True)

        encoded_name = str(base64.urlsafe_b64encode(bytes(self.name, 'utf-8')),
                           'utf-8')

        if uri:
            file_name = uri + '.db'
            path = os.path.join(self.mutations_dir, file_name)
            db_file = open(path, 'wb')
        else:
            db_file = NamedTemporaryFile(dir=self.mutations_dir,
                                         prefix=encoded_name,
                                         suffix='.db',
                                         delete=False)

        pickle.dump(data, db_file, protocol=4)

        uri_code = os.path.basename(db_file.name)[:-3]

        return uri_code

    @property
    def _path(self):
        from urllib.parse import unquote

        file_name = unquote(self.uri) + '.db'
        return os.path.join(self.mutations_dir, file_name)

    def _load_from_file(self):

        with open(self._path, 'rb') as f:
            data = pickle.load(f)
        return data

    @hybrid_property
    def is_expired(self):
        return self.life_expectancy < timedelta(0)

    @is_expired.expression
    def is_expired(self):
        return UsersMutationsDataset.store_until < utc_now()

    @hybrid_property
    def life_expectancy(self):
        """How much time is left for this dataset before removal."""
        return self.store_until - datetime.utcnow()

    @property
    def query_size(self):
        if self.query_count is None:
            new_lines = self.data.query.count('\n')
            return new_lines + 1 if new_lines else 0
        return self.query_count

    @property
    def mutations(self):
        mutations = []
        results = self.data.results
        for results in results.values():
            for result in results:
                mutations.append(result.mutation)
        return mutations

    @property
    def mutations_count(self):
        if self.results_count is None:
            return len(self.mutations)
        return self.results_count

    def get_mutation_details(self, protein, pos, alt):
        protein_results = self.data.results_by_refseq[protein.refseq]
        return protein_results[pos, alt].meta_user