Esempio n. 1
0
 class V1(StoredObject):
     _id = fields.StringField(_primary_key=True, index=True)
     my_string = fields.StringField()
     my_float = fields.FloatField()
     my_number = fields.FloatField()
     my_null = fields.StringField(required=False)
     _meta = {'optimistic': True, 'version': 1, 'optimistic': True}
 class Foo(StoredObject):
     _id = fields.IntegerField(primary=True)
     integer_field = fields.IntegerField()
     string_field = fields.StringField()
     datetime_field = fields.DateTimeField()
     float_field = fields.FloatField()
     list_field = fields.IntegerField(list=True)
Esempio n. 3
0
class Schema1(StoredObject):

    _id = fields.StringField(primary=True)
    number = fields.IntegerField()
    deleted = fields.FloatField()

    _meta = {
        'optimistic': True,
        'version': 1,
    }
Esempio n. 4
0
class Tag(StoredObject):
    value = fields.StringField(primary=True, index=False)
    count = fields.StringField(default='c', validate=True, index=True)
    misc = fields.StringField(default='')
    misc2 = fields.StringField(default='')
    created = fields.DateTimeField(validate=True)
    modified = fields.DateTimeField(validate=True, auto_now=True)
    keywords = fields.StringField(
        default=['keywd1', 'keywd2'],
        validate=[MinLengthValidator(5),
                  MaxLengthValidator(10)],
        list=True)
    mybool = fields.BooleanField(default=False)
    myint = fields.IntegerField()
    myfloat = fields.FloatField(required=True, default=4.5)
    myurl = fields.StringField(validate=URLValidator())
Esempio n. 5
0
class Document(StoredObject):

    _id = fields.StringField(default=make_oid)
    document_type = fields.StringField()

    filepath = fields.StringField()
    extract_filepath = fields.StringField()
    url = fields.StringField()

    verification_score = fields.FloatField()

    extract_path = fields.StringField()
    extracted = fields.BooleanField()

    _meta = {
        'abstract': True,
    }

    def read(self):
        raise NotImplementedError

    def text_file_name(self):

        _, tail = os.path.split(self.filepath)
        root, _= os.path.splitext(tail)
        return '{}.txt'.format(root)

    def save_extract(self, text, save=True, **kwargs):

        path = EXTRACT_SAVE_DIRS[self.document_type]
        if not os.path.exists(path):
            mkdir_p(path)

        filepath = os.path.join(path, self.text_file_name())
        self.extract_filepath = filepath
        self.extracted = True

        for key, value in kwargs.iteritems():
            setattr(self, key, value)

        open(filepath, 'w').write(
            to_unicode(text).encode('utf-8')
        )

        if save:
            self.save()

    def verify(self, threshold, overwrite=False):
        """Verify that the document matches the target article: checks that
        the document contains a minimum fraction of words in the article
        abstract.

        :param float threshold: Minimum fraction of abstract words present
        :param bool overwrite: Recalculate existing verification score
        :return bool: Article is verified

        """
        # Return stored
        if self.verification_score and not overwrite:
            return self.verification_score > threshold

        text = self.read()

        # Load target article
        try:
            article = self.article__scraped[0]
        except IndexError:
            return False

        # AB -> Abstract
        abstract = article. record.get('AB', None)

        if not text or not abstract:
            return False

        text = text.lower()
        abstract = abstract.lower()

        abstract_tokens = re.split(r'\s+', abstract)
        tokens_contained = [
            token
            for token in abstract_tokens
            if token in text
        ]
        prop_contained = len(tokens_contained) / len(abstract_tokens)

        self.verification_score = prop_contained
        self.save()

        return prop_contained >= threshold
Esempio n. 6
0
class OsfStorageFileVersion(StoredObject):

    _id = oid_primary_key
    creator = fields.ForeignField('user', required=True)

    status = fields.StringField(required=True, validate=validate_status)
    signature = fields.StringField()

    date_created = fields.DateTimeField(auto_now_add=True)
    date_resolved = fields.DateTimeField()
    last_ping = fields.FloatField(default=lambda: time.time())

    # Dictionary specifying all information needed to locate file on backend
    # {
    #     'service': 'cloudfiles',  # required
    #     'container': 'osf',       # required
    #     'object': '20c53b',       # required
    #     'worker_url': '127.0.0.1',
    #     'worker_host': 'upload-service-1',
    # }
    location = fields.DictionaryField()

    # Dictionary containing raw metadata from upload service response
    # {
    #     'size': 1024,                            # required
    #     'content_type': 'text/plain',            # required
    #     'date_modified': '2014-11-07T20:24:15',  # required
    #     'md5': 'd077f2',
    # }
    metadata = fields.DictionaryField()

    size = fields.IntegerField()
    content_type = fields.StringField()
    date_modified = fields.DateTimeField()

    @property
    def pending(self):
        return self.status != status_map['COMPLETE']

    @property
    def expired(self):
        """A version is expired if in pending state and has not received a ping
        from the upload service since in `PING_TIMEOUT` seconds.
        """
        if self.status != status_map['UPLOADING']:
            return False
        return time.time() > (self.last_ping + settings.PING_TIMEOUT)

    @property
    def location_hash(self):
        return self.location['object'] if self.location else None

    def is_duplicate(self, other):
        return (bool(self.location_hash)
                and self.location_hash == other.location_hash)

    @check_status(status_map['UPLOADING'])
    def ping(self, signature):
        """Verify upload signature and update last ping time.

        :param str signature: Signature used in signed URL
        """
        self.last_ping = time.time()
        self.save()

    @check_status(status_map['UPLOADING'])
    def set_cached(self, signature):
        """
        """
        self.status = status_map['CACHED']
        self.save()

    @check_status(status_map['UPLOADING'], status_map['CACHED'])
    def resolve(self, signature, location, metadata):
        """
        """
        self.status = status_map['COMPLETE']
        self.date_resolved = datetime.datetime.utcnow()
        self.location = location
        self.metadata = metadata
        for key, parser in metadata_fields.iteritems():
            try:
                value = metadata[key]
            except KeyError:
                raise errors.MissingFieldError
            setattr(self, key, parser(value))
        self.save()

    @check_status()
    def update_metadata(self, signature, metadata):
        self.metadata.update(metadata)
        self.save()

    @check_status(status_map['UPLOADING'])
    def cancel(self, signature):
        pass