예제 #1
0
class SampleSpecies(mongoengine.Document):
    original_id = mongoengine.StringField(required=True)
    smarter_id = mongoengine.StringField(required=True, unique=True)

    country = mongoengine.StringField(required=True)
    species = mongoengine.StringField(required=True)
    breed = mongoengine.StringField(required=True)
    breed_code = mongoengine.StringField(min_length=3)

    # required to search a sample relying only on original ID
    dataset = mongoengine.ReferenceField(Dataset,
                                         db_field="dataset_id",
                                         reverse_delete_rule=mongoengine.DENY)

    # track the original chip_name with sample
    chip_name = mongoengine.StringField()

    # define enum types for sex
    sex = mongoengine.EnumField(SEX)

    # GPS location
    # NOTE: X, Y where X is longitude, Y latitude
    location = mongoengine.PointField()

    # additional (not modelled) metadata
    metadata = mongoengine.DictField(default=None)

    # for phenotypes
    phenotype = mongoengine.EmbeddedDocumentField(Phenotype, default=None)

    meta = {
        'abstract': True,
    }

    def save(self, *args, **kwargs):
        """Custom save method. Deal with smarter_id before save"""

        if not self.smarter_id:
            logger.debug(f"Determining smarter id for {self.original_id}")

            # get the pymongo connection object
            conn = mongoengine.connection.get_db(alias=DB_ALIAS)

            # even is species, country and breed are required fields for
            # SampleSpecies document, their value will not be evaluated until
            # super().save() is called. I can't call it before determining
            # a smarter_id
            self.smarter_id = getSmarterId(self.species, self.country,
                                           self.breed, conn)

        # default save method
        super(SampleSpecies, self).save(*args, **kwargs)

    def __str__(self):
        return f"{self.smarter_id} ({self.breed})"
예제 #2
0
class Board(me.Document):
    name = me.StringField(required=True, unique=True, validation=_not_empty)
    canvas_width = me.IntField(default=1000)
    canvas_height = me.IntField(default=1000)
    background_type = me.EnumField(BackgroundType,
                                   default=BackgroundType.EMPTY)
    background_linespace = me.IntField(default=0.5)
    cards = me.EmbeddedDocumentListField(Card)
    owner = me.ReferenceField('User')
    is_root = me.BooleanField(required=True)

    meta = {'auto_create_index': True}
예제 #3
0
class User(me.Document):
    
     created_at = me.DateTimeField(default=datetime.utcnow)
     updated_at = me.DateTimeField(default=datetime.utcnow)
     username = me.StringField(required=True)
     email = me.EmailField(unique=True, required=True)
     password = me.StringField()
     address = me.StringField(required=True)
     phone_number = me.StringField(required=True)
     gender = me.EnumField(GenderEnum, required=True)
     def to_json(self):
        return {
            "_id": str(self.pk),
            "name": self.username,
            "email": self.email,
		    "address": self.address,
		    "phone_number": self.phone_number,
		    "gender": self.gender}
예제 #4
0
class User(me.Document):
    id = me.StringField(default=lambda: str(uuid4()), primary_key=True)
    user_name = me.StringField(regex='^([A-Za-z0-9]+_*)+', required=True)
    password = me.StringField(required=True)
    group = me.EnumField(UserGroupEnum, required=True)
    created_at = me.DateTimeField(default=datetime.utcnow)
    updated_at = me.DateTimeField(default=datetime.utcnow)

    @classmethod
    def get_by_id(cls, id: str):
        result = cls.objects(id=id)
        if result:
            return result[0]

    @classmethod
    def get_by_user_name(cls, user_name: str):
        result = cls.objects(user_name=user_name)
        if result:
            return result[0]
예제 #5
0
class MongoDescriptor(me.Document):
    photo_id = me.LongField(primary_key=True, required=True)
    coords = me.PointField(db_field="coordinates", required=True)
    dataset = me.EnumField(DatasetEnum, required=True)
    binary_descriptor = me.BinaryField(required=True, db_field="descriptor")
    meta = {'collection': 'flickr.descriptors_512'}

    @property
    def descriptor(self):
        if self.binary_descriptor is not None:
            return pickle.loads(self.binary_descriptor)
        else:
            return None

    @descriptor.setter
    def descriptor(self, descriptor_array: np.ndarray):
        if not isinstance(descriptor_array, np.ndarray):
            raise ValueError("Descriptor array should be of type np.ndarray")
        self.binary_descriptor = pickle.dumps(descriptor_array, protocol=2)

    @property
    def coordinates(self):
        return self.coords['coordinates']

    @coordinates.setter
    def coordinates(self, coord_dict):
        if 'lat' not in coord_dict or 'lng' not in coord_dict:
            raise ValueError(
                "error setting coordinates, lat or lng is not in dict")
        self.coords = [coord_dict['lng'], coord_dict['lat']]

    def set_coordinates(self, lng, lat):
        self.coords = [lng, lat]

    @classmethod
    def get_ids_and_coords(cls, dataset: DatasetEnum) -> dict:
        if isinstance(dataset, DatasetEnum):
            objects = cls.objects(dataset=dataset)
        else:
            raise ValueError(
                f"dataset should be one of DatasetEnum, was {dataset}")
        total = objects.count()

        log.debug(
            f"Getting {dataset.value} dataset from db. Total number of documents in db {total}"
        )

        ids = np.zeros(shape=total, dtype=int)
        coordinates = np.zeros(shape=(total, 2))

        batch_size = 100000
        for i, descriptor in enumerate(
                objects.batch_size(batch_size).only('photo_id', 'coords')):
            ids[i] = descriptor.photo_id
            coordinates[i, :] = descriptor.coordinates

            if (i + 1) % batch_size == 0:
                log.debug(f"Processed {i + 1} documents")

        return ids, coordinates

    @classmethod
    def get_data_as_arrays(cls, dataset: DatasetEnum = None) -> dict:
        if isinstance(dataset, DatasetEnum):
            objects = cls.objects(dataset=dataset)
        else:
            raise ValueError(
                f"dataset should be one of DatasetEnum, was {dataset}")

        total = objects.count()
        dim = cls.objects.first().descriptor.shape[0]

        log.debug(
            f"Getting {dataset.value} dataset from db. Total number of documents in db {total}"
        )

        descriptors = np.zeros(shape=(total, dim))
        ids = np.zeros(shape=total, dtype=int)
        coordinates = np.zeros(shape=(total, 2))
        batch_size = 50000
        for i, descriptor in enumerate(objects.batch_size(batch_size)):
            descriptors[i, :] = descriptor.descriptor
            ids[i] = descriptor.photo_id
            coordinates[i, :] = descriptor.coordinates

            if (i + 1) % batch_size == 0:
                log.debug(f"Processed {i + 1} documents")

        return ids, coordinates, descriptors

    @classmethod
    def get_random_docs(cls, dataset, sample_size):
        # Sample is quite slow, don't suggest using it
        pipeline = [{
            "$sample": {
                "size": sample_size
            }
        }, {
            "$project": {
                '_id': 1
            }
        }]
        pipeline_cursor = cls.objects(dataset=dataset).aggregate(pipeline)

        ids = [doc['_id'] for doc in pipeline_cursor]

        return cls.objects(photo_id__in=ids)