コード例 #1
0
class DataFile(db.Model):
    __tablename__ = "datafiles"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)
    name: str = db.Column(db.String(80))
    type: str = db.Column(db.String(20))

    dataset_version_id: str = db.Column(GUID,
                                        db.ForeignKey("dataset_versions.id"))
    dataset_version: "DatasetVersion" = db.relationship(
        "DatasetVersion",
        foreign_keys=[dataset_version_id],
        backref=db.backref(__tablename__, cascade="all, delete-orphan"),
    )

    # these two columns really belong to VirtualDataFile, but SQLAlchemy seems to have some problems with
    # self-referencing FKs. Moved here to get it to work, but assume these will be both None unless the type is
    # VirtualDataFile. Also underlying_data_file_id is always None. Use underlying_data_file.id instead if you want the ID
    underlying_data_file_id = db.Column(GUID, db.ForeignKey("datafiles.id"))
    underlying_data_file = db.relationship("DataFile", remote_side=[id])

    __table_args__ = (
        UniqueConstraint("dataset_version_id", "name"),
        CheckConstraint(
            "(type = 'virtual' and underlying_data_file_id is not null) or (type = 's3') or (type = 'gcs')",
            name="typedcheck",
        ),
    )

    __mapper_args__ = {
        "polymorphic_on": type,
        "polymorphic_identity": "abstract"
    }
コード例 #2
0
class DatasetPermaname(db.Model):
    __tablename__ = "dataset_permanames"

    permaname: str = db.Column(db.Text, primary_key=True)
    dataset_id: str = db.Column(GUID, db.ForeignKey("datasets.id"))
    creation_date: datetime.datetime = db.Column(
        db.DateTime, default=datetime.datetime.utcnow)
コード例 #3
0
class UploadSession(db.Model):
    __tablename__ = "upload_sessions"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)

    user_id: str = db.Column(GUID, db.ForeignKey("users.id"))
    user: User = db.relationship("User", backref=__tablename__)
コード例 #4
0
class ThirdPartyDataFileLink(db.Model):
    __tablename__ = "third_party_datafile_links"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)
    type: str = db.Column(db.String(50))
    __mapper_args__ = {
        "polymorphic_on": type,
        "polymorphic_identity": "abstract"
    }
コード例 #5
0
class DatasetSubscription(db.Model):
    __tablename__ = "dataset_subscriptions"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)
    user: User = db.relationship("User", backref=__tablename__)
    user_id: str = db.Column(GUID, db.ForeignKey("users.id"))

    dataset: Dataset = db.relationship("Dataset", backref=__tablename__)
    dataset_id: str = db.Column(GUID, db.ForeignKey("datasets.id"))
コード例 #6
0
class GCSObjectDataFile(DataFile):
    __mapper_args__ = {"polymorphic_identity": "gcs"}

    gcs_path: str = db.Column(db.Text)
    generation_id: str = db.Column(db.Text)

    @property
    def underlying_file_id(self):
        return None
コード例 #7
0
class Group(db.Model):
    __tablename__ = "groups"

    id: str = db.Column(db.INTEGER, primary_key=True, autoincrement=True)

    name: str = db.Column(db.String(80))

    users: List[User] = db.relationship(User.__name__,
                                        secondary=group_user_association_table,
                                        backref=__tablename__)

    def __repr__(self):
        return "Group {}".format(self.name)
コード例 #8
0
class ProvenanceGraph(db.Model):
    __tablename__ = "provenance_graphs"

    graph_id = db.Column(GUID, primary_key=True, default=generate_uuid)

    permaname = db.Column(GUID, unique=True, default=generate_uuid)

    name = db.Column(db.Text)

    created_by_user_id = db.Column(GUID,
                                   db.ForeignKey("users.id"),
                                   nullable=True)
    user = db.relationship("User", backref=__tablename__)

    created_timestamp = db.Column(db.DateTime,
                                  default=datetime.datetime.utcnow)
コード例 #9
0
class FigshareDataFileLink(ThirdPartyDataFileLink):
    __tablename__ = "figshare_datafile_links"

    id: str = db.Column(GUID,
                        db.ForeignKey("third_party_datafile_links.id"),
                        primary_key=True)
    figshare_file_id: int = db.Column(db.Integer, nullable=False)
    datafile_id: str = db.Column(GUID, db.ForeignKey("datafiles.id"))
    datafile: DataFile = db.relationship("DataFile",
                                         backref=backref(
                                             "figshare_datafile_link",
                                             uselist=False))
    figshare_dataset_version_link_id: str = db.Column(
        GUID, db.ForeignKey("figshare_dataset_version_links.id"))

    __mapper_args__ = {"polymorphic_identity": "figshare"}
コード例 #10
0
class ThirdPartyDatasetVersionLink(db.Model):
    __tablename__ = "third_party_dataset_version_links"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)
    type: str = db.Column(db.String(50))

    creator_id: str = db.Column(GUID, db.ForeignKey("users.id"))
    creator: User = db.relationship(
        "User",
        foreign_keys="ThirdPartyDatasetVersionLink.creator_id",
        backref=__tablename__,
    )

    __mapper_args__ = {
        "polymorphic_on": type,
        "polymorphic_identity": "abstract"
    }
コード例 #11
0
class User(db.Model):
    __tablename__ = "users"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)
    name: str = db.Column(db.String(80), unique=True)

    email: str = db.Column(db.TEXT)
    token: str = db.Column(db.String(50),
                           unique=True,
                           default=generate_str_uuid)

    home_folder_id: str = db.Column(GUID, db.ForeignKey("folders.id"))
    home_folder: "Folder" = db.relationship("Folder",
                                            foreign_keys="User.home_folder_id",
                                            backref="home_user")

    trash_folder_id: str = db.Column(GUID, db.ForeignKey("folders.id"))
    trash_folder: "Folder" = db.relationship(
        "Folder", foreign_keys="User.trash_folder_id", backref="trash_user")

    figshare_personal_token: str = db.Column(db.String(128), nullable=True)

    def __str__(self):
        return "name: {}, home_folder: {}, trash_folder: {}".format(
            self.name, self.home_folder.name, self.trash_folder.name)
コード例 #12
0
class ProvenanceEdge(db.Model):
    __tablename__ = "provenance_edges"

    edge_id = db.Column(GUID, primary_key=True, default=generate_uuid)

    from_node_id = db.Column(GUID, db.ForeignKey("provenance_nodes.node_id"))
    from_node = db.relationship(
        "ProvenanceNode",
        foreign_keys="ProvenanceEdge.from_node_id",
        backref="from_edges",
    )

    to_node_id = db.Column(GUID, db.ForeignKey("provenance_nodes.node_id"))
    to_node = db.relationship("ProvenanceNode",
                              foreign_keys="ProvenanceEdge.to_node_id",
                              backref="to_edges")

    label = db.Column(db.Text)
コード例 #13
0
class UserLog(db.Model):
    __tablename__ = "user_logs"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)

    user_id: str = db.Column(GUID, db.ForeignKey("users.id"))
    user: User = db.relationship("User",
                                 foreign_keys="UserLog.user_id",
                                 backref="user")

    entry_id: str = db.Column(GUID, db.ForeignKey("entries.id"))
    entry: "Entry" = db.relationship("Entry",
                                     foreign_keys="UserLog.entry_id",
                                     backref="entry")
    # TODO: Setup the constraint of only having one (user, entry) row

    last_access: datetime.datetime = db.Column(
        db.DateTime, default=datetime.datetime.utcnow)
コード例 #14
0
class ProvenanceNode(db.Model):
    __tablename__ = "provenance_nodes"

    class NodeType(enum.Enum):
        Dataset = "dataset"
        External = "external"
        Process = "process"

    node_id = db.Column(GUID, primary_key=True, default=generate_uuid)

    graph_id = db.Column(GUID, db.ForeignKey("provenance_graphs.graph_id"))
    graph = db.relationship("ProvenanceGraph", backref=__tablename__)

    datafile_id = db.Column(GUID, db.ForeignKey("datafiles.id"), nullable=True)
    datafile = db.relationship("DataFile", backref=__tablename__)

    label = db.Column(db.Text)

    type = db.Column(db.Enum(NodeType))
コード例 #15
0
class Entry(db.Model):
    __tablename__ = "entries"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)
    name: str = db.Column(db.Text, nullable=False)
    type: str = db.Column(db.String(50))
    creation_date: datetime.datetime = db.Column(
        db.DateTime, default=datetime.datetime.utcnow)

    creator_id: str = db.Column(GUID, db.ForeignKey("users.id"))

    creator: User = db.relationship("User",
                                    foreign_keys="Entry.creator_id",
                                    backref=__tablename__)

    description: str = db.Column(db.Text)

    __mapper_args__ = {
        "polymorphic_identity": classmethod.__class__.__name__,
        "polymorphic_on": type,
        "with_polymorphic": "*",
    }

    def __str__(self):
        return "Entry name: {}".format(self.name)
コード例 #16
0
class Activity(db.Model):
    class ActivityType(enum.Enum):
        created = "Created"
        changed_name = "Changed name"
        changed_description = "Changed Description"
        added_version = "Added version"
        started_log = "Log started"

    __tablename__ = "activities"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)

    user_id: str = db.Column(GUID, db.ForeignKey("users.id"))

    user: User = db.relationship("User", backref=__tablename__)

    dataset_id: str = db.Column(GUID, db.ForeignKey("datasets.id"))

    dataset: Dataset = db.relationship("Dataset", backref=__tablename__)

    # We would want the type of change and the comments associated
    type: ActivityType = db.Column(db.Enum(ActivityType), nullable=False)

    timestamp: datetime.datetime = db.Column(db.DateTime,
                                             default=datetime.datetime.utcnow)

    comments: str = db.Column(db.Text)

    __mapper_args__ = {"polymorphic_on": type}
コード例 #17
0
class Folder(Entry):
    # Enum Folder types
    # TODO: Could be a good idea to transform these enums into Classes. So they can have different behaviors if needed
    class FolderType(enum.Enum):
        home = "home"
        trash = "trash"
        folder = "folder"

    __tablename__ = "folders"

    # TODO: Instead of using a string 'entry.id', can we use Entry.id?
    id: str = db.Column(GUID, db.ForeignKey("entries.id"), primary_key=True)

    folder_type: FolderType = db.Column(db.Enum(FolderType))

    # TODO: This should be a set, not a list.
    entries: List[Entry] = db.relationship(
        "Entry", secondary=folder_entry_association_table, backref="parents")

    __mapper_args__ = {"polymorphic_identity": "Folder"}

    def __repr__(self):
        return "Folder name: {} and id: {}".format(self.name, self.id)
コード例 #18
0
class S3DataFile(DataFile):
    # IMPORTANT: Need to sync with frontend for each changes
    class DataFileFormat(enum.Enum):
        Raw = "Raw"
        HDF5 = "HDF5"
        Columnar = "Columnar"

    format: DataFileFormat = db.Column(db.Enum(DataFileFormat))
    encoding = db.Column(db.Text)
    s3_bucket: str = db.Column(db.Text)
    s3_key: str = db.Column(db.Text)
    compressed_s3_key: str = db.Column(db.Text)

    short_summary: str = db.Column(db.Text)
    long_summary: str = db.Column(db.Text)
    column_types_as_json: Dict[str, str] = db.Column(db.JSON)
    original_file_sha256: str = db.Column(db.Text)
    original_file_md5: str = db.Column(db.Text)

    __mapper_args__ = {"polymorphic_identity": "s3"}

    @property
    def underlying_file_id(self):
        return None
コード例 #19
0
class Dataset(Entry):
    __tablename__ = "datasets"

    id = db.Column(GUID, db.ForeignKey("entries.id"), primary_key=True)

    # TODO: Use the name/key of the dataset and add behind the uuid?
    permanames: List["DatasetPermaname"] = db.relationship("DatasetPermaname")

    @property
    def permaname(self) -> str:
        if len(self.permanames) > 0:
            return max(self.permanames,
                       key=lambda permaname: permaname.creation_date).permaname
        return ""

    __mapper_args__ = {"polymorphic_identity": "Dataset"}
コード例 #20
0
class ConversionCache(db.Model):
    __tablename__ = "conversion_cache"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)

    dataset_version_id: str = db.Column(GUID,
                                        db.ForeignKey("dataset_versions.id"))

    datafile_name: str = db.Column(db.String(80))

    format: str = db.Column(db.String(80))

    status: str = db.Column(db.Text)

    task_id: str = db.Column(db.Text)

    urls_as_json: str = db.Column(db.Text)

    state: ConversionEntryState = db.Column(db.Enum(ConversionEntryState))
コード例 #21
0
class DatasetVersion(Entry):
    class DatasetVersionState(enum.Enum):
        approved = "Approved"
        deprecated = "Deprecated"
        deleted = "Deleted"

    # TODO: Missing the permaname of the DatasetVersion
    __tablename__ = "dataset_versions"

    id: str = db.Column(GUID, db.ForeignKey("entries.id"), primary_key=True)

    dataset_id: str = db.Column(GUID, db.ForeignKey("datasets.id"))

    dataset: Dataset = db.relationship(
        "Dataset",
        foreign_keys=[dataset_id],
        backref=db.backref(__tablename__),
        single_parent=True,
        cascade="all, delete-orphan",
    )

    # Filled out by the server
    version: int = db.Column(db.Integer)

    # State of the version
    state: DatasetVersionState = db.Column(
        db.Enum(DatasetVersionState), default=DatasetVersionState.approved)
    # Reason for the state of the version. Should be empty if approved
    reason_state: str = db.Column(db.Text)

    changes_description: str = db.Column(db.Text)

    __table_args__ = (UniqueConstraint("dataset_id", "version"), )

    # TODO: See how to manage the status (persist.py)

    __mapper_args__ = {"polymorphic_identity": "DatasetVersion"}
コード例 #22
0
 def dataset_description(cls) -> str:
     return Activity.__table__.c.get("dataset_description",
                                     db.Column(db.Text))
コード例 #23
0
 def dataset_name(cls) -> str:
     return Activity.__table__.c.get("dataset_name", db.Column(db.Text))
コード例 #24
0
class UploadSessionFile(db.Model):
    __tablename__ = "upload_session_files"

    id: str = db.Column(GUID, primary_key=True, default=generate_uuid)

    session_id: str = db.Column(GUID, db.ForeignKey("upload_sessions.id"))

    session: UploadSession = db.relationship("UploadSession",
                                             backref=__tablename__)

    # filename submitted by user
    filename: str = db.Column(db.Text)
    encoding: str = db.Column(db.Text)

    initial_filetype: InitialFileType = db.Column(db.Enum(InitialFileType))
    initial_s3_key: str = db.Column(db.Text)

    converted_filetype: S3DataFile.DataFileFormat = db.Column(
        db.Enum(S3DataFile.DataFileFormat))
    converted_s3_key: str = db.Column(db.Text)

    compressed_s3_key: str = db.Column(db.Text)

    s3_bucket: str = db.Column(db.Text)

    gcs_path: str = db.Column(db.Text)
    generation_id: str = db.Column(db.Text)

    short_summary: str = db.Column(db.Text)
    long_summary: str = db.Column(db.Text)
    column_types_as_json: Dict[str, str] = db.Column(db.JSON)
    original_file_sha256: str = db.Column(db.Text)
    original_file_md5: str = db.Column(db.Text)

    data_file_id: str = db.Column(GUID, db.ForeignKey("datafiles.id"))
    data_file: DataFile = db.relationship(
        "DataFile",
        uselist=False,
        foreign_keys="UploadSessionFile.data_file_id")
コード例 #25
0
 def dataset_version(cls) -> str:
     return Activity.__table__.c.get("dataset_version",
                                     db.Column(db.Integer))
コード例 #26
0
def generate_uuid():
    return uuid.uuid4().hex


def generate_str_uuid():
    return str(uuid.uuid4())


# Associations #

# Association table for Many to Many relationship between folder and entries
# As discussed in december 2016 with Philip Montgomery, we decided an entry could have multiple folders containing it
folder_entry_association_table = db.Table(
    "folder_entry_association",
    db.Column("folder_id", GUID, db.ForeignKey("folders.id")),
    db.Column("entry_id", GUID, db.ForeignKey("entries.id")),
)

group_user_association_table = db.Table(
    "group_user_association",
    db.Column("group_id", db.INTEGER, db.ForeignKey("groups.id")),
    db.Column("user_id", GUID, db.ForeignKey("users.id")),
)

# End Associations #


def normalize_name(name: str) -> str:
    permaname_prefix = name.casefold(
    )  # str.casefold() is a more aggressive .lower()