Esempio n. 1
0
    def from_file(filename, annotations=None):
        """Read dataset from file. Expects the file to be in Yaml format which
        is the default serialization format used by to_file().

        Parameters
        ----------
        filename: string
            Name of the file to read.
        annotations: vizier.datastore.metadata.DatasetMetadata, optional
            Annotations for dataset components
        Returns
        -------
        vizier.datastore.base.DatasetHandle
        """
        try:
            with open(filename, 'r') as f:
                doc = load_json(f.read())
        except:
            with open(filename, 'r') as f:
                doc = yaml.load(f.read(), Loader=CLoader)
        return MimirDatasetHandle(
            identifier=doc['id'],
            columns=[
                MimirDatasetColumn.from_dict(obj) for obj in doc['columns']
            ],
            rowid_column=MimirDatasetColumn.from_dict(doc['rowIdColumn']),
            table_name=doc['tableName'],
            row_ids=doc['rows'],
            column_counter=doc['columnCounter'],
            row_counter=doc['rowCounter'],
            annotations=annotations)
Esempio n. 2
0
    def get_properties(self):
        """Get the dictionary of user-defined properties that are associated
        with an object.

        Returns
        -------
        dict
        """
        try:
            with open(self.filename, 'r') as f:
                properties = load_json(f.read())
        except:
            with open(self.filename, 'r') as f:
                properties = yaml.load(f.read(), Loader=CLoader)
        for key in self.properties:
            if not key in properties:
                properties[key] = self.properties[key]
        return properties
Esempio n. 3
0
    def get_workflow(self, branch_id=DEFAULT_BRANCH, version=-1):
        """Get the workflow with the given version number from the workflow
        history of the given branch.

        Returns None if the branch or the workflow version do not exist.

        Parameters
        ----------
        branch_id: string, optional
            Unique branch identifier
        version: int, optional
            Workflow version number
        """
        # Return None if branch does not exist
        if not branch_id in self.branches:
            return None
        branch = self.branches[branch_id]
        if version <= 0 and len(branch.workflows) == 0:
            # Returns an empty workflow if the branch does not contain any
            # executed workflows yet.
            return WorkflowHandle(branch_id, -1, get_current_time(), [])
        # Get version number of branch HEAD if negative version is given
        wf_file = None
        if version < 0 and len(branch.workflows) > 0:
            wf_file = workflow_file(self.fs_dir, branch.workflows[-1].version)
        else:
            for wf_desc in branch.workflows:
                if wf_desc.version == version:
                    wf_file = workflow_file(self.fs_dir, version)
                    break
        # Return None if version number is not in branch (indicated by an non-
        # existing workflow file)
        if wf_file is None:
            return None
        # Read workflow handle from file
        try:
            with open(wf_file, 'r') as f:
                doc = load_json(f.read())
        except:
            with open(wf_file, 'r') as f:
                doc = yaml.load(f.read(), Loader=CLoader)
        return WorkflowHandle(
            branch_id, doc['version'], to_datetime(doc['createdAt']),
            [ModuleHandle.from_dict(m) for m in doc['modules']])
Esempio n. 4
0
    def read_index(self):
        """Return content of the file index.

        Returns
        -------
        dict(vizier.filestore.base.FileHandle)
        """
        files = dict()
        if os.path.isfile(self.index_file):
            try:
                with open(self.index_file, 'r') as f:
                    for f_desc in load_json(f.read())['files']:
                        fh = FileHandle.from_dict(f_desc, self.get_filepath)
                        files[fh.identifier] = fh
            except:
                with open(self.index_file, 'r') as f:
                    for f_desc in yaml.load(f.read(), Loader=CLoader)['files']:
                        fh = FileHandle.from_dict(f_desc, self.get_filepath)
                        files[fh.identifier] = fh
        return files
Esempio n. 5
0
    def from_file(fs_dir, envs):
        """Read the viztrail state from file.

        Raises IOError if the viztrail file does not exist.

        Parameters
        ----------
        fs_dir: string
            Base directory where all viztrail information is stored
        envs: dict(string: vizier.config.ExecEnv)
            Dictionary of workflow execution environments

        Returns
        -------
        vizier.workflow.repository.fs.FileSystemViztrailHandle
        """
        # Read vizrail information for file (in Yaml format)
        try:
            with open(os.path.join(fs_dir, VIZTRAIL_FILE), 'r') as f:
                doc = load_json(f.read())
        except:
            with open(os.path.join(fs_dir, VIZTRAIL_FILE), 'r') as f:
                doc = yaml.load(f.read(), Loader=CLoader)
        # Read information about viztrail branches
        return FileSystemViztrailHandle(
            doc['id'], {
                b['id']: ViztrailBranch(
                    b['id'],
                    FilePropertiesHandler(branch_file(fs_dir, b['id'])),
                    FileSystemBranchProvenance(
                        branch_prov_file(fs_dir, b['id'])),
                    workflows=[
                        WorkflowVersionDescriptor.from_dict(v)
                        for v in b['versions']
                    ])
                for b in doc['branches']
            }, envs[doc['env']],
            FilePropertiesHandler(os.path.join(fs_dir, PROPERTIES_FILE)),
            to_datetime(doc['timestamps']['createdAt']),
            to_datetime(doc['timestamps']['lastModifiedAt']),
            doc['versionCounter'], doc['moduleCounter'], fs_dir)
Esempio n. 6
0
    def from_file(filename, annotations=None):
        """Read dataset from file. Expects the file to be in Json format which
        is the default serialization format used by to_file().

        Parameters
        ----------
        filename: string
            Name of the file to read.
        annotations: vizier.datastore.annotation.dataset.DatasetMetadata, optional
            Annotations for dataset components
        Returns
        -------
        vizier.datastore.mimir.dataset.MimirDatasetHandle
        """
        with open(filename, 'r') as f:
            doc = load_json(f.read())
        return MimirDatasetHandle(
            identifier=doc['id'],
            columns=[MimirDatasetColumn.from_dict(obj) for obj in doc['columns']],
            table_name=doc['tableName'],
            row_counter=doc['rowCounter']
        )
Esempio n. 7
0
    def __init__(self, file_name):
        """Read provanence information from file (if exists). Note that the
        file may not exist if the branch ist the default branch for which no
        provenance information exists.

        Parameters
        ----------
        file_name: string
            Name of the file that contains provenance information.
        """
        # Initialize a 'empty' provenance object (for the master branch)
        super(FileSystemBranchProvenance, self).__init__(None, -1, -1)
        if os.path.isfile(file_name):
            try:
                with open(file_name, 'r') as f:
                    doc = load_json(f.read())
            except:
                with open(file_name, 'r') as f:
                    doc = yaml.load(f.read(), Loader=CLoader)
            self.source_branch = doc['branch']
            self.workflow_version = doc['workflow']
            self.module_id = doc['module']