Exemple #1
0
    def extract(self):
        "Extract files from an archived submit dir"

        # Locate archive file
        if not self.is_archived():
            raise SubmitDirException("Submit dir not archived")

        # Update record in master db
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            wf.archived = False

        # Untar the files
        tar = tarfile.open(self.archname, "r:gz")
        tar.extractall(path=self.submitdir)
        tar.close()

        # Remove the tar file
        os.remove(self.archname)

        # Commit the workflow changes
        mdbsession.commit()
        mdbsession.close()
Exemple #2
0
def _get_connection(
    dburi=None,
    cl_properties=None,
    config_properties=None,
    submit_dir=None,
    db_type=None,
    pegasus_version=None,
    schema_check=True,
    create=False,
    force=False,
    print_version=True,
):
    """ Get connection to the database based on the parameters"""
    if dburi:
        return connection.connect(
            dburi,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            db_type=db_type,
            print_version=print_version,
        )
    elif submit_dir:
        return connection.connect_by_submitdir(
            submit_dir,
            db_type,
            config_properties,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            cl_properties=cl_properties,
            print_version=print_version,
        )

    elif config_properties or _has_connection_properties(cl_properties):
        return connection.connect_by_properties(
            config_properties,
            db_type,
            cl_properties=cl_properties,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            print_version=print_version,
        )

    if not db_type:
        dburi = connection._get_master_uri()
        return connection.connect(
            dburi,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            db_type=db_type,
            print_version=print_version,
        )
    return None
Exemple #3
0
    def detach(self):
        "Remove any master db entries for the given root workflow"

        # Verify that we aren't trying to detach a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException(
                "Subworkflows cannot be detached independent of the root workflow"
            )

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir,
                                                     connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Check to see if it even exists
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is None:
            print "Workflow is not in master DB"
        else:
            # Delete the workflow (this will delete the master_workflowstate entries as well)
            mdb.delete_master_workflow(self.wf_uuid)

        # Update the master db
        mdbsession.commit()
        mdbsession.close()
Exemple #4
0
    def extract(self):
        "Extract files from an archived submit dir"

        # Locate archive file
        if not self.is_archived():
            raise SubmitDirException("Submit dir not archived")

        # Update record in master db
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            wf.archived = False

        # Untar the files
        tar = tarfile.open(self.archname, "r:gz")
        tar.extractall(path=self.submitdir)
        tar.close()

        # Remove the tar file
        os.remove(self.archname)

        # Commit the workflow changes
        mdbsession.commit()
        mdbsession.close()
Exemple #5
0
    def detach(self, wf_uuid=None):
        "Remove any master db entries for the given root workflow"
        if self.submitdir_exists:
            # Verify that we aren't trying to detach a subworkflow
            if self.is_subworkflow():
                raise SubmitDirException(
                    "Subworkflows cannot be detached independent of the root workflow"
                )

            # Connect to master database
            mdbsession = connection.connect_by_submitdir(
                self.submitdir, connection.DBType.MASTER)
            mdb = MasterDatabase(mdbsession)

            # Check to see if it even exists
            wf = mdb.get_master_workflow(self.wf_uuid)
            if wf is None:
                print "Workflow is not in master DB"
            else:
                # Delete the workflow (this will delete the master_workflowstate entries as well)
                mdb.delete_master_workflow(self.wf_uuid)

            # Update the master db
            mdbsession.commit()
            mdbsession.close()

        else:
            # Connect to master database
            home = expanduser('~')
            mdbsession = connection.connect(
                'sqlite:///%s/.pegasus/workflow.db' % home,
                db_type=connection.DBType.MASTER)
            mdb = MasterDatabase(mdbsession)

            try:
                if wf_uuid is None:
                    wfs = mdb.get_master_workflow_for_submitdir(self.submitdir)
                    if wfs:
                        msg = "Invalid submit dir: %s, Specify --wf-uuid <WF_UUID> to detach\n" % self.submitdir
                        msg += "\tWorkflow UUID, DAX Label, Submit Hostname, Submit Dir.\n"
                        for wf in wfs:
                            msg += '\t%s, %s, %s, %s\n' % (
                                wf.wf_uuid, wf.dax_label, wf.submit_hostname,
                                wf.submit_dir)
                        raise SubmitDirException(msg)

                    else:
                        raise SubmitDirException("Invalid submit dir: %s" %
                                                 self.submitdir)

                else:
                    # Delete
                    mdb.delete_master_workflow(wf_uuid,
                                               submit_dir=self.submitdir)

                    # Update the master db
                    mdbsession.commit()

            finally:
                mdbsession.close()
Exemple #6
0
    def detach(self, wf_uuid=None):
        "Remove any master db entries for the given root workflow"
        if self.submitdir_exists:
            # Verify that we aren't trying to detach a subworkflow
            if self.is_subworkflow():
                raise SubmitDirException("Subworkflows cannot be detached independent of the root workflow")

            # Connect to master database
            mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
            mdb = MasterDatabase(mdbsession)

            # Check to see if it even exists
            wf = mdb.get_master_workflow(self.wf_uuid)
            if wf is None:
                print "Workflow is not in master DB"
            else:
                # Delete the workflow (this will delete the master_workflowstate entries as well)
                mdb.delete_master_workflow(self.wf_uuid)

            # Update the master db
            mdbsession.commit()
            mdbsession.close()

        else:
            # Connect to master database
            home = expanduser('~')
            mdbsession = connection.connect('sqlite:///%s/.pegasus/workflow.db' % home,
                                            db_type=connection.DBType.MASTER)
            mdb = MasterDatabase(mdbsession)

            try:
                if wf_uuid is None:
                    wfs = mdb.get_master_workflow_for_submitdir(self.submitdir)
                    if wfs:
                        msg = "Invalid submit dir: %s, Specify --wf-uuid <WF_UUID> to detach\n" % self.submitdir
                        msg += "\tWorkflow UUID, DAX Label, Submit Hostname, Submit Dir.\n"
                        for wf in wfs:
                            msg += '\t%s, %s, %s, %s\n' % (wf.wf_uuid, wf.dax_label, wf.submit_hostname, wf.submit_dir)
                        raise SubmitDirException(msg)

                    else:
                        raise SubmitDirException("Invalid submit dir: %s" % self.submitdir)

                else:
                    # Delete
                    mdb.delete_master_workflow(wf_uuid, submit_dir=self.submitdir)

                    # Update the master db
                    mdbsession.commit()

            finally:
                mdbsession.close()
Exemple #7
0
def _get_connection(dburi=None, config_properties=None, submit_dir=None, db_type=None, pegasus_version=None, schema_check=True, create=False, force=False):
    """ Get connection to the database based on the parameters"""
    if dburi:
        return connection.connect(dburi, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force)
    elif config_properties:
        return connection.connect_by_properties(config_properties, db_type, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force)
    elif submit_dir:
        return connection.connect_by_submitdir(submit_dir, db_type, config_properties, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force)
    
    if not db_type:
        dburi = connection._get_master_uri()
        return connection.connect(dburi, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force)
    return None
Exemple #8
0
    def delete(self):
        "Delete this submit dir and its entry in the master db"

        # Verify that we aren't trying to move a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException(
                "Subworkflows cannot be deleted independent of the root workflow"
            )

        # Confirm that they want to delete the workflow
        while True:
            try:
                input = raw_input
            except NameError:
                pass
            answer = (input(
                "Are you sure you want to delete this workflow? This operation cannot be undone. [y/n]: "
            ).strip().lower())
            if answer == "y":
                break
            if answer == "n":
                return

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir,
                                                     connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Delete all of the records from the workflow db if they are not using
        # an sqlite db that is in the submit dir.
        db_url = connection.url_by_submitdir(self.submitdir,
                                             connection.DBType.WORKFLOW)
        if self.submitdir not in db_url:
            dbsession = connection.connect(db_url)
            db = WorkflowDatabase(dbsession)
            db.delete_workflow(self.wf_uuid)
            dbsession.commit()
            dbsession.close()

        # Delete the workflow
        mdb.delete_master_workflow(self.wf_uuid)

        # Remove all the files
        shutil.rmtree(self.submitdir)

        # Update master db
        mdbsession.commit()
        mdbsession.close()
Exemple #9
0
    def delete(self):
        "Delete this submit dir and its entry in the master db"

        # Verify that we aren't trying to move a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException("Subworkflows cannot be deleted independent of the root workflow")

        # Confirm that they want to delete the workflow
        while True:
            try:
                input = raw_input
            except NameError:
                pass
            answer = input("Are you sure you want to delete this workflow? This operation cannot be undone. [y/n]: ").strip().lower()
            if answer == "y":
                break
            if answer == "n":
                return

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Delete all of the records from the workflow db if they are not using
        # an sqlite db that is in the submit dir.
        db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
        if self.submitdir not in db_url:
            dbsession = connection.connect(db_url)
            db = WorkflowDatabase(dbsession)
            db.delete_workflow(self.wf_uuid)
            dbsession.commit()
            dbsession.close()

        # Delete the workflow
        mdb.delete_master_workflow(self.wf_uuid)

        # Remove all the files
        shutil.rmtree(self.submitdir)

        # Update master db
        mdbsession.commit()
        mdbsession.close()
Exemple #10
0
    def detach(self):
        "Remove any master db entries for the given root workflow"

        # Verify that we aren't trying to detach a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException("Subworkflows cannot be detached independent of the root workflow")

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Check to see if it even exists
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is None:
            print "Workflow is not in master DB"
        else:
            # Delete the workflow (this will delete the master_workflowstate entries as well)
            mdb.delete_master_workflow(self.wf_uuid)

        # Update the master db
        mdbsession.commit()
        mdbsession.close()
Exemple #11
0
    def archive(self):
        "Archive a submit dir by adding files to a compressed archive"

        # Update record in master db
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            wf.archived = True

        # The set of files to exclude from the archive
        exclude = set()

        # Exclude braindump file
        exclude.add(self.braindump_file)

        # We use a temporary file so that we can determine if the archive step
        # completed successfully later
        tmparchname = os.path.join(self.submitdir, "archive.tmp.tar.gz")

        # We use a lock file to determine if cleanup is complete
        lockfile = os.path.join(self.submitdir, "archive.cleanup.lock")

        # If a previous archive was partially completed, then remove the
        # temporary file that was created
        if os.path.exists(tmparchname):
            os.unlink(tmparchname)

        # Exclude the temporary archive name so we don't add it to itself
        exclude.add(tmparchname)

        # We don't want the lock file to be saved, if it exists
        exclude.add(lockfile)

        # Also exclude the final archive name in case they try to run it again
        exclude.add(self.archname)

        # Ignore monitord files. This is needed so that tools like pegasus-statistics
        # will consider the workflow to be complete
        for name in ["monitord.started", "monitord.done", "monitord.log"]:
            exclude.add(os.path.join(self.submitdir, name))

        # Exclude stampede db
        for db in glob.glob(os.path.join(self.submitdir, "*.stampede.db")):
            exclude.add(db)

        # Exclude properties file
        for prop in glob.glob(os.path.join(self.submitdir, "pegasus.*.properties")):
            exclude.add(prop)

        # Visit all the files in the submit dir that we want to archive
        def visit(dirpath):
            for name in os.listdir(dirpath):
                filepath = os.path.join(dirpath, name)

                if filepath not in exclude:
                    yield name, filepath

        if self.is_archived() and not os.path.exists(lockfile):
            raise SubmitDirException("Submit directory already archived")

        if not self.is_archived():
            # Archive the files
            print("Creating archive...")
            tar = tarfile.open(name=tmparchname, mode="w:gz")
            for name, path in visit(self.submitdir):
                tar.add(name=path, arcname=name)
            tar.close()

            # This "commits" the archive step
            os.rename(tmparchname, self.archname)

        # Touch lockfile
        open(lockfile, "w").close()

        # Remove the files and directories
        # We do this here, instead of doing it in the loop above
        # because we want to make sure there are no errors in creating
        # the archive before we start removing files
        print("Removing files...")
        for name, path in visit(self.submitdir):
            if os.path.isfile(path) or os.path.islink(path):
                os.remove(path)
            else:
                shutil.rmtree(path)

        # This "commits" the file removal
        os.unlink(lockfile)

        # Commit the workflow changes
        mdbsession.commit()
        mdbsession.close()
Exemple #12
0
    def attach(self):
        "Add a workflow to the master db"

        # Verify that we aren't trying to attach a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException("Subworkflows cannot be attached independent of the root workflow")

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Check to see if it already exists and just update it
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            print "Workflow is already in master db"
            old_submit_dir = wf.submit_dir
            if old_submit_dir != self.submitdir:
                print "Updating path..."
                wf.submit_dir = self.submitdir
                wf.db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
                mdbsession.commit()
            mdbsession.close()
            return

        # Connect to workflow db
        db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
        dbsession = connection.connect(db_url)
        db = WorkflowDatabase(dbsession)

        # Get workflow record
        wf = db.get_workflow(self.wf_uuid)
        if wf is None:
            print "No database record for that workflow exists"
            return

        # Update the workflow record
        wf.submit_dir = self.submitdir
        wf.db_url = db_url

        # Insert workflow record into master db
        mwf = DashboardWorkflow()
        mwf.wf_uuid = wf.wf_uuid
        mwf.dax_label = wf.dax_label
        mwf.dax_version = wf.dax_version
        mwf.dax_file = wf.dax_file
        mwf.dag_file_name = wf.dag_file_name
        mwf.timestamp = wf.timestamp
        mwf.submit_hostname = wf.submit_hostname
        mwf.submit_dir = self.submitdir
        mwf.planner_arguments = wf.planner_arguments
        mwf.user = wf.user
        mwf.grid_dn = wf.grid_dn
        mwf.planner_version = wf.planner_version
        mwf.db_url = wf.db_url
        mwf.archived = self.is_archived()
        mdbsession.add(mwf)
        mdbsession.flush() # We should have the new wf_id after this

        # Query states from workflow database
        states = db.get_workflow_states(wf.wf_id)

        # Insert states into master db
        for s in states:
            ms = DashboardWorkflowstate()
            ms.wf_id = mwf.wf_id
            ms.state = s.state
            ms.timestamp = s.timestamp
            ms.restart_count = s.restart_count
            ms.status = s.status
            mdbsession.add(ms)
        mdbsession.flush()

        dbsession.commit()
        dbsession.close()

        mdbsession.commit()
        mdbsession.close()
Exemple #13
0
    def move(self, dest):
        "Move this submit directory to dest"

        dest = os.path.abspath(dest)

        if os.path.isfile(dest):
            raise SubmitDirException("Destination is a file: %s" % dest)

        if os.path.isdir(dest):
            if os.path.exists(os.path.join(dest, "braindump.txt")):
                raise SubmitDirException("Destination is a submit dir: %s" % dest)
            dest = os.path.join(dest, os.path.basename(self.submitdir))

        # Verify that we aren't trying to move a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException("Subworkflows cannot be moved independent of the root workflow")

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Get the workflow record from the master db
        db_url = None
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is None:
            db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
        else:
            # We found an mdb record, so we need to update it

            # Save the master db's pointer
            db_url = wf.db_url

            # Update the master db's db_url
            # Note that this will only update the URL if it is an sqlite file
            # located in the submitdir
            log.info("Old master db_url: %s" % wf.db_url)
            wf.db_url = db_url.replace(self.submitdir, dest)
            log.info("New master db_url: %s" % wf.db_url)

            # Change the master db's submit_dir
            log.info("Old master submit_dir: %s" % wf.submit_dir)
            wf.submit_dir = dest
            log.info("New master submit_dir: %s" % wf.submit_dir)

        # Update the ensemble record if one exists
        ew = mdb.get_ensemble_workflow(self.wf_uuid)
        if ew is not None:
            log.info("Old ensemble submit dir: %s", ew.submitdir)
            ew.submitdir = dest
            log.info("New ensemble submit dir: %s", ew.submitdir)

        # Update the workflow database if we found one
        if db_url is not None:
            dbsession = connection.connect(db_url)
            db = WorkflowDatabase(dbsession)
            root_wf = db.get_workflow(self.wf_uuid)
            db.update_submit_dirs(root_wf.wf_id, self.submitdir, dest)
            dbsession.commit()
            dbsession.close()

        # Move all the files
        shutil.move(self.submitdir, dest)

        # Set new paths in the braindump file
        self.braindump["submit_dir"] = dest
        self.braindump["basedir"] = os.path.dirname(dest)
        utils.write_braindump(os.path.join(dest, "braindump.txt"), self.braindump)

        # Note that we do not need to update the properties file even though it
        # might contain DB URLs because it cannot contain a DB URL with the submit
        # dir in it.

        # TODO We might want to update all of the absolute paths in the condor submit files
        # if we plan on moving workflows that could be resubmitted in the future

        # TODO We might want to update the braindump files for subworkflows

        # Update master database
        mdbsession.commit()
        mdbsession.close()

        # Finally, update object
        self.submitdir = dest
Exemple #14
0
    def archive(self):
        "Archive a submit dir by adding files to a compressed archive"

        # Update record in master db
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            wf.archived = True

        # The set of files to exclude from the archive
        exclude = set()

        # Exclude braindump file
        exclude.add(self.braindump_file)

        # We use a temporary file so that we can determine if the archive step
        # completed successfully later
        tmparchname = os.path.join(self.submitdir, "archive.tmp.tar.gz")

        # We use a lock file to determine if cleanup is complete
        lockfile = os.path.join(self.submitdir, "archive.cleanup.lock")

        # If a previous archive was partially completed, then remove the
        # temporary file that was created
        if os.path.exists(tmparchname):
            os.unlink(tmparchname)

        # Exclude the temporary archive name so we don't add it to itself
        exclude.add(tmparchname)

        # We don't want the lock file to be saved, if it exists
        exclude.add(lockfile)

        # Also exclude the final archive name in case they try to run it again
        exclude.add(self.archname)

        # Ignore monitord files. This is needed so that tools like pegasus-statistics
        # will consider the workflow to be complete
        for name in ["monitord.started", "monitord.done", "monitord.log"]:
            exclude.add(os.path.join(self.submitdir, name))

        # Exclude stampede db
        for db in glob.glob(os.path.join(self.submitdir, "*.stampede.db")):
            exclude.add(db)

        # Exclude properties file
        for prop in glob.glob(os.path.join(self.submitdir, "pegasus.*.properties")):
            exclude.add(prop)

        # Visit all the files in the submit dir that we want to archive
        def visit(dirpath):
            for name in os.listdir(dirpath):
                filepath = os.path.join(dirpath, name)

                if filepath not in exclude:
                    yield name, filepath

        if self.is_archived() and not os.path.exists(lockfile):
            raise SubmitDirException("Submit directory already archived")

        if not self.is_archived():
            # Archive the files
            print "Creating archive..."
            tar = tarfile.open(name=tmparchname, mode="w:gz")
            for name, path in visit(self.submitdir):
                tar.add(name=path, arcname=name)
            tar.close()

            # This "commits" the archive step
            os.rename(tmparchname, self.archname)

        # Touch lockfile
        open(lockfile, "w").close()

        # Remove the files and directories
        # We do this here, instead of doing it in the loop above
        # because we want to make sure there are no errors in creating
        # the archive before we start removing files
        print "Removing files..."
        for name, path in visit(self.submitdir):
            if os.path.isfile(path) or os.path.islink(path):
                os.remove(path)
            else:
                shutil.rmtree(path)

        # This "commits" the file removal
        os.unlink(lockfile)

        # Commit the workflow changes
        mdbsession.commit()
        mdbsession.close()
Exemple #15
0
    def attach(self):
        "Add a workflow to the master db"

        # Verify that we aren't trying to attach a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException("Subworkflows cannot be attached independent of the root workflow")

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Check to see if it already exists and just update it
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            print("Workflow is already in master db")
            old_submit_dir = wf.submit_dir
            if old_submit_dir != self.submitdir:
                print("Updating path...")
                wf.submit_dir = self.submitdir
                wf.db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
                mdbsession.commit()
            mdbsession.close()
            return

        # Connect to workflow db
        db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
        dbsession = connection.connect(db_url)
        db = WorkflowDatabase(dbsession)

        # Get workflow record
        wf = db.get_workflow(self.wf_uuid)
        if wf is None:
            print("No database record for that workflow exists")
            return

        # Update the workflow record
        wf.submit_dir = self.submitdir
        wf.db_url = db_url

        # Insert workflow record into master db
        mwf = DashboardWorkflow()
        mwf.wf_uuid = wf.wf_uuid
        mwf.dax_label = wf.dax_label
        mwf.dax_version = wf.dax_version
        mwf.dax_file = wf.dax_file
        mwf.dag_file_name = wf.dag_file_name
        mwf.timestamp = wf.timestamp
        mwf.submit_hostname = wf.submit_hostname
        mwf.submit_dir = self.submitdir
        mwf.planner_arguments = wf.planner_arguments
        mwf.user = wf.user
        mwf.grid_dn = wf.grid_dn
        mwf.planner_version = wf.planner_version
        mwf.db_url = wf.db_url
        mwf.archived = self.is_archived()
        mdbsession.add(mwf)
        mdbsession.flush() # We should have the new wf_id after this

        # Query states from workflow database
        states = db.get_workflow_states(wf.wf_id)

        # Insert states into master db
        for s in states:
            ms = DashboardWorkflowstate()
            ms.wf_id = mwf.wf_id
            ms.state = s.state
            ms.timestamp = s.timestamp
            ms.restart_count = s.restart_count
            ms.status = s.status
            mdbsession.add(ms)
        mdbsession.flush()

        dbsession.commit()
        dbsession.close()

        mdbsession.commit()
        mdbsession.close()
Exemple #16
0
    def move(self, dest):
        "Move this submit directory to dest"

        dest = os.path.abspath(dest)

        if os.path.isfile(dest):
            raise SubmitDirException("Destination is a file: %s" % dest)

        if os.path.isdir(dest):
            if os.path.exists(os.path.join(dest, "braindump.txt")):
                raise SubmitDirException("Destination is a submit dir: %s" % dest)
            dest = os.path.join(dest, os.path.basename(self.submitdir))

        # Verify that we aren't trying to move a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException("Subworkflows cannot be moved independent of the root workflow")

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)

        # Get the workflow record from the master db
        db_url = None
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is None:
            db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
        else:
            # We found an mdb record, so we need to update it

            # Save the master db's pointer
            db_url = wf.db_url

            # Update the master db's db_url
            # Note that this will only update the URL if it is an sqlite file
            # located in the submitdir
            log.info("Old master db_url: %s" % wf.db_url)
            wf.db_url = db_url.replace(self.submitdir, dest)
            log.info("New master db_url: %s" % wf.db_url)

            # Change the master db's submit_dir
            log.info("Old master submit_dir: %s" % wf.submit_dir)
            wf.submit_dir = dest
            log.info("New master submit_dir: %s" % wf.submit_dir)

        # Update the ensemble record if one exists
        ew = mdb.get_ensemble_workflow(self.wf_uuid)
        if ew is not None:
            log.info("Old ensemble submit dir: %s", ew.submitdir)
            ew.submitdir = dest
            log.info("New ensemble submit dir: %s", ew.submitdir)

        # Update the workflow database if we found one
        if db_url is not None:
            dbsession = connection.connect(db_url)
            db = WorkflowDatabase(dbsession)
            root_wf = db.get_workflow(self.wf_uuid)
            db.update_submit_dirs(root_wf.wf_id, self.submitdir, dest)
            dbsession.commit()
            dbsession.close()

        # Move all the files
        shutil.move(self.submitdir, dest)

        # Set new paths in the braindump file
        self.braindump["submit_dir"] = dest
        self.braindump["basedir"] = os.path.dirname(dest)
        utils.write_braindump(os.path.join(dest, "braindump.txt"), self.braindump)

        # Note that we do not need to update the properties file even though it
        # might contain DB URLs because it cannot contain a DB URL with the submit
        # dir in it.

        # TODO We might want to update all of the absolute paths in the condor submit files
        # if we plan on moving workflows that could be resubmitted in the future

        # TODO We might want to update the braindump files for subworkflows

        # Update master database
        mdbsession.commit()
        mdbsession.close()

        # Finally, update object
        self.submitdir = dest
Exemple #17
0
    def archive(self):
        "Archive a submit dir by adding files to a compressed archive"

        # Update record in master db
        mdbsession = connection.connect_by_submitdir(self.submitdir, connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            wf.archived = True

        # The set of files to exclude from the archive
        exclude = set()

        # Exclude braindump file
        exclude.add(self.braindump_file)

        # Locate and exclude archive file
        archname = os.path.join(self.submitdir, "archive.tar.gz")
        if os.path.exists(archname):
            raise SubmitDirException("Submit dir already archived")
        exclude.add(archname)

        # Ignore monitord files. This is needed so that tools like pegasus-statistics
        # will consider the workflow to be complete
        for name in ["monitord.started", "monitord.done", "monitord.log"]:
            exclude.add(os.path.join(self.submitdir, name))

        # Exclude stampede db
        for db in glob.glob(os.path.join(self.submitdir, "*.stampede.db")):
            exclude.add(db)

        # Exclude properties file
        for prop in glob.glob(os.path.join(self.submitdir, "pegasus.*.properties")):
            exclude.add(prop)

        # Visit all the files in the submit dir that we want to archive
        def visit(dirpath):
            for name in os.listdir(dirpath):
                filepath = os.path.join(dirpath, name)

                if filepath not in exclude:
                    yield name, filepath

        # Archive the files
        tar = tarfile.open(name=archname, mode="w:gz")
        for name, path in visit(self.submitdir):
            tar.add(name=path, arcname=name)
        tar.close()

        # Remove the files and directories
        # We do this here, instead of doing it in the loop above
        # because we want to make sure there are no errors in creating
        # the archive before we start removing files
        for name, path in visit(self.submitdir):
            if os.path.isfile(path) or os.path.islink(path):
                os.remove(path)
            else:
                shutil.rmtree(path)

        # Commit the workflow changes
        mdbsession.commit()
        mdbsession.close()
Exemple #18
0
    def archive(self):
        "Archive a submit dir by adding files to a compressed archive"

        # Update record in master db
        mdbsession = connection.connect_by_submitdir(self.submitdir,
                                                     connection.DBType.MASTER)
        mdb = MasterDatabase(mdbsession)
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            wf.archived = True

        # The set of files to exclude from the archive
        exclude = set()

        # Exclude braindump file
        exclude.add(self.braindump_file)

        # Locate and exclude archive file
        archname = os.path.join(self.submitdir, "archive.tar.gz")
        if os.path.exists(archname):
            raise SubmitDirException("Submit dir already archived")
        exclude.add(archname)

        # Ignore monitord files. This is needed so that tools like pegasus-statistics
        # will consider the workflow to be complete
        for name in ["monitord.started", "monitord.done", "monitord.log"]:
            exclude.add(os.path.join(self.submitdir, name))

        # Exclude stampede db
        for db in glob.glob(os.path.join(self.submitdir, "*.stampede.db")):
            exclude.add(db)

        # Exclude properties file
        for prop in glob.glob(
                os.path.join(self.submitdir, "pegasus.*.properties")):
            exclude.add(prop)

        # Visit all the files in the submit dir that we want to archive
        def visit(dirpath):
            for name in os.listdir(dirpath):
                filepath = os.path.join(dirpath, name)

                if filepath not in exclude:
                    yield name, filepath

        # Archive the files
        tar = tarfile.open(name=archname, mode="w:gz")
        for name, path in visit(self.submitdir):
            tar.add(name=path, arcname=name)
        tar.close()

        # Remove the files and directories
        # We do this here, instead of doing it in the loop above
        # because we want to make sure there are no errors in creating
        # the archive before we start removing files
        for name, path in visit(self.submitdir):
            if os.path.isfile(path) or os.path.islink(path):
                os.remove(path)
            else:
                shutil.rmtree(path)

        # Commit the workflow changes
        mdbsession.commit()
        mdbsession.close()