def _get_connection( dburi=None, cl_properties=None, config_properties=None, submit_dir=None, db_type=None, pegasus_version=None, schema_check=True, create=False, force=False, print_version=True, ): """ Get connection to the database based on the parameters""" if dburi: return connection.connect( dburi, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force, db_type=db_type, print_version=print_version, ) elif submit_dir: return connection.connect_by_submitdir( submit_dir, db_type, config_properties, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force, cl_properties=cl_properties, print_version=print_version, ) elif config_properties or _has_connection_properties(cl_properties): return connection.connect_by_properties( config_properties, db_type, cl_properties=cl_properties, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force, print_version=print_version, ) if not db_type: dburi = connection._get_master_uri() return connection.connect( dburi, pegasus_version=pegasus_version, schema_check=schema_check, create=create, force=force, db_type=db_type, print_version=print_version, ) return None
def loop_forever(self): while True: u = user.get_user_by_uid(os.getuid()) session = connection.connect( u.get_master_db_url(), connect_args={"check_same_thread": False} ) try: dao = Ensembles(session) self.loop_once(dao) finally: session.close() time.sleep(self.interval)
def delete(self): "Delete this submit dir and its entry in the master db" # Verify that we aren't trying to move a subworkflow if self.is_subworkflow(): raise SubmitDirException( "Subworkflows cannot be deleted independent of the root workflow" ) # Confirm that they want to delete the workflow while True: try: input = raw_input except NameError: pass answer = ( input( "Are you sure you want to delete this workflow? This operation cannot be undone. [y/n]: " ) .strip() .lower() ) if answer == "y": break if answer == "n": return # Connect to master database mdbsession = connection.connect_by_submitdir( self.submitdir, connection.DBType.MASTER ) mdb = MasterDatabase(mdbsession) # Delete all of the records from the workflow db if they are not using # an sqlite db that is in the submit dir. db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW) if self.submitdir not in db_url: dbsession = connection.connect(db_url) db = WorkflowDatabase(dbsession) db.delete_workflow(self.wf_uuid) dbsession.commit() dbsession.close() # Delete the workflow mdb.delete_master_workflow(self.wf_uuid) # Remove all the files shutil.rmtree(self.submitdir) # Update master db mdbsession.commit() mdbsession.close()
def __init__( self, dburi, batch=True, props=None, db_type=None, backup=False, flush_every=1000, ): """Will be overridden by subclasses to take parameters specific to their function. """ self.log = logging.getLogger("{}.{}".format(self.__module__, self.__class__.__name__)) self.dburi = dburi # PM-898 all props passed should have pegasus prefix stripped off # so they are more like connect_args to be used for database connect_args = {} for key in props.keyset(): # we don't pass url in connect args if key != "url": connect_args[key] = props.property(key) # make sure timeout is an int if "timeout" in connect_args: connect_args["timeout"] = int(connect_args["timeout"]) # self.session = connection.connect(dburi, create=True, props=props, db_type=db_type, backup=backup) self.session = connection.connect( dburi, create=True, connect_args=connect_args, db_type=db_type, backup=backup, ) # flags and state for batching self._batch = batch self._flush_every = flush_every self._flush_count = 0 self._last_flush = time.time()
def __init__(self, connString=None, expand_workflow=True): self.log = logging.getLogger( "{}.{}".format(self.__module__, self.__class__.__name__) ) try: self.session = connection.connect(connString) except (connection.ConnectionError, DBAdminError) as e: self.log.exception(e) raise StampedeDBNotFoundError self._expand = expand_workflow self._root_wf_id = [] self._root_wf_uuid = [] self.all_workflows = None self._job_filter_mode = None self._time_filter_mode = None self._host_filter = None self._xform_filter = {"include": None, "exclude": None} self._wfs = []
def run(self): """Trigger manager main loop.""" self.log.info("trigger manager starting") while True: # TODO: user will always be the same.., keep this in the loop or move it out u = user.get_user_by_uid(os.getuid()) session = connection.connect( u.get_master_db_url(), connect_args={"check_same_thread": False}) try: self.trigger_dao = Triggers(session) self.ensemble_dao = Ensembles(session) triggers = self.trigger_dao.list_triggers() self.log.info("processing {} triggers".format(len(triggers))) for t in triggers: t_name = TriggerManager.get_tname(t) if t.state == "READY": self.start_trigger(t) elif t.state == "RUNNING" and t_name not in self.running: # restart self.log.debug( "{} not in memory, restarting it".format(t_name)) self.start_trigger(t) elif t.state == "RUNNING" and not self.running[ t_name].is_alive(): # exited self.log.debug( "{} exited, removing references to it".format( t_name)) self.stop_trigger(t) elif t.state == "STOPPED": self.stop_trigger(t) finally: session.close() time.sleep(self.polling_rate)
def delete_workflow(dburi, wf_uuid): "Expunge workflow from workflow database" log.info("Expunging %s from workflow database", wf_uuid) session = connection.connect(dburi, create=True) try: query = session.query(Workflow).filter(Workflow.wf_uuid == wf_uuid) try: wf = query.one() except orm.exc.NoResultFound as e: log.warn("No workflow found with wf_uuid %s - aborting expunge", wf_uuid) return # PM-1218 gather list of descendant workflows with wf_uuid query = session.query(Workflow).filter(Workflow.root_wf_id == wf.wf_id) try: desc_wfs = query.all() for desc_wf in desc_wfs: # delete the files from the rc_lfn explicitly as they are # not associated with workflow table __delete_workflow_files__(session, desc_wf.wf_uuid, desc_wf.wf_id) except orm.exc.NoResultFound as e: log.warn("No workflow found with root wf_id %s - aborting expunge", wf.wf_id) return session.delete(wf) log.info("Flushing top-level workflow: %s", wf.wf_uuid) i = time.time() session.flush() session.commit() log.info("Flush took: %f seconds", time.time() - i) finally: session.close()
def delete_dashboard_workflow(dburi, wf_uuid): "Expunge workflow from dashboard database" log.info("Expunging %s from dashboard database", wf_uuid) session = connection.connect(dburi, create=True) try: query = session.query(MasterWorkflow).filter( MasterWorkflow.wf_uuid == wf_uuid) try: wf = query.one() except orm.exc.NoResultFound as e: log.warn("No workflow found with wf_uuid %s - aborting expunge", wf_uuid) return session.delete(wf) i = time.time() session.flush() session.commit() log.info("Flush took: %f seconds", time.time() - i) finally: session.close()
def connect(): log.debug("Connecting to database") g.master_db_url = g.user.get_master_db_url() g.session = connection.connect(g.master_db_url, connect_args={"check_same_thread": False})
def detach(self, wf_uuid=None): "Remove any master db entries for the given root workflow" if self.submitdir_exists: # Verify that we aren't trying to detach a subworkflow if self.is_subworkflow(): raise SubmitDirException( "Subworkflows cannot be detached independent of the root workflow" ) # Connect to master database mdbsession = connection.connect_by_submitdir( self.submitdir, connection.DBType.MASTER ) mdb = MasterDatabase(mdbsession) # Check to see if it even exists wf = mdb.get_master_workflow(self.wf_uuid) if wf is None: print("Workflow is not in master DB") else: # Delete the workflow (this will delete the master_workflowstate entries as well) mdb.delete_master_workflow(self.wf_uuid) # Update the master db mdbsession.commit() mdbsession.close() else: # Connect to master database home = expanduser("~") mdbsession = connection.connect( "sqlite:///%s/.pegasus/workflow.db" % home, db_type=connection.DBType.MASTER, ) mdb = MasterDatabase(mdbsession) try: if wf_uuid is None: wfs = mdb.get_master_workflow_for_submitdir(self.submitdir) if wfs: msg = ( "Invalid submit dir: %s, Specify --wf-uuid <WF_UUID> to detach\n" % self.submitdir ) msg += ( "\tWorkflow UUID, DAX Label, Submit Hostname, Submit Dir.\n" ) for wf in wfs: msg += "\t{}, {}, {}, {}\n".format( wf.wf_uuid, wf.dax_label, wf.submit_hostname, wf.submit_dir, ) raise SubmitDirException(msg) else: raise SubmitDirException( "Invalid submit dir: %s" % self.submitdir ) else: # Delete mdb.delete_master_workflow(wf_uuid, submit_dir=self.submitdir) # Update the master db mdbsession.commit() finally: mdbsession.close()
def attach(self): "Add a workflow to the master db" # Verify that we aren't trying to attach a subworkflow if self.is_subworkflow(): raise SubmitDirException( "Subworkflows cannot be attached independent of the root workflow" ) # Connect to master database mdbsession = connection.connect_by_submitdir( self.submitdir, connection.DBType.MASTER ) mdb = MasterDatabase(mdbsession) # Check to see if it already exists and just update it wf = mdb.get_master_workflow(self.wf_uuid) if wf is not None: print("Workflow is already in master db") old_submit_dir = wf.submit_dir if old_submit_dir != self.submitdir: print("Updating path...") wf.submit_dir = self.submitdir wf.db_url = connection.url_by_submitdir( self.submitdir, connection.DBType.WORKFLOW ) mdbsession.commit() mdbsession.close() return # Connect to workflow db db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW) dbsession = connection.connect(db_url) db = WorkflowDatabase(dbsession) # Get workflow record wf = db.get_workflow(self.wf_uuid) if wf is None: print("No database record for that workflow exists") return # Update the workflow record wf.submit_dir = self.submitdir wf.db_url = db_url # Insert workflow record into master db mwf = MasterWorkflow() mwf.wf_uuid = wf.wf_uuid mwf.dax_label = wf.dax_label mwf.dax_version = wf.dax_version mwf.dax_file = wf.dax_file mwf.dag_file_name = wf.dag_file_name mwf.timestamp = wf.timestamp mwf.submit_hostname = wf.submit_hostname mwf.submit_dir = self.submitdir mwf.planner_arguments = wf.planner_arguments mwf.user = wf.user mwf.grid_dn = wf.grid_dn mwf.planner_version = wf.planner_version mwf.db_url = wf.db_url mwf.archived = self.is_archived() mdbsession.add(mwf) mdbsession.flush() # We should have the new wf_id after this # Query states from workflow database states = db.get_workflow_states(wf.wf_id) # Insert states into master db for s in states: ms = MasterWorkflowstate() ms.wf_id = mwf.wf_id ms.state = s.state ms.timestamp = s.timestamp ms.restart_count = s.restart_count ms.status = s.status mdbsession.add(ms) mdbsession.flush() dbsession.commit() dbsession.close() mdbsession.commit() mdbsession.close()
def move(self, dest): "Move this submit directory to dest" dest = os.path.abspath(dest) if os.path.isfile(dest): raise SubmitDirException("Destination is a file: %s" % dest) if os.path.isdir(dest): if os.path.exists(os.path.join(dest, "braindump.txt")): raise SubmitDirException("Destination is a submit dir: %s" % dest) dest = os.path.join(dest, os.path.basename(self.submitdir)) # Verify that we aren't trying to move a subworkflow if self.is_subworkflow(): raise SubmitDirException( "Subworkflows cannot be moved independent of the root workflow" ) # Connect to master database mdbsession = connection.connect_by_submitdir( self.submitdir, connection.DBType.MASTER ) mdb = MasterDatabase(mdbsession) # Get the workflow record from the master db db_url = None wf = mdb.get_master_workflow(self.wf_uuid) if wf is None: db_url = connection.url_by_submitdir( self.submitdir, connection.DBType.WORKFLOW ) else: # We found an mdb record, so we need to update it # Save the master db's pointer db_url = wf.db_url # Update the master db's db_url # Note that this will only update the URL if it is an sqlite file # located in the submitdir log.info("Old master db_url: %s" % wf.db_url) wf.db_url = db_url.replace(self.submitdir, dest) log.info("New master db_url: %s" % wf.db_url) # Change the master db's submit_dir log.info("Old master submit_dir: %s" % wf.submit_dir) wf.submit_dir = dest log.info("New master submit_dir: %s" % wf.submit_dir) # Update the ensemble record if one exists ew = mdb.get_ensemble_workflow(self.wf_uuid) if ew is not None: log.info("Old ensemble submit dir: %s", ew.submitdir) ew.submitdir = dest log.info("New ensemble submit dir: %s", ew.submitdir) # Update the workflow database if we found one if db_url is not None: dbsession = connection.connect(db_url) db = WorkflowDatabase(dbsession) root_wf = db.get_workflow(self.wf_uuid) db.update_submit_dirs(root_wf.wf_id, self.submitdir, dest) dbsession.commit() dbsession.close() # Move all the files shutil.move(self.submitdir, dest) # Set new paths in the braindump file self.braindump["submit_dir"] = dest self.braindump["basedir"] = os.path.dirname(dest) utils.write_braindump(os.path.join(dest, "braindump.txt"), self.braindump) # Note that we do not need to update the properties file even though it # might contain DB URLs because it cannot contain a DB URL with the submit # dir in it. # TODO We might want to update all of the absolute paths in the condor submit files # if we plan on moving workflows that could be resubmitted in the future # TODO We might want to update the braindump files for subworkflows # Update master database mdbsession.commit() mdbsession.close() # Finally, update object self.submitdir = dest
def all_workflows_db(db, update=True, pegasus_version=None, schema_check=True, force=False): """ Update/Downgrade all completed workflow databases listed in master_workflow table. :param db: DB session object :param pegasus_version: version of the Pegasus software (e.g., 4.6.0) :param schema_check: whether a sanity check of the schema should be performed :param force: whether operations should be performed despite conflicts """ # log files file_prefix = "%s-dbadmin" % time.strftime("%Y%m%dT%H%M%S") f_out = open("%s.out" % file_prefix, "w") f_err = open("%s.err" % file_prefix, "w") data = (db.query( MasterWorkflow.db_url, MasterWorkflowstate.state, func.max(MasterWorkflowstate.timestamp), ).join(MasterWorkflowstate).group_by(MasterWorkflow.wf_id).all()) db_urls = [] for d in data: if d[1] == "WORKFLOW_TERMINATED": db_urls.append(d[0]) f_err.write("[ACTIVE] %s\n" % d[0]) counts = { "total": len(data), "running": len(data) - len(db_urls), "success": 0, "failed": 0, "unable_to_connect": 0, } if update: msg = ["updating", "Updated"] else: msg = ["downgrading", "Downgraded"] print("") print("Verifying and %s workflow databases:" % msg[0]) i = counts["running"] for dburi in db_urls: log.debug("{} '{}'...".format(msg[0], dburi)) i += 1 sys.stdout.write("\r%d/%d" % (i, counts["total"])) sys.stdout.flush() try: if update: con = connection.connect( dburi, pegasus_version=pegasus_version, schema_check=schema_check, create=True, force=force, verbose=False, ) else: con = connection.connect(dburi, schema_check=schema_check, create=False, verbose=False) metadata.clear() warnings.simplefilter("ignore") metadata.reflect(bind=con.get_bind()) db_downgrade(con, pegasus_version=pegasus_version, force=force, verbose=False) con.close() f_out.write("[SUCCESS] %s\n" % dburi) counts["success"] += 1 except connection.ConnectionError as e: if "unable to open database file" in str(e): f_err.write("[UNABLE TO CONNECT] %s\n" % dburi) counts["unable_to_connect"] += 1 log.debug(e) else: f_err.write("[ERROR] %s\n" % dburi) counts["failed"] += 1 log.debug(e) except Exception as e: f_err.write("[ERROR] %s\n" % dburi) counts["failed"] += 1 log.debug(e) f_out.close() f_err.close() print("\n\nSummary:") print(" Verified/{}: {}/{}".format(msg[1], counts["success"], counts["total"])) print(" Failed: {}/{}".format(counts["failed"], counts["total"])) print(" Unable to connect: {}/{}".format(counts["unable_to_connect"], counts["total"])) print(" Unable to update (active workflows): %s/%s" % (counts["running"], counts["total"])) print("\nLog files:") print(" %s.out (Succeeded operations)" % file_prefix) print(" %s.err (Failed operations)" % file_prefix)