def set_project_domain(self, project_code, domain_key): """Modify the Domain of the Project whose code is given in parameter. It raises HPCStatsRuntimeError if either the Project code or the Domain key are not found in DB. """ domain = Domain(domain_key, None) if not domain.existing(self.db): raise HPCStatsRuntimeError( \ "unable to find domain %s in database" \ % (domain_key)) project = Project(None, project_code, None) if not project.find(self.db): raise HPCStatsRuntimeError( \ "unable to find project %s in database" \ % (project_code)) # Load the Project in DB to get its description project.load(self.db) project.domain = domain logger.info("updating project %s with new domain %s", project_code, domain_key) project.update(self.db)
def load_cluster(self, cluster): """Connect to cluster Slurm database to extract project codes from jobs wckeys. Raises HPCStatsSourceError in case of error. """ self.log.debug("loading project codes from %s slurm database", cluster) self.connect_db(cluster) if not len(self.clusters_db[cluster]['partitions']): partitions_clause = '' else: partitions_clause = \ "WHERE job.partition IN (%s)" % \ ','.join(['%s'] * len(self.clusters_db[cluster]['partitions'])) req = """ SELECT DISTINCT(wckey) FROM %s_job_table job %s """ % (self.clusters_db[cluster]['prefix'], partitions_clause) params = tuple(self.clusters_db[cluster]['partitions']) self.cur.execute(req, params) while (1): row = self.cur.fetchone() if row == None: break wckey = row[0] if wckey == '': continue else: wckey_items = wckey.split(':') if len(wckey_items) != 2: if wckey not in self.invalid_wckeys: self.invalid_wckeys.append(wckey) self.log.warn(Errors.E_P0001, "format of wckey %s is not valid", wckey) continue else: project_code = wckey_items[0] project = Project(domain=self.default_domain, code=project_code, description=None) # check for duplicate project if not self.find_project(project): self.projects.append(project)
def test_update(self): """ProjectImporterCSV.update() works with simple data """ domain1 = Domain('dom1', 'domain name 1') project1 = Project(domain1, 'code1', 'project description 1') MockPg2.PG_REQS['save_project'].set_assoc(params=(project1.code, project1.description, domain1.key), result=[[1]]) self.importer.projects = [project1] self.importer.domains = [domain1] self.importer.update()
def set_project_description(self, project_code, description): """Modify in DB the description of the Project given in parameter. It raises HPCStatsRuntimeError if the Project is not found in DB. """ project = Project(None, project_code, None) if not project.find(self.db): raise HPCStatsRuntimeError( \ "unable to find project %s in database" \ % (project_code)) # Load the Project from DB to get its domain key project.load(self.db) project.description = description logger.info("updating project %s with new description", project_code) project.update(self.db)
def get_jobs_after_batchid(self, batchid, window_size=0): """Fill the jobs attribute with the list of Jobs found in Slurm DB whose id_job is over or equals to the batchid in parameter. Returns the last found batch_id. """ self.jobs = [] if window_size: limit = "LIMIT %d" % (window_size) else: limit = '' last_batch_id = -1 old_schema = self._is_old_schema() if old_schema is True: cpu_field = 'cpus_alloc' else: cpu_field = 'tres_alloc' if not len(self.partitions): partitions_clause = '' else: partitions_clause = "AND job.partition IN (%s)" % \ ','.join(['%s'] * len(self.partitions)) req = """ SELECT job_db_inx, id_job, id_user, id_group, time_submit, time_start, time_end, timelimit, nodes_alloc, %s, job.partition, qos.name AS qos, job.account, state, nodelist, assoc.user, job_name, wckey FROM %s_job_table job, %s_assoc_table assoc, qos_table qos WHERE job_db_inx >= %%s %s AND assoc.id_assoc = job.id_assoc AND qos.id = job.id_qos ORDER BY job_db_inx %s """ % (cpu_field, self.prefix, self.prefix, partitions_clause, limit) params = (batchid, ) + tuple(self.partitions) self.cur.execute(req, params) while (1): row = self.cur.fetchone() if row == None: break self.nb_loaded_jobs += 1 batch_id = last_batch_id = row[0] sched_id = row[1] submission_t = row[4] if submission_t == 0: submission = None else: submission = datetime.fromtimestamp(submission_t) start_t = row[5] if start_t == 0: start = None else: start = datetime.fromtimestamp(start_t) end_t = row[6] if end_t == 0: end = None else: end = datetime.fromtimestamp(end_t) # Some jobs in Slurm DBD have an end but no start. Typically, this # concernes the jobs that have been cancelled before starting. For # these jobs, we set the start equal to the end. if start is None and end is not None: start = end wall_t = row[7] if wall_t == 0: walltime = None elif wall_t >= 2147483648: walltime = "-1" else: walltime = str(wall_t) name = row[16] if old_schema is True: nbcpu = row[9] else: nbcpu = extract_tres_cpu(row[9]) if nbcpu == -1: raise HPCStatsSourceError( \ "unable to extract cpus_alloc from job tres") state = JobImporterSlurm.get_job_state_from_slurm_state(row[13]) nodelist = row[14] if nodelist == "(null)" or nodelist == "None assigned": nodelist = None partition = self.job_partition(sched_id, row[10], nodelist) qos = row[11] queue = "%s-%s" % (partition, qos) job_acct = row[12] login = row[15] searched_user = User(login, None, None, None) searched_account = Account(searched_user, self.cluster, None, None, None, None) account = self.app.users.find_account(searched_account) if account is None: msg = "account %s not found in loaded accounts" \ % (login) if self.strict_job_account_binding == True: raise HPCStatsSourceError(msg) elif login not in self.unknown_accounts: self.unknown_accounts.append(login) self.log.warn(Errors.E_J0001, msg) self.nb_excluded_jobs += 1 continue user = self.app.users.find_user(searched_user) if user is None: msg = "user %s not found in loaded users" % (login) raise HPCStatsSourceError(msg) job_department = user.department wckey = row[17] # empty wckey must be considered as None if wckey == '': wckey = None if wckey is None: project = None business = None else: wckey_items = wckey.split(':') if len(wckey_items) != 2: msg = "format of wckey %s is not valid" % (wckey) if self.strict_job_wckey_format == True: raise HPCStatsSourceError(msg) elif wckey not in self.invalid_wckeys: self.invalid_wckeys.append(wckey) self.log.warn(Errors.E_J0002, msg) project = None business = None else: project_code = wckey_items[0] searched_project = Project(None, project_code, None) project = self.app.projects.find_project(searched_project) if project is None: msg = "project %s not found in loaded projects" \ % (project_code) if self.strict_job_project_binding == True: raise HPCStatsSourceError(msg) elif project_code not in self.unknown_projects: self.unknown_projects.append(project_code) self.log.warn(Errors.E_J0003, msg) business_code = wckey_items[1] searched_business = Business(business_code, None) business = self.app.business.find(searched_business) if business is None: msg = "business code %s not found in loaded " \ "business codes" % (business_code) if self.strict_job_businesscode_binding == True: raise HPCStatsSourceError(msg) elif business_code not in self.unknown_businesses: self.unknown_businesses.append(business_code) self.log.warn(Errors.E_J0004, msg) job = Job(account, project, business, sched_id, str(batch_id), name, nbcpu, state, queue, job_acct, job_department, submission, start, end, walltime) self.jobs.append(job) if nodelist is not None: self.create_runs(nodelist, job) return last_batch_id
def __init__(self, db, config, cluster_name): self._db = db self._cluster_name = cluster_name context_section = self._cluster_name + "/context" self._context_file = config.get(context_section, "file") if not os.path.isfile(self._context_file): logging.error("context file %s does not exist", self._context_file) raise RuntimeError # delete all contexts entries in databases logging.debug("Delete all context entries in db") delete_contexts(self._db) self._db.commit() p_file = open(self._context_file, 'r') # save point is used to considere exception and commit in database only at the end db.get_cur().execute("SAVEPOINT my_savepoint;") with p_file as csvfile: file_reader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in file_reader: logging.debug("update projects and business codes for user : %s", row[0].lower()) # a new context is set in database for all projects attached AND for all business attached. # new line is set with a project referance OR a business referance. if row[6]: for pareo in re.split('\|',row[6]): project = Project() try: project.project_from_pareo(self._db, pareo) context = Context(login = row[0].lower(), job = None, project = project.get_id(), business = None) try: context.save(self._db) logging.debug("add context : %s", context) #self._db.commit() db.get_cur().execute("SAVEPOINT my_savepoint;") except psycopg2.DataError: logging.error("impossible to add CONTEXT entry in database : (%s), du to encoding error", row) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass except psycopg2.IntegrityError: logging.error("impossible to add CONTEXT entry in database : (%s), du to relations error", row) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass except: logging.error("context rejected. Project %s does not exist", pareo) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass if row[7]: for code in re.split('\|',row[7]): business = Business() try: business.business_from_key(self._db, code) context = Context(login = row[0].lower(), job = None, project = None, business = business.get_id()) try: context.save(self._db) logging.debug("add context : %s", context) #self._db.commit() db.get_cur().execute("SAVEPOINT my_savepoint;") except psycopg2.DataError: logging.error("impossible to add CONTEXT entry in database : (%s), du to encoding error", row) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass except psycopg2.IntegrityError: logging.error("impossible to add CONTEXT entry in database : (%s), du to relations error", row) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass except: logging.error("context rejected. Business %s does not exist", code) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass if not row[6] and not row[7]: logging.error("line : %s rejected - not code or project associate", row) self._db.commit() p_file.close()
def __init__(self, db, config, cluster_name): self._db = db self._cluster_name = cluster_name pareo_section = self._cluster_name + "/pareo" self._pareo_file = config.get(pareo_section, "file") if not os.path.isfile(self._pareo_file): logging.error("pareo file %s does not exist", self._pareo_file) raise RuntimeError # delete all entries in domains and sectors tables and # its dependances in contexts and projects table logging.debug("Delete all pareo entries in db") delete_contexts_with_pareo(self._db) delete_projects(self._db) delete_sectors(self._db) delete_domains(self._db) self._db.commit() p_file = open(self._pareo_file, "r") # savepoint is used to considere exceptions and commit in database only at the end. db.get_cur().execute("SAVEPOINT my_savepoint;") # define pareo delimiters in csv file for domains and sectors values delimiters = "\[|]" with p_file as csvfile: file_reader = csv.reader(csvfile, delimiter=";", quotechar="|") for row in file_reader: # update domains table with third column of the file, only if sector exist in forth column if row[2]: id_domain = re.split(delimiters, row[2])[1] description_domain = re.split(delimiters, row[2])[2] domain = Domain(id=id_domain, description=description_domain) try: if not domain.already_exist(self._db): domain.save(self._db) if not domain.get_description(): logging.debug("add domain : %s, without description", domain.get_id()) else: logging.debug( "add domain : %s, with description : %s", domain.get_id(), domain.get_description() ) db.get_cur().execute("SAVEPOINT my_savepoint;") except psycopg2.DataError: logging.error("impossible to add DOMAIN entry in database : (%s), du to encoding error", row[2]) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass else: id_domain = None # update sector table with forth column of the file if id_domain: if row[3] and row[3] != "[]": id_sector = int(re.sub("[^0-9]", "", re.split(delimiters, row[3])[1])) description_sector = re.split(delimiters, row[3])[2] if not row[3] or row[3] == "[]": id_sector = 0 description_sector = "default value for domain " + domain.get_id() sector = Sector(id=id_sector, domain=id_domain, description=description_sector) try: if not sector.already_exist(self._db): sector.save(self._db) if not sector.get_description(): logging.debug( "add sector : %s, from domain : %s, without description", sector.get_id(), sector.get_domain(), ) else: logging.debug( "add sector : %s, from domain : %s, with description : %s", sector.get_id(), sector.get_domain(), sector.get_description(), ) db.get_cur().execute("SAVEPOINT my_savepoint;") except psycopg2.DataError: logging.error("impossible to add SECTOR entry in database : (%s) du to encoding error", row[3]) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass except psycopg2.IntegrityError: logging.error( "impossible to add SECTOR entry in database : (%s), du to relations error", row[3] ) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass else: id_sector = None # update Project table with first and seconds columns of the file # because of constrains of database, it is impossible to add project with domain reference and no sector reference. Need both or any. # in case of domain reference exist but sector referance doesn't exist, None value is set for both (see first if condition). if row[0]: project = Project(sector=id_sector, domain=id_domain, description=row[1], pareo=row[0]) try: if not project.already_exist(self._db): project.save(self._db) if not project.get_description(): logging.debug( "add project : %s, from domain : %s, without description", project.get_pareo(), project.get_domain(), ) else: logging.debug( "add project : %s, from domain : %s, and sector : %s with description : %s", project.get_pareo(), project.get_domain(), project.get_sector(), project.get_description(), ) db.get_cur().execute("SAVEPOINT my_savepoint;") except psycopg2.DataError: logging.error( "impossible to add PAREO entry in database : (%s - %s), du to encoding error", row[0], row[1], ) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass except psycopg2.IntegrityError: logging.error( "impossible to add PAREO entry in database : (%s - %s), du to relations error", row[0], row[1], ) db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;") pass self._db.commit() p_file.close()
def load(self): """Open CSV file and load project out of it. Raises Exceptions if error is found in the file. Returns the list of Projects with their Domains. """ self.check() self.domains = [] self.projects = [] with open(self.csv_file, 'r') as csvfile: file_reader = csv.reader(csvfile, delimiter=';', quotechar='|') for row in file_reader: project_code = row[0] project_name = row[1] # domains domain_str = row[2] domain_m = re.match(r"\[(.*)\](.*)", domain_str) if domain_m: domain_key = domain_m.group(1) domain_name = domain_m.group(2) else: raise HPCStatsSourceError( \ "Project CSV %s domain format is invalid" \ % (project_code)) domain_key = domain_key.strip() domain_name = domain_name.strip() if len(domain_key) == 0: raise HPCStatsSourceError( \ "Project CSV %s domain key is empty" \ % (project_code)) if len(domain_name) == 0: raise HPCStatsSourceError( \ "Project CSV %s domain name is empty" \ % (project_code)) # Create the Domain and search for it among the already # existing ones. If not found, append to the list of Domains. new_domain = Domain(key=domain_key, name=domain_name) domain = self.find_domain(new_domain) if domain is None: domain = new_domain self.domains.append(domain) # Create the Project and search for it among the already # existing ones. If found, raise HPCStatsSourceError project = Project(domain=domain, code=project_code, description=project_name) # check for duplicate project and raise error if found if self.find_project(project): raise HPCStatsSourceError( \ "duplicated project code %s in CSV file" \ % (project_code)) self.projects.append(project) return self.projects