def main(args=sys.argv): # Command line argument parser usage = "%prog [options] command" parser = StatsOptionParser(usage) (options, args) = parser.parse_args(args[1:]) # validate options parser.validate(options) # configure logging logging_level = logging.INFO if options.debug: logging_level = logging.DEBUG logging.basicConfig(format = '%(levelname)s: %(filename)s: %(message)s', level = logging_level, stream = sys.stdout) # Config file argument parser config = HPCStatsConfig(options) # dump entire config file for section in config.sections(): logging.debug(section) for option in config.options(section): logging.debug(" %s = %s", option, config.get(section, option)) # Instantiate connexion to db db_section = "hpcstatsdb" dbhostname = config.get(db_section,"hostname") dbport = config.get(db_section,"port") dbname = config.get(db_section,"dbname") dbuser = config.get(db_section,"user") dbpass = config.get(db_section,"password") db = HPCStatsdb(dbhostname, dbport, dbname, dbuser, dbpass) db.bind() logging.debug("db information %s %s %s %s %s" % db.infos()) cluster_finder = ClusterFinder(db) cluster = cluster_finder.find(options.clustername) if (options.arch): logging.info("=> Updating architecture for cluster %s" % (options.clustername)) try: arch_importer = ArchitectureImporterFactory().factory(db, config, cluster.get_name()) arch_importer.update_architecture() db.commit() except RuntimeError: logging.error("error occured on architecture update.") if (options.events): logging.info("=> Updating events for cluster %s" % (options.clustername)) try: event_importer = EventImporterFactory().factory(db, config, cluster.get_name()) event_importer.update_events() db.commit() except RuntimeError: logging.error("error occured on events update.") if (options.users): logging.info("=> Updating users for cluster %s" % (options.clustername)) try: user_importer = UserImporterFactory().factory(db, config, cluster.get_name()) user_importer.update_users() db.commit() except RuntimeError: logging.error("error occured on users update.") if (options.jobs): logging.info("=> Update of jobs for cluster %s" % (options.clustername)) try: job_importer = JobImporterFactory().factory(db, config, cluster.get_name()) # The last updated job in hpcstatsdb for this cluster last_updated_id = job_importer.get_last_job_id() # The unfinished jobs in hpcstatsdb for this cluster ids = job_importer.get_unfinished_job_id() jobs_to_update = ['not_empty'] new_jobs = ['not_empty'] nb_theads = 4 offset = 0 max_jobs = 100000 logging.debug("Get jobs to update") jobs_to_update = job_importer.get_job_information_from_dbid_job_list(ids) for job in jobs_to_update: offset = offset + 1 if not offset % 10: logging.debug("update job push %d" % offset) job.update(db) offset = 0 while new_jobs: logging.debug("Get %d new jobs starting at offset %d" % (max_jobs, offset)) new_jobs = job_importer.get_job_for_id_above(last_updated_id, offset, max_jobs) for job in new_jobs: offset = offset + 1 if not offset % 10000: logging.debug("create job push %d" % offset) job.save(db) db.commit() except RuntimeError: logging.error("error occured on jobs update.") db.unbind()
def main(args=sys.argv): # Command line argument parser usage = "usage: %prog [options] (-c|--cluster) <cluster> [-i,--interval (day|week|month)] [-t,--template <template>]" parser = ReportOptionParser(usage) (options, args) = parser.parse_args(args[1:]) if not options.cluster: parser.error("<cluster> should be specified") # Config file argument parser config = HPCStatsConfig(options) # locale to format numbers locale.setlocale(locale.LC_ALL, 'fr_FR') # TODO: load locale output of the environment if (options.debug): # dump entire config file for section in config.sections(): print section for option in config.options(section): print " ", option, "=", config.get(section, option) # Instantiate connexion to db db_section = "hpcstatsdb" dbhostname = config.get(db_section,"hostname") dbport = config.get(db_section,"port") dbname = config.get(db_section,"dbname") dbuser = config.get(db_section,"user") dbpass = config.get(db_section,"password") db = HPCStatsdb(dbhostname, dbport, dbname, dbuser, dbpass) if (options.debug): print "db information %s %s %s %s %s" % db.infos() if (options.debug): print "→ running on cluster %s with interval %s" % (options.cluster, options.interval) # check if specified template is available #check_template(options.template) # get parameters step = options.interval template = options.template db.bind() cluster_finder = ClusterFinder(db) cluster = cluster_finder.find(options.cluster) # check if cluster really exists if not cluster.exists_in_db(db): sys.exit("error: cluster %s does not exist in database. Available clusters are: %s." % (cluster, ",".join(available_clusters)) ) if options.debug: print "main: getting nb cpus on cluster %s" % (cluster.get_name()) # get the total number of cpus inside the cluster nb_cpus_cluster = cluster.get_nb_cpus(db) results = [] # get datetime of the first job min_datetime = cluster.get_min_datetime(db) #min_datetime = datetime(2011,5,1,0,0,0) max_datetime = datetime.now() tmp_datetime = min_datetime db.unbind() userstats_global = {} groupstats_global = {} processes_args = [] # construct intervals with process information mapping while tmp_datetime < max_datetime: # get the exacts beginning and end of the step sized interval # around the tmp datetime (begin,end) = get_interval_begin_end(step,tmp_datetime) # construct an array of args for each process/interval process_info = [] #process_info.append(options) #process_info.append(config) #process_info.append(cluster) #process_info.append(begin) #process_info.append(end) process_info.append(options) process_info.append(config) process_info.append(begin) process_info.append(end) process_info.append(cluster) # finally appends this append to the global array processes_args.append(process_info) # going to next interval interval = get_interval_timedelta(step) tmp_datetime += interval if options.debug: print processes_args # launch processes with their corresponding arguments parallel = True processes_results = [] if parallel: pool = Pool(4) processes_results = pool.map(run_interval, processes_args) else: for process_info in processes_args: process_results = run_interval(process_info) processes_results.append(process_results) # then get results for result in processes_results: str_date = result[0] groupstats = result.pop() userstats = result.pop() userstats_global[str_date] = userstats groupstats_global[str_date] = groupstats results.append(result) if options.debug: print "debug: usersstats", userstats_global print "debug: groupsstats", groupstats_global # print results using template mytemplate = Template( filename=get_template_filename(template), input_encoding='utf-8', output_encoding='utf-8', default_filters=['decode.utf8'], encoding_errors='ignore' ) print mytemplate.render(cluster=cluster,step=step,results=results,userstats_global=userstats_global,groupstats_global=groupstats_global)