def plot_selection(): db_access.setup_db() session = db_access.get_session() try: obj = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) rows = execute_with_retry( session, ''' SELECT selection_params.id, selection_params.subsystem, selection_params.pd, selection_params.processing_string, last_calculated_configs.name FROM selection_params JOIN last_calculated_configs ON config_id = last_calculated_configs.id ORDER BY selection_params.subsystem, selection_params.pd, selection_params.processing_string, last_calculated_configs.name ; ''') rows = list(rows) for row in rows: if (row['processing_string'] in ALLOWED_PROCESSING_STRINGS): obj[row['subsystem']][row['pd']][ row['processing_string']].append({ 'name': row['name'], 'id': row['id'] }) return jsonify(obj) finally: session.close()
def fetch(update, nproc): db_access.setup_db() all_runs = [] extracted_runs = [] session = db_access.get_session() try: print("Getting missing runs...") all_runs = list( session.execute("SELECT DISTINCT(run) FROM historic_data_points;")) all_runs = [x[0] for x in all_runs] extracted_runs = list( session.execute("SELECT DISTINCT(run) FROM oms_data_cache;")) extracted_runs = [x[0] for x in extracted_runs] finally: session.close() # Diff needs to be extracted! if update: diff = all_runs else: diff = [x for x in all_runs if x not in extracted_runs] print("Number of runs to be fetched: %s" % len(diff)) db_access.dispose_engine() pool = Pool(nproc) pool.map(fetch_run, diff) print("Done.")
def runs(): db_access.setup_db() session = db_access.get_session() runs = [ h.run for h in session.query(db_access.HistoricDataPoint.run).distinct( ).order_by(db_access.HistoricDataPoint.run.asc()) ] session.close() return jsonify(runs)
def expand_url(): valid_url_types = [ 'main_gui_url', 'main_image_url', 'optional1_gui_url', 'optional1_image_url', 'optional2_gui_url', 'optional2_image_url', 'reference_gui_url', 'reference_image_url' ] data_point_id = request.args.get('data_point_id', type=int) url_type = request.args.get('url_type') if data_point_id == None: return jsonify( {'message': 'Please provide a data_point_id parameter.'}), 400 if url_type not in valid_url_types: return jsonify({ 'message': 'Please provide a valid url_type parameter. Accepted values are: %s' % ','.join(valid_url_types) }), 400 db_access.setup_db() session = db_access.get_session() try: sql = ''' SELECT %s FROM historic_data_points WHERE id = :id ; ''' % url_type rows = list(execute_with_retry(session, sql, {'id': data_point_id})) url = rows[0][url_type] if url: url = url.replace('+', '%2B') return redirect(url, code=302) else: return jsonify({'message': 'Requested URL type is not found.'}), 404 except Exception as e: print(e) finally: session.close() return jsonify({'message': 'Error getting the url from the DB.'}), 500
def selection(): db_access.setup_db() session = db_access.get_session() try: obj = defaultdict(lambda: defaultdict(list)) rows = execute_with_retry( session, 'SELECT DISTINCT subsystem, pd, processing_string FROM selection_params ORDER BY subsystem, pd, processing_string;' ) rows = list(rows) for row in rows: if (row['processing_string'] in ALLOWED_PROCESSING_STRINGS): obj[row['subsystem']][row['pd']].append( row['processing_string']) return jsonify(obj) finally: session.close()
def fetch(update): db_access.setup_db() min_run = 0 max_run = 0 session = db_access.get_session() try: print('Getting min and max runs...') minmax = list( session.execute('SELECT MIN(run), MAX(run) FROM oms_data_cache;')) min_run = minmax[0][0] max_run = minmax[0][1] finally: session.close() print('Run range: %s-%s' % (min_run, max_run)) fetch_runs(min_run, max_run)
def data(): subsystem = request.args.get('subsystem') pd = request.args.get('pd') processing_string = request.args.get('processing_string') from_run = request.args.get('from_run', type=int) to_run = request.args.get('to_run', type=int) runs = request.args.get('runs') latest = request.args.get('latest', type=int) series = request.args.get('series') series_id = request.args.get('series_id', type=int) if series_id == None: if subsystem == None: return jsonify( {'message': 'Please provide a subsystem parameter.'}), 400 if pd == None: return jsonify({'message': 'Please provide a pd parameter.'}), 400 if processing_string == None: return jsonify( {'message': 'Please provide a processing_string parameter.'}), 400 modes = 0 if from_run != None and to_run != None: modes += 1 if latest != None: modes += 1 if runs != None: modes += 1 if modes > 1: return jsonify({ 'message': 'The combination of parameters you provided is invalid.' }), 400 if runs != None: try: runs = runs.split(',') runs = [int(x) for x in runs] except: return jsonify({ 'message': 'runs parameter is not valid. It has to be a comma separated list of integers.' }), 400 if series and series_id: return jsonify({ 'message': 'series and series_id can not be defined at the same time.' }), 400 db_access.setup_db() session = db_access.get_session() # Get series data by series_id if series_id: sql = ''' SELECT selection_params.subsystem, selection_params.pd, selection_params.processing_string, last_calculated_configs.name FROM selection_params JOIN last_calculated_configs ON config_id = last_calculated_configs.id WHERE selection_params.id = :id ; ''' rows = execute_with_retry(session, sql, {'id': series_id}) rows = list(rows) subsystem = rows[0]['subsystem'] pd = rows[0]['pd'] processing_string = rows[0]['processing_string'] series = rows[0]['name'] if runs == None: # Will need filtering runs_filter = '' latest_filter = '' if from_run != None and to_run != None: runs_filter = 'AND run >= %s AND run <= %s' % (from_run, to_run) else: if latest == None: latest = 50 latest_filter = 'LIMIT %s' % latest run_class_like = '%%collision%%' if 'cosmic' in pd.lower(): run_class_like = '%%cosmic%%' # UL2018 processing produced two processing strings: 12Nov2019_UL2018 and 12Nov2019_UL2018_rsb. # _rsb version is a resubmition because some runs were not processed (crashed?) in the initial processing. # Some runs might exist under both processing strings, some under just one of them! # A union of both of these processing strings contains all runs of UL2018. # So in HDQM, when 12Nov2019_UL2018 is requested, we need include 12Nov2019_UL2018_rsb as well!!! # This is a special case and is not used in any other occasion. processing_string_sql = 'AND processing_string=:processing_string' if processing_string == '12Nov2019_UL2018': processing_string_sql = 'AND (processing_string=:processing_string OR processing_string=:processing_string_rsb)' sql = ''' SELECT DISTINCT run FROM oms_data_cache WHERE run IN ( SELECT run FROM historic_data_points WHERE subsystem=:subsystem AND pd=:pd %s ) AND oms_data_cache.run_class %s :run_class AND oms_data_cache.significant=%s AND oms_data_cache.is_dcs=%s %s ORDER BY run DESC %s ; ''' % (processing_string_sql, db_access.ilike_crossdb(), db_access.true_crossdb(), db_access.true_crossdb(), runs_filter, latest_filter) print('Getting the list of runs...') start = timeit.default_timer() rows = execute_with_retry( session, sql, { 'subsystem': subsystem, 'pd': pd, 'processing_string': processing_string, 'processing_string_rsb': processing_string + '_rsb', 'run_class': run_class_like }) rows = list(rows) stop = timeit.default_timer() print('Runs retrieved in: ', stop - start) runs = [x[0] for x in rows] # Construct SQL query query_params = { 'subsystem': subsystem, 'pd': pd, 'processing_string': processing_string, 'processing_string_rsb': processing_string + '_rsb' } run_selection_sql = 'AND historic_data_points.run BETWEEN :from_run AND :to_run' if runs != None: run_selection_sql = 'AND historic_data_points.run IN (%s)' % ', '.join( str(x) for x in runs) query_params['runs'] = runs else: query_params['from_run'] = from_run query_params['to_run'] = to_run series_filter_sql = '' if series != None: series_filter_sql = 'AND historic_data_points.name IN (' series = series.split(',') for i in range(len(series)): key = 'series_%i' % i series_filter_sql += ':%s,' % key query_params[key] = series[i] series_filter_sql = series_filter_sql.rstrip(',') + ')' processing_string_sql = 'AND historic_data_points.processing_string=:processing_string' if processing_string == '12Nov2019_UL2018': processing_string_sql = 'AND (historic_data_points.processing_string=:processing_string OR historic_data_points.processing_string=:processing_string_rsb)' sql = ''' SELECT historic_data_points.id, historic_data_points.run, historic_data_points.value, historic_data_points.error, historic_data_points.name, historic_data_points.plot_title, historic_data_points.y_title FROM historic_data_points WHERE historic_data_points.subsystem=:subsystem AND historic_data_points.pd=:pd %s %s %s ORDER BY historic_data_points.run ASC ; ''' % (processing_string_sql, run_selection_sql, series_filter_sql) print('Getting the data...') start = timeit.default_timer() rows = execute_with_retry(session, sql, query_params) rows = list(rows) session.close() stop = timeit.default_timer() print('Data retrieved in: ', stop - start) result = {} for row in rows: # Names are unique within the subsystem key = '%s_%s' % (row['name'], subsystem) if key not in result: result[key] = { 'metadata': { 'y_title': row['y_title'], 'plot_title': row['plot_title'], 'name': row['name'], 'subsystem': subsystem, 'pd': pd, 'processing_string': processing_string, }, 'trends': [] } result[key]['trends'].append({ 'run': row['run'], 'value': row['value'], 'error': row['error'], 'id': row['id'], 'oms_info': {}, }) # Transform result to array result = [result[key] for key in sorted(result.keys())] result = add_oms_info_to_result(result) return jsonify(result)
def extract_all_mes(cfg_files, runs, nprocs, all_files): print('Processing %d configuration files...' % len(cfg_files)) mes_set = set() good_files = 0 for cfg_file in cfg_files: try: parser = RawConfigParser() parser.read(unicode(cfg_file)) for section in parser: if not section.startswith('plot:'): if section != 'DEFAULT': print('Invalid configuration section: %s:%s, skipping.' % (cfg_file, section)) continue if not PLOTNAMEPATTERN.match(section.lstrip('plot:')): print("Invalid plot name: '%s:%s' Plot names can contain only: [a-zA-Z0-9_+-]" % (cfg_file, section.lstrip('plot:'))) continue mes_set.update(get_all_me_names(parser[section]['relativePath'])) if 'histo1Path' in parser[section]: mes_set.update(get_all_me_names(parser[section]['histo1Path'])) if 'histo2Path' in parser[section]: mes_set.update(get_all_me_names(parser[section]['histo2Path'])) if 'reference' in parser[section]: mes_set.update(get_all_me_names(parser[section]['reference'])) good_files += 1 except: print('Could not read %s, skipping...' % cfg_file) print('Read %d configuration files.' % good_files) print('Read %d distinct ME paths.' % len(mes_set)) if not all_files: print('Listing files on EOS, this can take a while...') all_files = glob(ROOTFILES) if len(all_files) == 0: print('GLOB returned 0 files, probably EOS is down. Aborting.') return print('Done.') else: print('Using provided DQM files: %s' % len(all_files)) # Filter on the runs that were passed by the user if runs: filtered = [] for file in all_files: run_match = RUNPATTERN.findall(file) if not len(run_match) == 0: run = run_match[0] if int(run) in runs: filtered.append(file) all_files = filtered # Keep only the newest version of each file print('Removing old versions of files...') all_files = remove_old_versions(all_files) print('Found %s files in EOS' % len(all_files)) print('Gathering information about MEs to be extracted...') db_access.setup_db() # Get lists of existing mes and eos files. # Existing means that ME was extracted or is in the extraction queue. session = db_access.get_session() existing_me_paths = set(x.me_path for x in session.query(db_access.TrackedMEPathForMEExtraction).all()) existing_eos_paths = set(x.eos_path for x in session.query(db_access.TrackedEOSPathForMEExtraction).all()) session.close() # Single session (transaction) for queue manipulations session = db_access.get_session() # -------------------- Update the ME paths in the extraction queue -------------------- # new_mes = mes_set.difference(existing_me_paths) deleted_mes = existing_me_paths.difference(mes_set) print('New MEs: %s, deleted MEs: %s' % (len(new_mes), len(deleted_mes))) # Remove deleted MEs from the extraction queue if len(deleted_mes) > 0: sql = 'DELETE FROM queue_to_extract WHERE me_path = :me_path;' session.execute(sql, [{'me_path': x} for x in deleted_mes]) sql = 'DELETE FROM tracked_me_paths_for_me_extraction WHERE me_path = :me_path;' session.execute(sql, [{'me_path': x} for x in deleted_mes]) # Refresh new MEs table sql = 'DELETE FROM new_me_paths_for_me_extraction;' session.execute(sql) # Insert new ME paths if len(new_mes) > 0: sql = 'INSERT INTO new_me_paths_for_me_extraction (me_path) VALUES (:me_path);' session.execute(sql, [{'me_path': x} for x in new_mes]) # Will have to extract new MEs for every existing file sql_update_queue = ''' INSERT INTO queue_to_extract (eos_path, me_path) SELECT eos_path, me_path FROM tracked_eos_paths_for_me_extraction, new_me_paths_for_me_extraction ; ''' sql_update_existing = ''' INSERT INTO tracked_me_paths_for_me_extraction (me_path) SELECT me_path FROM new_me_paths_for_me_extraction ; ''' session.execute(sql_update_queue) session.execute(sql_update_existing) # -------------------- Update the eos paths in the extraction queue -------------------- # files_set = set(all_files) new_files = files_set.difference(existing_eos_paths) deleted_files = existing_eos_paths.difference(files_set) print('New files: %s, deleted files: %s' % (len(new_files), len(deleted_files))) # Remove deleted files from the extraction queue if len(deleted_files) > 0: sql = 'DELETE FROM queue_to_extract WHERE eos_path = :eos_path;' session.execute(sql, [{'eos_path': x} for x in deleted_files]) sql = 'DELETE FROM tracked_eos_paths_for_me_extraction WHERE eos_path = :eos_path;' session.execute(sql, [{'eos_path': x} for x in deleted_files]) # Refresh new files table sql = 'DELETE FROM new_eos_paths_for_me_extraction;' session.execute(sql) # Insert new eos paths if len(new_files) > 0: sql = 'INSERT INTO new_eos_paths_for_me_extraction (eos_path) VALUES (:eos_path);' session.execute(sql, [{'eos_path': x} for x in new_files]) # Will have to extract all existing MEs for newly added files sql_update_queue = ''' INSERT INTO queue_to_extract (eos_path, me_path) SELECT eos_path, me_path FROM new_eos_paths_for_me_extraction, tracked_me_paths_for_me_extraction ; ''' sql_update_existing = ''' INSERT INTO tracked_eos_paths_for_me_extraction (eos_path) SELECT eos_path FROM new_eos_paths_for_me_extraction ; ''' session.execute(sql_update_queue) session.execute(sql_update_existing) session.commit() session.close() print('Done.') print('Extracting missing MEs...') # ------------------- Start extracting MEs from the extraction queue ------------------- # sql = 'SELECT id, eos_path, me_path FROM queue_to_extract LIMIT 100000;' pool = Pool(nprocs) while True: db_access.dispose_engine() session = db_access.get_session() try: print('Fetching not processed MEs from DB...') rows = session.execute(sql) rows = list(rows) session.close() print('Fetched: %s' % len(rows)) if len(rows) == 0: break pool.map(extract_mes, batch_iterable(rows, chunksize=2000)) except OSError as e: if e.errno != errno.EINTR: raise else: print('[Errno 4] occurred. Continueing.') except Exception as e: print(e) session.close() print('Done.')
def calculate_all_trends(cfg_files, runs, nprocs): print('Processing %d configuration files...' % len(cfg_files)) db_access.setup_db() trend_count = 0 for cfg_file in cfg_files: subsystem = os.path.basename(os.path.dirname(cfg_file)) if not subsystem: subsystem = 'Unknown' parser = RawConfigParser() parser.read(unicode(cfg_file)) for section in parser: if not section.startswith('plot:'): if (section != 'DEFAULT'): print('Invalid configuration section: %s:%s, skipping.' % (cfg_file, section)) continue if not PLOTNAMEPATTERN.match(section.lstrip('plot:')): print( "Invalid plot name: '%s:%s' Plot names can contain only: [a-zA-Z0-9_+-]" % (cfg_file, section.lstrip('plot:'))) continue if 'metric' not in parser[section] or\ 'relativePath' not in parser[section] or\ 'yTitle' not in parser[section]: print('Plot missing required attributes: %s:%s, skipping.' % (cfg_file, section)) print('Required parameters: metric, relativePath, yTitle') continue parser[section]['subsystem'] = subsystem parser[section]['name'] = section.split(':')[1] CONFIG.append(parser[section]) trend_count += 1 print('Starting to process %s trends.' % trend_count) print('Updating configuration...') # Find out new and changed configuration last_config = [] session = db_access.get_session() try: last_config = list( session.execute('SELECT * FROM last_calculated_configs;')) except Exception as e: print('Exception getting config from the DB: %s' % e) return finally: session.close() new_configs = [] for current in CONFIG: # Find by subsystem and name of trend last = next( (x for x in last_config if current['subsystem'] == x['subsystem'] and current['name'] == x['name']), None) if last: obj = section_to_config_object(current) if not last['metric'] == obj.metric or\ not last['plot_title'] == obj.plot_title or\ not last['y_title'] == obj.y_title or\ not last['relative_path'] == obj.relative_path or\ not last['histo1_path'] == obj.histo1_path or\ not last['histo2_path'] == obj.histo2_path or\ not last['reference_path'] == obj.reference_path or\ not last['threshold'] == int(obj.threshold) if obj.threshold else None: # Changed! new_configs.append(obj) else: new_configs.append(section_to_config_object(current)) # Add new configs session = db_access.get_session() try: for new in new_configs: session.add(new) session.commit() except Exception as e: print('Exception adding new configs to the DB: %s' % e) session.rollback() return finally: session.close() # Recalculate everything if the configuration changed if len(new_configs) > 0: print('Configuration changed, reseting the calculation queue...') session = db_access.get_session() try: session.execute('DELETE FROM queue_to_calculate;') session.execute('DELETE FROM queue_to_calculate_later;') session.execute( 'INSERT INTO queue_to_calculate (me_id) SELECT id FROM monitor_elements;' ) session.commit() except Exception as e: print('Exception reseting the calculation queue in the DB: %s' % e) session.rollback() return finally: session.close() print('Calculation queue is ready.') else: # Move things from queue_to_calculate_later back to queue_to_calculate print('Moving items from second queue to the main one...') session = db_access.get_session() try: session.execute( 'INSERT INTO queue_to_calculate (me_id) SELECT me_id FROM queue_to_calculate_later;' ) session.execute('DELETE FROM queue_to_calculate_later;') session.commit() except Exception as e: print( 'Exception moving items from the second calculation queue to the first: %s' % e) session.rollback() finally: session.close() print('Calculation queue is ready.') print('Configuration updated.') # Start calculating trends if runs == None: runs_filter = '' else: runs_filter = 'WHERE monitor_elements.run IN (%s)' % ', '.join( str(x) for x in runs) limit = 10000 sql = ''' SELECT queue_to_calculate.id, monitor_elements.id as me_id, monitor_elements.run, monitor_elements.lumi, monitor_elements.eos_path, monitor_elements.me_path, monitor_elements.dataset FROM monitor_elements JOIN queue_to_calculate ON monitor_elements.id=queue_to_calculate.me_id %s LIMIT %s; ''' % (runs_filter, limit) # pool = Pool(nprocs) pool = ForkPool(nprocs) while True: db_access.dispose_engine() session = db_access.get_session() try: print('Fetching not processed data points from DB...') rows = session.execute(sql) rows = list(rows) print('Fetched: %s' % len(rows)) if len(rows) == 0: print('Queue to calculate is empty. Exiting.') break pool.map(calculate_trends, batch_iterable(rows, chunksize=400)) print('Finished calculating a batch of trends.') except OSError as e: if e.errno != errno.EINTR: raise else: print('[Errno 4] occurred. Continueing.') except Exception as e: print( 'Exception fetching elements from the calculation queue: %s' % e) raise finally: session.close()