Example #1
0
def plot_selection():
    db_access.setup_db()

    session = db_access.get_session()
    try:
        obj = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

        rows = execute_with_retry(
            session, '''
    SELECT selection_params.id, selection_params.subsystem, selection_params.pd, selection_params.processing_string, last_calculated_configs.name 
    FROM selection_params 
    JOIN last_calculated_configs ON config_id = last_calculated_configs.id 
    ORDER BY selection_params.subsystem, selection_params.pd, selection_params.processing_string, last_calculated_configs.name
    ;
    ''')
        rows = list(rows)

        for row in rows:
            if (row['processing_string'] in ALLOWED_PROCESSING_STRINGS):
                obj[row['subsystem']][row['pd']][
                    row['processing_string']].append({
                        'name': row['name'],
                        'id': row['id']
                    })

        return jsonify(obj)
    finally:
        session.close()
Example #2
0
def fetch(update, nproc):
    db_access.setup_db()

    all_runs = []
    extracted_runs = []
    session = db_access.get_session()
    try:
        print("Getting missing runs...")
        all_runs = list(
            session.execute("SELECT DISTINCT(run) FROM historic_data_points;"))
        all_runs = [x[0] for x in all_runs]
        extracted_runs = list(
            session.execute("SELECT DISTINCT(run) FROM oms_data_cache;"))
        extracted_runs = [x[0] for x in extracted_runs]
    finally:
        session.close()

    # Diff needs to be extracted!
    if update:
        diff = all_runs
    else:
        diff = [x for x in all_runs if x not in extracted_runs]

    print("Number of runs to be fetched: %s" % len(diff))

    db_access.dispose_engine()
    pool = Pool(nproc)
    pool.map(fetch_run, diff)
    print("Done.")
Example #3
0
def runs():
    db_access.setup_db()
    session = db_access.get_session()

    runs = [
        h.run for h in session.query(db_access.HistoricDataPoint.run).distinct(
        ).order_by(db_access.HistoricDataPoint.run.asc())
    ]
    session.close()
    return jsonify(runs)
Example #4
0
def expand_url():
    valid_url_types = [
        'main_gui_url', 'main_image_url', 'optional1_gui_url',
        'optional1_image_url', 'optional2_gui_url', 'optional2_image_url',
        'reference_gui_url', 'reference_image_url'
    ]

    data_point_id = request.args.get('data_point_id', type=int)
    url_type = request.args.get('url_type')

    if data_point_id == None:
        return jsonify(
            {'message': 'Please provide a data_point_id parameter.'}), 400

    if url_type not in valid_url_types:
        return jsonify({
            'message':
            'Please provide a valid url_type parameter. Accepted values are: %s'
            % ','.join(valid_url_types)
        }), 400

    db_access.setup_db()
    session = db_access.get_session()

    try:
        sql = '''
    SELECT %s 
    FROM historic_data_points 
    WHERE id = :id
    ;
    ''' % url_type

        rows = list(execute_with_retry(session, sql, {'id': data_point_id}))
        url = rows[0][url_type]

        if url:
            url = url.replace('+', '%2B')
            return redirect(url, code=302)
        else:
            return jsonify({'message':
                            'Requested URL type is not found.'}), 404
    except Exception as e:
        print(e)
    finally:
        session.close()

    return jsonify({'message': 'Error getting the url from the DB.'}), 500
Example #5
0
def selection():
    db_access.setup_db()

    session = db_access.get_session()
    try:
        obj = defaultdict(lambda: defaultdict(list))
        rows = execute_with_retry(
            session,
            'SELECT DISTINCT subsystem, pd, processing_string FROM selection_params ORDER BY subsystem, pd, processing_string;'
        )
        rows = list(rows)
        for row in rows:
            if (row['processing_string'] in ALLOWED_PROCESSING_STRINGS):
                obj[row['subsystem']][row['pd']].append(
                    row['processing_string'])

        return jsonify(obj)
    finally:
        session.close()
Example #6
0
def fetch(update):
    db_access.setup_db()

    min_run = 0
    max_run = 0

    session = db_access.get_session()
    try:
        print('Getting min and max runs...')
        minmax = list(
            session.execute('SELECT MIN(run), MAX(run) FROM oms_data_cache;'))
        min_run = minmax[0][0]
        max_run = minmax[0][1]
    finally:
        session.close()

    print('Run range: %s-%s' % (min_run, max_run))

    fetch_runs(min_run, max_run)
Example #7
0
def data():
    subsystem = request.args.get('subsystem')
    pd = request.args.get('pd')
    processing_string = request.args.get('processing_string')
    from_run = request.args.get('from_run', type=int)
    to_run = request.args.get('to_run', type=int)
    runs = request.args.get('runs')
    latest = request.args.get('latest', type=int)
    series = request.args.get('series')
    series_id = request.args.get('series_id', type=int)

    if series_id == None:
        if subsystem == None:
            return jsonify(
                {'message': 'Please provide a subsystem parameter.'}), 400

        if pd == None:
            return jsonify({'message': 'Please provide a pd parameter.'}), 400

        if processing_string == None:
            return jsonify(
                {'message':
                 'Please provide a processing_string parameter.'}), 400

    modes = 0
    if from_run != None and to_run != None: modes += 1
    if latest != None: modes += 1
    if runs != None: modes += 1

    if modes > 1:
        return jsonify({
            'message':
            'The combination of parameters you provided is invalid.'
        }), 400

    if runs != None:
        try:
            runs = runs.split(',')
            runs = [int(x) for x in runs]
        except:
            return jsonify({
                'message':
                'runs parameter is not valid. It has to be a comma separated list of integers.'
            }), 400

    if series and series_id:
        return jsonify({
            'message':
            'series and series_id can not be defined at the same time.'
        }), 400

    db_access.setup_db()
    session = db_access.get_session()

    # Get series data by series_id
    if series_id:
        sql = '''
    SELECT selection_params.subsystem, selection_params.pd, selection_params.processing_string, last_calculated_configs.name
    FROM selection_params
    JOIN last_calculated_configs ON config_id = last_calculated_configs.id
    WHERE selection_params.id = :id
    ;
    '''

        rows = execute_with_retry(session, sql, {'id': series_id})
        rows = list(rows)

        subsystem = rows[0]['subsystem']
        pd = rows[0]['pd']
        processing_string = rows[0]['processing_string']
        series = rows[0]['name']

    if runs == None:
        # Will need filtering
        runs_filter = ''
        latest_filter = ''
        if from_run != None and to_run != None:
            runs_filter = 'AND run >= %s AND run <= %s' % (from_run, to_run)
        else:
            if latest == None:
                latest = 50
            latest_filter = 'LIMIT %s' % latest

        run_class_like = '%%collision%%'
        if 'cosmic' in pd.lower():
            run_class_like = '%%cosmic%%'

        # UL2018 processing produced two processing strings: 12Nov2019_UL2018 and 12Nov2019_UL2018_rsb.
        # _rsb version is a resubmition because some runs were not processed (crashed?) in the initial processing.
        # Some runs might exist under both processing strings, some under just one of them!
        # A union of both of these processing strings contains all runs of UL2018.
        # So in HDQM, when 12Nov2019_UL2018 is requested, we need include 12Nov2019_UL2018_rsb as well!!!
        # This is a special case and is not used in any other occasion.

        processing_string_sql = 'AND processing_string=:processing_string'
        if processing_string == '12Nov2019_UL2018':
            processing_string_sql = 'AND (processing_string=:processing_string OR processing_string=:processing_string_rsb)'

        sql = '''
    SELECT DISTINCT run FROM oms_data_cache
    WHERE run IN 
    (
      SELECT run FROM historic_data_points
      WHERE subsystem=:subsystem
      AND pd=:pd
      %s
    )
    AND oms_data_cache.run_class %s :run_class
    AND oms_data_cache.significant=%s
    AND oms_data_cache.is_dcs=%s
    %s
    ORDER BY run DESC
    %s
    ;
    ''' % (processing_string_sql, db_access.ilike_crossdb(),
           db_access.true_crossdb(), db_access.true_crossdb(), runs_filter,
           latest_filter)

        print('Getting the list of runs...')
        start = timeit.default_timer()

        rows = execute_with_retry(
            session, sql, {
                'subsystem': subsystem,
                'pd': pd,
                'processing_string': processing_string,
                'processing_string_rsb': processing_string + '_rsb',
                'run_class': run_class_like
            })
        rows = list(rows)

        stop = timeit.default_timer()
        print('Runs retrieved in: ', stop - start)

        runs = [x[0] for x in rows]

    # Construct SQL query
    query_params = {
        'subsystem': subsystem,
        'pd': pd,
        'processing_string': processing_string,
        'processing_string_rsb': processing_string + '_rsb'
    }

    run_selection_sql = 'AND historic_data_points.run BETWEEN :from_run AND :to_run'
    if runs != None:
        run_selection_sql = 'AND historic_data_points.run IN (%s)' % ', '.join(
            str(x) for x in runs)
        query_params['runs'] = runs
    else:
        query_params['from_run'] = from_run
        query_params['to_run'] = to_run

    series_filter_sql = ''
    if series != None:
        series_filter_sql = 'AND historic_data_points.name IN ('
        series = series.split(',')
        for i in range(len(series)):
            key = 'series_%i' % i
            series_filter_sql += ':%s,' % key
            query_params[key] = series[i]
        series_filter_sql = series_filter_sql.rstrip(',') + ')'

    processing_string_sql = 'AND historic_data_points.processing_string=:processing_string'
    if processing_string == '12Nov2019_UL2018':
        processing_string_sql = 'AND (historic_data_points.processing_string=:processing_string OR historic_data_points.processing_string=:processing_string_rsb)'

    sql = '''
  SELECT 
  historic_data_points.id,
	historic_data_points.run, 
	historic_data_points.value, 
	historic_data_points.error,
	historic_data_points.name, 
	
	historic_data_points.plot_title, 
	historic_data_points.y_title
  FROM historic_data_points

  WHERE historic_data_points.subsystem=:subsystem
  AND historic_data_points.pd=:pd
  %s

  %s
  %s

  ORDER BY historic_data_points.run ASC
  ;
  ''' % (processing_string_sql, run_selection_sql, series_filter_sql)

    print('Getting the data...')
    start = timeit.default_timer()

    rows = execute_with_retry(session, sql, query_params)
    rows = list(rows)
    session.close()

    stop = timeit.default_timer()
    print('Data retrieved in: ', stop - start)

    result = {}
    for row in rows:
        # Names are unique within the subsystem
        key = '%s_%s' % (row['name'], subsystem)
        if key not in result:
            result[key] = {
                'metadata': {
                    'y_title': row['y_title'],
                    'plot_title': row['plot_title'],
                    'name': row['name'],
                    'subsystem': subsystem,
                    'pd': pd,
                    'processing_string': processing_string,
                },
                'trends': []
            }

        result[key]['trends'].append({
            'run': row['run'],
            'value': row['value'],
            'error': row['error'],
            'id': row['id'],
            'oms_info': {},
        })

    # Transform result to array
    result = [result[key] for key in sorted(result.keys())]
    result = add_oms_info_to_result(result)

    return jsonify(result)
Example #8
0
def extract_all_mes(cfg_files, runs, nprocs, all_files):
  print('Processing %d configuration files...' % len(cfg_files))
  mes_set = set()
  good_files = 0
  for cfg_file in cfg_files:
    try:
      parser = RawConfigParser()
      parser.read(unicode(cfg_file))
      for section in parser:
        if not section.startswith('plot:'):
          if section != 'DEFAULT':
            print('Invalid configuration section: %s:%s, skipping.' % (cfg_file, section))
          continue
        if not PLOTNAMEPATTERN.match(section.lstrip('plot:')):
          print("Invalid plot name: '%s:%s' Plot names can contain only: [a-zA-Z0-9_+-]" % (cfg_file, section.lstrip('plot:')))
          continue

        mes_set.update(get_all_me_names(parser[section]['relativePath']))
        if 'histo1Path' in parser[section]:
          mes_set.update(get_all_me_names(parser[section]['histo1Path']))
        if 'histo2Path' in parser[section]:
          mes_set.update(get_all_me_names(parser[section]['histo2Path']))
        if 'reference' in parser[section]:
          mes_set.update(get_all_me_names(parser[section]['reference']))
      good_files += 1
    except:
      print('Could not read %s, skipping...' % cfg_file)

  print('Read %d configuration files.' % good_files)
  print('Read %d distinct ME paths.' % len(mes_set))

  if not all_files:
    print('Listing files on EOS, this can take a while...')
    all_files = glob(ROOTFILES)
    if len(all_files) == 0:
      print('GLOB returned 0 files, probably EOS is down. Aborting.')
      return
    print('Done.')
  else:
    print('Using provided DQM files: %s' % len(all_files))

  # Filter on the runs that were passed by the user
  if runs:
    filtered = []
    for file in all_files:
      run_match = RUNPATTERN.findall(file)
      if not len(run_match) == 0:
        run = run_match[0]
        if int(run) in runs:
          filtered.append(file)
    all_files = filtered

  # Keep only the newest version of each file
  print('Removing old versions of files...')
  all_files = remove_old_versions(all_files)

  print('Found %s files in EOS' % len(all_files))

  print('Gathering information about MEs to be extracted...')

  db_access.setup_db()

  # Get lists of existing mes and eos files.
  # Existing means that ME was extracted or is in the extraction queue.
  session = db_access.get_session()
  existing_me_paths = set(x.me_path for x in session.query(db_access.TrackedMEPathForMEExtraction).all())
  existing_eos_paths = set(x.eos_path for x in session.query(db_access.TrackedEOSPathForMEExtraction).all())
  session.close()

  # Single session (transaction) for queue manipulations
  session = db_access.get_session()

  # -------------------- Update the ME paths in the extraction queue -------------------- #
  new_mes = mes_set.difference(existing_me_paths)
  deleted_mes = existing_me_paths.difference(mes_set)

  print('New MEs: %s, deleted MEs: %s' % (len(new_mes), len(deleted_mes)))

  # Remove deleted MEs from the extraction queue
  if len(deleted_mes) > 0:
    sql = 'DELETE FROM queue_to_extract WHERE me_path = :me_path;'
    session.execute(sql, [{'me_path': x} for x in deleted_mes])

    sql = 'DELETE FROM tracked_me_paths_for_me_extraction WHERE me_path = :me_path;'
    session.execute(sql, [{'me_path': x} for x in deleted_mes])

  # Refresh new MEs table
  sql = 'DELETE FROM new_me_paths_for_me_extraction;'
  session.execute(sql)

  # Insert new ME paths
  if len(new_mes) > 0:
    sql = 'INSERT INTO new_me_paths_for_me_extraction (me_path) VALUES (:me_path);'
    session.execute(sql, [{'me_path': x} for x in new_mes])

  # Will have to extract new MEs for every existing file
  sql_update_queue = '''
  INSERT INTO queue_to_extract (eos_path, me_path)
  SELECT eos_path, me_path
  FROM tracked_eos_paths_for_me_extraction, new_me_paths_for_me_extraction
  ;
  '''

  sql_update_existing = '''
  INSERT INTO tracked_me_paths_for_me_extraction (me_path)
  SELECT me_path
  FROM new_me_paths_for_me_extraction
  ;
  '''

  session.execute(sql_update_queue)
  session.execute(sql_update_existing)

  # -------------------- Update the eos paths in the extraction queue -------------------- #
  files_set = set(all_files)
  new_files = files_set.difference(existing_eos_paths)
  deleted_files = existing_eos_paths.difference(files_set)

  print('New files: %s, deleted files: %s' % (len(new_files), len(deleted_files)))

  # Remove deleted files from the extraction queue
  if len(deleted_files) > 0:
    sql = 'DELETE FROM queue_to_extract WHERE eos_path = :eos_path;'
    session.execute(sql, [{'eos_path': x} for x in deleted_files])

    sql = 'DELETE FROM tracked_eos_paths_for_me_extraction WHERE eos_path = :eos_path;'
    session.execute(sql, [{'eos_path': x} for x in deleted_files])

  # Refresh new files table
  sql = 'DELETE FROM new_eos_paths_for_me_extraction;'
  session.execute(sql)

  # Insert new eos paths
  if len(new_files) > 0:
    sql = 'INSERT INTO new_eos_paths_for_me_extraction (eos_path) VALUES (:eos_path);'
    session.execute(sql, [{'eos_path': x} for x in new_files])

  # Will have to extract all existing MEs for newly added files
  sql_update_queue = '''
  INSERT INTO queue_to_extract (eos_path, me_path)
  SELECT eos_path, me_path
  FROM new_eos_paths_for_me_extraction, tracked_me_paths_for_me_extraction
  ;
  '''

  sql_update_existing = '''
  INSERT INTO tracked_eos_paths_for_me_extraction (eos_path)
  SELECT eos_path
  FROM new_eos_paths_for_me_extraction
  ;
  '''

  session.execute(sql_update_queue)
  session.execute(sql_update_existing)

  session.commit()
  session.close()

  print('Done.')
  print('Extracting missing MEs...')

  # ------------------- Start extracting MEs from the extraction queue ------------------- #

  sql = 'SELECT id, eos_path, me_path FROM queue_to_extract LIMIT 100000;'
  pool = Pool(nprocs)

  while True:
    db_access.dispose_engine()
    session = db_access.get_session()
    try:
      print('Fetching not processed MEs from DB...')
      rows = session.execute(sql)
      rows = list(rows)
      session.close()
      print('Fetched: %s' % len(rows))
      if len(rows) == 0:
        break

      pool.map(extract_mes, batch_iterable(rows, chunksize=2000))
    except OSError as e:
      if e.errno != errno.EINTR:
        raise
      else:
        print('[Errno 4] occurred. Continueing.')
    except Exception as e:
      print(e)
      session.close()

  print('Done.')
Example #9
0
def calculate_all_trends(cfg_files, runs, nprocs):
    print('Processing %d configuration files...' % len(cfg_files))
    db_access.setup_db()

    trend_count = 0
    for cfg_file in cfg_files:
        subsystem = os.path.basename(os.path.dirname(cfg_file))
        if not subsystem:
            subsystem = 'Unknown'

        parser = RawConfigParser()
        parser.read(unicode(cfg_file))

        for section in parser:
            if not section.startswith('plot:'):
                if (section != 'DEFAULT'):
                    print('Invalid configuration section: %s:%s, skipping.' %
                          (cfg_file, section))
                continue

            if not PLOTNAMEPATTERN.match(section.lstrip('plot:')):
                print(
                    "Invalid plot name: '%s:%s' Plot names can contain only: [a-zA-Z0-9_+-]"
                    % (cfg_file, section.lstrip('plot:')))
                continue

            if 'metric' not in parser[section] or\
               'relativePath' not in parser[section] or\
               'yTitle' not in parser[section]:
                print('Plot missing required attributes: %s:%s, skipping.' %
                      (cfg_file, section))
                print('Required parameters: metric, relativePath, yTitle')
                continue

            parser[section]['subsystem'] = subsystem
            parser[section]['name'] = section.split(':')[1]
            CONFIG.append(parser[section])
            trend_count += 1

    print('Starting to process %s trends.' % trend_count)
    print('Updating configuration...')

    # Find out new and changed configuration
    last_config = []
    session = db_access.get_session()
    try:
        last_config = list(
            session.execute('SELECT * FROM last_calculated_configs;'))
    except Exception as e:
        print('Exception getting config from the DB: %s' % e)
        return
    finally:
        session.close()

    new_configs = []

    for current in CONFIG:
        # Find by subsystem and name of trend
        last = next(
            (x for x in last_config if current['subsystem'] == x['subsystem']
             and current['name'] == x['name']), None)
        if last:
            obj = section_to_config_object(current)
            if not last['metric'] == obj.metric or\
              not last['plot_title'] == obj.plot_title or\
              not last['y_title'] == obj.y_title or\
              not last['relative_path'] == obj.relative_path or\
              not last['histo1_path'] == obj.histo1_path or\
              not last['histo2_path'] == obj.histo2_path or\
              not last['reference_path'] == obj.reference_path or\
              not last['threshold'] == int(obj.threshold) if obj.threshold else None:
                # Changed!
                new_configs.append(obj)
        else:
            new_configs.append(section_to_config_object(current))

    # Add new configs
    session = db_access.get_session()
    try:
        for new in new_configs:
            session.add(new)
        session.commit()
    except Exception as e:
        print('Exception adding new configs to the DB: %s' % e)
        session.rollback()
        return
    finally:
        session.close()

    # Recalculate everything if the configuration changed
    if len(new_configs) > 0:
        print('Configuration changed, reseting the calculation queue...')
        session = db_access.get_session()
        try:
            session.execute('DELETE FROM queue_to_calculate;')
            session.execute('DELETE FROM queue_to_calculate_later;')
            session.execute(
                'INSERT INTO queue_to_calculate (me_id) SELECT id FROM monitor_elements;'
            )
            session.commit()
        except Exception as e:
            print('Exception reseting the calculation queue in the DB: %s' % e)
            session.rollback()
            return
        finally:
            session.close()
        print('Calculation queue is ready.')
    else:
        # Move things from queue_to_calculate_later back to queue_to_calculate
        print('Moving items from second queue to the main one...')
        session = db_access.get_session()
        try:
            session.execute(
                'INSERT INTO queue_to_calculate (me_id) SELECT me_id FROM queue_to_calculate_later;'
            )
            session.execute('DELETE FROM queue_to_calculate_later;')
            session.commit()
        except Exception as e:
            print(
                'Exception moving items from the second calculation queue to the first: %s'
                % e)
            session.rollback()
        finally:
            session.close()
        print('Calculation queue is ready.')

    print('Configuration updated.')

    # Start calculating trends
    if runs == None:
        runs_filter = ''
    else:
        runs_filter = 'WHERE monitor_elements.run IN (%s)' % ', '.join(
            str(x) for x in runs)

    limit = 10000
    sql = '''
  SELECT queue_to_calculate.id, monitor_elements.id as me_id, monitor_elements.run, monitor_elements.lumi, monitor_elements.eos_path, monitor_elements.me_path, monitor_elements.dataset FROM monitor_elements
  JOIN queue_to_calculate ON monitor_elements.id=queue_to_calculate.me_id
  %s
  LIMIT %s;
  ''' % (runs_filter, limit)

    # pool = Pool(nprocs)
    pool = ForkPool(nprocs)

    while True:
        db_access.dispose_engine()
        session = db_access.get_session()

        try:
            print('Fetching not processed data points from DB...')
            rows = session.execute(sql)
            rows = list(rows)
            print('Fetched: %s' % len(rows))
            if len(rows) == 0:
                print('Queue to calculate is empty. Exiting.')
                break

            pool.map(calculate_trends, batch_iterable(rows, chunksize=400))

            print('Finished calculating a batch of trends.')
        except OSError as e:
            if e.errno != errno.EINTR:
                raise
            else:
                print('[Errno 4] occurred. Continueing.')
        except Exception as e:
            print(
                'Exception fetching elements from the calculation queue: %s' %
                e)
            raise
        finally:
            session.close()