Esempio n. 1
0
def histo_features(runh, refh, const_std_features=True):
    result = pd.Series()

    try:
        (rund, run_bars), (refd, ref_bars) = map(make_distribution,
                                                 [runh, refh])
    except ValueError:
        return Maybe(error_message=["Integral for histogram equals zero"])

    runf = distribution_features(rund)
    reff = distribution_features(refd)
    distancef = statistical_distance(refd, rund)
    funcf = reff - runf

    flavors = [(runf, 'run'), (distancef, 'distance'), (funcf, 'func')]
    if const_std_features:
        flavors.append((reff, 'ref'))

    for features, flavor in flavors:
        result = result.append(rename_features(features, flavor))

    if run_bars is not None:
        result = result.append(
            pd.Series({
                ('alarm', 'max_error_ratio'):
                max_error_ratio(run_bars, ref_bars)
            }))

    return Maybe(value=result)
Esempio n. 2
0
def process_tfile(tfile, histo_keys, handle):
    contents = {}

    for histo_key in histo_keys:
        t = try_get_object(tfile, histo_key)
        typ = type(t)

        if typ not in HANDLERS:
            val = Maybe(error_message=[
                'Error while processing .root. No handler available for {} of {}'
                .format(typ, handle)
            ])
        else:
            handled = HANDLERS[typ](t)

            if handled is None:
                val = Maybe(error_message=[
                    'Error while processing .root. No data for {}'.format(
                        handle)
                ])
            else:
                val = Maybe(value=np_histo(handled))

        contents[histo_key] = val

    return contents
Esempio n. 3
0
def tprofile_features(runh, refh):
    distance = Maybe(value=rename_features(
        regression_distance(runh['vals'], refh['vals']), 'distance'))

    alarm = Maybe(value=rename_features(
        pd.Series({
            'max_abs': np.max(np.abs(runh['vals'])),
            'max_error_ratio': max_error_ratio(runh, refh),
        }), 'alarm'))

    result = Maybe.concat([
        distance, alarm,
        value_features(runh['entries'], refh['entries']),
        histo_features({'vals': runh['errs']}, {'vals': refh['errs']})
    ])

    return result
Esempio n. 4
0
    def get_features(self, run_number):
        refh = self.collector.get_reference(run_number)
        runh = self.collector.get_run(run_number)

        present = Maybe.concat_id([runh, refh])
        if present.is_error():
            return FeatureContainer(Flag.BAD, present.error_message)

        result = FeatureContainer()

        for histo_key in self.collector.get_histo_keys():
            histo_present = Maybe.concat_id(
                [a[histo_key] for a in present.value])

            if histo_present.is_error():
                result.add_histo_errors(histo_key, histo_present.error_message)
                result.set_flag(Flag.TAIL)
            else:
                runh, refh = histo_present.value
                ht = self.get_histo_type(histo_key, runh)

                # !!!!!!!!!!!!!!!!!!!!!!!!!!!
                if ht == 'TH2D':
                    continue

                handler = FeatureExtractor.HANDLERS[ht]

                runh, refh = map(partial(self.tune_histo, ht), [runh, refh])

                features = handler(runh, refh)

                if features.is_error():
                    result.add_histo_errors(histo_key, features.error_message)
                    result.set_flag(Flag.TAIL)
                else:
                    renamed = rename_features(features.value,
                                              ('my', ht, histo_key))

                    result.add_features(renamed)

        if not result.has_features():
            result.add_errors(['No histo features at all.'])
            result.set_flag(Flag.BAD)

        return result
Esempio n. 5
0
def weird_tprofile_features(runh, refh):
    mask = (runh['vals'] != 0) & (refh['vals'] != 0)

    s = mask.sum()

    if s < 3:
        return Maybe(
            error_message=['Too many zeros in graph to process features'])
    else:
        for d in [runh, refh]:
            for key, value in d.items():
                if isinstance(value, np.ndarray):
                    d[key] = value[mask]

        return Maybe.concat([
            Maybe(value=pd.Series({('zeros', 'vals'): s})),
            tprofile_features(runh, refh)
        ])
Esempio n. 6
0
def tefficiency_features(runh, refh):
    (run_eff, run_stats), (ref_eff, ref_stats) = map(get_efficiency,
                                                     [runh, refh])

    return Maybe.concat([
        histo_features(run_eff, ref_eff),
        value_features(run_stats, ref_stats),
        value_features(run_eff['vals'], ref_eff['vals'], 'eff')
    ])
Esempio n. 7
0
def process_run(arg):
    self, histo_keys, run_number, handle, reference = arg

    getter = self.external_collector.get_reference_tfile if reference \
        else self.external_collector.get_run_tfile
    writer = partial(self.collector.write_root2py, reference)

    tfile = getter(run_number)

    if tfile is None:
        val = Maybe(error_message=[
            "External collector doesn't give any .root for the handle"
            " {} of run {}".format(handle, run_number)
        ])
    else:
        val = Maybe(value=process_tfile(tfile, histo_keys, handle))

    writer(val, handle)
Esempio n. 8
0
def value_features(runv, refv, prefix='stats'):
    runs, refs = runv.sum(), refv.sum()

    features = pd.Series({
        'runs': runs,
        'refs': refs,
        'refs - runs': refs - runs,
    })

    result = pd.concat([features, regression_distance(runv, refv)])

    return Maybe(value=rename_features(result, prefix))
Esempio n. 9
0
 def make_result(self, result, error_on_fail):
     return Maybe(
         error_message=[error_on_fail]) if result is None else result
Esempio n. 10
0
class FeatureExtractor:
    HANDLERS = {
        'TH1D':
        th1d_features,
        'TH2D':
        lambda runh, refh: Maybe(
            error_message=['No handler for TH2D available']),  # it's sad
        'TEfficiency':
        tefficiency_features,
        'TProfile':
        tprofile_features,
        'WeirdTProfile':
        weird_tprofile_features,
        'WeirdMuTProfile':
        weird_tprofile_features,
        'DecodingErrors':
        decoding_errors_features
    }

    WEIRD_HT = {
        'WeirdTProfile': [
            'RICH/RiLongTrkEff/All/effVChi2PDOF',
            'Velo/VeloTrackMonitor/Pseudoefficiency_per_sensor_vs_sensorID',
        ],
        'WeirdMuTProfile':
        ['MuIDLambdaPlot/pion/Prof_eff', 'MuIDLambdaPlot/proton/Prof_eff'],
        'DecodingErrors': ['RICH/RichDecodingErrors/decodingErrors']
    }

    NUMS = [
        'avHltPhysRate', 'avL0PhysRate', 'avLumi', 'avMu', 'avPhysDeadTime',
        'beamenergy', 'beamgasTrigger', 'betaStar', 'endlumi', 'lumiTrigger',
        'magnetCurrent', 'nobiasTrigger', 'partitionid', 'run_state', 'tck',
        'veloOpening'
    ]
    CATEGORICAL = [
        'LHCState', 'activity', 'magnetState', 'partitionname', 'program',
        'programVersion', 'runtype', 'state', 'triggerConfiguration',
        'veloPosition', 'destination'
    ]
    TIME = ['starttime', 'endtime']

    def __init__(self, collector, njobs):
        self.collector = collector
        self.njobs = njobs

    def tune_histo(self, ht, runh):
        runh = deepcopy(runh)

        if ht == 'TH1D':
            runh['vals'] = np.maximum(0, runh['vals'])

        return runh

    def get_histo_type(self, histo_key, data):
        histo_types = inverse_dict(FeatureExtractor.WEIRD_HT)

        return histo_types.get(histo_key, data['type'])

    def get_features(self, run_number):
        refh = self.collector.get_reference(run_number)
        runh = self.collector.get_run(run_number)

        present = Maybe.concat_id([runh, refh])
        if present.is_error():
            return FeatureContainer(Flag.BAD, present.error_message)

        result = FeatureContainer()

        for histo_key in self.collector.get_histo_keys():
            histo_present = Maybe.concat_id(
                [a[histo_key] for a in present.value])

            if histo_present.is_error():
                result.add_histo_errors(histo_key, histo_present.error_message)
                result.set_flag(Flag.TAIL)
            else:
                runh, refh = histo_present.value
                ht = self.get_histo_type(histo_key, runh)

                # !!!!!!!!!!!!!!!!!!!!!!!!!!!
                if ht == 'TH2D':
                    continue

                handler = FeatureExtractor.HANDLERS[ht]

                runh, refh = map(partial(self.tune_histo, ht), [runh, refh])

                features = handler(runh, refh)

                if features.is_error():
                    result.add_histo_errors(histo_key, features.error_message)
                    result.set_flag(Flag.TAIL)
                else:
                    renamed = rename_features(features.value,
                                              ('my', ht, histo_key))

                    result.add_features(renamed)

        if not result.has_features():
            result.add_errors(['No histo features at all.'])
            result.set_flag(Flag.BAD)

        return result

    def make_features(self, run_numbers):
        args = zip([self] * len(run_numbers), run_numbers)

        # for arg in args:
        #     process_run(arg)

        pool = Pool(self.njobs)
        pool.map(process_run, args)
        pool.close()
        pool.join()

    def get_linear_data(self):
        linear_data = self.collector.get_linear_data()

        df = pd.DataFrame.from_dict(linear_data,
                                    orient='index').drop('rundb_data', axis=1)
        df.index = df.index.astype(np.int)

        rundb = {
            key: value['rundb_data']
            for key, value in linear_data.items()
        }
        rundf = pd.DataFrame.from_dict(rundb, orient='index')
        rundf.index = rundf.index.astype(np.int)
        rundf = rundf[FeatureExtractor.NUMS + FeatureExtractor.CATEGORICAL +
                      FeatureExtractor.TIME]

        for col in FeatureExtractor.CATEGORICAL:
            rundf[col] = LabelEncoder().fit_transform(rundf[col])

        for col in FeatureExtractor.NUMS:
            rundf[col] = rundf[col].astype(np.float64)

        for col in FeatureExtractor.TIME:
            rundf[col] = rundf[col].map(get_time)

        rundf['run_length'] = rundf['endtime'] - rundf['starttime']
        rundf.loc[rundf['run_length'] < 0, 'run_length'] = np.nan

        df = rundf.merge(df, left_index=True, right_index=True)

        df['reference'] = pd.Series(df.index, index=df.index).map(
            self.collector.get_data_ref())

        df['switch'] = (df['reference'] != df['reference'].shift(1)).astype(
            np.int).cumsum()

        df = df.rename(
            columns=lambda col: ('linear', col) if col != 'flag' else 'flag')

        return df
Esempio n. 11
0
def decoding_errors_features(runh, refh):
    return Maybe(value=pd.Series({('decoding', 'errors'): runh['vals'].sum()}))
Esempio n. 12
0
def th1d_features(runh, refh):
    return Maybe.concat([
        histo_features(runh, refh),
        value_features(runh['vals'], refh['vals']),
        Maybe(value=pd.Series({('alarm', 'mean'): runh['mean']}))
    ])