Python stats Exemples, utils.stats Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : models.py Projet : Ariel96cs/VGG-C300

    def train_model(self,
                    x_train,
                    y_train,
                    x_val,
                    y_val,
                    batch_size,
                    normalize=False,
                    log_stats=True):
        self.train_cnn_model(self.cnn_model,
                             x_train,
                             y_train,
                             x_val,
                             y_val,
                             batch_size,
                             normalize=normalize)
        layer_dict = dict([(layer.name, layer)
                           for layer in self.cnn_model.layers])

        #     x = layer_dict['flatten'].output
        x = self.cnn_model.layers[-2].output

        self.cropped_model = Model(self.cnn_model.input, x)

        cropped_model_train_output = self.cropped_model.predict(x_train)

        for sklearn_model, _ in self.classifiers:
            sklearn_model.fit(cropped_model_train_output, y_train)

        ensemble_model_predictions = self.predict(x_val)
        if log_stats:
            print("Ensemble predicted proba: ", ensemble_model_predictions)

            print("Validation ensemble Model result: ")
            stats(y_val, ensemble_model_predictions, 'Ensemble Method')

Exemple #2

0

Afficher le fichier

Fichier : models.py Projet : Ariel96cs/VGG-C300

def _train(build_model,
           train_model,
           X_train,
           y_train,
           X_val,
           y_val,
           X_test,
           y_test,
           batch_size,
           gray_scale_model,
           normalize=False,
           cnn_file_results_path='./histories/cnn_norm.pickle'):
    model = build_model(gray_scale_model)
    train_model(model, X_train, y_train, X_val, y_val, batch_size, normalize)

    y_pred = model.predict(X_test).ravel()

    path_p = Path(cnn_file_results_path)
    if not path_p.exists():
        print("Initialize log file")
        with open(cnn_file_results_path, 'xb') as file:
            pickle.dump([], file)

    stats(y_test, y_pred, "CNN", cnn_file_results_path)
    return model

Exemple #3

0

Afficher le fichier

Fichier : models.py Projet : Ariel96cs/VGG-C300

def _trainSK(build_model,
             train_model,
             X_train,
             y_train,
             X_val,
             y_val,
             X_test,
             y_test,
             sklearn_model,
             sklearn_model_name,
             batch_size,
             gray_scale_model,
             normalize,
             cnn_file_results_path='./histories/norm_'):
    print("TrainingModelSK")
    model = build_model(gray_scale_model)
    sklearn_model, cropped_model = train_model(model, X_train, y_train, X_val,
                                               y_val, sklearn_model,
                                               sklearn_model_name, batch_size,
                                               normalize)

    y_pred = sklearn_model.predict_proba(cropped_model.predict(X_test))[:, 1]

    path = cnn_file_results_path + sklearn_model_name + ".pickle"
    path_p = Path(path)
    if not path_p.exists():
        print("Initialize log file")
        with open(path, 'xb') as file:
            pickle.dump([], file)

    stats(y_test, y_pred, "CNN + " + sklearn_model_name, path)

Exemple #4

0

Afficher le fichier

def analyse_regionsm(region, rd=False):
    wpE_reg = wpE.loc[states_reg[region]].wp.unstack().transpose().sum(axis=1)
    wpE_GWA_reg = wpE_GWA.loc[states_reg[region]].wp.unstack().transpose().sum(
        axis=1)
    wpM_reg = wpM.loc[states_reg[region]].wp.unstack().transpose().sum(axis=1)
    wpM_GWA_reg = wpM_GWA.loc[states_reg[region]].wp.unstack().transpose().sum(
        axis=1)
    prod_regm = prod_USAm[region].dropna()
    # merge data
    wp_reg = pd.concat([wpE_reg, wpE_GWA_reg, wpM_reg, wpM_GWA_reg],
                       axis=1).tz_localize('UTC').tz_convert('US/Central')
    wp_reg.columns = ['ERA5', 'ERA5_GWA', 'MERRA2', 'MERRA2_GWA']
    # aggregate monthly
    wp_regm = wp_reg.resample('M').sum()
    # combine data and calculate capacity factors
    cf_regm = pd.concat([
        wp_regm.div(caps_reg[region], axis=0),
        (prod_regm.resample('M').sum() * 10**6 / caps_reg[region])
    ],
                        axis=1).dropna()
    cf_regm.columns = np.append(wp_regm.columns, 'wp_obs')
    # Analyse
    stats_regm = pd.DataFrame(
        {
            'ERA5': stats(cf_regm.ERA5, cf_regm.wp_obs, rd),
            'ERA5_GWA': stats(cf_regm.ERA5_GWA, cf_regm.wp_obs, rd),
            'MERRA2': stats(cf_regm.MERRA2, cf_regm.wp_obs, rd),
            'MERRA2_GWA': stats(cf_regm.MERRA2_GWA, cf_regm.wp_obs, rd),
            'obs': [np.nan, np.nan, np.nan,
                    cf_regm.wp_obs.mean()]
        },
        index=['cor', 'rmse', 'mbe', 'avg'])
    stats_regm.index = pd.MultiIndex.from_product(
        [[region], stats_regm.index.values], names=['state', 'param'])
    return stats_regm

Exemple #5

0

Afficher le fichier

def analyse_statesm(state, rd=False):
    # merge data
    wp_st = pd.concat([
        wpE.wp.loc[state], wpE_GWA.wp.loc[state], wpM.wp.loc[state],
        wpM_GWA.wp.loc[state]
    ],
                      axis=1).tz_localize('UTC').tz_convert('US/Central')
    wp_st.columns = ['ERA5', 'ERA5_GWA', 'MERRA2', 'MERRA2_GWA']
    # aggregate monthly
    wp_stm = wp_st.resample('M').sum()
    # combine data and calculate capacity factors
    cf_stm = pd.concat([
        wp_stm.div(cap_statesm[state], axis=0),
        (prod_USAm[state].dropna().resample('M').sum() * 10**6 /
         (cap_statesm[state]))
    ],
                       axis=1).replace(np.inf, np.nan).dropna()[1:]
    cf_stm.columns = np.append(wp_stm.columns, 'wp_obs')
    # Analyse
    stats_stm = pd.DataFrame(
        {
            'ERA5': stats(cf_stm.ERA5, cf_stm.wp_obs, rd),
            'ERA5_GWA': stats(cf_stm.ERA5_GWA, cf_stm.wp_obs, rd),
            'MERRA2': stats(cf_stm.MERRA2, cf_stm.wp_obs, rd),
            'MERRA2_GWA': stats(cf_stm.MERRA2_GWA, cf_stm.wp_obs, rd),
            'obs': [np.nan, np.nan, np.nan,
                    cf_stm.wp_obs.mean()]
        },
        index=['cor', 'rmse', 'mbe', 'avg'])
    stats_stm.index = pd.MultiIndex.from_product(
        [[state], stats_stm.index.values], names=['state', 'param'])
    return (stats_stm)

Exemple #6

0

Afficher le fichier

Fichier : models.py Projet : Ariel96cs/VGG-C300

def _trainV2(build_model,
             train_model,
             X_train_paths,
             y_train,
             X_val_paths,
             y_val,
             X_test_paths,
             y_test,
             batch_size,
             gray_scale_model,
             normalize=False,
             cnn_file_results_path='./histories/cnn_norm.pickle'):

    dict_path_image = load_images(X_train_paths + X_val_paths + X_test_paths,
                                  y_train + y_val + y_test, gray_scale_model,
                                  normalize)
    X_train_ = [
        value for path, value in dict_path_image.items()
        if path in X_train_paths
    ]
    X_val_ = [
        value for path, value in dict_path_image.items() if path in X_val_paths
    ]
    X_test_ = [
        value for path, value in dict_path_image.items()
        if path in X_test_paths
    ]

    del dict_path_image

    X_train = np.array(map(lambda x: x[0], X_train_))
    y_train = np.array(map(lambda x: x[1], X_train_))

    del X_train_

    X_val = np.array(map(lambda x: x[0], X_val_))
    y_val = np.array(map(lambda x: x[1], X_val_))

    del X_val_

    X_test = np.array(map(lambda x: x[0], X_test_))
    y_test = np.array(map(lambda x: x[1], X_test_))

    del X_test_

    model = build_model(gray_scale_model)
    train_model(model, X_train, y_train, X_val, y_val, batch_size, normalize)

    y_pred = model.predict(X_test).ravel()

    path_p = Path(cnn_file_results_path)
    if not path_p.exists():
        print("Initialize log file")
        with open(cnn_file_results_path, 'xb') as file:
            pickle.dump([], file)

    stats(y_test, y_pred, "CNN", cnn_file_results_path)

Exemple #7

0

Afficher le fichier

Fichier : tagger.py Projet : kasev/pandora

 def print_stats(self):
     print('Train stats:')
     utils.stats(tokens=self.train_tokens,
                 lemmas=self.train_lemmas,
                 known=self.preprocessor.known_tokens)
     print('Test stats:')
     utils.stats(tokens=self.test_tokens,
                 lemmas=self.test_lemmas,
                 known=self.preprocessor.known_tokens)

Exemple #8

0

Afficher le fichier

Fichier : app.py Projet : stuycs-ml7-2012-2013/feedbackerator

def ratings(name=""):
    name = str(name)
    mydb = db.db()
    q = utils.get_questions()
    me, all = mydb.getAllRatingsFor(name)
    mystats = utils.stats(me)
    allstats = utils.stats(all)
    ans = json.dumps({"questions": q, "me": mystats, "all": allstats})
    return ans

Exemple #9

0

Afficher le fichier

Fichier : app.py Projet : stuycs-ml7-2012-2013/feedbackerator

def allratings():
    mydb = db.db()
    users = userdb.get_users()
    emails = users.keys()
    ratings = []
    for e in emails:
        me, all = mydb.getAllRatingsFor(e)
        mystats = utils.stats(me)
        allstats = utils.stats(all)
        ratings.append({"email": e, "last": users[e]["last"], "first": users[e]["first"], "stats": mystats})
    return render_template("allratings.html", ratings=ratings, all=allstats)

Exemple #10

0

Afficher le fichier

Fichier : languages.py Projet : ia03/vummer

def run_code(arg, input_data, attachment, lang_id, channel_id):
    code = get_code(arg, attachment)
    print('Running code: ', code.decode())
    submission = api.submission.submit(client, code, lang_id,
        stdin=input_data.encode())
    status = submission.status
    output = submission.stdout
    errors = submission.stderr
    compile_output = submission.compile_output
    if output:
        output = output.decode()
    if errors:
        errors = errors.decode()
    if compile_output:
        compile_output = compile_output.decode()

    message = 'Status: ' + status['description'] + '\n'

    if output:
        message += 'Output: ```\n' + output + '\n```'
    else:
        message += 'No output sent.\n'
    if errors:
        message += 'Errors: ```\n' + errors + '\n```'
    if compile_output:
        message += ('Compiler output: ```\n' + compile_output
            + '\n```\n')

    message += stats(submission.time, submission.memory)
    send_message(channel_id, message)

Exemple #11

0

Afficher le fichier

Fichier : DecisionTree.py Projet : zhwa/toolkits

def buildTree(data):
    if len(data) <= 0: return node()

    currentEnt = entropy(data)
    bestGain = 0.0
    bestCriteria = None
    bestSets = None

    dimension = len(data[0]) - 1

    for feature in range(dimension):
        feature_values = {}
        for item in data:
            feature_values[data[feature]] = 1
        for value in feature_values.keys():
            set1, set2 = split(data, feature, value)
            p = len(set1) / len(set2)
            infoGain = currentEnt - p * entropy(set1) - (1 - p) * entropy(set2)
            if infoGain > bestGain and len(set1) > 0 and len(set2) > 0:
                bestGain = infoGain
                bestCriteria = (feature, value)
                bestSets = (set1, set2)

    if bestGain > 0:
        leftBranch = buildTree(bestSet[0])
        rightBranch = buildTree(bestSet[1])
        return node(feature=bestCriteria[0], threshold=bestCriteria[1], left=leftBranch, right=rightBranch)
    else:
        return node(results=stats(data))

Exemple #12

0

Afficher le fichier

def container_list():
    res = utils.stats()
    # if res["status"] != "error":
    #     return jsonify({"status":"success", "message":res})
    # else:
    #     return jsonify(res)

    return jsonify(res)

Exemple #13

0

Afficher le fichier

Fichier : models.py Projet : Ariel96cs/VGG-C300

def _train_ensemble(classifiers,
                    X_train,
                    y_train,
                    X_val,
                    y_val,
                    X_test,
                    y_test,
                    batch_size,
                    gray_scale_model,
                    normalize=False,
                    mode='single',
                    cnn_file_results_path='./histories/ensemble_norm.pickle'):
    model = EnsembleClassifier(build_model,
                               train_model_with_Keras_ImageDataGenerator,
                               classifiers)
    model.build_model(gray_scale_model)
    model.train_model(X_train,
                      y_train,
                      X_val,
                      y_val,
                      batch_size,
                      normalize=normalize)

    if mode == 'single':
        y_pred = model.predict(X_test)

        path_p = Path(cnn_file_results_path)
        if not path_p.exists():
            print("Initialize log file")
            with open(cnn_file_results_path, 'xb') as file:
                pickle.dump([], file)

        stats(y_test, y_pred, "Ensemble", cnn_file_results_path)
    else:

        for y_pred, model_name in model.get_predictions(X_test):
            results_path = '/histories/' + 'norm_' + model_name + '.pickle'
            path_p = Path(results_path)
            if not path_p.exists():
                print("Initialize log file")
                with open(results_path, 'xb') as file:
                    pickle.dump([], file)

            stats(y_test, y_pred, model_name, str(path_p))

Exemple #14

0

Afficher le fichier

def analyse_ZAFd(region):
    '''
    analyse daily wind power generation for a region
    '''
    mask = (ZAFh[region].notna()*capdfH[region].notna()).replace(0,np.nan)
    comph = pd.DataFrame({'MERRA2':ZAFm[region].tz_convert('Africa/Johannesburg')*mask,
                         'ERA5':ZAFe[region].tz_convert('Africa/Johannesburg')*mask,
                         'MERRA2_GWA2':ZAFmg2[region].tz_convert('Africa/Johannesburg')*mask,
                         'ERA5_GWA2':ZAFeg2[region].tz_convert('Africa/Johannesburg')*mask,
                         'MERRA2_GWA3':ZAFmg3[region].tz_convert('Africa/Johannesburg')*mask,
                         'ERA5_GWA3':ZAFeg3[region].tz_convert('Africa/Johannesburg')*mask})
    # get capacities and mask
    caph = capdfH[region].tz_convert('Africa/Johannesburg')*mask
    # aggregate daily
    capd = caph.resample('D').sum()
    compd = comph.resample('D').sum()
    # calculate capacity factors
    cfd = compd.div(capd,axis=0)
    # add observed CFs
    cfd['obs'] = cfd.index.map((ZAFh[region]*mask).resample('D').mean())
    # remove capacity factors > 1 and missing data
    cfd = cfd.mask(cfd>1).dropna()
    stat_d = pd.DataFrame({'ERA5':stats(cfd.ERA5,cfd.obs,False),
                           'ERA5_GWA2':stats(cfd.ERA5_GWA2,cfd.obs,False),
                           'ERA5_GWA3':stats(cfd.ERA5_GWA3,cfd.obs,False),
                           'MERRA2':stats(cfd.MERRA2,cfd.obs,False),
                           'MERRA2_GWA2':stats(cfd.MERRA2_GWA2,cfd.obs,False),
                           'MERRA2_GWA3':stats(cfd.MERRA2_GWA3,cfd.obs,False),
                           'obs':[np.nan,np.nan,np.nan,cfd.obs.mean()]},
                          index = ['cor','rmse','mbe','avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_d.columns = ['param','dataset',region]
    return(stat_d.set_index(['param','dataset']).transpose())

Exemple #15

0

Afficher le fichier

Fichier : WP_simulation_NZ_Analysis.py Projet : KatharinaGruber/windpower_GWA

def analyse_NZh():
    '''
    analyse hourly wind power generation for NZ
    '''
    mask = (prod_NZh.notna() * capdfH.notna()).replace(0, np.nan)
    comph = pd.DataFrame({
        'MERRA2': (NZm * mask).sum(axis=1),
        'ERA5': (NZe * mask).sum(axis=1),
        'MERRA2_GWA2': (NZmg2 * mask).sum(axis=1),
        'ERA5_GWA2': (NZeg2 * mask).sum(axis=1),
        'MERRA2_GWA3': (NZmg3 * mask).sum(axis=1),
        'ERA5_GWA3': (NZeg3 * mask).sum(axis=1)
    })
    comph['obs'] = comph.index.map((prod_NZh * mask).sum(axis=1)) / 1000
    # get capacities
    caph = (capdfH * mask).sum(axis=1)
    # calculate capacity factors
    cfh = comph.div(caph, axis=0)
    # remove capacity factors > 1
    cfh = cfh.mask(cfh > 1).dropna()
    stat_h = pd.DataFrame(
        {
            'ERA5': stats(cfh.ERA5, cfh.obs, False),
            'ERA5_GWA2': stats(cfh.ERA5_GWA2, cfh.obs, False),
            'ERA5_GWA3': stats(cfh.ERA5_GWA3, cfh.obs, False),
            'MERRA2': stats(cfh.MERRA2, cfh.obs, False),
            'MERRA2_GWA2': stats(cfh.MERRA2_GWA2, cfh.obs, False),
            'MERRA2_GWA3': stats(cfh.MERRA2_GWA3, cfh.obs, False),
            'obs': [np.nan, np.nan, np.nan,
                    cfh.obs.mean()]
        },
        index=['cor', 'rmse', 'mbe',
               'avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_h.columns = ['param', 'dataset', 'NZ']
    return (stat_h.set_index(['param', 'dataset']).transpose())

Exemple #16

0

Afficher le fichier

Fichier : WP_simulation_NZ_Analysis.py Projet : KatharinaGruber/windpower_GWA

def analyse_NZparkh(park):
    '''
    analyse hourly wind power generation for one park
    '''
    comph = pd.DataFrame({
        'MERRA2': NZm[park],
        'ERA5': NZe[park],
        'MERRA2_GWA2': NZmg2[park],
        'ERA5_GWA2': NZeg2[park],
        'MERRA2_GWA3': NZmg3[park],
        'ERA5_GWA3': NZeg3[park]
    })
    comph['obs'] = comph.index.map(prod_NZh[park]) / 1000
    # get capacities
    caph = capdfH[park]
    # calculate capacity factors
    cfh = comph.div(caph, axis=0)
    # remove capacity factors > 1
    cfh = cfh.mask(cfh > 1).dropna()
    stat_h = pd.DataFrame(
        {
            'ERA5': stats(cfh.ERA5, cfh.obs, False),
            'ERA5_GWA2': stats(cfh.ERA5_GWA2, cfh.obs, False),
            'ERA5_GWA3': stats(cfh.ERA5_GWA3, cfh.obs, False),
            'MERRA2': stats(cfh.MERRA2, cfh.obs, False),
            'MERRA2_GWA2': stats(cfh.MERRA2_GWA2, cfh.obs, False),
            'MERRA2_GWA3': stats(cfh.MERRA2_GWA3, cfh.obs, False),
            'obs': [np.nan, np.nan, np.nan,
                    cfh.obs.mean()]
        },
        index=['cor', 'rmse', 'mbe',
               'avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_h.columns = ['param', 'dataset', park]
    return (stat_h.set_index(['param', 'dataset']).transpose())

Exemple #17

0

Afficher le fichier

def analyse_ZAFm(region):
    '''
    analyse monthly wind power generation for a region
    '''
    # mask for masking simulated data and capacities
    # (to only use timespans where also observed data are available)
    mask = (ZAFh[region].notna()*capdfH[region].notna()).replace(0,np.nan)
    comph = pd.DataFrame({'MERRA2':ZAFm[region].tz_convert('Africa/Johannesburg')*mask,
                         'ERA5':ZAFe[region].tz_convert('Africa/Johannesburg')*mask,
                         'MERRA2_GWA2':ZAFmg2[region].tz_convert('Africa/Johannesburg')*mask,
                         'ERA5_GWA2':ZAFeg2[region].tz_convert('Africa/Johannesburg')*mask,
                         'MERRA2_GWA3':ZAFmg3[region].tz_convert('Africa/Johannesburg')*mask,
                         'ERA5_GWA3':ZAFeg3[region].tz_convert('Africa/Johannesburg')*mask})
    # get capacities and mask
    caph = capdfH[region].tz_convert('Africa/Johannesburg')*mask
    # aggregate monthly
    capm = caph.resample('M').sum()
    compm = comph.resample('M').sum()
    # calculate capacity factors
    cfm = compm.div(capm,axis=0)
    # add observed data
    cfm['obs'] = cfm.index.map((ZAFh[region]*mask).resample('M').mean())
    # remove capacity factors > 1 and missing data
    cfm = cfm.mask(cfm>1).dropna()
    stat_m = pd.DataFrame({'ERA5':stats(cfm.ERA5,cfm.obs,False),
                           'ERA5_GWA2':stats(cfm.ERA5_GWA2,cfm.obs,False),
                           'ERA5_GWA3':stats(cfm.ERA5_GWA3,cfm.obs,False),
                           'MERRA2':stats(cfm.MERRA2,cfm.obs,False),
                           'MERRA2_GWA2':stats(cfm.MERRA2_GWA2,cfm.obs,False),
                           'MERRA2_GWA3':stats(cfm.MERRA2_GWA3,cfm.obs,False),
                           'obs':[np.nan,np.nan,np.nan,cfm.obs.mean()]},
                          index = ['cor','rmse','mbe','avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_m.columns = ['param','dataset',region]
    return(stat_m.set_index(['param','dataset']).transpose())

Exemple #18

0

Afficher le fichier

def analyse_ZAFh(region):
    '''
    analyse hourly wind power generation for a region
    '''
    comph = pd.DataFrame({'MERRA2':ZAFm[region],
                         'ERA5':ZAFe[region],
                         'MERRA2_GWA2':ZAFmg2[region],
                         'ERA5_GWA2':ZAFeg2[region],
                         'MERRA2_GWA3':ZAFmg3[region],
                         'ERA5_GWA3':ZAFeg3[region]})
    # get capacities
    caph = capdfH[region]
    # calculate capacity factors
    cfh = comph.div(caph,axis=0).tz_convert('Africa/Johannesburg')
    # add observed data
    cfh['obs'] = cfh.index.map(ZAFh[region])
    # remove capacity factors > 1 and lines with missing data
    cfh = cfh.mask(cfh>1).dropna()
    stat_h = pd.DataFrame({'ERA5':stats(cfh.ERA5,cfh.obs,False),
                           'ERA5_GWA2':stats(cfh.ERA5_GWA2,cfh.obs,False),
                           'ERA5_GWA3':stats(cfh.ERA5_GWA3,cfh.obs,False),
                           'MERRA2':stats(cfh.MERRA2,cfh.obs,False),
                           'MERRA2_GWA2':stats(cfh.MERRA2_GWA2,cfh.obs,False),
                           'MERRA2_GWA3':stats(cfh.MERRA2_GWA3,cfh.obs,False),
                           'obs':[np.nan,np.nan,np.nan,cfh.obs.mean()]},
                          index = ['cor','rmse','mbe','avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_h.columns = ['param','dataset',region]
    return(stat_h.set_index(['param','dataset']).transpose())

Exemple #19

0

Afficher le fichier

Fichier : languages.py Projet : ia03/vummer

def test_code(arg, attachment, lang_id, problem_name, channel_id):
    code = get_code(arg, attachment)
    print('Running test code: ', code.decode())
    cases = get_problem(problem_name).cases
    message = ''
    for stdin in cases:
        expected_output = cases[stdin]
        submission = api.submission.submit(client, code, lang_id,
            stdin=stdin.encode(), expected_output=expected_output.encode())
        status = submission.status
        message += 'Status: ' + status['description'] + '\n'
        message += stats(submission.time, submission.memory)
    send_message(channel_id, message)

Exemple #20

0

Afficher le fichier

Fichier : models.py Projet : Ariel96cs/VGG-C300

def train_model_sklearn(model: models.Sequential,
                        x_train,
                        y_train,
                        x_val,
                        y_val,
                        sklearn_model,
                        sklearn_model_name,
                        batch_size,
                        normalize=False):

    model = train_model_with_Keras_ImageDataGenerator(model,
                                                      x_train,
                                                      y_train,
                                                      x_val,
                                                      y_val,
                                                      batch_size,
                                                      normalize=normalize)
    layer_dict = dict([(layer.name, layer) for layer in model.layers])

    #     x = layer_dict['flatten'].output
    x = model.layers[-2].output

    from tensorflow.keras.models import Model
    cropped_model = Model(model.input, x)
    del model

    cropped_model_train_output = cropped_model.predict(x_train)
    del x_train

    sklearn_model.fit(cropped_model_train_output, y_train)

    cropped_model_val_output = cropped_model.predict(x_val)
    sklearn_model_predicted = sklearn_model.predict_proba(
        cropped_model_val_output)[:, 1]
    print("SKlearn predicted proba: ", sklearn_model_predicted)

    print("Validation sklearn Model result: ")
    stats(y_val, sklearn_model_predicted, 'CNN + ' + sklearn_model_name)
    return sklearn_model, cropped_model

Exemple #21

0

Afficher le fichier

def histogram(metrics, figure_num):
    global min, max

    # Calculate data needed to plot the normal distribution graph
    stats = utils.stats(metrics)
    dist = scipy.stats.norm(stats['mean'], stats['std_dev'])
    minimum = int(min(metrics))
    maximum = int(max(metrics))
    values = [
        value for value in range(minimum, maximum, (maximum - minimum) // 100)
    ]
    probabilities = [dist.pdf(value) for value in values]

    # Plot
    plt.figure(figure_num)
    plt.hist(metrics, bins=50, density=True)
    plt.plot(values, probabilities)
    plt.show()

Exemple #22

0

Afficher le fichier

Fichier : baseline.py Projet : filipdavidovic/Baseline_PoC

    def _calculate(self):
        """
        Private method used to calculate the baseline from the base slots. Base slots are not used as the baseline
        because this approach would require additional computation with each is_alerting call.
        """
        bl = [[[] for _ in range(DAY_LEN)] for _ in range(WEEK_LEN)]
        self.baseline = [[None for _ in range(DAY_LEN)]
                         for _ in range(WEEK_LEN)]

        # Build collection slot arrays
        for base_slot in self.base_slots:
            for day in range(WEEK_LEN):
                for hour in range(DAY_LEN):
                    bl[day][hour].append(base_slot.get_item(day, hour))

        # Build baseline object
        for day in range(WEEK_LEN):
            for hour in range(DAY_LEN):
                stats = utils.stats(bl[day][hour])
                self.baseline[day][hour] = CollectionSlot(
                    stats['mean'], stats['std_dev'])

Exemple #23

0

Afficher le fichier

Fichier : WP_simulation_NZ_Analysis.py Projet : KatharinaGruber/windpower_GWA

def analyse_NZparkm(park):
    '''
    analyse monthly wind power generation for one park
    '''
    # mask for masking simulated data and capacities
    # (to only use timespans where also observed data are available)
    mask = (prod_NZh[park].notna() * capdfH[park].notna()).replace(0, np.nan)
    comph = pd.DataFrame({
        'MERRA2': NZm[park] * mask,
        'ERA5': NZe[park] * mask,
        'MERRA2_GWA2': NZmg2[park] * mask,
        'ERA5_GWA2': NZeg2[park] * mask,
        'MERRA2_GWA3': NZmg3[park] * mask,
        'ERA5_GWA3': NZeg3[park] * mask
    })
    comph['obs'] = comph.index.map(prod_NZh[park] * mask) / 1000
    # get capacities and mask
    caph = capdfH[park] * mask
    # aggregate monthly
    capm = caph.resample('M').sum()
    compm = comph.resample('M').sum()
    # calculate capacity factors
    cfm = compm.div(capm, axis=0)
    # remove capacity factors > 1
    cfm = cfm.mask(cfm > 1).dropna()
    stat_m = pd.DataFrame(
        {
            'ERA5': stats(cfm.ERA5, cfm.obs, False),
            'ERA5_GWA2': stats(cfm.ERA5_GWA2, cfm.obs, False),
            'ERA5_GWA3': stats(cfm.ERA5_GWA3, cfm.obs, False),
            'MERRA2': stats(cfm.MERRA2, cfm.obs, False),
            'MERRA2_GWA2': stats(cfm.MERRA2_GWA2, cfm.obs, False),
            'MERRA2_GWA3': stats(cfm.MERRA2_GWA3, cfm.obs, False),
            'obs': [np.nan, np.nan, np.nan,
                    cfm.obs.mean()]
        },
        index=['cor', 'rmse', 'mbe',
               'avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_m.columns = ['param', 'dataset', park]
    return (stat_m.set_index(['param', 'dataset']).transpose())

Exemple #24

0

Afficher le fichier

Fichier : WP_simulation_NZ_Analysis.py Projet : KatharinaGruber/windpower_GWA

def analyse_NZm():
    '''
    analyse monthly wind power generation for NZ
    '''
    # mask for masking simulated data and capacities
    # (to only use timespans where also observed data are available)
    mask = (prod_NZh.notna() * capdfH.notna()).replace(0, np.nan)
    # mask and aggregate simulated data
    comph = pd.DataFrame({
        'MERRA2': (NZm * mask).sum(axis=1),
        'ERA5': (NZe * mask).sum(axis=1),
        'MERRA2_GWA2': (NZmg2 * mask).sum(axis=1),
        'ERA5_GWA2': (NZeg2 * mask).sum(axis=1),
        'MERRA2_GWA3': (NZmg3 * mask).sum(axis=1),
        'ERA5_GWA3': (NZeg3 * mask).sum(axis=1)
    })
    comph['obs'] = comph.index.map(prod_NZh.sum(axis=1)) / 1000
    # mask and aggregate capacities
    caph = (capdfH * mask).sum(axis=1)
    # aggregate monthly
    compm = comph.resample('M').sum()
    capm = caph.resample('M').sum()
    # calculate capacity factors
    cfmu = compm.div(capm, axis=0).dropna()
    cfm = cfmu.mask(cfmu > 1).dropna()
    stat_m = pd.DataFrame(
        {
            'ERA5': stats(cfm.ERA5, cfm.obs, False),
            'ERA5_GWA2': stats(cfm.ERA5_GWA2, cfm.obs, False),
            'ERA5_GWA3': stats(cfm.ERA5_GWA3, cfm.obs, False),
            'MERRA2': stats(cfm.MERRA2, cfm.obs, False),
            'MERRA2_GWA2': stats(cfm.MERRA2_GWA2, cfm.obs, False),
            'MERRA2_GWA3': stats(cfm.MERRA2_GWA3, cfm.obs, False),
            'obs': [np.nan, np.nan, np.nan,
                    cfm.obs.mean()]
        },
        index=['cor', 'rmse', 'mbe',
               'avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_m.columns = ['param', 'dataset', 'NZ']
    return (stat_m.set_index(['param', 'dataset']).transpose())

Exemple #25

0

Afficher le fichier

Fichier : WP_simulation_NZ_Analysis.py Projet : KatharinaGruber/windpower_GWA

def analyse_NZparkd(park):
    '''
    analyse daily wind power generation for one park
    '''
    mask = (prod_NZh[park].notna() * capdfH[park].notna()).replace(0, np.nan)
    comph = pd.DataFrame({
        'MERRA2': NZm[park] * mask,
        'ERA5': NZe[park] * mask,
        'MERRA2_GWA2': NZmg2[park] * mask,
        'ERA5_GWA2': NZeg2[park] * mask,
        'MERRA2_GWA3': NZmg3[park] * mask,
        'ERA5_GWA3': NZeg3[park] * mask
    })
    comph['obs'] = comph.index.map(prod_NZh[park] * mask) / 1000
    # get capacities and mask
    caph = capdfH[park] * mask
    # aggregate daily
    capd = caph.resample('D').sum()
    compd = comph.resample('D').sum()
    # calculate capacity factors
    cfd = compd.div(capd, axis=0)
    # remove capacity factors > 1
    cfd = cfd.mask(cfd > 1).dropna()
    stat_d = pd.DataFrame(
        {
            'ERA5': stats(cfd.ERA5, cfd.obs, False),
            'ERA5_GWA2': stats(cfd.ERA5_GWA2, cfd.obs, False),
            'ERA5_GWA3': stats(cfd.ERA5_GWA3, cfd.obs, False),
            'MERRA2': stats(cfd.MERRA2, cfd.obs, False),
            'MERRA2_GWA2': stats(cfd.MERRA2_GWA2, cfd.obs, False),
            'MERRA2_GWA3': stats(cfd.MERRA2_GWA3, cfd.obs, False),
            'obs': [np.nan, np.nan, np.nan,
                    cfd.obs.mean()]
        },
        index=['cor', 'rmse', 'mbe',
               'avg']).reset_index().melt(id_vars=['index']).dropna()
    stat_d.columns = ['param', 'dataset', park]
    return (stat_d.set_index(['param', 'dataset']).transpose())

Exemple #26

0

Afficher le fichier

    env.run()

    #test_cache(jobList,hierarchy)

    logger.info("Simulation Ends")
    logger.info("Collecting Statistics...")
    print "Simulation Ends"
    print "Collecting Statistics..."

    #utils.cacheinfo2(hierarchy)
    stats = {}
    stats["rn"] = sim_req_num
    stats["clist"] = sim_req_comp
    stats["sim_end"] = sim_end

    res_file = config.get('Simulation', 'res_file')
    fd = open(res_file, "a")
    fd.write("\n\n")
    fd.write("-------------RESULTS---------------------\n")
    fd.write("Date:" + time + "\n")
    utils.stats(hierarchy, config, fd, stats)
    utils.missCost(hierarchy, config, fd)

    fd.close()

#	shadow.set_cache_size(hierarchy,env)
#	shadow.reset_counters(hierarchy,nodeNum)

#	shadow.set_cache_size(hierarchy,env)

Exemple #27

0

Afficher le fichier

                   axis=1).tz_localize('UTC').tz_convert('US/Central')
wp_USA.columns = ['ERA5', 'ERA5_GWA', 'MERRA2', 'MERRA2_GWA']
# aggregate monthly
wp_USAm = wp_USA.resample('M').sum()
# combine data and calculate capacity factors
cf_USAm = pd.concat([
    wp_USAm.div(cap_usam, axis=0),
    (prod_USAm['USA'].dropna().resample('M').sum() * 10**6 /
     (cap_usaIm * 10**3))
],
                    axis=1).dropna()
cf_USAm.columns = np.append(wp_USAm.columns, 'wp_obs')
# Analyse
stats_USAm = pd.DataFrame(
    {
        'ERA5': stats(cf_USAm.ERA5, cf_USAm.wp_obs, False),
        'ERA5_GWA': stats(cf_USAm.ERA5_GWA, cf_USAm.wp_obs, False),
        'MERRA2': stats(cf_USAm.MERRA2, cf_USAm.wp_obs, False),
        'MERRA2_GWA': stats(cf_USAm.MERRA2_GWA, cf_USAm.wp_obs, False),
        'obs': [np.nan, np.nan, np.nan,
                cf_USAm.wp_obs.mean()]
    },
    index=['cor', 'rmse', 'mbe', 'avg'])
stats_USAm_r = pd.DataFrame(
    {
        'ERA5': stats(cf_USAm.ERA5, cf_USAm.wp_obs),
        'ERA5_GWA': stats(cf_USAm.ERA5_GWA, cf_USAm.wp_obs),
        'MERRA2': stats(cf_USAm.MERRA2, cf_USAm.wp_obs),
        'MERRA2_GWA': stats(cf_USAm.MERRA2_GWA, cf_USAm.wp_obs),
        'obs': [np.nan, np.nan, np.nan,
                round(cf_USAm.wp_obs.mean(), 2)]

Exemple #28

0

Afficher le fichier

Fichier : script.py Projet : JuanScaFranTru/mosquitomodels

        params = dict(read_csv(opts['-p']))
        params = {k: v[0] for k, v in params.items()}

    n_cols, weeks, y, X = load_data(filename=opts['-i'])

    try:
        model.pca
    except AttributeError:
        pass
    else:
        pca = PCA(n_components=params['n_components'])
        del params['n_components']
        X = pca.fit(X).transform(X)

    model = model(**params)
    scores, mean, std_dev = stats(X, y, model)

    if '--predict' not in opts:
        print(mean)
    else:
        modelname = opts['--model']
        predict = opts['--predict']

        print_stats(scores, mean, std_dev, 'Stats of ' + modelname)

        results_filename = predict + '/pred-' + modelname + '.csv'
        results_plot_filename = predict + '/pred-' + modelname + '.eps'

        model.fit(X, y)
        y_true, y_pred = y, model.predict(X)
        save_data(results_filename, weeks, y_true, y_pred)

Exemple #29

0

Afficher le fichier

ITERS = 100
ALPHA = 0.01
EPSI = 0.1

CW_ITERS = 100
BIN_STEPS = 20
NORM = 'l2'  #l0/l2/linf

(train_features, train_labels), (test_features,
                                 test_labels) = cifar10.load_data()
(test_features, test_labels) = agu(test_features, test_labels)
train_features = train_features.astype('float32')
test_features = test_features.astype('float32')
train_features /= 255
test_features /= 255
test_labels = np_utils.to_categorical(test_labels, 10)

if len(sys.argv) < 2 or sys.argv[1] == "fgsm_it":
    perturbed_accuracy = fgsm_it(test_features[:100], ITERS, EPSI, ALPHA,
                                 build_network, loss_func, evaluation,
                                 './tmp/original_cifar_model-8')
    print([s[0] for s in perturbed_accuracy])
    stats([s[1] for s in perturbed_accuracy], test_labels[:1000])
    heat([s[1] for s in perturbed_accuracy], test_labels[:1000],
         "cifar10_fgsm_")
else:
    perturbed_norms, was = cw(test_features[:100], test_labels[:100], CW_ITERS,
                              BIN_STEPS, build_network, loss_func, evaluation,
                              NORM, './tmp/original_cifar_model-8')
    stats_cari(perturbed_norms, was)

Exemple #30

0

Afficher le fichier

def stream_stats():
    data = dict()
    data["stream"] = True
    res = utils.stats(**data)
    # return jsonify({"message":"success", "containers":res})
    return render_template('index.html', **locals())

Exemple #31

0

Afficher le fichier

Fichier : run_exp.py Projet : wangben88/statistical-robustness

def multilevel_uniform(
      prop,
      x_sample,
      sigma=1.,
      rho=0.1,
      count_particles=1000,
      count_mh_steps=100,
      debug=True, stats=False):

  # Calculate the mean of the normal distribution in logit space
  # We transform the input from [x_min, x_max] to [epsilon, 1 - epsilon], then to [logit(epsilon), logit(1 - epsilon)]
  # Then we can do the sampling on (-inf, inf)
  prior = dist.Uniform(low=torch.max(x_sample-sigma*(x_max-x_min).view(3,1,1), x_min.view(3,1,1)), high=torch.min(x_sample+sigma*(x_max-x_min).view(3,1,1), x_max.view(3,1,1)))

  #print((x_sample-sigma).size())
  #raise Exception()

  # Parameters
  if CUDA:
    width_proposal = sigma*torch.ones(count_particles).cuda()/30
  else:
    width_proposal = sigma*torch.ones(count_particles)/30
  count_max_levels = 500
  target_acc_ratio = 0.9
  max_width_proposal = 0.1
  min_width_proposal = 1e-8
  width_inc = 1.02
  width_dec = 0.5

  # Sample the initial particles
  # Implements parallel batched accept-reject sampling.
  x = prior.sample(torch.Size([count_particles]))

  #print(x.size())
  #raise Exception()

  L_prev = -math.inf
  L = -math.inf
  l_inf_min = math.inf
  lg_p = 0
  levels = []

  #print('Inside valid bounds', x_min, x_max)
  #utils.stats(x[0])
  #print((x >= x_min).all(dim=1) & (x <= x_max).all(dim=1))
  #raise Exception()

  # Loop over levels
  for level_idx in range(count_max_levels):
    if CUDA:
      acc_ratio = torch.zeros(count_particles).cuda()
    else:
      acc_ratio = torch.zeros(count_particles)

    if L >= 0:
      break

    # Calculate current level
    s_x = prop(x).squeeze(-1)
    s_sorted, s_idx = torch.sort(s_x)
    L = min(s_sorted[math.floor((1-rho)*count_particles)].item(), 0)
    if L == L_prev:
      L = 0
    levels.append(L)
    where_keep = s_x >= L
    where_kill = s_x < L
    count_kill = (where_kill).sum()
    count_keep = count_particles - count_kill

    # Print level
    if debug:
      print(f'Level {level_idx+1} = {L}')

    # Terminate if change in level is below some threshold
    if count_keep == 0:
      return -math.inf, None, x, levels

    lg_p += torch.log(count_keep.float()).item() - math.log(count_particles)
    
    # Early termination
    if lg_p < -90:
      return -90., None, x, levels

    # If the level is 0 then don't do last MH steps (speeds things up!)
    if L >= 0:
      break

    # Uniformly resample killed particles below the level
    new_idx = torch.randint(low=0, high=count_keep, size=(count_kill,), dtype=torch.long)
    x = x[where_keep]
    x = torch.cat((x, x[new_idx]), dim=0)
    width_proposal = width_proposal[where_keep]
    width_proposal = torch.cat((width_proposal, width_proposal[new_idx]), dim=0)
    
    #acc_ratio = torch.zeros(count_kill).cuda()
    #x_temp = x
    #while acc_ratio.mean() < 0.2:
    #  x = x_temp
    if CUDA:
      acc_ratio = torch.zeros(count_particles).cuda()
    else:
      acc_ratio = torch.zeros(count_particles)

    for mh_idx in range(count_mh_steps):
      # Propose new sample
      g_bottom = dist.Uniform(low=torch.max(x - width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.low), high=torch.min(x + width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.high))

      x_maybe = g_bottom.sample()
      s_x = prop(x_maybe).squeeze(-1)

      # Calculate log-acceptance ratio
      g_top = dist.Uniform(low=torch.max(x_maybe - width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.low), high=torch.min(x_maybe + width_proposal.view(-1,1,1,1)*(x_max-x_min).view(3,1,1), prior.high))
      lg_alpha = (prior.log_prob(x_maybe) + g_top.log_prob(x) - prior.log_prob(x) - g_bottom.log_prob(x_maybe)).view(count_particles,-1).sum(dim=1)
      acceptance = torch.min(lg_alpha, torch.zeros_like(lg_alpha))

      # Work out which ones to accept
      log_u = torch.log(torch.rand_like(acceptance))
      acc_idx = (log_u <= acceptance) & (s_x >= L)
      acc_ratio += acc_idx.float()
      x = torch.where(acc_idx.view(-1,1,1,1), x_maybe, x)
        
    # Adapt the width proposal *for each chain individually*
    acc_ratio /= count_mh_steps

    # DEBUG: See what acceptance ratios are doing
    if stats:
      utils.stats(acc_ratio)
    #input()

    #print(acc_ratio.size())
    width_proposal = torch.where(acc_ratio > 0.124, width_proposal*width_inc, width_proposal)
    width_proposal = torch.where(acc_ratio < 0.124, width_proposal*width_dec, width_proposal)

    L_prev = L
    #input()

  return lg_p, None, x, levels

Exemple #32

0

Afficher le fichier

def subtest():
    ''' test net using patches of slide.
        compare outputs of net and targets and print result.

    '''

    os.path.isdir('checkpoint')
    checkpoint = torch.load('./checkpoint/ckpt.t7')
    net = checkpoint['net']
    threshold = checkpoint['threshold']
    net.eval()
    outputs_list = np.array([])
    targets_list = np.array([])
    test_loss = 0
    total = 0
    correct = 0

    for batch_idx, (inputs, targets) in enumerate(subtestloader):
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = torch.FloatTensor(np.array(targets).astype(float)).cuda()

        batch_size = targets.shape[0]
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        outputs = net(inputs)
        total += targets.size(0)
        outputs = torch.squeeze(outputs)
        loss = criterion(outputs, targets)
        test_loss += loss.data[0]

        _outputs = np.array(outputs.data).astype(float)
        _targets = np.array(targets.data).astype(float)
        outputs_list = np.append(outputs_list, _outputs)
        targets_list = np.append(targets_list, _targets)

        outputs += Variable((torch.ones(batch_size) * (1 - threshold)).cuda())
        outputs = torch.floor(outputs)
        correct += int(outputs.eq(targets).cpu().sum())

        progress_bar(
            batch_idx, len(subtestloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (test_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))

    correct, tp, tn, fp, fn, recall, precision, specificity, f1_score, auc, threshold = stats(
        outputs_list, targets_list)
    acc = correct / total
    print(
        'Acc: %.3f, Recall: %.3f, Prec: %.3f, Spec: %.3f, F1: %.3f, Thres: %.3f, AUC: %.3f'
        % (acc, recall, precision, specificity, f1_score, threshold, auc))
    print('%17s %12s\n%-11s %-8d    %-8d\n%-11s %-8d    %-8d' %
          ('Tumor', 'Normal', 'pos', tp, fp, 'neg', fn, tn))
    print("lr: ", args.lr * (0.5**(LR_DECAY)), " chance:", LR_CHANCE)

Exemple #33

0

Afficher le fichier

def valid(epoch):
    ''' valid net using patches of slide.
        Save checkpoint if AUC score is higher than saved checkpoint's.
    
    Args: 
        epoch (int): current epoch
    '''

    global BEST_AUC
    global THRESHOLD
    global LR_CHANCE
    global CK_CHANCE
    global LR_DECAY

    net.eval()
    valid_loss = 0
    total = 0
    correct = 0

    outputs_list = np.array([])
    targets_list = np.array([])

    for batch_idx, (inputs, targets) in enumerate(valloader):
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = torch.FloatTensor(np.array(targets).astype(float)).cuda()

        batch_size = targets.shape[0]
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        outputs = net(inputs)
        total += targets.size(0)
        outputs = torch.squeeze(outputs)
        loss = criterion(outputs, targets)
        valid_loss += loss.data[0]

        _outputs = np.array(outputs.data).astype(float)
        _targets = np.array(targets.data).astype(float)
        outputs_list = np.append(outputs_list, _outputs)
        targets_list = np.append(targets_list, _targets)

        outputs += Variable((torch.ones(batch_size) * (1 - THRESHOLD)).cuda())
        outputs = torch.floor(outputs)
        correct += int(outputs.eq(targets).cpu().sum())

        progress_bar(
            batch_idx, len(valloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (valid_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))

    correct, tp, tn, fp, fn, recall, precision, specificity, f1_score, auc, threshold = stats(
        outputs_list, targets_list)
    acc = correct / total
    THRESHOLD = threshold

    print(
        'Acc: %.3f, Recall: %.3f, Prec: %.3f, Spec: %.3f, F1: %.3f, Thres: %.3f, AUC: %.3f'
        % (acc, recall, precision, specificity, f1_score, threshold, auc))
    print('%17s %12s\n%-11s %-8d    %-8d\n%-11s %-8d    %-8d' %
          ('Tumor', 'Normal', 'pos', tp, fp, 'neg', fn, tn))
    print("lr: ", args.lr * (0.5**(LR_DECAY)), "lr chance:", LR_CHANCE)

    # plot data
    CUR_EPOCH.append(epoch)
    CUR_VAL_ACC.append(acc)
    CUR_LOSS.append(valid_loss / (batch_idx + 1))
    CUR_LR.append(args.lr * (0.5**(LR_DECAY)))

    # Save checkpoint.
    if auc > BEST_AUC:
        print('saving...')
        BEST_AUC = auc
        state = {
            'net': net if USE_CUDA else net,
            'acc': acc,
            'loss': valid_loss,
            'recall': recall,
            'specificity': specificity,
            'precision': precision,
            'f1_score': f1_score,
            'auc': auc,
            'epoch': epoch,
            'lr': args.lr * (0.5**(LR_DECAY)),
            'threshold': threshold
        }
        torch.save(state, './checkpoint/ckpt.t7')

Exemple #34

0

Afficher le fichier

Fichier : tagger.py Projet : PonteIneptique/pandora

 def print_stats(self):
     print('Train stats:')
     utils.stats(tokens=self.train_tokens, lemmas=self.train_lemmas, known=self.preprocessor.known_tokens)
     print('Test stats:')
     utils.stats(tokens=self.test_tokens, lemmas=self.test_lemmas, known=self.preprocessor.known_tokens)