Python plot_metricsの例、astrorapid.plot_metrics.plot_metrics Pythonの例

コード例 #1

0

ファイルを表示

def test_plot_metrics(monkeypatch):
    """Test the basic plotting functionality"""
    class_names = ('A', 'B')
    X_test = np.zeros((500, 50, 4))
    y_test = np.zeros((500, 50, len(class_names) + 1))
    fig_dir = 'some/dir'
    timesX_test = np.ones((500, 50))
    num_ex_vs_time = 10
    orig_lc_test = 10 * [MagicMock()]
    objids_test = 500 * ['obj_id']
    passbands = ('first_band', 'second_band')
    init_day_since_trigger = 20

    model = Mock()
    model.predict = Mock(return_value=np.ones((500, 50, len(class_names) + 1)))
    model.evaluate = Mock(return_value=np.ones(len(class_names)))

    mock_rc = Mock()
    mock_legend = Mock()
    monkeypatch.setattr('matplotlib.rc', mock_rc)
    monkeypatch.setattr('matplotlib.axes.Axes.legend', mock_legend)
    monkeypatch.setattr('numpy.concatenate', Mock(return_value=np.ones(5)))

    with pytest.raises(
            ValueError, match=re.escape("min() arg is an empty sequence")):
        plot_metrics(
            class_names, model, X_test, y_test, fig_dir,
            timesX_test, orig_lc_test, objids_test, passbands,
            num_ex_vs_time, init_day_since_trigger
        )
    mock_legend.assert_called()

コード例 #2

0

ファイルを表示

def main():
    passbands = ('g', 'r')
    contextual_info = (0,)

    aggregate_classes = True
    reread_hdf5_data = False
    retrain_rnn = False
    train_epochs = 50

    otherchange = ''
    nchunks = 10000

    # Train + Test cuts
    zcut = 0.5
    bcut = True
    variablescut = True

    training_set_dir = 'training_set_files'
    if not os.path.exists(training_set_dir):
        os.makedirs(training_set_dir)

    data_release = 'ZTF_20190512'
    field = 'MSIP'
    savename = 'firsttry'
    fpath = os.path.join(training_set_dir, 'saved_lc_{}_{}_{}.hdf5'.format(field, data_release, savename))

    fig_dir = os.path.join(training_set_dir, 'Figures', 'classify', 'ZTF_{}_epochs{}_ag{}_ci{}_fp{}_zcut{}_bcut{}_varcut{}'.format(otherchange, train_epochs, aggregate_classes, contextual_info, os.path.basename(fpath), zcut, bcut, variablescut))
    for dirname in [fig_dir, fig_dir+'/cf_since_trigger', fig_dir+'/cf_since_t0', fig_dir+'/roc_since_trigger', fig_dir+'/lc_pred', fig_dir+'/pr_since_trigger', fig_dir+'/truth_table_since_trigger']:
        if not os.path.exists(dirname):
            os.makedirs(dirname)

    preparearrays = PrepareTrainingSetArrays(passbands, contextual_info, reread_hdf5_data, aggregate_classes, bcut, zcut, variablescut, nchunks=nchunks)
    X_train, X_test, y_train, y_test, labels_train, labels_test, class_names, class_weights, sample_weights, timesX_train, timesX_test, orig_lc_train, orig_lc_test, objids_train, objids_test = preparearrays.prepare_training_set_arrays(fpath, otherchange)
    model = train_model(X_train, X_test, y_train, y_test, sample_weights=sample_weights, fig_dir=fig_dir, retrain=retrain_rnn, epochs=train_epochs)
    plot_metrics(class_names, model, X_test, y_test, fig_dir, timesX_test=timesX_test, orig_lc_test=orig_lc_test, objids_test=objids_test, passbands=passbands)

コード例 #3

0

ファイルを表示

ファイル: custom_classifier.py プロジェクト: nmiranda/astrorapid

def create_custom_classifier(get_data_func, data_dir, class_nums=(1,2,), class_name_map=None, reread_data=False, train_size=0.6,
contextual_info=('redshift',), passbands=('g', 'r'), nobs=50, mintime=-70, maxtime=80,
timestep=3.0, retrain_network=False, train_epochs=50, dropout_rate=0,
train_batch_size=64, nunits=100, zcut=0.5, bcut=True,
ignore_classes=(), nprocesses=1, nchunks=1000, otherchange='',
training_set_dir='data/training_set_files', save_dir='data/saved_light_curves',
fig_dir='Figures', plot=True, num_ex_vs_time=100, init_day_since_trigger=-25,
augment_data=False, redo_processing=False):

"""
Create a classifier with your own data and own training parameters.

Parameters
----------
get_data_func : func
Function that reads your data and returns a light_curves object.
It takes at least the arguments: class_num, data_dir, save_dir, passbands, known_redshift.
See get_custom_data.py for an example template.
E.g. get_data_func = get_data_custom(class_num, data_dir, save_dir, passbands)
data_dir : str
Directory where data is stored. E.g. data_dir='data/ZTF_20190512/'
class_nums : tuple
Class numbers (or names) to train on. E.g. class_nums=(1, 5, 6, 12, 41, 43, 51)
class_name_map : dict or None
This maps the class_nums onto class names.
E.g. class_name_map = {1: 'SNIa-norm', 5: 'SNIbc', 6: 'SNII', 12: 'SNII', 41: 'SNIa-91bg', 43: 'SNIa-x', 51: 'Kilonova'}.
You may use the same value for a different key if you want the classifier to join these two class_nums under the same label.
If this is None, it will use the default mapping listed in get_sntypes in helpers.py.
reread_data : bool
If this is True, then it will reread_data your data and resave the processed files, otherwise
it will check if the data has already been read, processed and saved.
train_size : float
Fraction of data to use for training, the remainder will be used for testing/validation.
Usually choose a value between 0.5 - 1.0 depending on how much data you have.
contextual_info : tuple of strings
What contextual information to use while training. You can write any string in this tuple provided that it
is stored as a keyword in the metadata of the light_curve Table object returned by the `get_data_func`.
passbands : tuple of str
Passbands to use.
E.g. passbands=('g', 'r')
nobs : int
Number of points to use in interpolation of light curve between mintime and maxtime.
mintime : int
Days from trigger (minimum) to extract from light curve.
maxtime : int
Days from trigger (maximum) to extract from light curve.
timestep : float
Time-step between interpolated points in light curve.
retrain_network : bool
Whether to retrain the neural network or to use the saved network model.
train_epochs : int
Number of epochs to train the neural network.
This is the number of times the nerual network sees each datum in the training set.
The higher this is the better the classifier will find a local minimum, however, too high, and it might
overfit and not generalise well to new data
dropout_rate : float
Value between 0.0 and 1.0 indicating fraction for dropout regularisation.
train_batch_size : int
Number of objects to use per step in gradient descent.
units : int
Number of LSTM units.
zcut : float
Do not train on objects with redshifts higher than zcut.
bcut : bool
If True, do not train on objects within 15 degrees of the galactic plane
ignore_classes : tuple
Will not train or test on classes listed in this tuple.
nprocesses : int or None
Number of computer processes to use while processing the data.
If None, it will use all the available processors from os.cpu_count().
nchunks : int
Number of chunks to split the data set into before doing multiprocessing.
This should be a small fraction of the number of total objects.
otherchange : str
A change in this text will signify that a change has been made to one of these training parameters
and that the data should be resaved and the model retrained should resave the data and retrained.
training_set_dir : str
Name of directory to save the data that will be read by the neural network.
save_dir : str
Name of directory to save the processes data files.
fig_dir : str
Name of directory to save the Classifier metric figures such as confusion matrices.
plot : bool
Whether to plot classier metrics such as confusion matrices after training.
num_ex_vs_time : int
Number of example vs time light curves to plot.
init_day_since_trigger : int
Day since trigger from which to start plotting in vs time figures. Input a negative value for a day
before trigger.
augment_data : bool
Whether to do Gaussian processing augmenting.
redo_processing : bool
Whether to redo processing AFTER reading data, saving GP fits and computing t0.
"""

for dirname in [training_set_dir, data_dir, save_dir]:
if not os.path.exists(dirname):
os.makedirs(dirname)

for dirname in [fig_dir, fig_dir + '/cf_since_trigger', fig_dir + '/cf_since_t0', fig_dir + '/roc_since_trigger',
fig_dir + '/lc_pred', fig_dir + '/pr_since_trigger', fig_dir + '/truth_table_since_trigger']:
if not os.path.exists(dirname):
os.makedirs(dirname)

# Prepare the training set, read the data files, and save processed files
preparearrays = PrepareTrainingSetArrays(passbands, contextual_info, nobs, mintime, maxtime, timestep,
reread_data, bcut, zcut, ignore_classes,
class_name_map=class_name_map, nchunks=nchunks,
training_set_dir=training_set_dir, data_dir=data_dir, save_dir=save_dir,
get_data_func=get_data_func, augment_data=augment_data, redo_processing=redo_processing)
X_train, X_test, y_train, y_test, labels_train, labels_test, class_names, class_weights, sample_weights, \
timesX_train, timesX_test, orig_lc_train, orig_lc_test, objids_train, objids_test = \
preparearrays.prepare_training_set_arrays(otherchange, class_nums, nprocesses, train_size)

# Train the neural network model on saved files
model = train_model(X_train, X_test, y_train, y_test, sample_weights=sample_weights, fig_dir=fig_dir,
retrain=retrain_network, epochs=train_epochs, plot_loss=plot, dropout_rate=dropout_rate,
batch_size=train_batch_size, nunits=nunits)

# Plot classification metrics such as confusion matrices
if plot:
plot_metrics(class_names, model, X_test, y_test, fig_dir, timesX_test=timesX_test, orig_lc_test=orig_lc_test,
objids_test=objids_test, passbands=passbands, num_ex_vs_time=num_ex_vs_time, init_day_since_trigger=init_day_since_trigger)

コード例 #4

0

ファイルを表示

def main():
    """ Train Neural Network classifier """

    passbands = ('g', 'r')
    contextual_info = (0, )

    aggregate_classes = True
    reread_hdf5_data = False
    retrain_rnn = False
    train_epochs = 50

    otherchange = ''  # nonuniformtime' #'withdropout'##'no_dc_and_late_start_lcs_with_colour'
    nchunks = 10000

    # Train + Test cuts
    zcut = 0.5
    bcut = True
    variablescut = True

    class_nums = (1, 2, 12, 14, 3, 13, 41, 43, 51, 60, 61, 62, 63, 64, 70, 80,
                  81, 83, 90, 91, 92)
    nprocesses = None  # None means os.cpu_count() otherwise use integer

    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    data_dir = os.path.join(SCRIPT_DIR, '..', 'data/ZTF_20190512')
    training_set_dir = os.path.join(SCRIPT_DIR, '..', 'training_set_files')
    save_dir = os.path.join(SCRIPT_DIR, '..', 'data/saved_light_curves')
    for dirname in [training_set_dir, data_dir, save_dir]:
        if not os.path.exists(dirname):
            os.makedirs(dirname)

    fig_dir = os.path.join(
        training_set_dir, 'Figures', 'classify',
        'ZTF_{}_noAGN_batch500_unnormalised_epochs{}_ag{}_ci{}_zcut{}_bcut{}_varcut{}'
        .format(otherchange, train_epochs, aggregate_classes, contextual_info,
                zcut, bcut, variablescut))
    for dirname in [
            fig_dir, fig_dir + '/cf_since_trigger', fig_dir + '/cf_since_t0',
            fig_dir + '/roc_since_trigger', fig_dir + '/lc_pred',
            fig_dir + '/pr_since_trigger',
            fig_dir + '/truth_table_since_trigger'
    ]:
        if not os.path.exists(dirname):
            os.makedirs(dirname)

    preparearrays = PrepareTrainingSetArrays(passbands,
                                             contextual_info,
                                             reread_hdf5_data,
                                             aggregate_classes,
                                             bcut,
                                             zcut,
                                             variablescut,
                                             nchunks=nchunks,
                                             training_set_dir=training_set_dir,
                                             data_dir=data_dir,
                                             save_dir=save_dir)
    X_train, X_test, y_train, y_test, labels_train, labels_test, class_names, class_weights, sample_weights, timesX_train, timesX_test, orig_lc_train, orig_lc_test, objids_train, objids_test = preparearrays.prepare_training_set_arrays(
        otherchange, class_nums, nprocesses)
    model = train_model(X_train,
                        X_test,
                        y_train,
                        y_test,
                        sample_weights=sample_weights,
                        fig_dir=fig_dir,
                        retrain=retrain_rnn,
                        epochs=train_epochs)
    plot_metrics(class_names,
                 model,
                 X_test,
                 y_test,
                 fig_dir,
                 timesX_test=timesX_test,
                 orig_lc_test=orig_lc_test,
                 objids_test=objids_test,
                 passbands=passbands)