コード例 #1
0
 def save(self, prepath):
     makedirs_process_safe(prepath)
     save_path = self.get_save_path(prepath)
     np.savez(save_path,
              valid=self.valid,
              is_disruptive=self.is_disruptive,
              signals_dict=self.signals_dict,
              ttd=self.ttd)
     print('...saved shot {}'.format(self.number))
コード例 #2
0
    def process(self, shot):
        save_prepath = self.get_save_prepath()
        save_path = shot.get_save_path(save_prepath)
        if not os.path.exists(save_prepath):
            makedirs_process_safe(save_prepath)
        prepath = self.loader.conf['paths']['processed_prepath']
        assert shot.valid
        shot.restore(prepath)
        self.loader.set_inference_mode(True)  # make sure shots aren't cut
        if self.loader.normalizer is not None:
            self.loader.normalizer.apply(shot)
        else:
            print('Warning, no normalization. ',
                  'Training data may be poorly conditioned')
        self.loader.set_inference_mode(False)
        # sig, res = self.get_signal_result_from_shot(shot)
        disr = 1 if shot.is_disruptive else 0

        if not os.path.isfile(save_path):
            X = self.get_X(shot)
            np.savez(save_path, X=X)  # , Y=Y, disr=disr
            # print(X.shape, Y.shape)
        else:
            try:
                dat = np.load(save_path, allow_pickle=False)
                # X, Y, disr = dat["X"], dat["Y"], dat["disr"][()]
                X = dat["X"]
            except BaseException:
                # data was there but corrupted, save it again
                X = self.get_X(shot)
                np.savez(save_path, X=X)

        Y = self.get_Y(shot)

        shot.make_light()

        return X, Y, disr
コード例 #3
0
    def process(self,shot): 
        save_prepath = self.get_save_prepath()
        save_path = shot.get_save_path(save_prepath)
        if not os.path.exists(save_prepath):
            makedirs_process_safe(save_prepath)
        prepath = self.loader.conf['paths']['processed_prepath']
        assert(shot.valid)
        shot.restore(prepath)
        self.loader.set_inference_mode(True)#make sure shots aren't cut
        if self.loader.normalizer is not None:
            self.loader.normalizer.apply(shot)
        else:
            print('Warning, no normalization. Training data may be poorly conditioned')
        self.loader.set_inference_mode(False)
        # sig,res = self.get_signal_result_from_shot(shot)
        disr = 1 if shot.is_disruptive else 0


        if not os.path.isfile(save_path):
            X = self.get_X(shot)
            np.savez(save_path,X=X)#,Y=Y,disr=disr
            #print(X.shape,Y.shape)
        else:
            try:
                dat = np.load(save_path)
                # X,Y,disr = dat["X"],dat["Y"],dat["disr"][()]
                X = dat["X"]
            except: #data was there but corrupted, save it again
                X = self.get_X(shot)
                np.savez(save_path,X=X)


        Y = self.get_Y(shot)

        shot.make_light()

        return X,Y,disr
コード例 #4
0
def train(conf,
          shot_list_train,
          shot_list_validate,
          loader,
          shot_list_test=None):
    np.random.seed(1)
    print_shot_list_sizes(shot_list_train, shot_list_validate)
    print('training: {} shots, {} disruptive'.format(
        len(shot_list_train), shot_list_train.num_disruptive()))
    print('validate: {} shots, {} disruptive'.format(
        len(shot_list_validate), shot_list_validate.num_disruptive()))

    num_samples = conf['model']['shallow_model']['num_samples']
    feature_extractor = FeatureExtractor(loader)
    shot_list_train = shot_list_train.random_sublist(debug_use_shots)
    X, Y, _ = feature_extractor.load_shots(shot_list_train,
                                           num_samples=num_samples)
    Xv, Yv, _ = feature_extractor.load_shots(shot_list_validate,
                                             num_samples=num_samples)
    X = np.concatenate(X, axis=0)
    Y = np.concatenate(Y, axis=0)
    Xv = np.concatenate(Xv, axis=0)
    Yv = np.concatenate(Yv, axis=0)

    # max_samples = 100000
    # num_samples = min(max_samples, len(Y))
    # indices = np.random.choice(np.array(range(len(Y))), num_samples,
    #          replace=False)
    # X = X[indices]
    # Y = Y[indices]

    print("fitting on {} samples, {} positive".format(len(X), np.sum(Y > 0)))
    callbacks = build_callbacks(conf)
    callback_metrics = conf['callbacks']['metrics']
    callbacks.set_params({
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()
    callbacks.on_epoch_begin(0)

    # save_prepath = feature_extractor.get_save_prepath()
    model_path = (conf['paths']['model_save_path'] + model_filename
                  )  # save_prepath + model_filename
    makedirs_process_safe(conf['paths']['model_save_path'])
    model_conf = conf['model']['shallow_model']
    if not model_conf['skip_train'] or not os.path.isfile(model_path):

        start_time = time.time()
        if model_conf["scale_pos_weight"] != 1:
            scale_pos_weight_dict = {
                np.min(Y): 1,
                np.max(Y): model_conf["scale_pos_weight"]
            }
        else:
            scale_pos_weight_dict = None
        if model_conf['type'] == "svm":
            model = svm.SVC(probability=True,
                            C=model_conf["C"],
                            kernel=model_conf["kernel"],
                            class_weight=scale_pos_weight_dict)
        elif model_conf['type'] == "random_forest":
            model = RandomForestClassifier(
                n_estimators=model_conf["n_estimators"],
                max_depth=model_conf["max_depth"],
                class_weight=scale_pos_weight_dict,
                n_jobs=-1)
        elif model_conf['type'] == "xgboost":
            max_depth = model_conf["max_depth"]
            if max_depth is None:
                max_depth = 0
            model = XGBClassifier(
                max_depth=max_depth,
                learning_rate=model_conf['learning_rate'],
                n_estimators=model_conf["n_estimators"],
                scale_pos_weight=model_conf["scale_pos_weight"])
        elif model_conf['type'] == 'mlp':
            hidden_layer_sizes = tuple(
                reversed([
                    model_conf['final_hidden_layer_size'] * 2**x
                    for x in range(model_conf['num_hidden_layers'])
                ]))
            model = MLPClassifier(
                hidden_layer_sizes=hidden_layer_sizes,
                learning_rate_init=model_conf['learning_rate_mlp'],
                alpha=model_conf['mlp_regularization'])
        else:
            print("Unkown model type, exiting.")
            exit(1)
        model.fit(X, Y)
        joblib.dump(model, model_path)
        print("Fit model in {} seconds".format(time.time() - start_time))
    else:
        model = joblib.load(model_path)
        print("model exists.")

    Y_pred = model.predict(X)
    print("Train")
    print(classification_report(Y, Y_pred))
    Y_predv = model.predict(Xv)
    print("Validate")
    print(classification_report(Yv, Y_predv))
    if ('monitor_test' in conf['callbacks'].keys()
            and conf['callbacks']['monitor_test']):
        times = conf['callbacks']['monitor_times']
        roc_areas, losses = make_predictions_and_evaluate_multiple_times(
            conf, shot_list_validate, loader, times)
        for roc, t in zip(roc_areas, times):
            print('val_roc_{} = {}'.format(t, roc))
        if shot_list_test is not None:
            roc_areas, losses = make_predictions_and_evaluate_multiple_times(
                conf, shot_list_test, loader, times)
            for roc, t in zip(roc_areas, times):
                print('test_roc_{} = {}'.format(t, roc))
    # print(confusion_matrix(Y,Y_pred))
    _, _, _, roc_area, loss = make_predictions_and_evaluate_gpu(
        conf, shot_list_validate, loader)
    # _, _, _, roc_area_train, loss_train = make_predictions_and_evaluate_gpu(
    #          conf, shot_list_train, loader)

    print('Validation Loss: {:.3e}'.format(loss))
    print('Validation ROC: {:.4f}'.format(roc_area))
    epoch_logs = {}
    epoch_logs['val_roc'] = roc_area
    epoch_logs['val_loss'] = loss
    # epoch_logs['train_roc'] = roc_area_train
    # epoch_logs['train_loss'] = loss_train
    callbacks.on_epoch_end(0, epoch_logs)

    print('...done')
コード例 #5
0
ファイル: torch_runner.py プロジェクト: iMurfyD/plasma-python
def train(conf, shot_list_train, shot_list_validate, loader):
    np.random.seed(1)
    # data_gen = ProcessGenerator(partial(
    # loader.training_batch_generator_full_shot_partial_reset,shot_list
    # = shot_list_train)())
    data_gen = partial(loader.training_batch_generator_full_shot_partial_reset,
                       shot_list=shot_list_train)()
    print_shot_list_sizes(shot_list_train, shot_list_validate)
    loader.set_inference_mode(False)

    train_model = build_torch_model(conf)

    # load the latest epoch we did. Returns -1 if none exist yet
    # e = specific_builder.load_model_weights(train_model)

    num_epochs = conf['training']['num_epochs']
    patience = conf['callbacks']['patience']
    lr_decay = conf['model']['lr_decay']
    # batch_size = conf['training']['batch_size']
    lr = conf['model']['lr']
    # clipnorm = conf['model']['clipnorm']
    e = 0
    # warmup_steps = conf['model']['warmup_steps']
    # num_batches_minimum = conf['training']['num_batches_minimum']

    # if 'adam' in conf['model']['optimizer']:
    #     optimizer = MPIAdam(lr=lr)
    # elif conf['model']['optimizer'] == 'sgd' or conf['model']['optimizer'] ==
    #     'tf_sgd':
    #
    #     optimizer = MPISGD(lr=lr)
    # elif 'momentum_sgd' in conf['model']['optimizer']:
    #     optimizer = MPIMomentumSGD(lr=lr)
    # else:
    #     print("Optimizer not implemented yet")
    #     exit(1)

    print('{} epochs left to go'.format(num_epochs - 1 - e))

    if conf['callbacks']['mode'] == 'max':
        best_so_far = -np.inf
        cmp_fn = max
    else:
        best_so_far = np.inf
        cmp_fn = min
    optimizer = opt.Adam(train_model.parameters(), lr=lr)
    scheduler = opt.lr_scheduler.ExponentialLR(optimizer, lr_decay)
    train_model.train()
    not_updated = 0
    # total_loss = 0
    # count = 0
    loss_fn = nn.MSELoss(size_average=True)
    model_path = get_model_path(conf)
    makedirs_process_safe(os.path.dirname(model_path))
    while e < num_epochs - 1:
        scheduler.step()
        print('\nEpoch {}/{}'.format(e, num_epochs))
        (step, ave_loss, curr_loss, num_so_far,
         effective_epochs) = train_epoch(train_model, data_gen, optimizer,
                                         loss_fn)
        e = effective_epochs
        loader.verbose = False  # True during the first iteration
        # if task_index == 0:
        # specific_builder.save_model_weights(train_model,int(round(e)))
        torch.save(train_model.state_dict(), model_path)
        _, _, _, roc_area, loss = make_predictions_and_evaluate_gpu(
            conf, shot_list_validate, loader)

        best_so_far = cmp_fn(roc_area, best_so_far)

        # stop_training = False
        print('=========Summary======== for epoch{}'.format(step))
        print('Training Loss numpy: {:.3e}'.format(ave_loss))
        print('Validation Loss: {:.3e}'.format(loss))
        print('Validation ROC: {:.4f}'.format(roc_area))

        # only save model weights if the quantity we are tracking is improving
        if best_so_far != roc_area:
            print("No improvement, still saving model")
            not_updated += 1
        else:
            print("Saving model")
            # specific_builder.delete_model_weights(train_model,int(round(e)))
        if not_updated > patience:
            print("Stopping training due to early stopping")
            break
コード例 #6
0
ファイル: builder.py プロジェクト: Sprinterzzj/plasma-python
 def ensure_save_directory(self):
     prepath = self.conf['paths']['model_save_path']
     makedirs_process_safe(prepath)
コード例 #7
0
ファイル: shots.py プロジェクト: Sprinterzzj/plasma-python
 def save(self,prepath):
     makedirs_process_safe(prepath)
     save_path = self.get_save_path(prepath)
     np.savez(save_path,valid=self.valid,is_disruptive=self.is_disruptive,
         signals_dict=self.signals_dict,ttd=self.ttd)
     print('...saved shot {}'.format(self.number))
コード例 #8
0
ファイル: builder.py プロジェクト: ge-dong/plasma-python
 def ensure_save_directory(self):
     prepath = self.conf['paths']['model_save_path']
     makedirs_process_safe(prepath)
コード例 #9
0
def train(conf,shot_list_train,shot_list_validate,loader):

    np.random.seed(1)

    print('validate: {} shots, {} disruptive'.format(len(shot_list_validate),shot_list_validate.num_disruptive()))
    print('training: {} shots, {} disruptive'.format(len(shot_list_train),shot_list_train.num_disruptive()))

    num_samples = conf['model']['shallow_model']['num_samples']
    feature_extractor = FeatureExtractor(loader)
    shot_list_train = shot_list_train.random_sublist(debug_use_shots)
    X,Y,_ = feature_extractor.load_shots(shot_list_train,num_samples = num_samples)
    Xv,Yv,_ = feature_extractor.load_shots(shot_list_validate,num_samples = num_samples)
    X = np.concatenate(X,axis=0)
    Y = np.concatenate(Y,axis=0)
    Xv = np.concatenate(Xv,axis=0)
    Yv = np.concatenate(Yv,axis=0)

    #max_samples = 100000
    #num_samples = min(max_samples,len(Y))
    #indices = np.random.choice(np.array(range(len(Y))),num_samples,replace=False)
    #X = X[indices]
    #Y = Y[indices]

    print("fitting on {} samples, {} positive".format(len(X),np.sum(Y > 0)))
    callbacks = build_callbacks(conf)
    callback_metrics = conf['callbacks']['metrics']
    callbacks.set_params({
        'metrics': callback_metrics,
        })
    callbacks.on_train_begin()
    callbacks.on_epoch_begin(0)

    save_prepath = feature_extractor.get_save_prepath()
    model_path = conf['paths']['model_save_path'] + model_filename #save_prepath + model_filename
    makedirs_process_safe(conf['paths']['model_save_path'])
    model_conf = conf['model']['shallow_model']
    if not model_conf['skip_train'] or not os.path.isfile(model_path):
        
        start_time = time.time()
        if model_conf["scale_pos_weight"] != 1:
            scale_pos_weight_dict = {np.min(Y) : 1, np.max(Y):model_conf["scale_pos_weight"]}
        else:
            scale_pos_weight_dict = None
        if model_conf['type'] == "svm":
            model = svm.SVC(probability=True,
                C=model_conf["C"],
                kernel=model_conf["kernel"],
                class_weight=scale_pos_weight_dict)
        elif model_conf['type'] == "random_forest":
            model = RandomForestClassifier(n_estimators=model_conf["n_estimators"],
                max_depth=model_conf["max_depth"],
                class_weight=scale_pos_weight_dict,
                n_jobs=-1)
        elif model_conf['type'] == "xgboost":
            max_depth = model_conf["max_depth"]
            if max_depth == None:
                max_depth = 0
            model = XGBClassifier(max_depth=max_depth,
                learning_rate=model_conf['learning_rate'],
                n_estimators=model_conf["n_estimators"],
                scale_pos_weight=model_conf["scale_pos_weight"])
        elif model_conf['type'] == 'mlp':
            hidden_layer_sizes = tuple(reversed([model_conf['final_hidden_layer_size']*2**x for x in range(model_conf['num_hidden_layers'])]))
            model = MLPClassifier(hidden_layer_sizes = hidden_layer_sizes,
                learning_rate_init = model_conf['learning_rate_mlp'],
                alpha = model_conf['mlp_regularization'])
        else:
            print("Unkown model type, exiting.")
            exit(1)
        model.fit(X,Y)
        joblib.dump(model,model_path)
        print("Fit model in {} seconds".format(time.time()-start_time))
    else:
        model = joblib.load(model_path)
        print("model exists.")
    

    Y_pred = model.predict(X)
    print("Train")
    print(classification_report(Y,Y_pred))
    Y_predv = model.predict(Xv)
    print("Validate")
    print(classification_report(Yv,Y_predv))
    #print(confusion_matrix(Y,Y_pred))
    _,_,_,roc_area,loss = make_predictions_and_evaluate_gpu(conf,shot_list_validate,loader)
    # _,_,_,roc_area_train,loss_train = make_predictions_and_evaluate_gpu(conf,shot_list_train,loader)   

    print('Validation Loss: {:.3e}'.format(loss))
    print('Validation ROC: {:.4f}'.format(roc_area))
    epoch_logs = {}
    epoch_logs['val_roc'] = roc_area 
    epoch_logs['val_loss'] = loss
    # epoch_logs['train_roc'] = roc_area_train
    # epoch_logs['train_loss'] = loss_train 
    callbacks.on_epoch_end(0, epoch_logs)


    print('...done')