Python load_data 예제들, Data.load_data Python 예제들

예제 #1

0

파일 보기

def main(argv):

    path = argv[1]
    features = argv[2]

    data = load_data(path, features)

    men_vs_women(data)
    married_women_vs_unmarried(data)

예제 #2

0

파일 보기

 def __init__(self):
     print config_params
     self.CLM = CLM_worker(lrate=1.,
                           optimizer='adadelta',
                           batch_size=config_params.minibatch,
                           saveto='model.npz',
                           validFreq=2000,
                           dispFreq=100,
                           dropout_input=config_params.CLM_drop_in,
                           dropout_output=config_params.CLM_drop_out,
                           reload_model=config_params.model_dir + '/' +
                           config_params.model_L2S,
                           reload_option=None,
                           log='log1')
     self.classifier = Classifier(
         lrate=
         1.,  # Learning rate for sgd (not used for adadelta and rmsprop)
         optimizer='adadelta',
         saveto='model.npz',  # The best model will be saved there
         dispFreq=
         50,  # Display the training progress after this number of updates
         validFreq=
         2000,  # Compute the validation error after this number of updates
         batch_size=config_params.
         minibatch,  # The batch size during training.
         batch_len_threshold=
         None,  # use dynamic batch size if sequence lengths exceed this threshold
         valid_batch_size=config_params.
         minibatch,  # The batch size used for validation/test set.
         lastHiddenLayer=None,
         dropout_output=config_params.classifier_drop_out,
         dropout_input=config_params.classifier_drop_in,
         reload_options=
         None,  # Path to a saved model options we want to start from
         reload_model=config_params.model_dir + '/' +
         config_params.model_S2L,
         embedding=
         None,  # Path to the word embedding file (otherwise randomized)
         warm_LM=None,
         logFile='log2'  # Path to log file
     )
     self.trainSet, self.validSet, self.testSet = \
         load_data(path=config_params.data_dir, n_words=10000, maxlen=None, sort_by_len=True, fixed_valid=True)
     self.LMscore = numpy.load(config_params.LMScoreFile)
     self.LMscore = self.LMscore[self.LMscore.files[0]]
     self.build()

예제 #3

0

파일 보기

파일: plot.py 프로젝트: a1600012888/Statistical-Learning-HW

    print(pred.shape)

    residual = pred - data.Y

    loss = np.linalg.norm(residual, axis=0, ord=2)

    loss = loss * loss / 0.5

    plt.figure()

    plt.plot(L1, loss, label='Residual term')

    plt.legend(loc='upper right')

    plt.xlabel('L1 norm of beta')
    plt.ylabel('Reconstruction Loss')
    plt.show()


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--dir_path', type=str, default='./SGDBeta-V3')
    args = parser.parse_args()

    B, L1 = read_beta(args.dir_path)

    data = load_data()
    ShowLambda(B, alpha=0.85)
    ShowL1(B, L1)

예제 #4

0

파일 보기

def train_model(data_path=data_path, imgs=imgs, msks=msks, tstimgs="", tstmsks="", model_name="model", save_path = "models", num_folds=5, batch_size=32, learning_rate=1e-5, nr_epochs=50, verbosity=1, up=False, start_ch=32, depth=4, inc_rate=2, kernel_size=(3, 3), activation='relu', normalization=None, dropout=0, elastic_deform = None, low_pass = None, high_pass = None, prwt = False, lr_decay = False, lr_schedule=None, model_net = Unet, final_test=False, monitor="val_loss"):
    '''
    DESCRIPTION: Function to load the data, load the model, fit the model and evaluate the model
    -------
    INPUTS:
    data_path:      string, directory of the folder containing the images
    imgs:           string, name of the npy file containing the images
    msks:           string, name of the npy file containing the masks
    tstimgs:        string, name of the npy file containing the test images
    tstmsks:        string, name of the npy file containing the test masks
    model_name:     string, name to identify the model
    save_path:      string, directory of the folder to which to save the results of the callbacks to
    num_folds:      int, number of folds to use for K-fold cross validation
    batch_size:     int, batch size to use in fitting the model
    learning_rate:  float, learning rate to use in compiling the model
    nr_epochs:      int, number of epochs to use in fitting the model
    verbosity:      boolean, whether to print the progress of fitting or not
    up:             boolean, True for using upsampling, False for using Transposed convolution
    start_ch:       int, the number of filters for the first convolutional layers
    depth:          int, the number of convolutional layers
    inc_rate:       number, the factor with which the number of filters is incremented per convolutional layer
    kernel_size:    int or tuple of 2 integers, the kernel size to be used in the convolution layers  
    activation:     string, which activation function to use in the convolution layers
    normalization:  function, normalization function. In case of Groupnormalization a tuple of the function and the desired group size
    dropout:        float between 0-1, the dropout rate to be used
    elastic_deform: None or tuple of 2 numbers, (alpha, sigma) with alpha being the scaling factor for the transformation and sigma being the standard deviation for the gaussian convolution
    low_pass:       None or int, giving the standard deviation used for the gaussian low-pass filter applied to the images
    high_pass:      None or int, giving the standard deviation used for the gaussian high_pass filter applied to the images
    prwt:           boolean, whether to apply a prewitt filter to the images or not
    lr_decay:       boolean, whether to use a scheduled learning rate using the schedule from 'Model.py' or not
    lr_schedule:    function, the schedule to be used for the learning rate
    model_net:      function, which model architecture to use (from Model.py: Unet or Mnet)
    final_test:     boolean, stating if the model should be optimized (k-fold = 5 folds, validation split is used, no test data is needed) or if the model performance should be tested (in 10 fold, training on all data en testing on test data)
    monitor:        string, which output of the model to monitor by the callbacks, in case final_test=True it will automaticly be set to "loss"
    -------
    OUTPUTS:
    A .h5 file per fold containing the model with its trained weights
    A .out file per fold containing the log of the training process in CSV format
    A .csv file where each row contains the DSC and train time per fold of the model, if there is already a file present the results are appended
    A .csv file where each row contains the mean and standard deviation of the DSCs and times of all folds of a model, if there is already a file present the results are appended
    '''
    
    ##### load data and optional test data #####
    images, masks = load_data(data_path, imgs, msks, low_pass=low_pass, high_pass=high_pass, prwt=prwt)
    
    if final_test: 
        print_func('Test Data:')
        test_images, test_masks = load_data(data_path, tstimgs, tstmsks, low_pass=low_pass, high_pass=high_pass, prwt=prwt)
        if model_net == Mnet: test_masks = downsample_image(test_masks, depth-1)
        #monitor = "loss"
        num_folds = 10
    
    ##### save arguments for the model to dictionairy #####
    arg_dict_model = {"start_ch": start_ch, "depth": depth, "inc_rate": inc_rate, "kernel_size": kernel_size, "activation": activation, "normalization": normalization, "dropout": dropout, "learning_rate": learning_rate, "up": up}
    
    ##### prepare for k-fold cross validation #####
    kfold = KFold(n_splits=num_folds, shuffle=True)
    fold_no = 1
    dice_per_fold, time_per_fold = [], []

    for train, val in kfold.split(images, masks):
        print_func(f'Training for fold {fold_no} (of {num_folds}) ... \nModel name: {model_name}')
        
        train_im, train_msk, val_im, val_msk = images[train], masks[train], images[val], masks[val]  
        
        if elastic_deform is not None: train_im, train_msk = image_transformation(train_im, train_msk, elastic_deform)
        
        if model_net == Mnet: #M-net has multiple outputs, the train and validation masks are downsampled to match the outputs of the model
            print_func("prepare data for Mnet")
            train_msk = downsample_image(train_msk, depth-1)
            val_msk = downsample_image(val_msk, depth-1)

        
        ##### load model with random initialized weights ######
        model = model_net(**arg_dict_model)
        
        ##### load callbacks #####
        save_dir = save_path + '/' + model_name + " K_" + str(fold_no)
        callbacks_list = []
        callbacks_list.append(ModelCheckpoint(save_dir + ' weights.h5', monitor=monitor, save_best_only=True))
        callbacks_list.append(CSVLogger(os.path.join(save_dir + ' log.out'), append=True, separator=';'))
        callbacks_list.append(EarlyStopping(monitor = monitor, verbose = 1, min_delta = 0.0001, patience = 5, mode = 'auto', restore_best_weights = True))
        
        
        if lr_decay:
            lr_sched = LearningRateScheduler(schedule, verbose = 1) if lr_schedule is None else LearningRateScheduler(lr_schedule, verbose = 1) 
            callbacks_list.append(lr_sched)
        
        ##### fit model #####
        arg_dict_fit = {"x": train_im, "y": train_msk, "validation_data": (val_im, val_msk), "batch_size": batch_size, "epochs": nr_epochs, "verbose": verbosity, "shuffle": True}
        
        start_time = time()
        model.fit(callbacks=callbacks_list, **arg_dict_fit)
        train_time = int(time()-start_time)
        
        if final_test: val_im, val_msk = test_images, test_masks        
        ##### evaluate model #####
        if model_net == Mnet:
            scores = eval_Mnet(val_im, val_msk['o1'], model, verbose=1)
        else:
            scores = model.evaluate(val_im, val_msk, verbose=0)
        
        ##### save scores of fold #####
        print_func(f"Scores \nDice: {scores[1]} \nTime: {train_time}")
        dice_per_fold.append(scores[1])
        time_per_fold.append(train_time)
        save_results(model_name + f' K_{fold_no}', scores[1], train_time)
        
        fold_no += 1 
    
    ##### save scores of model #####
    save_results(model_name, dice_per_fold, time_per_fold, False)

예제 #5

0

파일 보기

def post_process(model_file, weights_path, data_path, imgs, msks, model_name = "", n = None, m = False, low_pass = None, high_pass = None, threshold=0.95, disk_size=5, smooth_sigma=1, smooth_trsh=0.5):
    '''
    DESCRIPTION: Function to test and apply post-processing techniques on trained models
    -------
    INPUTS:
    model_file:     string, directory of the .json file containing the model architecture
    weights_path:   string, directory of the .h5 files containing the trained weights of the model
    data_path:      string, directory of the folder containing the images
    imgs:           string, name of the npy file containing the images
    msks:           string, name of the npy file containing the masks
    model_name:     string, name to identify the model
    n:              None or int, the number of images to predict and test. If None all images given are tested
    m:              boolean, whether the data should be prepared for an M-net architecture or not
    low_pass:       None or int, giving the standard deviation used for the gaussian low-pass filter applied to the images
    high_pass:      None or int, giving the standard deviation used for the gaussian high_pass filter applied to the images
    threshold:      float between 0 and 1, the threshold to apply to the masks
    disk_size:      int, the radius of the disk shaped structuring element used in opening by reconstruction
    smooth_sigma:   int, the standard deviation used for the gaussian low-pass filter for smoothing edges
    smooth_trsh:    float between 0 and 1, the threshold to apply after low-pass filtering 
    -------
    OUTPUTS:
    A .csv file where each row contains the mean and standard deviation of the DSCs of all weights of a model per post-processing option, if there is already a file present the results are appended
    '''
    images, masks = load_data(data_path, imgs, msks, low_pass=low_pass, high_pass=high_pass)
    if n is None: n = len(images)
    
    print_func("load model")
    json_file = open(model_file, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    
    weights = os.listdir(weights_path)
    weights = [weight for weight in weights if weight.find(".h5")>-1]
    
    scores = pd.DataFrame()    
    
    for weight in weights:
        print_func(f"load weigths: {weight}")
        model.load_weights(os.path.join(weights_path, weight))

        predictions = model.predict(images[:n], verbose=1)
        if m: predictions = sum(predictions)/len(predictions)
        sc0 = np.mean(list(map(dice_coef_pred, masks[:n], predictions)))
        
        if threshold is not None:
            pp1, sc1 = [], []
            if type(threshold) != list: threshold = [threshold]
            for trsh in threshold: pp1.append(post_process_thresholding(copy.copy(predictions), trsh))
            for res in pp1: sc1.append(np.mean(list(map(dice_coef_pred, masks[:n], res))))
            max_idx1 = sc1.index(max(sc1))
        else:
            pp1, sc1, threshold, max_idx1 = [predictions], [], [None], 0
        
        if disk_size is not None:
            pp2, sc2 = [], []
            if type(disk_size) != list: disk_size = [disk_size]
            for ds in disk_size: pp2.append(post_process_openingbyreconstruction(copy.copy(pp1[max_idx1]), ds))
            for res in pp2: sc2.append(np.mean(list(map(dice_coef_pred, masks[:n], res))))
            max_idx2 = sc2.index(max(sc2))
        else: 
            pp2, sc2, disk_size, max_idx2 = pp1, [], [None], 0
        
        if smooth_sigma is not None:
            pp3, sc3 = [], []
            if type(smooth_sigma) != list: smooth_sigma = [smooth_sigma]
            if type(smooth_trsh) != list: smooth_trsh = [smooth_trsh]
            for ss in smooth_sigma: 
                for st in smooth_trsh: pp3.append(post_process_smoothingedges(copy.copy(pp2[max_idx2]), ss, st))
            for res in pp3: sc3.append(np.mean(list(map(dice_coef_pred, masks[:n], res))))   
        else: 
            sc3 = []
        
        scores = scores.append([[sc0, *sc1, *sc2, *sc3]], ignore_index=True)        

    for key, value in scores.iteritems():
        m_name = model_name + '.'
        if key>0 and key<=len(threshold): m_name = m_name + "trsh" + str(threshold[key-1])
        elif key>len(threshold) and key<=len(threshold)+len(disk_size): m_name = m_name + "disk" + str(disk_size[key-1-len(threshold)])
        elif key>len(threshold)+len(disk_size):m_name + "smooth_sigma" + str(key-1-len(threshold)-len(disk_size))
        save_results(m_name, value, 0, False)

예제 #6

0

파일 보기

파일: Predict.py 프로젝트: Rimehdaoudi/Tumour-Segmentation

def mask_parse(mask):
    mask = np.squeeze(mask)
    mask = [mask, mask, mask]
    mask = np.transpose(mask, (1, 2, 0))
    return mask


if __name__ == "__main__":
    np.random.seed(42)
    tf.random.set_seed(42)

    PATH = "../../Dataset/"
    BATCH = 8

    (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(PATH)

    test_dataset = tf_dataset(test_x, test_y, batch=BATCH)
    test_steps = len(test_x) // BATCH

    if len(test_x) % BATCH != 0:
        test_steps += 1

    with CustomObjectScope({'iou': iou}):
        model = tf.keras.models.load_model("../Models/model_transversal.h5")
    model.evaluate(test_dataset, steps=test_steps)

    for i, (x, y) in tqdm(enumerate(zip(test_x, test_y)), total=len(test_x)):
        x = read_image(x)
        y = read_mask(y)
        y_pred = model.predict(np.expand_dims(x, axis=0))

예제 #7

0

파일 보기

파일: Gradient.py 프로젝트: a1600012888/Statistical-Learning-HW

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--device',
        '-d',
        type=int,
        default=0,
        help='Using which gpu?(Should be one of 0-7; Default:0)')
    parser.add_argument('--lamabda', type=float, default=1.0)
    parser.add_argument('--save_path', type=str, default='./SGDBeta-V2/')
    parser.add_argument('--continue_beta',
                        type=str,
                        default='./lambda.2000.0.txt')
    args = parser.parse_args()

    DEVICE = torch.device('cuda', args.device)
    save_path = args.save_path

    TOTAL_ITER = 200000
    InitiaLearningRate = 1e-5 / (1 + args.lamabda / 20)

    affine = Affine()

    if args.continue_beta is not None and os.path.isfile(args.continue_beta):
        print('Warm starting from {}'.format(args.continue_beta))
        beta = np.loadtxt(args.continue_beta).astype(np.float32)[np.newaxis, :]

        affine.fc.weight = torch.nn.Parameter(torch.tensor(beta))
        TOTAL_ITER = TOTAL_ITER // 10
        InitiaLearningRate = InitiaLearningRate / 2
        GetLearningRate = LinearLearningRatePolicy(TOTAL_ITER,
                                                   InitiaLearningRate)
    else:
        GetLearningRate = LinearLearningRatePolicy(TOTAL_ITER,
                                                   InitiaLearningRate)

    affine = affine.to(DEVICE)

    criterion = nn.MSELoss(reduce=True, size_average=False).to(DEVICE)

    optimizer = optim.SGD(affine.parameters(),
                          lr=GetLearningRate(0),
                          momentum=0.8)

    numpy_data = load_data()

    X = torch.from_numpy(numpy_data.X).type(torch.FloatTensor).to(DEVICE)
    Y = torch.tensor(numpy_data.Y).type(torch.FloatTensor).to(DEVICE)
    beta = torch.tensor(numpy_data.beta).type(torch.FloatTensor).to(DEVICE)
    data = Data(X, Y, beta)
    pbar = tqdm(range(TOTAL_ITER))

    print('Lambda:{}  -- Total Iters:{} -- Initial Lr:{}'.format(
        args.lamabda, TOTAL_ITER, InitiaLearningRate))
    for cur_iter in pbar:
        residual_loss, l1 = train_one_iter(data, criterion, args.lamabda,
                                           optimizer, affine)

        pbar.set_postfix(residual_loss='{:.2f}'.format(residual_loss),
                         l1='{:.2f}'.format(l1))

    beta = affine.fc.weight.detach().cpu().numpy()

    np.savetxt(save_path, beta)