コード例 #1
0
def ranging(model_path, x, y, win_len, col="k", model_pred=None):
    """
    plot a scattergram of F1 score for each patient
    :return: list of F1 scores
    """
    offsets = (5000 - win_len) // 2
    Y = y[:, offsets:5000 - offsets, :]

    if model_pred == None:
        model = load_model(model_path)
        prediction = np.array(model.predict(x))
    else:
        prediction = model_pred
    prediction = prediction[:, offsets:5000 - offsets, :]

    dict = {}
    for i in range(len(x)):
        prediction_i = prediction[i, :, :]
        y_i = Y[i, :, :]
        stat = statistics(np.expand_dims(y_i, axis=0),
                          np.expand_dims(prediction_i, axis=0))
        F = F_score(stat)
        dict[i] = F

    dict = sorted(dict.items())
    x, y_i = zip(*dict)
    plt.scatter(x, y_i, c=col, alpha=0.3)
    plt.show()
    return y_i
コード例 #2
0
def draw_one(model_path, x, y, pacient, win_len):
    offsets = (5000 - win_len)//2
    model = load_model(model_path)
    X = np.expand_dims(x[pacient, :, :], axis=0)
    Y = np.expand_dims(y[pacient,offsets:5000 - offsets,:], axis=0)

    prediction = np.array(model.predict(X))
    prediction = prediction[:,offsets:5000-offsets,:]

    x_axis = np.arange(offsets/500, (win_len +offsets)/500, 1/500)
    plt.figure(figsize=(20, 5))
    plt.plot(x_axis, x[pacient, offsets:5000 - offsets, 0], 'k')
    i = 0
    predict_rounded = np.argmax(prediction, axis=2)[i]
    one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1))
    one_hot[np.arange(predict_rounded.size), predict_rounded] = 1

    plt.fill_between(x_axis, Y[i, :win_len, 1]*40 + -50, -50, color='r', alpha=0.3)
    plt.fill_between(x_axis, Y[i, :win_len, 2]*40 + -50, -50, color='g', alpha=0.3)
    plt.fill_between(x_axis, Y[i, :win_len, 0]*40 + -50, -50, color='b', alpha=0.3)
    plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40), 0, color='r', alpha=0.3)
    plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40), 0, color='g', alpha=0.3)
    plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40), 0, color='b', alpha=0.3)


    stat = statistics(Y, prediction)
    F = F_score(stat)
    print(stat)
    print(F)
    plt.show()
コード例 #3
0
def trim(model, xtrain, ytrain, name, threshold, path_to_data, win_len):
    """
    removes from xtrain, ytrain elements on which the model has F1 greater than threshold
    :param path_to_data: path to the folder where the trimmed dataset will be saved
    :return: trimmed dataset
    """
    pred_train = np.array(model.predict(xtrain))
    xtrain_new = xtrain.copy()
    ytrain_new = ytrain.copy()
    counter = 0
    for i in range(len(xtrain)):
        pred = pred_train[i, win_len // 2:5000 - win_len // 2, :]
        y = ytrain[i, win_len // 2:5000 - win_len // 2, :]
        stat = statistics(np.expand_dims(y, axis=0),
                          np.expand_dims(pred, axis=0))
        F = F_score(stat)
        if F >= threshold:
            xtrain_new = np.delete(xtrain_new, i - counter, axis=0)
            ytrain_new = np.delete(ytrain_new, i - counter, axis=0)
            counter += 1

    if not os.path.exists(path_to_data):
        os.makedirs(path_to_data)
    outfile = open(path_to_data + "\\trim_" + name + ".pkl", 'wb')
    pkl.dump({"x": xtrain_new, "y": ytrain_new}, outfile)
    outfile.close()
    return xtrain_new, ytrain_new
コード例 #4
0
    def loss_function(weights: np.ndarray) -> float:
        all_predicts = np.zeros_like(all_labels)

        for lvl1_predicts, w in zip(level1_train_predicts, weights):
            model_predict = np.zeros_like(all_labels)

            for fold, lvl1_pred in enumerate(lvl1_predicts):
                predict = lvl1_pred * w
                model_predict[fold_num == fold] = predict

            all_predicts += model_predict

        score = F_score(all_predicts, all_labels, beta=2, threshold=0)
        print('score', score, 'weights', weights)

        return -score
コード例 #5
0
def histogram(model_paths_list, x, y, win_len, threshold=0.99):
    dict = {}
    for path in model_paths_list:
        _, filename = split(path)
        model_num = int(filename[len("ens_model_"):-3])
        dict[model_num] = 0
        model = load_model(path)
        predict = np.array(model.predict(x))
        for i in range(len(x)):
            pred = predict[i, win_len // 2:5000 - win_len // 2, :]
            y_i = y[i, win_len // 2:5000 - win_len // 2, :]
            stat = statistics(np.expand_dims(y_i, axis=0),
                              np.expand_dims(pred, axis=0))
            F = F_score(stat)
            if F >= threshold:
                dict[model_num] += 1

    return dict
コード例 #6
0
def validate(data_loader: Any, model: Any) -> float:
    ''' Performs validation, returns validation score. '''
    model.eval()

    sigmoid = nn.Sigmoid()
    predicts_list, targets_list = [], []

    with torch.no_grad():
        for input_data in tqdm(data_loader):
            if data_loader.dataset.mode != 'test':
                input_, target = input_data
            else:
                input_, target = input_data, None

            if data_loader.dataset.num_ttas != 1:
                bs, ncrops, c, h, w = input_.size()
                input_ = input_.view(-1, c, h, w)

                output = model(input_)
                output = sigmoid(output)

                if config.test.tta_combine_func == 'max':
                    output = output.view(bs, ncrops, -1).max(1)[0]
                elif config.test.tta_combine_func == 'mean':
                    output = output.view(bs, ncrops, -1).mean(1)
                else:
                    assert False
            else:
                output = model(input_.cuda())
                output = sigmoid(output)

            predicts_list.append(output.detach().cpu())
            targets_list.append(target)

    predicts, targets = torch.cat(predicts_list), torch.cat(targets_list)
    best_score, best_thresh = 0.0, 0.0

    for threshold in tqdm(np.linspace(0.05, 0.25, 100)):
        score = F_score(predicts, targets, beta=2, threshold=threshold)
        if score > best_score:
            best_score, best_thresh = score, threshold.item()

    print(f'F2 {best_score:.4f} threshold {best_thresh:.4f}')
    return best_score
コード例 #7
0
ファイル: train.py プロジェクト: gittigxuy/imet
def validate(val_loader: Any, model: Any, epoch: int) -> Tuple[float, float, np.ndarray]:
    ''' Calculates validation score.
    1. Infers predictions
    2. Finds optimal threshold
    3. Returns the best score and a threshold. '''
    logger.info('validate()')

    predicts, targets = inference(val_loader, model)
    predicts, targets = torch.tensor(predicts), torch.tensor(targets)
    best_score, best_thresh = 0.0, 0.0

    for threshold in tqdm(np.linspace(0.05, 0.25, 100), disable=IN_KERNEL):
        score = F_score(predicts, targets, beta=2, threshold=threshold)
        if score > best_score:
            best_score, best_thresh = score, threshold.item()

    logger.info(f'{epoch} F2 {best_score:.4f} threshold {best_thresh:.4f}')
    logger.info(f' * F2 on validation {best_score:.4f}')
    return best_score, best_thresh, predicts.numpy()
コード例 #8
0
def draw_all(model_path, x, y, win_len, model2=None):
    offsets = (5000 - win_len)//2
    model = load_model(model_path)
    X = x
    Y = y[:,offsets:5000 - offsets,:]

    prediction = np.array(model.predict(X))
    prediction = prediction[:,offsets:5000-offsets,:]
    if model2 != None:
        model2 =  load_model(model2)
        prediction2 = np.array(model2.predict(X))[:,offsets:5000-offsets,:]

    x_axis = np.arange(offsets/500, (win_len +offsets)/500, 1/500)
    for i in range(len(X)):
        plt.figure(figsize=(20, 5))
        plt.plot(x_axis, x[i, offsets:5000 - offsets, 0], 'k')
        predict_rounded = np.argmax(prediction, axis=2)[i]
        one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1))
        one_hot[np.arange(predict_rounded.size), predict_rounded] = 1

        plt.fill_between(x_axis, Y[i, :win_len, 1]*40 + -50, -50, color='r', alpha=0.3)
        plt.fill_between(x_axis, Y[i, :win_len, 2]*40 + -50, -50, color='g', alpha=0.3)
        plt.fill_between(x_axis, Y[i, :win_len, 0]*40 + -50, -50, color='b', alpha=0.3)
        plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40), 0, color='r', alpha=0.3)
        plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40), 0, color='g', alpha=0.3)
        plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40), 0, color='b', alpha=0.3)

        if model2 != None:
            predict_rounded = np.argmax(prediction2, axis=2)[i]
            one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1))
            one_hot[np.arange(predict_rounded.size), predict_rounded] = 1
            plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40+50), 50, color='r', alpha=0.3)
            plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40+50), 50, color='g', alpha=0.3)
            plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40+50), 50, color='b', alpha=0.3)

        stat = statistics(Y, prediction)
        F = F_score(stat)
        print(stat)
        print(F)
        plt.savefig("ill"+str(i)+".png")
        plt.clf()
コード例 #9
0
def trim(model, xtrain, ytrain, data_name, threshold, path_to_data, win_len):

    pred_train = np.array(model.predict(xtrain))
    xtrain_new = xtrain.copy()
    ytrain_new = ytrain.copy()
    counter = 0
    for i in range(len(xtrain)):
        pred = pred_train[i, win_len // 2:5000 - win_len // 2, :]
        y = ytrain[i, win_len // 2:5000 - win_len // 2, :]
        stat = statistics(np.expand_dims(y, axis=0),
                          np.expand_dims(pred, axis=0))
        F = F_score(stat)
        if F >= threshold:
            xtrain_new = np.delete(xtrain_new, i - counter, axis=0)
            ytrain_new = np.delete(ytrain_new, i - counter, axis=0)
            counter += 1

    outfile = open(path_to_data + "\\trim_" + data_name + ".pkl", 'wb')
    pkl.dump({"x": xtrain_new, "y": ytrain_new}, outfile)
    outfile.close()
    return xtrain_new, ytrain_new
コード例 #10
0
def ranging(model_path, x, y, win_len, col= "k", is_path = True):
    offsets = (5000 - win_len)//2
    Y = y[:,offsets:5000 - offsets,:]

    if is_path:
        model = load_model(model_path)
        prediction = np.array(model.predict(x))
    else:
        prediction = model_path
    prediction = prediction[:,offsets:5000-offsets,:]

    dict = {}
    for i in range(len(x)):
        prediction_i = prediction[i,:,:]
        y_i = Y[i,:,:]
        stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(prediction_i, axis=0))
        F = F_score(stat)
        dict[i] = F

    dict = sorted(dict.items())
    x, y_i = zip(*dict)
    plt.scatter(x, y_i, c=col, alpha=0.3)
    return y_i
コード例 #11
0
def histogram(model_paths_list, x, y, win_len, threshold=0.99):
    """
    returns a dictionary: {model number: number of patients from x with F1 score > threshold}
    :param model_paths_list: list of paths to the saved models
    :param x: dataset
    :param y: GT annotation
    """
    dict = {}
    for path in model_paths_list:
        _, filename = split(path)
        model_num = int(filename[len("ens_model_"):-3])
        dict[model_num] = 0
        model = load_model(path)
        predict = np.array(model.predict(x))
        for i in range(len(x)):
            pred = predict[i, win_len // 2:5000 - win_len // 2, :]
            y_i = y[i, win_len // 2:5000 - win_len // 2, :]
            stat = statistics(np.expand_dims(y_i, axis=0),
                              np.expand_dims(pred, axis=0))
            F = F_score(stat)
            if F >= threshold:
                dict[model_num] += 1

    return dict
コード例 #12
0
ファイル: train.py プロジェクト: gittigxuy/imet
def train_epoch(train_loader: Any, model: Any, criterion: Any, optimizer: Any,
                epoch: int, lr_scheduler: Any, lr_scheduler2: Any,
                max_steps: Optional[int]) -> None:
    logger.info(f'epoch: {epoch}')
    logger.info(f'learning rate: {get_lr(optimizer)}')

    batch_time = AverageMeter()
    losses = AverageMeter()
    avg_score = AverageMeter()

    model.train()
    optimizer.zero_grad()

    num_steps = len(train_loader)
    if max_steps:
        num_steps = min(max_steps, num_steps)
    num_steps -= num_steps % config.train.accum_batches_num

    logger.info(f'total batches: {num_steps}')
    end = time.time()
    lr_str = ''

    for i, (input_, target) in enumerate(train_loader):
        if i >= num_steps:
            break

        input_ = input_.cuda()

        if config.train.mixup.enable:
            input_, target = mixup(input_, target)

        output = model(input_)
        loss = criterion(output, target.cuda())

        predict = (output.detach() > 0.1).type(torch.FloatTensor)
        avg_score.update(F_score(predict, target, beta=2))

        losses.update(loss.data.item(), input_.size(0))
        loss.backward()

        if (i + 1) % config.train.accum_batches_num == 0:
            optimizer.step()
            optimizer.zero_grad()

        if is_scheduler_continuous(lr_scheduler):
            lr_scheduler.step()
            lr_str = f'\tlr {get_lr(optimizer):.02e}'
        elif is_scheduler_continuous(lr_scheduler2):
            lr_scheduler2.step()
            lr_str = f'\tlr {get_lr(optimizer):.08f}'

        batch_time.update(time.time() - end)
        end = time.time()

        if i % config.train.log_freq == 0:
            logger.info(f'{epoch} [{i}/{num_steps}]\t'
                        f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        f'loss {losses.val:.4f} ({losses.avg:.4f})\t'
                        f'F2 {avg_score.val:.4f} ({avg_score.avg:.4f})'
                        + lr_str)

    logger.info(f' * average F2 on train {avg_score.avg:.4f}')
コード例 #13
0
ファイル: train.py プロジェクト: gittigxuy/imet
def lr_finder(train_loader: Any, model: Any, criterion: Any, optimizer: Any) -> None:
    ''' Finds the optimal LR range and sets up first optimizer parameters. '''
    logger.info('lr_finder called')

    batch_time = AverageMeter()
    num_steps = min(len(train_loader), config.train.lr_finder.num_steps)
    logger.info(f'total batches: {num_steps}')
    end = time.time()
    lr_str = ''
    model.train()

    init_value = config.train.lr_finder.init_value
    final_value = config.train.lr_finder.final_value
    beta = config.train.lr_finder.beta

    mult = (final_value / init_value) ** (1 / (num_steps - 1))
    lr = init_value

    avg_loss = best_loss = 0.0
    losses = np.zeros(num_steps)
    logs = np.zeros(num_steps)

    for i, (input_, target) in enumerate(train_loader):
        if i >= num_steps:
            break

        set_lr(optimizer, lr)

        output = model(input_.cuda())
        loss = criterion(output, target.cuda())
        loss_val = loss.data.item()

        predict = (output.detach() > 0.1).type(torch.FloatTensor)
        f2 = F_score(predict, target, beta=2)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        lr_str = f'\tlr {lr:.08f}'

        # compute the smoothed loss
        avg_loss = beta * avg_loss + (1 - beta) * loss_val
        smoothed_loss = avg_loss / (1 - beta ** (i + 1))

        # stop if the loss is exploding
        if i > 0 and smoothed_loss > 4 * best_loss:
            break

        # record the best loss
        if smoothed_loss < best_loss or i == 0:
            best_loss = smoothed_loss

        # store the values
        losses[i] = smoothed_loss
        logs[i] = math.log10(lr)

        # update the lr for the next step
        lr *= mult

        batch_time.update(time.time() - end)
        end = time.time()

        if i % config.train.log_freq == 0:
            logger.info(f'lr_finder [{i}/{num_steps}]\t'
                        f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        f'loss {loss:.4f} ({smoothed_loss:.4f})\t'
                        f'F2 {f2:.4f} {lr_str}')

    np.savez(os.path.join(config.experiment_dir, f'lr_finder_{config.version}'),
             logs=logs, losses=losses)

    d1 = np.zeros_like(losses); d1[1:] = losses[1:] - losses[:-1]
    first, last = np.argmin(d1), np.argmin(losses)

    MAGIC_COEFF = 4

    highest_lr = 10 ** logs[last]
    best_high_lr = highest_lr / MAGIC_COEFF
    best_low_lr = 10 ** logs[first]
    logger.info(f'best_low_lr={best_low_lr} best_high_lr={best_high_lr} '
                f'highest_lr={highest_lr}')

    def find_nearest(array: np.array, value: float) -> int:
        return (np.abs(array - value)).argmin()

    last = find_nearest(logs, math.log10(best_high_lr))
    logger.info(f'first={first} last={last}')

    import matplotlib.pyplot as plt
    plt.plot(logs, losses, '-D', markevery=[first, last])
    plt.savefig(os.path.join(config.experiment_dir, 'lr_finder_plot.png'))
コード例 #14
0
        join(path_to_ensemble_models, f)
        for f in listdir(path_to_ensemble_models)
        if isfile(join(path_to_ensemble_models, f))
    ]

    xy = load_dataset()

    X = xy["x"]
    Y = xy["y"]
    offsets = (5000 - win_len) // 2
    xtrain, xtest, ytrain, ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.33,
                                                    random_state=42)
    model = load_model(path_to_ensemble_models + "\\ens_model_1.h5")
    pred_e = ensemble_predict(model_paths_list, xtest)
    pred_ = model.predict(xtest)

    stat = statistics(ytest[:, win_len // 2:5000 - win_len // 2, :],
                      pred_e[:, win_len // 2:5000 - win_len // 2, :])
    print(F_score(stat))

    #stat.to_csv("stats_one_test.csv", sep = ';')
    ranging(pred_e, xtest, ytest, win_len, col="k", is_path=False)
    plt.show()

    dict = histogram(model_paths_list, xtrain, ytrain, win_len, threshold=0.99)
    plt.bar(list(dict.keys()), dict.values(), color='g', alpha=0.5)
    plt.show()

    plot_two_prediction(pred_e, pred_, xtest, ytest, win_len, [5])
コード例 #15
0
def draw_one(model_path, x, y, patients, win_len):
    """
    print F1_score, plot ECG annotation of the network and ground true
    :param model_path: path to the trained model
    :param x: array of ECG
    :param y: array of annotation
    :param pacients: list of patients numbers to be plotted
    """
    for pacient in patients:
        offsets = (5000 - win_len) // 2
        model = load_model(model_path)
        X = np.expand_dims(x[pacient, :, :], axis=0)
        Y = np.expand_dims(y[pacient, offsets:5000 - offsets, :], axis=0)

        prediction = np.array(model.predict(X))
        prediction = prediction[:, offsets:5000 - offsets, :]

        x_axis = np.arange(offsets / 500, (win_len + offsets) / 500, 1 / 500)
        plt.figure(figsize=(20, 5))
        plt.plot(x_axis, x[pacient, offsets:5000 - offsets, 0], 'k')

        predict_rounded = np.argmax(prediction, axis=2)[pacient]
        one_hot = np.zeros((predict_rounded.size, predict_rounded.max() + 1))
        one_hot[np.arange(predict_rounded.size), predict_rounded] = 1

        plt.fill_between(x_axis,
                         Y[0, :win_len, 1] * 40 + -50,
                         -50,
                         color='r',
                         alpha=0.3)
        plt.fill_between(x_axis,
                         Y[0, :win_len, 2] * 40 + -50,
                         -50,
                         color='g',
                         alpha=0.3)
        plt.fill_between(x_axis,
                         Y[0, :win_len, 0] * 40 + -50,
                         -50,
                         color='b',
                         alpha=0.3)
        plt.fill_between(x_axis,
                         list(one_hot[:win_len, 1] * 40),
                         0,
                         color='r',
                         alpha=0.3)
        plt.fill_between(x_axis,
                         list(one_hot[:win_len, 2] * 40),
                         0,
                         color='g',
                         alpha=0.3)
        plt.fill_between(x_axis,
                         list(one_hot[:win_len, 0] * 40),
                         0,
                         color='b',
                         alpha=0.3)

        stat = statistics(Y, prediction)
        F = F_score(stat)
        print(stat)
        print(F)
        plt.show()