Ejemplo n.º 1
0
def main():
    # Get input arguments
    args = get_command_line_args()
    use_gpu = torch.cuda.is_available() and args.gpu
    print("Input file: {}".format(args.input))
    print("Checkpoint file: {}".format(args.checkpoint))
    if args.top_k:
        print("Returning {} most likely classes".format(args.top_k))
    if args.category_names:
        print("Category names file: {}".format(args.category_names))
    if use_gpu:
        print("Using GPU.")
    else:
        print("Using CPU.")
    
    # Load the checkpoint
    model = predict_utils.load_checkpoint(args.checkpoint)
    print("Checkpoint loaded.")
    
    # Move tensors to GPU
    if use_gpu:
        model.cuda()
    
    # Load categories file
    if args.category_names:
        with open(args.category_names, 'r') as f:
            categories = json.load(f)
            print("Category names loaded")
    
    results_to_show = args.top_k if args.top_k else 1
    
    # Predict
    print("Processing image")
    probabilities, classes = predict_utils.predict(args.input, model, use_gpu, results_to_show, args.top_k)
    
    # Show the results
    # Print results
    if results_to_show > 1:
        print("Top {} Classes for '{}':".format(len(classes), args.input))

        if args.category_names:
            print("{:<30} {}".format("Flower", "Probability"))
            print("------------------------------------------")
        else:
            print("{:<10} {}".format("Class", "Probability"))
            print("----------------------")

        for i in range(0, len(classes)):
            if args.category_names:
                print("{:<30} {:.2f}".format(categories[classes[i]], probabilities[i]))
            else:
                print("{:<10} {:.2f}".format(classes[i], probabilities[i]))
    else:
        print("The most likely class is '{}': probability: {:.2f}" \
              .format(categories[classes[0]] if args.category_names else classes[0], probabilities[0]))
Ejemplo n.º 2
0
    suffixes = config.data_suffixes
    if args.is_single_series:
        test_ids = ['']
        suffixes = ['t1.nii.gz', 't2.nii.gz', 't1ce.nii.gz', 'flair.nii.gz']

    if not os.path.exists(args.out_data_dir):
        os.makedirs(args.out_data_dir)

    for idx in test_ids:
        data, data_crop, affine, brain_bbox = predict_utils.read_image(
            idx,
            suffixes=suffixes,
            test_data_path=args.test_data_dir,
            separate_folder=(args.is_single_series == False))

        label = predict_utils.predict(m, data, data_crop, brain_bbox)

        label = label.reshape(data.shape[2:]).astype(np.float32)

        for key, value in config.dataset_transform_dict.iteritems():
            label[label == value] = key

        connected_regions = morphology.label(label > 0)

        clusters = predict_utils.reject_small_regions(connected_regions, 0.1)
        label[clusters == 0] = 0

        pred_nii = nib.Nifti1Image(label, affine)

        print str(idx) + ' ' + str(np.mean(label > 0)) + ' ' + str(
            np.unique(label))
Ejemplo n.º 3
0
import argparse
import predict_utils

parser = argparse.ArgumentParser(description='This script helps in predicting the model',)

parser.add_argument('--image_path', dest='image_path', action='store', default='./flowers/test/9/image_06410.jpg')
parser.add_argument('--checkpoint_path', dest='checkpoint_path', action='store', default='checkpoint.pth')
parser.add_argument('--top_k', dest='top_k', action='store', default=5, type=int)
parser.add_argument('--gpu', dest="mode", action="store", default="gpu")

args = parser.parse_args()

checkpoint_model = predict_utils.load_checkpoint(args.checkpoint_path)

probs, classes = predict_utils.predict(args.image_path, checkpoint_model, args.top_k)

for i in range(args.top_k):
    print("Probability - {} \t Class - {}".format(probs[i], classes[i]))
Ejemplo n.º 4
0
parser.add_argument('--topk',
                    help="Number of classes and probabilities to display",
                    default=1,
                    type=int)
parser.add_argument('--category_names',
                    help="File with name mappings for classes")
parser.add_argument(
    '--gpu',
    help="Flag to use GPU for training data, recommended if GPU available.",
    action='store_true')

A = parser.parse_args()

#process the image
im = put.process_image(A.image_path)

#load the checkpoint into the model
model = put.load_model(A.checkpoint_path)

#predict the flower class
probabilities, classes = put.predict(model, im, A.topk, A.gpu)

#output something
print(f"Probabilities: {probabilities}")
if A.category_names is not None:
    with open(A.category_names, 'r') as f:
        cat_to_name = json.load(f)
    names = [cat_to_name[key] for key in classes]
    print(f"Names: {names}")
else:
    print(f"Classes: {classes}")
Ejemplo n.º 5
0
                    action="store",
                    dest="category_names",
                    default='cat_to_name')

args = vars(parser.parse_args())

#imputs
image_path = args['image']
checkpoint = args['input']
topk = int(args['top_k'])
device = 'cuda' if args['device'] == 'gpu' else 'cpu'

# load the model
model, learning_rate, hidden_units, class_to_idx = load_checkpoint(checkpoint)

# prediction
probs, classes = predict(image_path, device, model, topk)

# print results
cat_to_name = args['category_names'] + '.json'
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

top_labels = [cat_to_name[cat] for cat in classes]

res = "\n".join("{} {}".format(x, y) for x, y in zip(probs, top_labels))

print(res)

#
Ejemplo n.º 6
0
import predict_utils
import json

in_arg = predict_utils.get_cmd_args()  # get cmd args

# load model from checkpoint
model, _, _ = predict_utils.load_model(in_arg.checkpoint,
                                       "cuda:0" if in_arg.gpu else "cpu")

# get topk probabilities
topk_prob_array, topk_classes = predict_utils.predict(in_arg.input, model,
                                                      in_arg.topk)

print("Results:")
if not in_arg.category_names == None:  # if class to category name is available

    with open('cat_to_name.json', 'r') as f:
        cat_to_name = json.load(f)

    cat_to_name_dict = dict()
    for key in cat_to_name:  #
        cat_to_name_dict[int(key)] = cat_to_name[key]

    topk_classes = [int(item) for item in topk_classes]
    flower_name = [cat_to_name_dict[i] for i in topk_classes]

    for i in range(len(topk_prob_array)):
        print("{}. {}\t{}%".format(i + 1, flower_name[i].title(),
                                   topk_prob_array[i] * 100))
else:
    for i in range(len(topk_prob_array)):
Ejemplo n.º 7
0
def main(args):
    """ Train and evaluate with a model in a NON TIME SERIES form
    """
    if args.train_on_country is None:
        raise ValueError("select a specific country with --train_on_country")

    # ==========================================================================
    # Get the dataset
    # ==========================================================================
    X, Y, dates = data.getDataset(
        init_population=args.population,
        type=args.model.upper(),
        country=args.train_on_country,
        return_time_series=False,
    )

    if not np.sum(X > 1) == 0:
        raise ValueError("Data is expected normalized in [0, 1]. \
                          The selected population is not enough.")

    # Select the model
    if args.model.upper() == 'SIRC':
        model = SIRC()
    elif args.model.upper() == 'SEIR':
        model = SEIR()
    else:
        raise ValueError('--model not supported')

    # Select the loss
    if args.loss.upper() == 'MSE':
        loss = mse_loss
    elif args.loss.upper() == 'MAE':
        loss = mae_loss
    else:
        raise ValueError('--loss not supported')

    # cache file
    if os.path.isfile(args.cache_file):
        print('Loading from cache')
        optimal_params = pkl.load(open(args.cache_file, 'rb'))
    else:
        optimal_params = {
            'beta_mu': [],
            'beta_rho': [],
            'gamma_mu': [],
            'gamma_rho': [],
            'delta_mu': [],
            'delta_rho': [],
            'predicted_dates': []
        }

    # ==========================================================================
    # Estimate parameters day by day
    # ==========================================================================

    def compute_loss(params={}, X=[], y_true=[], timesteps=1):
        # Set params (random if value is not set)
        model.set_params(**params)

        # Determine the loss with those params
        l = 0
        for _ in range(LOSS_CV_ROUNDS):
            y_pred = model.predict(X_step[:, 0], X_step[:, 1], X_step[:, 2],
                                   X_step[:, 3], timesteps)
            y_pred = np.squeeze(y_pred, axis=1)  # remove timestep dimension
            l += mae_loss(y_true, y_pred)
        l /= LOSS_CV_ROUNDS
        return l

    print("{:^20} {:>20} {:>20} {:>20} {:>20}".format('date', 'beta', 'gamma',
                                                      'delta', 'loss'))
    for t in range(args.days_to_compute_params, len(X)):
        date = dates[t - (int)(args.days_to_compute_params /
                               2)].strftime("%d/%m/%Y")

        # If element in cache file, skip it
        if date in optimal_params['predicted_dates']:
            i = optimal_params['predicted_dates'].index(date)
            print(
                "{:^20} {: 15.2E}±{:.2E} {: 15.2E}±{:.2E} {: 15.2E}±{:.2E} {}".
                format(optimal_params['predicted_dates'][i],
                       optimal_params['beta_mu'][i],
                       optimal_params['beta_rho'][i],
                       optimal_params['gamma_mu'][i],
                       optimal_params['gamma_rho'][i],
                       optimal_params['delta_mu'][i],
                       optimal_params['delta_rho'][i], '[from cache]'))
            continue

        # Initialize optimal params
        for param_name in optimal_params:
            optimal_params[param_name].append(None)
        optimal_params['predicted_dates'][-1] = date
        optimal_loss = np.inf

        # Data as the data of those days
        X_step = X[t - args.days_to_compute_params:t]
        Y_step = Y[t - args.days_to_compute_params:t]
        timesteps = 1

        for it in range(args.train_iters):
            l = compute_loss(X=X_step, y_true=Y_step, timesteps=1)
            params = model.get_params()

            # TODO: implement occam razor for solutions! if (opt_loss - loss) < epsilon:
            # chose the solution with higher variance
            if l < optimal_loss:
                # Set last optimal param as the value current param
                for k, v in params.items():
                    optimal_params[k][-1] = v
                optimal_loss = l

        print(
            "{:^20} {: 15.2E}±{:.2E} {: 15.2E}±{:.2E} {: 15.2E}±{:.2E} {: 15.2E}"
            .format(optimal_params['predicted_dates'][-1], params['beta_mu'],
                    params['beta_rho'], params['gamma_mu'],
                    params['gamma_rho'], params['delta_mu'],
                    params['delta_rho'], l))

    # save cache file
    pkl.dump(optimal_params, open(args.cache_file, 'wb'))

    # ==========================================================================
    # Fit linear regression on model parameters and predict what's next
    # ==========================================================================

    optimal_params = {k: np.array(v) for k, v in optimal_params.items()}
    predicted_dates = optimal_params['predicted_dates'].copy()
    del optimal_params['predicted_dates']

    # smooth average
    def smooth(x):
        window = 3
        last = x[-window + 1:]
        averaged = np.convolve(x, np.ones((window, )) / window, mode='same')
        averaged[-window +
                 1:] = last  # do not average last samples (avoid distortions)
        return averaged

    optimal_params = {k: smooth(v) for k, v in optimal_params.items()}

    # fit Beta (that changes according to social distancing etc)
    # linear regression on the last computed params
    DAYS_TO_INFER_PARAMS = 20
    inferred_coeffs = np.polyfit(
        x=np.arange(0, DAYS_TO_INFER_PARAMS),
        y=optimal_params['beta_mu'][-DAYS_TO_INFER_PARAMS:],
        deg=1)

    # Get the values for the next days
    x = np.arange(DAYS_TO_INFER_PARAMS,
                  DAYS_TO_INFER_PARAMS + args.predicted_days)
    beta_mu = x * inferred_coeffs[0] + inferred_coeffs[1]
    beta_mu = np.clip(beta_mu, 0, np.inf)

    # Other parms are considered stables, we consider the mean
    beta_rho = np.mean(optimal_params['beta_rho'])
    gamma_mu = np.mean(optimal_params['gamma_mu'])
    gamma_rho = np.mean(optimal_params['gamma_rho'])
    delta_mu = np.mean(optimal_params['delta_mu'])
    delta_rho = np.mean(optimal_params['delta_rho'])

    beta_rho = np.array([beta_rho] * args.predicted_days)
    gamma_mu = np.array([gamma_mu] * args.predicted_days)
    gamma_rho = np.array([gamma_rho] * args.predicted_days)
    delta_mu = np.array([delta_mu] * args.predicted_days)
    delta_rho = np.array([delta_rho] * args.predicted_days)

    params_next_days = []
    for i in range(args.predicted_days):
        params_next_days.append({
            'beta_mu': beta_mu[i],
            'beta_rho': beta_rho[i],
            'gamma_mu': gamma_mu[i],
            'gamma_rho': gamma_rho[i],
            'delta_mu': delta_mu[i],
            'delta_rho': delta_rho[i]
        })

    predict(model,
            args.train_on_country,
            args.population,
            args.predicted_days,
            params_next_days,
            std_dev=0.1)

    # ==========================================================================
    # Compute R0 and show params
    # ==========================================================================

    # add predicted future params
    current = datetime.strptime(predicted_dates[-1], '%d/%m/%Y')
    dates = []
    for i in range(args.predicted_days):
        dates.append(current + timedelta(days=i + 1))
    dates = np.array([d.strftime('%d/%m/%y') for d in dates])
    predicted_dates = np.concatenate((predicted_dates, dates))

    optimal_params['beta_mu'] = np.concatenate(
        (optimal_params['beta_mu'], beta_mu))
    optimal_params['beta_rho'] = np.concatenate(
        (optimal_params['beta_rho'], beta_rho))
    optimal_params['gamma_mu'] = np.concatenate(
        (optimal_params['gamma_mu'], gamma_mu))
    optimal_params['gamma_rho'] = np.concatenate(
        (optimal_params['gamma_rho'], gamma_rho))
    optimal_params['delta_mu'] = np.concatenate(
        (optimal_params['delta_mu'], delta_mu))
    optimal_params['delta_rho'] = np.concatenate(
        (optimal_params['delta_rho'], delta_rho))

    # compute R0
    optimal_params[
        'R0_mu'] = optimal_params['beta_mu'] * optimal_params['gamma_mu']
    optimal_params[
        'R0_rho'] = optimal_params['beta_rho'] * optimal_params['gamma_rho']

    fig, axs = plt.subplots(4, sharex=True, figsize=(15, 15))
    plt.title(args.train_on_country + ' - Params')

    # Plot average
    axs[0].plot(optimal_params['R0_mu'], label='R0', color='b')
    axs[1].plot(optimal_params['beta_mu'], label='Beta', color='r')
    axs[2].plot(optimal_params['gamma_mu'], label='Gamma', color='orange')
    axs[3].plot(optimal_params['delta_mu'], label='Delta', color='g')

    # TODO: Set delta to NaN if no deaths / recoveries in that period

    # "Confidence interval"
    x = np.arange(0, len(predicted_dates))
    axs[0].fill_between(x,
                        optimal_params['R0_mu'] + optimal_params['R0_rho'],
                        optimal_params['R0_mu'] - optimal_params['R0_rho'],
                        color='b',
                        alpha=0.2)
    axs[1].fill_between(x,
                        optimal_params['beta_mu'] + optimal_params['beta_rho'],
                        optimal_params['beta_mu'] - optimal_params['beta_rho'],
                        color='r',
                        alpha=0.2)
    axs[2].fill_between(
        x,
        optimal_params['gamma_mu'] + optimal_params['gamma_rho'],
        optimal_params['gamma_mu'] - optimal_params['gamma_rho'],
        color='orange',
        alpha=0.2)
    axs[3].fill_between(
        x,
        optimal_params['delta_mu'] + optimal_params['delta_rho'],
        optimal_params['delta_mu'] - optimal_params['delta_rho'],
        color='g',
        alpha=0.2)

    plt.xticks(x[::5], predicted_dates[::5], rotation=45)
    for ax in axs:
        ax.grid(True)
        ax.legend()
    axs[0].set_ylim(0, 10)
    axs[1].set_ylim(0, 4)
    axs[2].set_ylim(5, 40)
    axs[3].set_ylim(0, 1)
    plt.plot()
    plt.savefig(args.train_on_country + "_params.png")
    plt.show()
Ejemplo n.º 8
0
def main(args):
    """ Train and evaluate with a model in a NON TIME SERIES form
    """

    # ==========================================================================
    # Get the dataset
    # ==========================================================================
    X, Y, dates = data.getDataset(
        init_population=args.population,
        type=args.model.upper(),
        country=args.train_on_country,
        return_time_series=False,
    )

    if not np.sum(X > 1) == 0:
        raise ValueError("Data is expected normalized in [0, 1]. \
                          The selected population is not enough.")

    # Select the model
    if args.model.upper() == 'SIRC':
        model = SIRC()
    elif args.model.upper() == 'SEIR':
        model = SEIR()
    else:
        raise ValueError('--model not supported')

    # Select the loss
    if args.loss.upper() == 'MSE':
        loss = mse_loss
    elif args.loss.upper() == 'MAE':
        loss = mae_loss
    else:
        raise ValueError('--loss not supported')

    print("{:>15} {:>15} {:>15} {:>15} {:>15} {:>15}  {:>15}  {:>15}".format(
        'iter', 'beta_mu', 'beta_rho', 'gamma_mu', 'gamma_rho', 'delta_mu',
        'delta_rho', 'loss'))

    # Start from last checkpoint if exists
    old_data = os.path.isfile(args.cache_file)
    f = open(args.cache_file, 'a')
    if not old_data:
        f.write(
            "iter,beta_mu,beta_rho,gamma_mu,gamma_rho,delta_mu,delta_rho,loss\n"
        )
        optimal_params = None
        optimal_loss = np.inf
    else:
        logs = open(args.cache_file, 'r').read().splitlines()
        last_log = logs[-1]
        last_log = last_log.split(',')
        optimal_params = {
            'beta_mu': float(last_log[1]),
            'beta_rho': float(last_log[2]),
            'gamma_mu': float(last_log[3]),
            'gamma_rho': float(last_log[4]),
            'delta_mu': float(last_log[5]),
            'delta_rho': float(last_log[6]),
        }
        optimal_loss = float(last_log[7])

        print(
            "{: 15d} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E}"
            .format(-1, optimal_params['beta_mu'], optimal_params['beta_rho'],
                    optimal_params['gamma_mu'], optimal_params['gamma_rho'],
                    optimal_params['delta_mu'], optimal_params['delta_rho'],
                    optimal_loss))

    # ==========================================================================
    # Train by random search
    # ==========================================================================

    y_true = Y
    timesteps = 1

    def compute_loss(params={}, X=[], y_true=[], timesteps=1):
        # Set params (random if value is not set)
        model.set_params(**params)

        # Determine the loss with those params
        l = 0
        for _ in range(LOSS_CV_ROUNDS):
            y_pred = model.predict(X[:, 0], X[:, 1], X[:, 2], X[:, 3],
                                   timesteps)
            y_pred = np.squeeze(y_pred, axis=1)  # remove timestep dimension

            l += mae_loss(y_true, y_pred)
        l /= LOSS_CV_ROUNDS
        return l

    for it in range(args.train_iters):
        if it % 1000 == 0:
            print('Iter:', it)

        l = compute_loss(X=X, y_true=y_true, timesteps=1)

        params = model.get_params()
        # TODO: implement occam razor for solutions! if (opt_loss - loss) < epsilon:
        # chose the solution with higher variance
        if l < optimal_loss:
            optimal_params = params
            optimal_loss = l

            mess = "{},{},{},{},{},{},{},{}".format(it, params['beta_mu'],
                                                    params['beta_rho'],
                                                    params['gamma_mu'],
                                                    params['gamma_rho'],
                                                    params['delta_mu'],
                                                    params['delta_rho'], l)
            f.write(mess + "\n")
            print(
                "{: 15d} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E} {: 15.2E}"
                .format(it, params['beta_mu'], params['beta_rho'],
                        params['gamma_mu'], params['gamma_rho'],
                        params['delta_mu'], params['delta_rho'], l))

    # ==========================================================================
    # Train by scipy minimize
    # ==========================================================================

    # from scipy import optimize

    # def compute_loss(params, X = [], y_true = [], timesteps = 1):
    #     # Set params (random if value is not set)
    #     params = {'beta_mu' : params[0],
    #             'beta_rho' : params[1],
    #             'gamma_mu' : params[2],
    #             'gamma_rho' : params[3],
    #             'delta_mu' : params[4],
    #             'delta_rho' : params[5],}

    #     model.set_params(**params)

    #     # Determine the loss with those params
    #     l = 0
    #     for _ in range(LOSS_CV_ROUNDS):
    #         y_pred = model.predict(X[:,0], X[:,1], X[:,2], X[:,3], timesteps)
    #         y_pred = np.squeeze(y_pred, axis=1) # remove timestep dimension

    #         l += mae_loss(y_true, y_pred)
    #     l /= LOSS_CV_ROUNDS
    #     return l

    # res =optimize.minimize(fun = compute_loss,
    #                     #    x0 = np.array([0.38, 0.02, 30., 7., 0.17, 0.02]),
    #                        x0 = np.array([0.8, 0.1, 30., 1., 0.1, 0.01]),
    #                        args = (X, y_true, 1),
    #                     #    method = "L-BFGS-B",
    #                        method = "SLSQP",
    #                        # Min / max bound for beta_mu, beta_rho, ....
    #                        bounds = [(0, 5), (0, 5), (0, 40), (0, 10), (0,1), (0,1)],
    #                        tol = 1e-6,
    #                        options = {'maxiter':100000, 'disp':True}
    #                        )

    # ==========================================================================
    # Evaluate on Italy
    # ==========================================================================

    model.set_params(beta_mu=optimal_params['beta_mu'],
                     gamma_mu=optimal_params['gamma_mu'],
                     beta_rho=optimal_params['beta_rho'],
                     gamma_rho=optimal_params['gamma_rho'])

    COUNTRY = 'Italy'
    POPULATION = args.population
    predict(model, COUNTRY, POPULATION, args.predicted_days)
Ejemplo n.º 9
0
def load_dataset(filename):
    lines = csv.reader(open(filename, 'rb'))
    dataset = list(lines)
    for i in range(len(dataset)):
        dataset[i] = [float(x) for x in dataset[i]]
    return dataset


def data_split(dataset, ratio=0.7):
    train_size = int(len(dataset) * ratio)
    dataset = list(dataset)
    train = dataset[:train_size]
    test = dataset[train_size:]
    return train, test


if __name__ == "__main__":
    # load dataset and prepare data
    filename = "../data/data.csv"
    dataset = load_dataset(filename)
    train, test = data_split(dataset, ratio=0.7)

    # extract features
    features = extract_feature(train)

    # make prediction and evaluation
    predictions = predict(test, features)
    test = np.array(test)
    accuracy = accuracy(predictions[:, 0], test[:, -1])
    print "accuracy: ", accuracy
    dataset = list(lines)
    for i in range(len(dataset)):
        dataset[i] = [float(x) for x in dataset[i]]
    return dataset


def data_split(dataset, ratio=0.7):
    train_size = int(len(dataset) * ratio)
    dataset = list(dataset)
    train = dataset[:train_size]
    test = dataset[train_size:]
    return train, test

if __name__ == "__main__":
    # load dataset and prepare data
    filename = "../data/data.csv"
    dataset = load_dataset(filename)
    train, test = data_split(dataset, ratio=0.7)

    # extract features
    features = extract_feature(train)

    # make prediction and evaluation
    predictions = predict(test, features)
    test = np.array(test)
    accuracy = accuracy(predictions[:, 0], test[:, -1])
    print "accuracy: ", accuracy