コード例 #1
0
def data_analysis():

    tr_data = dt.get_data('cifar10', (0, 20000))
    val_data = dt.get_data('cifar10', (40000, 50000))
    test_data = dt.get_data('cifar10', (50000, 60000))

    for m in models[:1]:
        # model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, 'cifar10-2-5', h5_path)
        # model0, model_name0 = mt.train(m, 'cifar10-channelswitched', 50, data_augmentation=False, path=res_path)
        # acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data)
        # t_log.log_predictions(y_predicted, model_name0, file_path=csv_path)

        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)

        # true_classes = np.argmax(test_data[1], axis=1)  # wrong
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)
        imgs_entropies = []

        # for image in test_data[0]:
        #     imgs_entropies.append(metrics_color.entropy_cc(image, 8))
            # c, i = metrics_color.contrast_intensity(image)
            # imgs_c.append(c)
            # imgs_i.append(i)

            # scores.append(metrics_color.colorfulness(image))

        sorted_e = np.argsort(imgs_entropies)
        # id_list = [sorted_e[k] for k in [10, 100, 1000, 2000, 5000, 8000, 9000, 9900, 9990]]
        id_list = [21, 3767, 9176, 730, 5905]
        plotting.show_imgs(id_list, 'cdc entropy examples', test_data[0], showColorCube=True)
コード例 #2
0
def mt_noise_test():
    np.random.seed(0)
    tr_data = dt.get_data('cifar10', (0, 40000))
    val_data = dt.get_data('cifar10', (40000, 50000))
    for noise_level in xrange(5, 200, 10):
        for k in [1]:  # xrange(len(tr_data[0])):
            # noise_mat = np.repeat(np.random.random((32, 32))[:, :, np.newaxis], 3, axis=2)
            noise_mat = np.swapaxes([np.random.random((32, 32)), np.random.random((32, 32)),
                                     np.random.random((32, 32))], 0, 2)
            print(tr_data[0][k].shape)
            print(noise_mat.shape)
            tr_data[0][k] = np.clip(tr_data[0][k].astype('uint16') * (1 + (noise_mat-0.5) * noise_level/100), 0, 255)\
                .astype('uint8')
            plotting.imshow(tr_data[0][k])
        for m in models:
            print('Training', m)
コード例 #3
0
def colorcube_analysis():
    # m = 'densenet121'
    for m in models:
        test_data = dt.get_data('cifar10', (50000, 60000))
        top_n = 2000
        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        # model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False, suffix='ft20ep-exp')
        model = mt.load_by_name(model_name0, test_data[0].shape[1:], h5_path+model_name0)
        # y_predicted = model.predict(np.array(test_data[0]))
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
        true_classes = [int(k) for k in test_data[1]]
        scores = metrics.prediction_ratings(y_predicted, true_classes)
        score_sorted_ids = np.argsort(scores)
        cc_high = metrics_color.ColorDensityCube(resolution=4)
        for img_id in score_sorted_ids[-top_n:]:
            cc_high.feed(test_data[0][img_id])
        cc_high.normalize()
        cc_high.plot_cube()

        cc_low = metrics_color.ColorDensityCube(resolution=4)
        for img_id in score_sorted_ids[:top_n]:
            cc_low.feed(test_data[0][img_id])
        cc_low.normalize()

        cc_diff = cc_high.substract(cc_low, 'value')

        cc_low.plot_cube()

        cc_diff.normalize()
        cc_diff.plot_cube(title='Color cube analysis difference (' + str(top_n) + ' images/series)', normalize=True,
                          save=True)
コード例 #4
0
def color_domain_test():
    all_data_orig = dt.get_data('cifar10', (0, 20000))
    g = 4
    n_images = 5
    # images_cube = ds.cifar10_color_domains(granularity=g, frequence=0.3)
    images_cube = dt.cifar10_maxcolor_domains(granularity=g)
    images_cube_sizes = np.zeros((g, g, g))
    total = 0
    for x in xrange(g):
        for y in xrange(g):
            for z in xrange(g):
                l = len(images_cube[x][y][z])
                images_cube_sizes[x][y][z] = l
                total += l
                id_list = images_cube[x][y][z][:n_images]
                if len(id_list) > 10000:
                    print(id_list)
                    c = 0
                    fig, axes = plt.subplots(1,
                                             n_images,
                                             figsize=(n_images, 4),
                                             subplot_kw={
                                                 'xticks': (),
                                                 'yticks': ()
                                             })
                    for img_id in id_list:
                        ax = axes[c]
                        c += 1
                        ax.imshow(all_data_orig[0][img_id], vmin=0, vmax=1)
                        ax.set_title("id#" + str(img_id))
                    plt.show()
    print(images_cube_sizes)
    print('total', total)
コード例 #5
0
def colorfulness_analysis(model='densenet121', top_n=2500):
    """
    Experiment to analyse the relevance if the colorfulness attribute
    See the metrics_color.colorfulness() function for more details on the attribute
    :param model: The predictions of :model: will be used to compute the prediciton scores
    :param top_n: Number of elements in the series that will be plotted for analysis
    :return:
    """

    # Load test data and model results
    test_data = dt.get_data('cifar10', (50000, 60000))
    model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]

    # Compute scores and sort test data ids by score
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)

    # Compute metric for high score and low score data
    high_score_series = []
    low_score_series = []
    print(len(score_sorted_ids))
    for k in xrange(0, top_n):
        high_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[-k-1]]))
        low_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[k]]))

    # Plot box plot of the two series
    plotting.box_plot(high_score_series, low_score_series, name_s1='high prediction scores',
                      name_s2='low prediction scores', y_label='Colorfulness',
                      title='Colorfulness analysis (' + str(top_n) + ' images/series)')
コード例 #6
0
def show_ids():
    test_data = dt.get_data('cifar10', (50000, 60000))
    hard = [9746, 9840, 9853, 9901, 9910, 9923, 9924, 9926, 9960, 9982]
    easy = [9929, 9935, 9939, 9945, 9952, 9966, 9971, 9992, 9997, 9999]
    for k in easy:
        plotting.imshow(test_data[0][k])
    for k in hard:
        plotting.imshow(test_data[0][k])

    print('done')
コード例 #7
0
def check_acc():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))

    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    predicted_classes = np.argmax(y_predicted, axis=1)
    print(predicted_classes[:10])
    true_classes = [int(k) for k in test_data[1]]
    acc = metrics.accuracy(predicted_classes, true_classes)
    print(acc)
コード例 #8
0
def epochs_accuracy_test():
    tr_data = dt.get_data('cifar10', (0, 40000))
    val_data = dt.get_data('cifar10', (40000, 50000))
    test_data = dt.get_data('cifar10', (50000, 60000))
    m = models[0]
    epochs = [1, 2, 3, 4, 5, 6, 7, 10, 20, 40, 200]  # 8, 9,  10, 20, 40, 60, 80, 100, 140, 200]
    correctness = [[] for _ in xrange(len(test_data[0]))]
    for k in xrange(len(epochs)):
        print('###->', epochs[k], 'epochs')
        model0, model_name0 = mt.train2(m, tr_data, val_data, epochs[k], False,
                                        'cifar10_0445_epochsacc-5_', path=h5_path)
        acc, predicted_classes, _ = dt.predict_and_acc(model0, test_data)
        for c in xrange(len(correctness)):
            if predicted_classes[c] == test_data[1][c]:
                correctness[c].append(1)
            else:
                correctness[c].append(0)

        print('Test accuracy = ', acc)

    easy_imgs = []
    hard_imgs = []
    correctness_tot = [np.sum(img_preds) for img_preds in correctness]
    for c, n in enumerate(correctness_tot):
        if n == len(epochs):
            easy_imgs.append(c)
        if n == 0:
            hard_imgs.append(c)

    unique, counts = np.unique(correctness_tot, return_counts=True)
    n_correct = dict(zip(unique, counts))

    correctness_shapes = [str(img_preds) for img_preds in correctness]
    unique, counts = np.unique(correctness_shapes, return_counts=True)
    correct_shapes = dict(zip(unique, counts))
    sorted_cs = sorted(correct_shapes.items(), key=operator.itemgetter(1))
    print(n_correct)
    print(sorted_cs[-20:])

    print('Easy images ids: ', easy_imgs[max(-len(easy_imgs), -10):])
    print('Hard images ids: ', hard_imgs[max(-len(hard_imgs), -10):])
コード例 #9
0
def cifar_color_domains_test():
    for m in models:
        tr_data = dt.get_data('cifar10', (0, 20000))
        val_data = dt.get_data('cifar10', (20000, 30000))
        test_data = dt.get_data('cifar10', (30000, 60000))
        f_test_data = dt.format_data(test_data, 10)  # f for formatted

        model0, model_name0 = mt.train2(m,
                                        tr_data,
                                        val_data,
                                        50,
                                        False,
                                        'cifar10-2-5',
                                        path=h5_path)
        #
        # for m in models:
        #     model0, model_name = mt.train(m, 'cifar10', 50, data_augmentation=True)
        cube = metrics_color.color_domains_accuracy(model0)
        print('cube', cube)
        sizes_cube = dt.cube_cardinals(cube)
        print('Sizes', sizes_cube)
コード例 #10
0
ファイル: models.py プロジェクト: ychervonyi/college_value
def train_student_model(features_all, features_model, model_name, path, batch=50, n_epochs=300,
                learning_rate=1.0, model_type='sklearn', save=False, normalize=True):
    # Merge data over multiple years
    print("Reading data...")
    dataset = get_data(features_all, path=path)

    df, x, y, feature_names = process_data(dataset=dataset,
                                           features_model=features_model,
                                           normalize=normalize)

    train_model(x=x, y=y, model_name=model_name, feature_names=feature_names, batch=batch,
                n_epochs=n_epochs, learning_rate=learning_rate, model_type=model_type, save=save)
コード例 #11
0
def car_example():
    test_data = dt.get_data('cifar10', (50000, 60000))
    cars = [6983, 3678, 3170, 1591]

    cc0 = metrics_color.ColorDensityCube(resolution=4)
    cc0.feed(test_data[0][cars[0]])
    plotting.imshow(test_data[0][cars[0]])
    cc0.plot_cube()

    cc0 = metrics_color.ColorDensityCube(resolution=4)
    cc0.feed(test_data[0][cars[1]])
    plotting.imshow(test_data[0][cars[1]])
    cc0.plot_cube()
コード例 #12
0
def histogram_analysis():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))
    top_n = 2000
    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)
    high_score_series = []
    low_score_series = []
    for k in xrange(0, top_n):
        high_score_series.append(test_data[0][score_sorted_ids[-k-1]])
        low_score_series.append(test_data[0][score_sorted_ids[k]])

    plotting.plot_hists(high_score_series, 'high scores', low_score_series, 'low scores', plotting.cs_bgr, title=' ')
コード例 #13
0
ファイル: models.py プロジェクト: ychervonyi/college_value
def train_college_model(features_all, features_student, features_model, model_name, path, batch=100, n_epochs=1000,
                learning_rate=0.000005, model_type='sklearn', save=False, normalize=True):
    # Merge data over multiple years
    print("Reading data...")
    dataset = get_data(features_all, path=path)

    df, x, y, feature_names = process_data(dataset=dataset,
                                           features_model=features_student,
                                           normalize=normalize)
    y = compute_college_scores(model_type, x, y)

    df, x, _, feature_names = process_data(dataset=dataset,
                                           features_model=features_model,
                                           normalize=normalize)

    train_model(x=x, y=y, model_name=model_name, feature_names=feature_names, batch=batch,
                n_epochs=n_epochs, learning_rate=learning_rate, model_type=model_type, save=save)
コード例 #14
0
def check_rgb():
    test_data = dt.get_data('cifar10', (50000, 60000))
    # plotting.imshow(test_data[0][9960])
    # img_test = np.repeat(test_data[0][9960][:, :, 0, np.newaxis], 3, axis=2)
    img_test = np.array(test_data[0][9960])
    img_test[:, :, 1] = np.ones((32, 32))  # * 255
    img_test[:, :, 2] = np.ones((32, 32))  # * 255
    # img_test = np.swapaxes(img_test, 0, 2)
    print(np.array(test_data[0][9960]).shape)
    print(img_test)
    plotting.imshow(img_test)
    plotting.plot_hists(
        [test_data[0][9960]],
        'normal',
        [img_test],
        'red',
        plotting.cs_bgr,
    )
コード例 #15
0
def confusion(model='densenet121'):
    # Load test data and model results
    test_data = dt.get_data('cifar10', (50000, 60000))
    model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    predicted_classes = np.argmax(y_predicted, axis=1)
    true_classes = [int(k) for k in test_data[1]]

    print('Confusion Matrix for Total Test Data')
    print(sk_metrics.confusion_matrix(true_classes, predicted_classes))

    scores = metrics.prediction_ratings(y_predicted, true_classes)
    prediction_scores = np.zeros((10, 1)).tolist()
    print(prediction_scores)
    for k in xrange(len(y_predicted)):
        prediction_scores[predicted_classes[k]].append(scores[k])

    print(np.array(prediction_scores).shape)
    for cifar_class in prediction_scores:
        print(float(np.mean(cifar_class)))
コード例 #16
0
def check_entropy():
    r_col_imgs = []
    r_bw_imgs = []
    test_data = dt.get_data('cifar10', (50000, 60000))
    entropies = []
    for img in test_data[0]:
        entropies.append(metrics_color.entropy_cc(img))

    sorted_args = np.argsort(entropies)

    plotting.imshow(test_data[0][sorted_args[0]])
    print(entropies[sorted_args[0]], test_data[1][sorted_args[0]])
    plotting.imshow(test_data[0][sorted_args[100]])
    print(entropies[sorted_args[100]], test_data[1][sorted_args[100]])
    plotting.imshow(test_data[0][sorted_args[1000]])
    print(entropies[sorted_args[1000]], test_data[1][sorted_args[1000]])
    plotting.imshow(test_data[0][sorted_args[9000]])
    print(entropies[sorted_args[9000]], test_data[1][sorted_args[9000]])
    plotting.imshow(test_data[0][sorted_args[9900]])
    print(entropies[sorted_args[9900]], test_data[1][sorted_args[9900]])
    plotting.imshow(test_data[0][sorted_args[9999]])
    print(entropies[sorted_args[9999]], test_data[1][sorted_args[9999]])
コード例 #17
0
def check_pr():
    m = 'densenet121'
    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)

    test_data = dt.get_data('cifar10', (50000, 60000))
    easy = [9929, 9935, 9939, 9945, 9952, 9966, 9971, 9992, 9997, 9999]
    hard = [9746, 9840, 9853, 9901, 9910, 9923, 9924, 9926, 9960, 9982]
    # cat = [671]
    # cars = [6983, 3678, 3170, 1591]
    # plotting.show_imgs(easy, 'easy set: ', test_data[0], showColorCube=True, resolution=4)
    # plotting.show_imgs(hard, 'hard set: ', test_data[0], showColorCube=True, resolution=4)
    true_classes = [int(k) for k in test_data[1]]

    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)

    # print(scores[score_sorted_ids[0]], y_predicted[score_sorted_ids[0]])
    # print(scores[score_sorted_ids[1]], y_predicted[score_sorted_ids[1]])
    print(scores[score_sorted_ids[2500]], y_predicted[score_sorted_ids[2500]])
    print(scores[score_sorted_ids[2501]], y_predicted[score_sorted_ids[2501]])
    # print(scores[score_sorted_ids[9998]], y_predicted[score_sorted_ids[9998]])
    # print(scores[score_sorted_ids[9999]], y_predicted[score_sorted_ids[9999]])

    print('easy')
    for img_id in easy:
        print(
            img_id, '- pr:',
            metrics.prediction_rating(y_predicted[img_id],
                                      true_classes[img_id]), ' - correct?: ',
            np.argmax(y_predicted[img_id]) == true_classes[img_id])
        # print(y_predicted[id])
    print('hard')
    for img_id in hard:
        print(
            img_id, '- pr:',
            metrics.prediction_rating(y_predicted[img_id],
                                      true_classes[img_id]), ' - correct?: ',
            np.argmax(y_predicted[img_id]) == true_classes[img_id])
コード例 #18
0
def entropy_cc_analysis():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))
    top_n = 2000

    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)
    high_score_entropies = []
    low_score_entropies = []
    print(len(score_sorted_ids))
    for k in xrange(0, top_n):
        # id = score_sorted_ids[-k - 1]
        # print(id)
        # img = test_data[id]
        high_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[-k-1]], 8))
        low_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[k]], 8))

    plotting.box_plot(high_score_entropies, low_score_entropies, name_s1='high prediction scores',
                      name_s2='low prediction scores', y_label='Color entropy',
                      title='Color entropy analysis (' + str(top_n) + ' images/series)')
コード例 #19
0
def color_domains_accuracy(model, granularity=4, n=1, data_range=(50000, 60000)):
    g = granularity
    images_cube = dt.cifar10_nth_maxcolor_domains(granularity=g, n=n, data_range=data_range)
    scores_cube = np.zeros((g, g, g))
    data = dt.get_data('cifar10', data_range)
    xf, yf = dt.format_data(data, 10)
    for x in xrange(g):
        for y in xrange(g):
            for z in xrange(g):
                test_data = [[], []]
                if len(images_cube[x][y][z]) > 1:
                    for k in images_cube[x][y][z]:
                        test_data[0].append(xf[k])
                        test_data[1].append(yf[k])
                    # print(np.array(test_data[0]).shape)
                    y_predicted = model.predict(np.array(test_data[0]))
                    predicted_classes = np.argmax(y_predicted, axis=1)
                    true_classes = np.argmax(test_data[1], axis=1)
                    acc = metrics.accuracy(predicted_classes, true_classes)
                else:
                    acc = None
                scores_cube[x][y][z] = acc
    return scores_cube
コード例 #20
0
ファイル: models.py プロジェクト: ychervonyi/college_value
def generate_ranking(features_all, features_model, path, model_type='sklearn', normalize=True):
    # Merge data over multiple years
    print("Reading data...")
    dataset = get_data(features_all, path=path)

    df, x, y, feature_names = process_data(dataset=dataset,
                                           features_model=features_model,
                                           normalize=normalize)

    # College score
    college_score = compute_college_scores(model_type, x, y)

    college_name = df['INSTNM'].values
    college_name = college_name.reshape((-1, 1))

    # Concatenate college scores and names
    scores = np.concatenate((college_name, college_score), axis=1)

    # We might have data over several years, so let's create a hash table and compute average over years
    scores_dict = {}
    for i in range(scores.shape[0]):
        name = scores[i, 0]
        if scores[i, 1] not in scores_dict:
            scores_dict[name] = [float(scores[i, 1])]
        else:
            scores_dict[name].append(float(scores[i, 1]))
    for key, value in scores_dict.items():
        scores_dict[key] = np.mean(value)

    # Write scores into a file
    scores_sorted = sorted(scores_dict.items(), key=lambda x: x[1], reverse=True)
    scores_file = 'scores.csv'
    with open(scores_file, 'w') as csv_file:
        writer = csv.writer(csv_file)
        for row in scores_sorted:
            writer.writerow(row)
    print("Scores are saved in %s" % scores_file)
コード例 #21
0
def main():

    parser = argparse.ArgumentParser()

    # General arguments:
    parser.add_argument('--relations', default='impl', help='Relationship type. OPTIONS: impl, expl')
    parser.add_argument('--model_type', default='PIX', help='OPTIONS: REG, PIX')
    parser.add_argument('--n_side_pixl', default=15, type=int, help='Number of pixels as output of PIX')
    parser.add_argument('--method_compare', default=['emb','rnd','onehot','ctrl'],
                        help='Methods to compare. OPTIONS: init, rnd, onehot, ctrl')
    parser.add_argument('--n_folds', default=10, type=int, help='Number of cross-validation folds')
    parser.add_argument('--eval_generalized_set', default= 'words',
                        help='Whether we evaluate in a generalized set or not. If so, instances are left out for training. '
                             'OPTIONS: None, triplets, words')
    parser.add_argument('--eval_clean_set', default= None,
                        help='Whether we evaluate in a clean set (equal to the generalized ones, but without keeping the model from'
                             'seeing these words/triplets during training. BE CAREFUL! Do not use the same list as in generalized above'
                             '(because you will not find any tuple for the clean set if you have removed them first!) '
                             'OPTIONS: None, triplets, words')
    parser.add_argument('--save_indiv_predictions', default=False, type=bool_str, help='To store model predictions (they can be heavy, especially in PIX). '
                                                                        'Useful to visualize them afterwards.')
    parser.add_argument('--save_model', default=False, type=bool_str, help='To store the models (e.g., to explore weights afterwards)')

    args = parser.parse_args()

    if args.model_type == 'REG':
        perf_measures = ['R2', 'acc_y', 'F1_y', 'Pear_x', 'Pear_y', 'IoU_t']
    if args.model_type == 'PIX':
        perf_measures = ['acc_y', 'F1_y', 'Pear_x', 'Pear_y', 'max_acc_px']

    # Create folder for results
    saveFolder = wd.get_folder_name(args)

    # Get default params
    par_learning = pt.get_default_params(args.model_type)

    # --- Read data --- #
    TRAIN = rd.load_training_data('../training_data/TRAINING_DATA-' + args.relations + '.csv')
    TRAIN['subj_ctr_x'], TRAIN['obj_ctr_x'] = dt.mirror_x(TRAIN['subj_ctr_x'], TRAIN['obj_ctr_x'])
    words, EMB = rd.readDATA( '../embeddings/glove_words.csv')

    # --- GENERALIZED and CLEAN triplets or words --- #
    enforce_gen, clean_eval = {}, {}
    enforce_gen['eval'], clean_eval['eval'] = args.eval_generalized_set, args.eval_clean_set
    enforce_gen['triplets'], enforce_gen['words'] = dt.get_enforce_gen(enforce_gen['eval'])
    clean_eval['triplets'], clean_eval['words'] = dt.get_enforce_gen(clean_eval['eval'])

    print('Getting training data...')
    X, X_extra, y, y_pixl, X_extra_enf_gen, X_enf_gen, y_enf_gen, y_enf_gen_pixl, rel_ids, OBJ_ctr_sd, OBJ_ctr_sd_enf_gen, \
    EMBEDDINGS, TRAIN_relevant = dt.get_data(args.model_type, TRAIN, words, EMB, enforce_gen, args.n_side_pixl)

    # Get folds
    kf = dt.get_folds(X['subj'].shape[0], args.n_folds)

    # --- INITIALIZE performance measures --- #
    PERF, PERF_clean, PERF_enf_gen = {},{},{}
    PERF['train'], PERF['test'], PERF_clean['train'], PERF_clean['test'] = {},{},{},{}
    for method_full in args.method_compare:
        PERF['train'][method_full], PERF['test'][method_full] = {},{}
        PERF_clean['train'][method_full], PERF_clean['test'][method_full] = {},{}
        PERF_enf_gen[method_full] = {}
        for meas in perf_measures:
            PERF['train'][method_full][meas], PERF['test'][method_full][meas] = [],[]
            PERF_clean['train'][method_full][meas], PERF_clean['test'][method_full][meas] = [],[]
            PERF_enf_gen[method_full][meas] = []

    idx_clean_train, idx_clean_test = [],[]
    for fold_count, (train_idx, test_idx) in enumerate(kf): # FOLDS loop

        # --- TRAIN and TEST data (splits) --- #
        # This aux function isn't elegant, but we don't want to triplicate y_pixl with train and test splits. Takes too much memory
        X_train, X_test, X_extra_train, X_extra_test, y_train, y_test, OBJ_ctr_sd_train, \
        OBJ_ctr_sd_test = dt.aux_get_train_test_splits(X, X_extra, y, OBJ_ctr_sd, train_idx, test_idx)
        aux_train_idx = train_idx if args.model_type == 'PIX' else 0
        aux_test_idx = test_idx if args.model_type == 'PIX' else 0

        # --- get CLEAN_train and CLEAN_test INDICES --- #
        if clean_eval['eval'] is not None:
            idx_clean_train, idx_clean_test = dt.get_CLEAN_train_test_idx(TRAIN_relevant, train_idx, test_idx, clean_eval)

        for method in args.method_compare: # METHODS LOOP

            print('=========================================')
            print('=======>> ' + method + ' <<=======')
            print('=========================================')

            # Initialize model object
            model = models.NeuralnetModel(args, method, par_learning)

            # --- LEARN the model --- #
            model.method_learn(X_train, X_extra_train, y_train, y_pixl[aux_train_idx], EMBEDDINGS)

            # --- PREDICT --- #
            y_pred_train = model.model_predict(X_train, X_extra_train, y_train)
            y_pred_test = model.model_predict(X_test, X_extra_test, y_train)
            if enforce_gen['eval'] is not None:
                y_pred_enf_gen = model.model_predict(X_enf_gen, X_extra_enf_gen, y_train)

            # --- EVALUATE performance --- #
            PERF_DICT_test = et.evaluate_perf(y_test, y_pixl[aux_test_idx], y_pred_test, OBJ_ctr_sd_test, perf_measures, args.model_type)
            PERF_DICT_train = et.evaluate_perf(y_train, y_pixl[aux_train_idx], y_pred_train, OBJ_ctr_sd_train, perf_measures, args.model_type)

            if (clean_eval['eval'] is not None) and (idx_clean_test != []) and (idx_clean_train != []):
                aux_idx_clean_train = idx_clean_train if args.model_type == 'PIX' else 0
                aux_idx_clean_test = idx_clean_test if args.model_type == 'PIX' else 0
                PERF_DICT_clean_train = et.evaluate_perf(y_train[idx_clean_train], y_pixl[aux_train_idx][aux_idx_clean_train], y_pred_train[idx_clean_train], OBJ_ctr_sd_train[idx_clean_train], perf_measures, args.model_type)
                PERF_DICT_clean_test = et.evaluate_perf(y_test[idx_clean_test], y_pixl[aux_test_idx][aux_idx_clean_test], y_pred_test[idx_clean_test], OBJ_ctr_sd_test[idx_clean_test], perf_measures, args.model_type)
            if enforce_gen['eval'] is not None:
                PERF_DICT_enf_gen = et.evaluate_perf(y_enf_gen, y_enf_gen_pixl, y_pred_enf_gen, OBJ_ctr_sd_enf_gen, perf_measures, args.model_type)

            # --- append --- #
            for meas in perf_measures:
                PERF['train'][method_full][meas].append(PERF_DICT_train[meas])
                PERF['test'][method_full][meas].append(PERF_DICT_test[meas])
                if (clean_eval['eval'] is not None) and (idx_clean_test != []) and (idx_clean_train != []):
                    PERF_clean['train'][method_full][meas].append(PERF_DICT_clean_train[meas])
                    PERF_clean['test'][method_full][meas].append(PERF_DICT_clean_test[meas])
                    print ('method==> ' + method + ' || ' + meas + '_CLEAN_ts= ' + str(PERF_DICT_clean_test[meas]) + ' | ' + meas + '_CLEAN_tr= ' + str(PERF_DICT_clean_train[meas]))
                if enforce_gen['eval'] is not None:
                    PERF_enf_gen[method_full][meas].append(PERF_DICT_enf_gen[meas])
                    print ('method==> ' + method + ' || ' + meas + '_GEN= ' + str(PERF_DICT_enf_gen[meas]))
                print ('method==> ' + method + ' || ' + meas + '_test= ' + str(PERF_DICT_test[meas]) + ' | ' + meas + '_train= ' + str(PERF_DICT_train[meas]))

            # -- write individual predictions -- #
            if args.save_indiv_predictions == True:
                indiv_predDir = saveFolder + '/INDIV_' + method_full + '_fld_' + str(fold_count + 1) + '.csv'
                wd.write_indiv_predictions(y_pred_test, OBJ_ctr_sd_test, args.model_type, 0.1, indiv_predDir)
                if enforce_gen['eval'] is not None:
                    wd.write_indiv_predictions(y_pred_enf_gen, OBJ_ctr_sd_enf_gen, args.model_type, 0.1, indiv_predDir.replace('INDIV', 'INDIV-GEN'))
                if (clean_eval['eval'] is not None) and (idx_clean_test != []) and (idx_clean_train != []):
                    wd.write_indiv_predictions(y_pred_test[idx_clean_test], OBJ_ctr_sd_test[idx_clean_test], args.model_type, 0.1, indiv_predDir.replace('INDIV', 'INDIV-CLEAN_TST'))

            # -- store model weights -- #
            if (args.save_model == True) and (method is not 'ctrl'):
                import h5py
                model.keras_model.save_weights(saveFolder + '/MODEL_' + method_full + '_fld_' + str(fold_count + 1) + '.h5')

            # --- write results --- #
            wd.write_results_all(PERF, args.method_compare, perf_measures, saveFolder + '/TRAIN-TEST.csv')
            if enforce_gen['eval'] is not None:
                wd.write_results_enf_gen(PERF_enf_gen, args.method_compare, perf_measures, saveFolder + '/GEN.csv')
            if (clean_eval['eval'] is not None) and (idx_clean_test != []) and (idx_clean_train != []):
                wd.write_results_all(PERF_clean, args.method_compare, perf_measures, saveFolder + '/CLEAN.csv')
コード例 #22
0
minage = 8
maxage = 20

# Additional variables to use when imputing puberty
# TODO: include Bamako, subscap for boys, men_age for girls
others = {0: ["HT", "WT", "BMI"], 1: ["HT", "WT"]}

# Fit a Gaussian process model separately to females and males.
for female in 0, 1:

    if female == 1 and impvar in ("log2T_use_Z", ):
        continue
    if female == 0 and impvar in ("Breast_Stage_Use_Z", "Menarche"):
        continue

    dx[female] = get_data(female, impvar, others=["datecomb"] + others[female])
    outf.write("Loaded %d x %d values\n" % tuple(dx[female].shape))
    outf.write("%d distinct people in initial data\n\n" %
               dx[female].ID.unique().size)

    dx[female] = dx[female].loc[dx[female].Age >= minage, :]
    outf.write("Retained %d x %d values at or above age %d\n" %
               (tuple(dx[female].shape) + (minage, )))
    outf.write("%d distinct people after requiring age at or above %d\n\n" %
               (dx[female].ID.unique().size, minage))

    # Not converted for some reason, stored as seconds, convert to days
    dx[female].datecomb = pd.to_datetime(dx[female].datecomb)

    dx[female] = dx[female].groupby("ID").apply(xform)
コード例 #23
0
def color_region_finetuning():
    g = 4
    images_cube = dt.cifar10_maxcolor_domains(granularity=g, data_range=(50000, 60000))
    region_sizes = dt.cube_cardinals(images_cube)
    tr_data = dt.get_data('cifar10', (0, 20000))
    val_data = dt.get_data('cifar10', (40000, 50000))
    ft_data = dt.get_data('cifar10', (20000, 40000))
    train_data_ref = dt.get_data('cifar10', (20000, 30000))
    train_data_ref2 = dt.get_data('cifar10', (30000, 40000))
    # train_data_ref2 = ds.get_data('cifar10', (25000, 35000))
    test_data = dt.get_data('cifar10', (50000, 60000))
    f_test_data = dt.format_data(test_data, 10)
    ft_data_augmentation = True
    ft_epochs = 30

    for m in models:

        # cr = color region, 0-2 for tr data / 4-5 for val data
        model_base, model_name0 = mt.train2(m, tr_data, val_data, 50, False, 'cr_0245', path=h5_path)
        scores_cubes = []

        for x in xrange(g):
            nametag_prefix = 'ft_2345_ref' + str(x + 4)
            ft_model_name = mt.ft_weight_file_name(model_name0, ft_data_augmentation, ft_epochs, nametag_prefix)
            weights_file = h5_path + ft_model_name + '.h5'
            print('*-> ' + weights_file)

            if mt.model_state_exists(weights_file):
                model2 = mt.load_by_name(model_name0, ft_data[0].shape[1:], weights_file)
                score = dt.predict_and_acc(model2, val_data)
                print('Val accuracy:', score[0])
            else:
                ft_data_selected_ref = [np.concatenate((tr_data[0], train_data_ref2[0])),
                                        np.concatenate((tr_data[1], train_data_ref2[1]))]
                assert len(ft_data_selected_ref[0]) == 30000
                model2, model_name2 = mt.train2(m, ft_data_selected_ref, val_data, ft_epochs, ft_data_augmentation,
                                                nametag_prefix, h5_path, weights_file=model_name0 + '.h5')
            scores_cube2 = metrics_color.color_domains_accuracy(model2, g)
            # print('Scores cube ref:', scores_cube2)
            weighted_cube = scores_cube2 * np.array(region_sizes) / float(10000)
            print('(Approx) Test accuracy', np.nansum(weighted_cube))  # Weighted average score_cube
            scores_cubes.append(scores_cube2)

        avg_ref_score_cube = np.nanmean(scores_cubes, axis=0)
        max_ref_score_cube = np.max(scores_cubes, axis=0)

        for x in xrange(g):
            for y in xrange(g):
                for z in xrange(g):
                    if region_sizes[x][y][z] > 100:
                        print('#--> Region ' + str(x) + str(y) + str(z) + ' (' + str(
                            region_sizes[x][y][z]) + ' images)')
                        nametag_prefix = 'ft_2445_r' + str(x) + str(y) + str(z) + '_cr_1'

                        ft_model_name = mt.ft_weight_file_name(model_name0, ft_data_augmentation, ft_epochs,
                                                               nametag=nametag_prefix + 'exp')
                        weights_file = h5_path + ft_model_name + '.h5'

                        if mt.model_state_exists(weights_file):
                            model1 = mt.load_by_name(model_name0, ft_data[0].shape[1:], weights_file)
                            score = dt.predict_and_acc(model1, val_data)
                            print('Val accuracy:', score[0])
                        else:
                            ft_data_args = metrics_color.finetune_by_region((x, y, z), ft_data, 10000, g)
                            ft_data_selected = dt.get_finetune_data(tr_data, ft_data, ft_data_args)
                            assert len(ft_data_selected[0]) == 30000
                            model1, model_name1 = mt.train2(m, ft_data_selected, val_data, ft_epochs,
                                                            ft_data_augmentation, nametag_prefix + 'exp',
                                                            h5_path, weights_file=model_name0 + '.h5')
                        scores_cube1 = metrics_color.color_domains_accuracy(model1, g)
                        # print('Scores cube exp:', scores_cube1)
                        print('  -  Region accuracy = ' + str(scores_cube1[x][y][z]))
                        weighted_cube = scores_cube1 * np.array(region_sizes) / float(10000)
                        print('  -  (Approx) Test accuracy = ', np.nansum(weighted_cube))  # Weighted average score_cube
                        # cc = np.subtract(scores_cube1, scores_cube2)
                        cc_avg = np.subtract(scores_cube1, avg_ref_score_cube)
                        print('  -  Region score (avg ref) = ' + str(float(cc_avg[x][y][z])))
                        cc_max = np.subtract(scores_cube1, max_ref_score_cube)
                        print('  -  Region score (max ref) = ' + str(float(cc_max[x][y][z])))
                        # print(cc)
                        print('           ~           ')
コード例 #24
0
def bug_feature_detection():

    for m in models:
        tr_data = dt.get_data('cifar10', (0, 20000))
        val_data = dt.get_data('cifar10', (20000, 30000))
        test_data = dt.get_data('cifar10', (30000, 60000))

        model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, tag='cifar10-2-5', path=h5_path)
        acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data)
        # log_predictions(y_predicted, model_name0, path=csv_path)
        print('acc', acc)

        # print(sk_metrics.confusion_matrix(test_data[1], predicted_classes))
        # true_classes = np.argmax(test_data[1], axis=1) wrong
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)

        model2, model_name2 = mt.train2(m, tr_data, val_data, 1, False, tag='cifar10-0223', path=h5_path)
        model1 = mt.reg_from_(model2, m)
        print('Reg model created')
        X_test, y_test = test_data
        tr_data = X_test[0:20000], pr[0:20000]
        val_data = X_test[20000:30000], pr[20000:30000]
        model1, model_name1 = mt.train_reg(model1, m, tr_data, val_data, '', 50, False, path=h5_path)
        # score = model1.evaluate(val_data[0], val_data[1], verbose=0)
        # print('Test loss:', score[0])
        # print('Val accuracy:', score[1])
        formatted_test_data = dt.format_data(val_data, 10)
        y_true = pr[20000:30000]
        print('Ground truth values:')
        print('Mean', np.mean(y_true))
        print('Std', np.std(y_true))
        print('Max', np.max(y_true))
        print('Min', np.min(y_true))
        y_predicted1 = model1.predict(formatted_test_data[0])
        # print(np.array(y_predicted).shape)
        n_guesses = len(y_predicted1)
        y_predicted2 = [y_predicted1[k][0] for k in xrange(n_guesses)]
        print('Prediction values:')
        print('Mean', np.mean(y_predicted2))
        print('Std', np.std(y_predicted2))
        print('Max', np.max(y_predicted2))
        print('Min', np.min(y_predicted2))
        y_predicted3 = y_predicted2 / np.linalg.norm(y_predicted2)
        print('Norm Prediction values:')
        print('Mean', np.mean(y_predicted3))
        print('Std', np.std(y_predicted3))
        print('Max', np.max(y_predicted3))
        print('Min', np.min(y_predicted3))

        # fig, axs = plt.subplots(1, 1)
        # axs.hist(y_true, bins=30)
        # axs.set_title('y_true for ' + m)
        # plt.show()
        #
        # fig, axs = plt.subplots(1, 1)
        # axs.hist(y_predicted2, bins=30, range=(0, 2))
        # axs.set_title(m)
        # plt.show()

        diff2 = []
        diff3 = []
        for k in xrange(min(10000, len(y_predicted))):
            diff2.append(abs(y_predicted2[k] - y_true[k]))
            diff3.append(abs(y_predicted3[k] - y_true[k]))
        print('Difference:')
        print('Mean ', np.mean(diff2))
        print('Max ', max(diff2))
        print('Difference Norm:')
        print('Mean ', np.mean(diff3))
        print('Max ', max(diff3))

        # R/W guess prediction
        opti_thr = float(np.sort(y_predicted2)[int(acc*10000)])
        print('opti_thr', opti_thr)
        thresholds = (float(0.6), float(0.7), float(0.777), float(0.8), float(0.9), opti_thr)
        # thresholds = (float(0.9), float(1), float(1.1), float(1.2), opti_thr)

        for thr in thresholds:
            n_right_guesses = 0
            for k in xrange(n_guesses):
                q = (test_data[1][20000+k] == predicted_classes[20000+k])
                p = y_predicted1[k][0] > thr
                if p == q:
                    n_right_guesses = n_right_guesses + 1

            print('acc for reg for true/false with thr of ' + str(thr) + ': ' + str(float(n_right_guesses)/n_guesses))

        # n_images = 10
        # n_rows = 10
        # for th in xrange(n_rows):
        #     fig, axes = plt.subplots(1, n_images, figsize=(n_images, 4),
        #                              subplot_kw={'xticks': (), 'yticks': ()})
        #     for dec in xrange(n_images):
        #         ax = axes[dec]
        #         pr_rank = 7000 + th * 100 + dec
        #         img_id = sorted_pr_args[pr_rank]
        #         # print(str(pr_rank) + ': ' + str(y_test[img_id]))  # + ' conf. guessed = ' + str(guessed[img_id]))
        #         ax.imshow(X_test[img_id], vmin=0, vmax=1)
        #         ax.set_title('pr#' + str(pr_rank) + "\nid#" + str(img_id)
        #                      + '\nr=' + str("{0:.2f}".format(pr[img_id]))
        #                      + '\np_cl=' + str(predicted_classes[img_id])
        #                      + '\nr_cl=' + str(true_classes[img_id]))
        #     plt.show()

        print('           ~           ')
コード例 #25
0
ファイル: KNN.py プロジェクト: oldsheep2019/EP-FPG
import numpy as np
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

from data_tools import get_data

if __name__ == '__main__':

    data_set_name = 'WIL'
    # data_set_name = 'BLE'

    test_num = 3
    for neighbor_num in (1, 3, 5, 11):
        if data_set_name == 'WIL':
            acc_list = []
            for k in range(test_num):
                train_data, train_label, test_data, test_label = get_data(
                    data_set_name)
                neigh = KNeighborsClassifier(n_neighbors=neighbor_num)
                neigh.fit(train_data, train_label)
                predict_label = neigh.predict(test_data)

                test_sample_num = test_data.shape[0]
                err_cnt = 0
                for i in range(test_sample_num):
                    if np.argmax(predict_label[i]) != np.argmax(test_label[i]):
                        err_cnt += 1

                acc = 1 - (err_cnt / test_sample_num)
                print('kNN test accuracy = %.1f%%' % (acc * 100))
                acc_list.append(acc)
            print('\nkNN mean test accuracy (for k = %d) = %.2f%%\n' %
                  (neighbor_num, sum(acc_list) / len(acc_list) * 100))
コード例 #26
0
import pandas as pd
import os

#impvar = "Breast_Stage_Z"
impvar = "log2T_use_Z"

pdf = PdfPages("plot_imputed_%s.pdf" % impvar)

# Plot only a few curves to avoid overplotting
di = []
for j in range(5):
    di.append(pd.read_csv(os.path.join("imputed_data_puberty", "%s_imp_%d.csv" % (impvar, j))))

for female in False, True:

    dx = get_data(female, impvar)

    idx = dx.ID.unique().astype(np.int).tolist()

    jj = 0
    for id0 in idx:

        vv = df.loc[df.ID == id0, :]
        v0 = dx.loc[dx.ID == id0, :]

        plt.clf()
        plt.title("ID=%d" % id0)
        plt.grid(True)

        plt.plot(vv.Age, vv[impvar], 'o', color='purple')
コード例 #27
0
ファイル: nn.py プロジェクト: jackyzha0/vybe
epochs = 1200
learning_rate = 0.005
num_features = 193
n_hidden_units_one = 256
n_hidden_units_two = 512
n_hidden_units_three = 1024

savepath = os.getcwd() + '/ckpt'

### NN Setup
"""
Input Dims: 26 (features) x 501 (time length)
Output Dims: (num classes)
"""

db, db_size, occ = data_tools.get_data()
t_db, t_db_size, _ = data_tools.get_data(test=True)
print(t_db_size)

index = np.arange(db_size)
np.random.shuffle(index)


def get_indices(batchsize):
    global index
    if index.size < batchsize:
        index = np.arange(db_size)
        np.random.shuffle(index)
    ret = index[:batchsize]
    index = index[:-batchsize].copy()
    return ret
コード例 #28
0
# Ages to impute
imp_ages = np.arange(1, maxage + 1)

# Storage for results
dx = [None, None]
preg = [None, None]
rslt = [None, None]

outf.write("Imputing %s\n\n" % impvar)

# Fit a Gaussian process model separately to females and males.
for female in 0, 1:

    outf.write("female=%d\n\n" % female)

    dx[female] = get_data(female, impvar, others=["SBP_MEAN"])
    outf.write("Loaded %d x %d values\n" % tuple(dx[female].shape))
    outf.write("%d distinct people in initial data\n\n" %
               dx[female].ID.unique().size)

    # Drop people with no SBP data
    x = dx[female][["ID", "SBP_MEAN"]].dropna().groupby("ID").size()
    x = pd.DataFrame(x, columns=["n_SBP_mean"])
    dx[female] = pd.merge(dx[female],
                          x,
                          left_on="ID",
                          right_on="ID",
                          how='outer')
    dx[female] = dx[female].loc[dx[female].n_SBP_mean > 0, :]
    dx[female] = dx[female].drop(["SBP_MEAN", "n_SBP_mean"], axis=1)
    dx[female] = dx[female].dropna()
コード例 #29
0
def pr_on_fair_distribution(models=['densenet121'], top_n=100, res=4):
    test_data = dt.get_data('cifar10', (50000, 60000))

    # Add every image's cube in densities
    densities = []
    for img in test_data[0]:
        cc = metrics_color.ColorDensityCube(res)
        cc.feed(img)
        densities.append(cc.get_cube())
        # ccf = np.array(cc.get_cube()).flatten()

    # Shape densities (list of cubes) to make a list per color
    densities_lists = np.swapaxes(np.swapaxes(np.swapaxes(densities, 0, 3), 0, 2), 0, 1)
    # print(densities_lists.shape)
    densities_cube = np.empty((res, res, res), dtype=object)

    # For each color keep the ids of the top_n most dense images in this color (same image can be in 2 colors)
    for i in xrange(res):
        for j in xrange(res):
            for k in xrange(res):
                # pr_most_dense = []
                density_list = densities_lists[i][j][k].tolist()
                args_most_dense = np.argsort(density_list)[-top_n:]
                densities_cube[i][j][k] = args_most_dense
    # print(densities_cube.shape)

    # Per model analysis
    for m in models:
        # Load model predictions and ground_truth values
        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)

        # For each color get prediction score of the top_n images
        score_cube = np.zeros((res, res, res))
        global_cc = metrics_color.ColorDensityCube(resolution=res)
        args_most_dense_all = []
        for i in xrange(res):
            for j in xrange(res):
                for k in xrange(res):
                    pr_most_dense = []
                    densities_args = densities_cube[i][j][k].tolist()
                    # args_most_dense = np.argsort(density_list)[-topn:]
                    ijk_cc = metrics_color.ColorDensityCube(res)
                    for a in densities_cube[i][j][k].tolist():
                        pr_most_dense.append(pr[a])
                        ijk_cc.feed(test_data[0][a])
                        global_cc.feed(test_data[0][a])
                    ijk_cc.normalize()
                    ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')'
                    # ijk_cc.plot_cube()
                    score_cube[i][j][k] = np.mean(pr_most_dense)
                    print(np.mean(pr_most_dense))
                    # args_most_dense_all.append(args_most_dense)
                    ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')'
                    # plotting.show_imgs(densities_args[:10], ttl, test_data[0], showColorCube=True, resolution=4)

        global_cc.normalize()
        global_cc.plot_cube(title='Fair distributed dataset ColorCube')

        sc = metrics_color.ColorDensityCube(resolution=res, cube=score_cube)
        sc.normalize()
        sc.plot_cube(title='Scores per color for ' + m)