if len(sys.argv) > 2:
        threshold_epoch = int(sys.argv[2])
    dict_data_train = {
        'dir_data': DIRPROJECT + 'data/',
        'data_prefix': 'nz',
        'dataset': '20122016',
        'encoding': 'embedding',
        'newfeatures': None,
        'featurereduction': {
            'method': 'FUSION'
        },
        'grouping': 'verylightgrouping'
    }
    dataset_options_train = DatasetOptions(dict_data_train)

    diag_group_names = dataset_options_train.getDiagGroupNames()
    indices_diag_codes = getDiagCodesIndices(diag_group_names)
    main_groups = icd10_chapters.getMainGroups()

    num_colors = len(main_groups)
    colors = plt.cm.rainbow(np.linspace(0, 1, num_colors))

    num_diags = len(indices_diag_codes)

    filenames_encodings = glob.glob(dir_model + 'basic_encodings_*')
    var_encodings = []
    for l, f in enumerate(sorted(filenames_encodings)):
        print(f)
        epoch = int(f.split('/')[-1].split('.')[0].split('_')[-1])
        print('epoch: ' + str(epoch))
        basic_encodings = np.load(f)
Ejemplo n.º 2
0
def analyze(flags_obj):
    """Run Wide-Deep training and eval loop.
    Args:
    flags_obj: An object containing parsed flag values.
    """
    dict_data_train = {
        'dir_data': DIRPROJECT + 'data/',
        'data_prefix': 'nz',
        'dataset': '20072016',
        'encoding': 'embedding',
        'newfeatures': None,
        'featurereduction': {
            'method': 'FUSION'
        },
        'grouping': 'verylightgrouping'
    }
    dataset_options_train = DatasetOptions(dict_data_train)
    dataset_options_eval = None

    if dict_data_train['data_prefix'] == 'nz':
        feature_columns = FeatureColumnsAutoEncoderNZ(
            dataset_options=dataset_options_train)
    else:
        print('unknown data prefix..exit')
        sys.exit()

    dict_dataset_options = {
        'train': dataset_options_train,
        'eval': dataset_options_eval,
        'test': None
    }

    nn = AutoEncoderModel('analysis', dict_dataset_options, feature_columns,
                          flags_obj)
    basic_encodings = nn.analyze()

    num_colors = 26
    colors = plt.cm.rainbow(np.linspace(0, 1, num_colors))

    pca = PCA(n_components=2)
    weights_2d_pca = pca.fit_transform(basic_encodings)

    tsne = TSNE(n_components=2)
    weights_2d_tsne = tsne.fit_transform(basic_encodings)

    diag_group_names = dataset_options_train.getDiagGroupNames()
    num_diags = len(diag_group_names)

    if dataset_options_train.getGroupingName() == 'verylightgrouping':
        num_subcategories = 100
    elif dataset_options_train.getGroupingName() == 'lightgrouping':
        num_subcategories = 10
    elif dataset_options_train.getGroupingName() == 'grouping':
        num_subcategories = 1
    else:
        print('grouping scheme is unknown...exit')
        sys.exit()

    plt.figure()
    for k in range(0, num_colors):
        c = colors[k]
        plt.scatter(
            weights_2d_pca[k * num_subcategories:(k * num_subcategories +
                                                  num_subcategories), 0],
            weights_2d_pca[k * num_subcategories:(k * num_subcategories +
                                                  num_subcategories), 1],
            label=string.ascii_uppercase[k],
            alpha=0.5,
            s=100,
            c=c)
    plt.legend()
    plt.title('pca')
    plt.draw()

    plt.figure()
    for k in range(0, num_colors):
        c = colors[k]
        plt.scatter(
            weights_2d_tsne[k * num_subcategories:(k * num_subcategories +
                                                   num_subcategories), 0],
            weights_2d_tsne[k * num_subcategories:(k * num_subcategories +
                                                   num_subcategories), 1],
            label=string.ascii_uppercase[k],
            alpha=0.5,
            s=100,
            c=c)
    plt.legend()
    plt.title('t-sne')
    plt.draw()

    plt.show()