Beispiel #1
0
               leaf_font_size=12,
               p=50,
               **kwargs)


df = pd.read_csv('data/cervical_arranged_NORM.csv')

gt_labels = ['Biopsy']

y = df.Biopsy.values.astype(np.int32)
df = df.drop(gt_labels, 1)
X = df.as_matrix().astype(np.float64)

model = MultitaskEmbedding(embedding='zero',
                           bypass=False,
                           alpha=0.1,
                           width=10,
                           depth=2)
model.fit(X, y)

domains = [np.unique(X[:, i]) for i in range(X.shape[1])]

impact = np.zeros((X.shape[1], 10))

for ft_orig in range(X.shape[1]):
    emb = model.get_hidden_representation(X)
    for value in domains[ft_orig]:
        Xnew = X.copy()
        Xnew[:, ft_orig] = value
        embnew = model.get_hidden_representation(Xnew)
        impact[ft_orig] = np.maximum(impact[ft_orig],
skf = StratifiedKFold(10, shuffle=False, random_state=42)
df = pd.read_csv('data/cervical_arranged_NORM.csv')

all_procedures = set(['Hinselmann', 'Schiller', 'Citology'])

for gt_labels in list(findsubsets(all_procedures)):
    gt_labels = list(gt_labels) + ['Biopsy']
    print 'Procedures', ' '.join(sorted(list(all_procedures - set(gt_labels))))

    y = df.Biopsy.values.astype(np.int32)
    X = df.drop(gt_labels, 1).as_matrix().astype(np.float64)

    cv = 3
    models = [('Sup',
               GridSearchCV(
                   MultitaskEmbedding(alpha=1., embedding='raw'),
                   param_grid={
                       'depth': [1, 2, 3],
                       'width': [10, 20],
                   },
                   scoring='average_precision',
                   cv=cv,
               )),
              ('Semi',
               GridSearchCV(
                   MultitaskEmbedding(alpha=0.01),
                   param_grid={
                       'alpha': [0.01, 0.1],
                       'depth': [1, 2, 3],
                       'width': [10, 20],
                       'bypass': [False, True],
    ret = metrics.average_precision_score(a, b[:, 1])
    if np.isnan(ret):
        return -np.inf
    return ret


X = pd.read_csv(os.sys.argv[1]).as_matrix()
y = X[:, -1]
X = X[:, : -1]


skf = StratifiedKFold(10, shuffle=False, random_state=42)

cv = 3
models = [('Sup',
           GridSearchCV(MultitaskEmbedding(alpha=1.,
                                           embedding='raw'),
                        param_grid={'depth': [1, 2, 3],
                                    'width': [10, 20],
                                    },
                        scoring='average_precision',
                        cv=cv,
                        )),
           ('Semi',
            GridSearchCV(MultitaskEmbedding(alpha=0.01),
                        param_grid={'alpha': [0.01, 0.1],
                                    'depth': [1, 2, 3],
                                    'width': [10, 20],
                                    'bypass': [False, True],
                                    },
                        scoring='average_precision',
                        cv=cv,
os.sys.setrecursionlimit(100000)

skf = StratifiedKFold(10, shuffle=False, random_state=42)
df = pd.read_csv('data/cervical_arranged_NORM.csv')

all_procedures = set(['Hinselmann', 'Schiller', 'Citology'])
gt_labels = ['Biopsy']

y = df.Biopsy.values.astype(np.int32)
X = df.drop(gt_labels, 1).as_matrix().astype(np.float64)

cv = 3
models = [
    ('Unsupervised',
     GridSearchCV(MultitaskEmbedding(alpha=0.0, bypass=False),
                  param_grid={
                      'depth': [1, 2, 3],
                      'width': [10, 20],
                  },
                  scoring='average_precision',
                  cv=cv,
                  n_jobs=1)),
    ('Semi',
     GridSearchCV(MultitaskEmbedding(alpha=0.01, bypass=False),
                  param_grid={
                      'alpha': [0.01, 0.1],
                      'depth': [1, 2, 3],
                      'width': [10, 20],
                  },
                  scoring='average_precision',