Beispiel #1
0
loss_function = sys.argv[1]  #'softmax' #'logistic' # 'sparsemax'
num_epochs = int(sys.argv[2])  #20
regularization_constant = float(sys.argv[3])

filepath_train = sys.argv[4]
filepath_test = sys.argv[5]
add_bias = True  # False
normalize = bool(int(sys.argv[6]))
has_label_probabilities = False  #True

num_jackknife_partitions = int(sys.argv[7])  # 1 means no jackknifing.

X_train, Y_train, num_features = \
    read_multilabel_dataset(filepath_train, \
                            add_bias=add_bias, \
                            has_label_probabilities=has_label_probabilities, \
                            sparse=True)
if normalize:
    X = np.zeros((len(X_train), num_features))
    for i, features in enumerate(X_train):
        for fid, fval in features.iteritems():
            assert fid < num_features, pdb.set_trace()
            X[i, fid] = fval
    x_av = X.mean(axis=0)
    x_std = X.std(axis=0)
    #x_std = np.sqrt(((X-x_av)*(X-x_av)).sum(axis=0) / X.shape[0])
    for i, features in enumerate(X_train):
        for fid, fval in features.iteritems():
            if x_std[fid] != 0:
                features[fid] -= x_av[fid]
                features[fid] /= x_std[fid]
    offset = 0
    X_train = X[offset:(offset+num_documents_train), :]
    Y_train = Y[offset:(offset+num_documents_train), :]

    offset += num_documents_train
    X_dev = X[offset:(offset+num_documents_dev), :]
    Y_dev = Y[offset:(offset+num_documents_dev), :]

    offset += num_documents_dev
    X_test = X[offset:(offset+num_documents_test), :]
    Y_test = Y[offset:(offset+num_documents_test), :]

else:
    filepath_train = sys.argv[5]
    X_train, Y_train, num_features = read_multilabel_dataset(filepath_train, sparse=sparse)
    num_labels = Y_train.shape[1]
    #num_features = X_train.shape[1]
    filepath_dev = sys.argv[6]
    X_dev, Y_dev, _ = read_multilabel_dataset(filepath_dev, \
                                              num_labels=num_labels, \
                                              num_features=num_features, \
                                              sparse=sparse)
    filepath_test = sys.argv[7]
    X_test, Y_test, _ = read_multilabel_dataset(filepath_test, \
                                                num_labels=num_labels, \
                                                num_features=num_features, \
                                                sparse=sparse)
    num_words = num_features
    num_classes = num_labels
    if sparse:
loss_function = sys.argv[1] #'softmax' #'logistic' # 'sparsemax'
num_epochs = int(sys.argv[2]) #20
regularization_constant = float(sys.argv[3])

filepath_train = sys.argv[4]
filepath_test = sys.argv[5]
add_bias = True # False
normalize = bool(int(sys.argv[6]))
has_label_probabilities = False #True

num_jackknife_partitions = int(sys.argv[7]) # 1 means no jackknifing.

X_train, Y_train, num_features = \
    read_multilabel_dataset(filepath_train, \
                            add_bias=add_bias, \
                            has_label_probabilities=has_label_probabilities, \
                            sparse=True)
if normalize:
    X = np.zeros((len(X_train), num_features))
    for i, features in enumerate(X_train):
        for fid, fval in features.iteritems():
            assert fid < num_features, pdb.set_trace()
            X[i, fid] = fval
    x_av = X.mean(axis=0)
    x_std = X.std(axis=0)
    #x_std = np.sqrt(((X-x_av)*(X-x_av)).sum(axis=0) / X.shape[0])
    for i, features in enumerate(X_train):
        for fid, fval in features.iteritems():
            if x_std[fid] != 0:
                features[fid] -= x_av[fid]
                features[fid] /= x_std[fid]


###########################

loss_function = sys.argv[1] #'softmax' #'logistic' # 'sparsemax'
num_epochs = int(sys.argv[2]) #20
learning_rate = float(sys.argv[3]) #0.001
regularization_constant = float(sys.argv[4])

sparsemax_scales = [1., 1.5,  2., 2.5, 3., 3.5, 4., 4.5, 5.]
softmax_thresholds = [.01, .02, .03, .04, .05, .06, .07, .08, .09, .1]
logistic_thresholds = [.1, .2, .3, .4, .5, .6, .7]

filepath_train = sys.argv[5]
X_train, Y_train, num_features = read_multilabel_dataset(filepath_train, \
                                                         sparse=True)
num_labels = Y_train.shape[1]
filepath_dev = sys.argv[6]
filepath_test = sys.argv[7]

num_words = num_features
num_classes = num_labels
num_documents_train = len(X_train)

if loss_function == 'softmax':
    hyperparameter_name = 'softmax_thres'
    hyperparameter_values = softmax_thresholds
elif loss_function == 'sparsemax':
    hyperparameter_name = 'sparsemax_scale'
    hyperparameter_values = sparsemax_scales
elif loss_function == 'logistic':
Beispiel #5
0
                     Pl_dev[k, l], Rl_dev[k, l], F1l_dev[k, l])


###########################

loss_function = sys.argv[1]  #'softmax' #'logistic' # 'sparsemax'
num_epochs = int(sys.argv[2])  #20
learning_rate = float(sys.argv[3])  #0.001
regularization_constant = float(sys.argv[4])

sparsemax_scales = [1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5.]
softmax_thresholds = [.01, .02, .03, .04, .05, .06, .07, .08, .09, .1]
logistic_thresholds = [.1, .2, .3, .4, .5, .6, .7]

filepath_train = sys.argv[5]
X_train, Y_train, num_features = read_multilabel_dataset(filepath_train, \
                                                         sparse=True)
num_labels = Y_train.shape[1]
filepath_dev = sys.argv[6]
filepath_test = sys.argv[7]

num_words = num_features
num_classes = num_labels
num_documents_train = len(X_train)

if loss_function == 'softmax':
    hyperparameter_name = 'softmax_thres'
    hyperparameter_values = softmax_thresholds
elif loss_function == 'sparsemax':
    hyperparameter_name = 'sparsemax_scale'
    hyperparameter_values = sparsemax_scales
elif loss_function == 'logistic':