def main(): rubrics = [ 'Мир', 'Россия', 'Политика', 'Экономика', 'Наука и техника', 'Украина', 'Госэкономика', 'Спорт', 'Общество', 'Бывший СССР', 'Культура', 'Медиа', 'Футбол', 'Музыка', 'Наука' ] lenta = Dataset(use_title=False, rubrics=rubrics, random_state=SEED, subsample=0.3) X, y = lenta.get_data() models = [ LogisticRegression(random_state=SEED), GaussianNB(), SVC(), RandomForestClassifier(), XGBClassifier() ] cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED) transformations = ['elmo'] with tf.device("/gpu:0"): # Setup operations for trans in transformations: for model in models: clf = model pipeline = lenta.get_transform_pipeline(clf, trans, standardize=False) scores = cross_val_score(pipeline, X, y, verbose=10, cv=cv, n_jobs=1) mean = np.mean(scores) std = np.std(scores) res = {'mean': round(mean, 3), 'std': round(std, 3)} msg = 'Transformation: {}, Model: {}, accuracy {:.3f}(+- {:.3f})' print(msg.format(trans, model, mean, std)) save_result(type(model).__name__ + '_' + trans, res)
n_test = 480 batch_size = 480 im_shape = (32, 32) elif data_name == 'MNIST': n_train = 10000 n_test = 5000 batch_size = 1000 im_shape = (28, 28) d_componentsss = [2] train_RBM = False # only true if the RBM are not trained yet train_NN = False show_plot = True noisy = False # if data should be noisy epochs = 50 X, y, X_train, y_train, X_test, y_test = dataset.get_data(name=data_name, n_train=n_train, n_test=n_test) # hyperparameters thetas = [0.9] #[0.1, 0.3, 0.5, 0.7, 0.9, 0.99] c = 0.1 # training RBM for d_components in d_componentsss: file_path_weights = 'Models/weightsRBM/' + data_name + '/TEST' + data_name + str( n_train) + 'dim' + str(d_components) if data_name == 'MNIST': layer_dim = [784, 500, 500, 2000, d_components] elif data_name == 'COIL20': layer_dim = [1024, 500, 500, 2000, d_components]
# initialization dataset/model seed = 0 dataset = Dataset(seed) model_type = 'reg' # par/reg/auto/PCA/kernel data_name = 'COIL20' # MNIST/ COIL20 if data_name == 'COIL20': n_train = 960 n_test = 480 batch_size = 480 elif data_name == 'MNIST': n_train = 10000 n_test = 5000 batch_size = 1000 d_componentss = [2, 10, 20] epochs = 30 X, labels, X_train, labels_train, X_test, labels_test = dataset.get_data( name=data_name, n_train=n_train, n_test=n_test) nsplits = 3 kfold = KFold(n_splits=nsplits) for d_components in d_componentss: RBM_file = 'Models/weightsRBM/' + data_name + '/' + data_name + str( n_train) + 'dim' + str(d_components) layer_dim = [784, 500, 500, 2000, d_components] lambdas = [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99] accuracies = np.zeros(len(lambdas)) losses = [] for i, labda in enumerate(lambdas): print('Now training for lambda = %.2f ' % (labda)) accuracy = 0
print("Gradient descent took %.4f seconds" % (time() - t0)) return Y, cost, grad_value if __name__ == '__main__': ''' main to implement tSNE on the datasets ''' seed = 0 dataset = Dataset(seed) d_components = [2] data_name = 'COIL20' grad_method = 'gains' n_train = 960 X, y, X_train, y_train, X_test, y_test = dataset.get_data( data_name, n_train, 10000) #X, y, X_train, y_train, X_test, y_test = dataset.get_coil20_data() for d in d_components: if grad_method == 'ADAM': model = tsne(random_state=0, initialization='PCA', initial_dims=30, grad_method='ADAM', perplexity=40, max_iter=1000, d_components=d, learning_rate=0.1) elif grad_method == 'gains': model = tsne(random_state=0, initialization='PCA',