# W index w_index = 0 # Last space last_space = dict() # Iterate for space in param_space: # Params reservoir_size, w_sparsity, leak_rate, input_scaling, \ input_sparsity, spectral_radius, feature, aggregation, \ state_gram, feedbacks_sparsity, lang, embedding, \ ridge_param, washout = functions.get_params(space) # Choose the right transformer sfgram_dataset.transform = features.create_transformer( feature, embedding, args.embedding_path, lang) # Set experience state xp.set_state(space) # Average sample average_sample = np.array([]) # For each sample for n in range(args.n_samples): # Set sample xp.set_sample_state(n) # ESN cell esn = etnn.LiESN(input_dim=sfgram_dataset.transform.input_dim, hidden_dim=reservoir_size,
# For each batch for k in range(10): # Choose fold xp.set_fold_state(k) sfgram_loader_train_wv.dataset.set_fold(k) sfgram_loader_test_wv.dataset.set_fold(k) sfgram_loader_train_c3.dataset.set_fold(k) sfgram_loader_test_c3.dataset.set_fold(k) # F1 per threshold f1_scores = torch.zeros(100) thresholds = torch.linspace(0.0, 1.0, 100) # Choose the right transformer sfgram_dataset_wv.transform = features.create_transformer( 'wv', embedding, args.embedding_path, lang) sfgram_dataset_c3.transform = features.create_transformer( 'c3', embedding, args.embedding_path, lang) # Train WV models for i, data in enumerate(sfgram_loader_train_wv): # Inputs and labels inputs, labels = data # To variable inputs, labels = Variable(inputs), Variable(labels) if use_cuda: inputs, labels = inputs.cuda(), labels.cuda() # Accumulate xTx and xTy esn_wv(inputs, labels) # end for
# Iterate for space in param_space: # Params reservoir_size, w_sparsity, leak_rate, input_scaling, \ input_sparsity, spectral_radius, feature, aggregation, \ state_gram, feedbacks_sparsity, lang, embedding = functions.get_params(space) n_layers = int(space['n_layers']) if n_layers == 1: leaky_rates = leak_rate else: leaky_rates = np.linspace(1.0, leak_rate, n_layers) # end if w = base_w[:n_layers] # Choose the right transformer reutersc50_dataset.transform = features.create_transformer( feature, embedding, args.embedding_path, lang) # Set experience state xp.set_state(space) # Average sample average_sample = np.array([]) # For each sample for n in range(args.n_samples): # Set sample xp.set_sample_state(n) # Stacked ESN esn = etnn.StackedESN(input_dim=reutersc50_dataset.transform.input_dim, hidden_dim=[reservoir_size] * n_layers,
# Parse args args, use_cuda, param_space, xp = argument_parsing.parser_training() # Last space last_space = dict() # Iterate for space in param_space: # Params hidden_size, cell_size, feature, lang, dataset_start, window_size, learning_window, embedding_size, rnn_type, \ num_layers, dropout, output_dropout = functions.get_params(space) # Feature transformer feature_transformer = features.create_transformer(feature, args.pretrained, args.embedding_path, lang) # Load PAN17 dataset pan17_dataset, pan17_dataset_per_tweet, pan17_loader_train, pan17_loader_dev, pan17_loader_test = dataset.load_pan17_dataset_per_tweet( output_length=settings.output_length[feature], output_dim=settings.input_dims[feature], batch_size=args.batch_size, trained=not args.pretrained, load_type=feature, transform=feature_transformer) # Print authors xp.write("Number of users : {}".format( len(pan17_dataset_per_tweet.user_tweets)), log_level=0)
# W index w_index = 0 # Last space last_space = dict() # Iterate for space in param_space: # Params reservoir_size, w_sparsity, leak_rate, input_scaling, \ input_sparsity, spectral_radius, feature, aggregation, \ state_gram, feedbacks_sparsity, lang, embedding, dataset_start = functions.get_params(space) # Choose the right transformer reutersc50_dataset.transform = features.create_transformer( feature, embedding, args.embedding_path, lang) # Dataset start reutersc50_dataset.set_start(dataset_start) # Set experience state xp.set_state(space) # Average sample average_sample = np.array([]) # New W? if len(last_space ) > 0 and last_space['reservoir_size'] != space['reservoir_size']: w = etnn.ESNCell.generate_w(int(space['reservoir_size']), space['w_sparsity'])
# Iterate for space in param_space: # Params hidden_size, cell_size, feature, lang, dataset_start, window_size, learning_window, embedding_size, rnn_type, \ num_layers, dropout, output_dropout = functions.get_params(space) # Load SFGram dataset sfgram_dataset, sfgram_loader_train, sfgram_loader_dev, sfgram_loader_test = dataset.load_sfgram_dataset( block_length=40, batch_size=args.batch_size, author='SILVERBERG', load_type=feature + ("" if args.pretrained else "T") ) # Print dataset information xp.write("Dataset length : {}".format(len(sfgram_dataset)), log_level=0) xp.write("Number of texts : {}".format(len(sfgram_dataset.texts)), log_level=0) # Choose the right transformer sfgram_dataset.transform = features.create_transformer( feature, args.pretrained, args.embedding_path, lang ) # Precompute the dataset sfgram_dataset.precompute_documents("./sfgram_blocks") # end for
reuters_loader_train.dataset.set_fold(k) reuters_loader_test.dataset.set_fold(k) # Models outputs model_outputs = list() model_targets = list() model_local_targets = list() # For each model for m in range(n_models): # Leak rate and features model_feature = feature[m][0] print(u"Model {}".format(model_feature)) # Choose the right transformer reutersc50_dataset.transform = features.create_transformer(model_feature, embedding, args.embedding_path, lang, k, use_cuda) # Outputs model_outputs.append(list()) # ESN cell esn = models[m] # Get training data for this fold for i, data in enumerate(reuters_loader_train): # Inputs and labels inputs, labels, time_labels = data # To variable inputs, time_labels = Variable(inputs), Variable(time_labels) if use_cuda: inputs, time_labels = inputs.cuda(), time_labels.cuda()
# OOV oov = np.array([]) # For each fold for k in range(args.k): # Choose fold xp.set_fold_state(k) reuters_loader_train.dataset.set_fold(k) reuters_loader_dev.dataset.set_fold(k) reuters_loader_test.dataset.set_fold(k) # Choose the right transformer reutersc50_dataset.transform = features.create_transformer( feature, True if args.pretrained and not args.fine_tuning else False, args.embedding_path, lang, token2index=word2index) # Create the model if args.pretrained and not args.fine_tuning: # Create model model = KerasRNN.create_rnn_model( rnn_type=rnn_type, embedding_size=embedding_size, hidden_size=hidden_size, dense_size=args.n_authors, average=False) elif args.pretrained and args.fine_tuning: # Create model model = KerasRNN.create_rnn_model_with_pretrained_embedding_layer(
# OOV oov = np.array([]) # For each fold for k in range(10): # Choose fold xp.set_fold_state(k) reuters_loader_train.dataset.set_fold(k) reuters_loader_dev.dataset.set_fold(k) reuters_loader_test.dataset.set_fold(k) # Choose the right transformer reutersc50_dataset.transform = features.create_transformer( feature, args.pretrained, args.embedding_path, lang, token2index=word2index) # Create the model model = KerasRNN.create_stanford_article_level_model( rnn_type=rnn_type, embedding_matrix=embedding_matrix, sentence_size=learning_window, hidden_size=hidden_size, dense_size=args.n_authors, trainable=False) # Print model summary if k == 0: print(model.summary(90))
#################################################### # Main #################################################### # Arguments parser = argparse.ArgumentParser(u"Feature selector visualisation") parser.add_argument("--n-authors", type=int, default=15) parser.add_argument("--model", type=str, required=True) parser.add_argument("--sub-type", type=str, required=True) args = parser.parse_args() # Load from directory reutersc50_dataset, reuters_loader_train, reuters_loader_test = dataset.load_dataset() # Load transformer if args.model == 'cgfs': reutersc50_dataset.transform = features.create_transformer(feature='cgfs', n_gram=args.sub_type, fold=0) elif args.model == 'ccsaa': reutersc50_dataset.transform = features.create_transformer(feature='ccsaa', fold=0) # end if # Get training data for this fold for i, data in enumerate(reuters_loader_train): # Inputs and labels inputs, labels, time_labels = data plt.imshow(inputs[0, 0].t().numpy(), cmap='Greys') plt.show() # end for
args.dataset_size) # Print authors xp.write(u"Authors : {}".format(reutersc50_dataset.authors), log_level=0) # Last space last_space = dict() # Iterate for space in param_space: # Params hidden_size, cell_size, feature, lang, dataset_start, window_size, learning_window, embedding_size, rnn_type, num_layers, dropout, output_dropout = functions.get_params( space) # Choose the right transformer reutersc50_dataset.transform = features.create_transformer( feature, args.pretrained, args.embedding_path, lang) # Dataset start reutersc50_dataset.set_start(dataset_start) # Set experience state xp.set_state(space) # Average sample average_sample = np.array([]) # Certainty data certainty_data = np.zeros( (2, args.n_samples * len(reutersc50_dataset.authors) * 100)) certainty_index = 0
# 10-CV for k in np.arange(0, settings.k): # Load transformer if model_type == 'linear': reutersc50_dataset.transform = torchlanguage.transforms.Compose( [ torchlanguage.transforms.GloveVector( model='en_vectors_web_lg'), torchlanguage.transforms.ToNGram(n=model_subtype), torchlanguage.transforms.Reshape((-1, input_dim)) ]) elif model_type == 'cgfs': reutersc50_dataset.transform = features.create_transformer( feature='cgfs', n_gram=model_subtype, fold=k, use_cuda=args.cuda) elif model_type == 'ccsaa': reutersc50_dataset.transform = features.create_transformer( feature='ccsaa', fold=k, use_cuda=args.cuda) # end if # Linear classifier model = etnn.RRCell(input_dim, settings.n_authors) if args.cuda: model.cuda() # end if # Get test data for this fold step = 0