def _run(self, tensor_provider: TensorProvider, run_idx): """ USE ONLY FOR DEBUGGING AND VERIFICATION! :param tensor_provider: :param run_idx: :return: """ warnings.warn( "Use only this method for debugging! (unless we keep working on it)" ) # Use model's graph and run initializer with self._tf_graph.as_default(): self._sess.run(tf.global_variables_initializer()) # Get run data recurrent_input_tensor = tensor_provider.load_concat_input_tensors( data_keys_or_idx=run_idx, word_embedding=self.use_word_embedding, char_embedding=self.use_char_embedding, pos_tags=self.use_pos_tags) input_lengths = tensor_provider.load_data_tensors( data_keys_or_idx=run_idx, word_counts=True)["word_counts"] train_idx = list(range(len(run_idx))) # Static input tensor static_input_tensor = [[]] if self.use_static_features: static_input_tensor = tensor_provider.load_concat_input_tensors( data_keys_or_idx=run_idx, bow=self.use_bow) # Get truths of data y = tensor_provider.load_labels(data_keys_or_idx=run_idx) # Prepare data c_truth = y c_truth = np.stack([c_truth == 0, c_truth == 1], axis=1) * 1 c_input_lengths = input_lengths # Feeds feed_dict = { self.recurrent_inputs: recurrent_input_tensor, self.input_lengths: c_input_lengths, self.truth: c_truth, self.static_inputs: static_input_tensor, } # Fetching fetch = [ self.regularized_cost, self.truth, self._ffout_a, self.prediction ] # Run batch training res = self._sess.run(fetches=fetch, feed_dict=feed_dict) return res
def _run(self, tensor_provider: TensorProvider, run_idx): """ USE ONLY FOR DEBUGGING AND VERIFICATION! :param tensor_provider: :param run_idx: :return: """ warnings.warn( "Use only this method for debugging! (unless we keep working on it)" ) # Use model's graph and run initializer with self._tf_graph.as_default(): self._sess.run(tf.global_variables_initializer()) # Get training data input_tensor = tensor_provider.load_concat_input_tensors( data_keys_or_idx=run_idx, bow=self.use_bow, embedding_sum=self.use_embedsum) train_idx = list(range(len(run_idx))) # Get truths of data y = tensor_provider.load_labels(data_keys_or_idx=run_idx) # Prepare data c_truth = y c_truth = np.stack([c_truth == 0, c_truth == 1], axis=1) * 1 # Feeds feed_dict = {self.inputs: input_tensor, self.truth: c_truth} # Fetching fetch = [self.cost, self.truth, self._ffout_a, self.prediction] # Run batch training res = self._sess.run(fetches=fetch, feed_dict=feed_dict) return res
# Convert to single column predictions = predictions[:, 1] # Binary conversion binary_predictions = predictions > 0.5 return predictions, binary_predictions def summary_to_string(self): return self.autosummary_str() if __name__ == "__main__": # Initialize tensor-provider (data-source) the_tensor_provider = TensorProvider(verbose=True) # Create model model = BasicRecurrent(tensor_provider=the_tensor_provider, use_bow=True) model.initialize_model(tensor_provider=the_tensor_provider) print("Settings string: {}".format(model.generate_settings_name())) # Get some random data test_size = 2000 all_keys = np.array(the_tensor_provider.accessible_annotated_keys) all_indices = list(range(len(all_keys))) random_keys = [ tuple(val) for val in all_keys[np.random.choice(all_indices, test_size)] ]
from util.tensor_provider import TensorProvider from util.sql_utilities import rows2sql_table from util.utilities import ensure_folder, redirect_stdout_to_file from project_paths import ProjectPaths overfit_like_crazy_directory = Path(ProjectPaths.results, "overfit_like_crazy") ################################### # Settings # Test-train parameters n_test_programs = 2 n_train_programs = 1 # Initialize tensor-provider (data-source) the_tensor_provider = TensorProvider(verbose=True) # Initialize model # REC_HIDDEN_UNITS = 200 # FC_HIDDEN_UNITS = 400 # ITERS=2000 # BATCH_SIZE=100 # recmodel = BasicRecurrent(the_tensor_provider, units=[REC_HIDDEN_UNITS, FC_HIDDEN_UNITS], # optimizer=tf.train.AdamOptimizer, word_embedding=True, # pos_tags=True, char_embedding=False) # model = LogisticRegression(the_tensor_provider, use_bow=True, use_embedsum=False, # learning_rate=0.001, training_epochs=100, verbose=False) # model = MLP( # tensor_provider=the_tensor_provider, # hidden_units=10,
from pathlib import Path import numpy as np import tensorflow as tf from models.PositiveLearningElkan.pu_learning import PULogisticRegressionSK from models.baselines import LogisticRegressionSK from models.recurrent.basic_recurrent import BasicRecurrent from project_paths import ProjectPaths from run_files.single_train import single_training from util.learning_rate_utilities import linear_geometric_curve from util.tensor_provider import TensorProvider if __name__ == "__main__": # Initialize tensor-provider (data-source) the_tensor_provider = TensorProvider(verbose=True) # Results path used_base_path = Path(ProjectPaths.results, "single_train") # Settings test_ratio = 0.11 # Models n_batches = 2000 learning_rates = linear_geometric_curve(n=n_batches, starting_value=5e-4, end_value=1e-10, geometric_component=3. / 4, geometric_end=5) a_model = BasicRecurrent(tensor_provider=the_tensor_provider,
import tensorflow as tf from models.dnn import BasicDNN from models.recurrent.basic_recurrent import BasicRecurrent from models.baselines import LogisticRegressionSK, MLP from models.PositiveLearningElkan.pu_learning import PULogisticRegressionSK from project_paths import ProjectPaths from run_files.single_train import single_training from util.learning_rate_utilities import linear_geometric_curve from util.tensor_provider import TensorProvider from util.utilities import ensure_folder if __name__ == "__main__": # Initialize tensor-provider (data-source) the_tensor_provider = TensorProvider(verbose=True) # Results path base_path = Path(ProjectPaths.results, "final_model_comparison") shutil.rmtree(str(base_path), ignore_errors=True) ensure_folder(base_path) # Get program IDs all_program_ids = the_tensor_provider.annotated_program_ids( access_restricted_data=True) training_programs = the_tensor_provider.accessible_annotated_program_ids test_programs = set(all_program_ids).difference(set(training_programs)) # Settings n_runs = 1
import sqlite3 from pathlib import Path import numpy as np from models.baselines import LogisticRegressionSK from project_paths import ProjectPaths from util.sql_utilities import rows2sql_table from util.tensor_provider import TensorProvider from util.utilities import ensure_folder # Initialize tensor-provider (data-source) the_tensor_provider = TensorProvider(verbose=True) print("") ########### # Settings # Path for pu-negatives results inputs_path = Path(ProjectPaths.results, "pu_learning_LogisticRegressionSKLEARN") # Negative labels # None: get labels that have been classified as negative in every single iteration of PU (really f*****g certain) # float: threshold the final confidence of the pu-model with this value (higher means more certain of negativity) negative_label_scheme = None ########### # Get split from PU-learning # Log on to database