def _run(self, tensor_provider: TensorProvider, run_idx):
        """
        USE ONLY FOR DEBUGGING AND VERIFICATION!
        :param tensor_provider:
        :param run_idx:
        :return:
        """
        warnings.warn(
            "Use only this method for debugging! (unless we keep working on it)"
        )

        # Use model's graph and run initializer
        with self._tf_graph.as_default():
            self._sess.run(tf.global_variables_initializer())

        # Get run data
        recurrent_input_tensor = tensor_provider.load_concat_input_tensors(
            data_keys_or_idx=run_idx,
            word_embedding=self.use_word_embedding,
            char_embedding=self.use_char_embedding,
            pos_tags=self.use_pos_tags)
        input_lengths = tensor_provider.load_data_tensors(
            data_keys_or_idx=run_idx, word_counts=True)["word_counts"]
        train_idx = list(range(len(run_idx)))

        # Static input tensor
        static_input_tensor = [[]]
        if self.use_static_features:
            static_input_tensor = tensor_provider.load_concat_input_tensors(
                data_keys_or_idx=run_idx, bow=self.use_bow)

        # Get truths of data
        y = tensor_provider.load_labels(data_keys_or_idx=run_idx)

        # Prepare data
        c_truth = y
        c_truth = np.stack([c_truth == 0, c_truth == 1], axis=1) * 1
        c_input_lengths = input_lengths

        # Feeds
        feed_dict = {
            self.recurrent_inputs: recurrent_input_tensor,
            self.input_lengths: c_input_lengths,
            self.truth: c_truth,
            self.static_inputs: static_input_tensor,
        }

        # Fetching
        fetch = [
            self.regularized_cost, self.truth, self._ffout_a, self.prediction
        ]

        # Run batch training
        res = self._sess.run(fetches=fetch, feed_dict=feed_dict)

        return res
    def _run(self, tensor_provider: TensorProvider, run_idx):
        """
        USE ONLY FOR DEBUGGING AND VERIFICATION!
        :param tensor_provider:
        :param run_idx:
        :return:
        """
        warnings.warn(
            "Use only this method for debugging! (unless we keep working on it)"
        )

        # Use model's graph and run initializer
        with self._tf_graph.as_default():
            self._sess.run(tf.global_variables_initializer())

        # Get training data
        input_tensor = tensor_provider.load_concat_input_tensors(
            data_keys_or_idx=run_idx,
            bow=self.use_bow,
            embedding_sum=self.use_embedsum)
        train_idx = list(range(len(run_idx)))

        # Get truths of data
        y = tensor_provider.load_labels(data_keys_or_idx=run_idx)

        # Prepare data
        c_truth = y
        c_truth = np.stack([c_truth == 0, c_truth == 1], axis=1) * 1

        # Feeds
        feed_dict = {self.inputs: input_tensor, self.truth: c_truth}

        # Fetching
        fetch = [self.cost, self.truth, self._ffout_a, self.prediction]

        # Run batch training
        res = self._sess.run(fetches=fetch, feed_dict=feed_dict)

        return res
        # Convert to single column
        predictions = predictions[:, 1]

        # Binary conversion
        binary_predictions = predictions > 0.5

        return predictions, binary_predictions

    def summary_to_string(self):
        return self.autosummary_str()


if __name__ == "__main__":
    # Initialize tensor-provider (data-source)
    the_tensor_provider = TensorProvider(verbose=True)

    # Create model
    model = BasicRecurrent(tensor_provider=the_tensor_provider, use_bow=True)
    model.initialize_model(tensor_provider=the_tensor_provider)

    print("Settings string: {}".format(model.generate_settings_name()))

    # Get some random data
    test_size = 2000
    all_keys = np.array(the_tensor_provider.accessible_annotated_keys)
    all_indices = list(range(len(all_keys)))
    random_keys = [
        tuple(val)
        for val in all_keys[np.random.choice(all_indices, test_size)]
    ]
from util.tensor_provider import TensorProvider
from util.sql_utilities import rows2sql_table
from util.utilities import ensure_folder, redirect_stdout_to_file
from project_paths import ProjectPaths

overfit_like_crazy_directory = Path(ProjectPaths.results, "overfit_like_crazy")

###################################
# Settings

# Test-train parameters
n_test_programs = 2
n_train_programs = 1

# Initialize tensor-provider (data-source)
the_tensor_provider = TensorProvider(verbose=True)

# Initialize model
# REC_HIDDEN_UNITS = 200
# FC_HIDDEN_UNITS = 400
# ITERS=2000
# BATCH_SIZE=100
# recmodel = BasicRecurrent(the_tensor_provider, units=[REC_HIDDEN_UNITS, FC_HIDDEN_UNITS],
#                          optimizer=tf.train.AdamOptimizer, word_embedding=True,
#                           pos_tags=True, char_embedding=False)

# model = LogisticRegression(the_tensor_provider,  use_bow=True, use_embedsum=False,
#                 learning_rate=0.001, training_epochs=100, verbose=False)
# model = MLP(
#     tensor_provider=the_tensor_provider,
#     hidden_units=10,
Esempio n. 5
0
from pathlib import Path

import numpy as np
import tensorflow as tf

from models.PositiveLearningElkan.pu_learning import PULogisticRegressionSK
from models.baselines import LogisticRegressionSK
from models.recurrent.basic_recurrent import BasicRecurrent
from project_paths import ProjectPaths
from run_files.single_train import single_training
from util.learning_rate_utilities import linear_geometric_curve
from util.tensor_provider import TensorProvider

if __name__ == "__main__":
    # Initialize tensor-provider (data-source)
    the_tensor_provider = TensorProvider(verbose=True)

    # Results path
    used_base_path = Path(ProjectPaths.results, "single_train")

    # Settings
    test_ratio = 0.11

    # Models
    n_batches = 2000
    learning_rates = linear_geometric_curve(n=n_batches,
                                            starting_value=5e-4,
                                            end_value=1e-10,
                                            geometric_component=3. / 4,
                                            geometric_end=5)
    a_model = BasicRecurrent(tensor_provider=the_tensor_provider,
import tensorflow as tf

from models.dnn import BasicDNN
from models.recurrent.basic_recurrent import BasicRecurrent
from models.baselines import LogisticRegressionSK, MLP
from models.PositiveLearningElkan.pu_learning import PULogisticRegressionSK
from project_paths import ProjectPaths
from run_files.single_train import single_training
from util.learning_rate_utilities import linear_geometric_curve
from util.tensor_provider import TensorProvider
from util.utilities import ensure_folder

if __name__ == "__main__":

    # Initialize tensor-provider (data-source)
    the_tensor_provider = TensorProvider(verbose=True)

    # Results path
    base_path = Path(ProjectPaths.results, "final_model_comparison")
    shutil.rmtree(str(base_path), ignore_errors=True)
    ensure_folder(base_path)

    # Get program IDs
    all_program_ids = the_tensor_provider.annotated_program_ids(
        access_restricted_data=True)
    training_programs = the_tensor_provider.accessible_annotated_program_ids
    test_programs = set(all_program_ids).difference(set(training_programs))

    # Settings
    n_runs = 1
import sqlite3
from pathlib import Path

import numpy as np

from models.baselines import LogisticRegressionSK
from project_paths import ProjectPaths
from util.sql_utilities import rows2sql_table
from util.tensor_provider import TensorProvider
from util.utilities import ensure_folder

# Initialize tensor-provider (data-source)
the_tensor_provider = TensorProvider(verbose=True)
print("")

###########
# Settings

# Path for pu-negatives results
inputs_path = Path(ProjectPaths.results,
                   "pu_learning_LogisticRegressionSKLEARN")

# Negative labels
# None: get labels that have been classified as negative in every single iteration of PU (really f*****g certain)
# float: threshold the final confidence of the pu-model with this value (higher means more certain of negativity)
negative_label_scheme = None

###########
# Get split from PU-learning

# Log on to database