def __init__(self, name, config): """ Initializes the component. :param name: Component name (read from configuration file). :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructor(s) of parent class(es) - in the right order! Component.__init__(self, name, SentenceIndexer, config) WordMappings.__init__(self) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] # Read mode from the configuration. self.mode_reverse = self.config['reverse'] # Force padding to a fixed length self.fixed_padding = self.config['fixed_padding'] # Wether to add <EOS> at the end of sequence self.enable_eos_token = self.config['eos_token'] if self.mode_reverse: # We will need reverse (index:word) mapping. self.ix_to_word = dict((v, k) for k, v in self.word_to_ix.items()) # Get inputs distributions/indices flag. self.use_input_distributions = self.config["use_input_distributions"]
def __init__(self, name, config): """ Initializes object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, AccuracyStatistics, config) # Get stream key mappings. self.key_targets = self.stream_keys["targets"] self.key_predictions = self.stream_keys["predictions"] self.key_masks = self.stream_keys["masks"] # Get prediction distributions/indices flag. self.use_prediction_distributions = self.config[ "use_prediction_distributions"] # Get masking flag. self.use_masking = self.config["use_masking"] # Get statistics key mappings. self.key_accuracy = self.statistics_keys["accuracy"]
def __init__(self, name, config): """ Initializes object. Loads keys and values of variables and adds them to globals. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, GlobalVariablePublisher, config) # Get list of keys of global variables - can be both list of strings or a single string with comma-separated values. keys = self.config["keys"] if type(keys) is str: keys = keys.replace(" ", "").split(",") # Get list of values - must be a single value or a list. values = self.config["values"] if type(values) is list: # Make sure that both are lists. if type(keys) is not list or len(keys) != len(values): raise ConfigurationError( "Number of parameters indicated by provided 'keys' must be equal to number of provided 'values'" ) # Publish globals one by one. for (key, value) in zip(keys, values): self.globals[key] = value elif keys != '': # Publish single global. self.globals[keys[0]] = values
def __init__(self, name, config): """ Initializes the component. :param name: Component name (read from configuration file). :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, SentenceTokenizer, config) # Read the actual configuration. self.mode_detokenize = config['detokenize'] # Tokenizer. self.tokenizer = WhitespaceTokenizer() # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] if self.mode_detokenize: # list of strings -> sentence. self.processor = self.detokenize_sample else: # sentence -> list of strings. self.processor = self.tokenize_sample
def __init__(self, name, class_type, config): """ Initializes a Model object. :param name: Model name. :type name: str :param class_type: Class type of the component. :param config: Parameters read from configuration file. :type config: ``ptp.configuration.ConfigInterface`` This constructor: - calls base class constructors (save config, name, logger, app_state etc.) - initializes the best model loss (used to select which model to save) to ``np.inf``: >>> self.best_loss = np.inf """ # Call constructors of parent classes. Component.__init__(self, name, class_type, config) Module.__init__(self) # Flag indicating whether the model is frozen or not. self.frozen = False
def __init__(self, name, config): """ Initializes object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, PrecisionRecallStatistics, config) # Get stream key mappings. self.key_targets = self.stream_keys["targets"] self.key_predictions = self.stream_keys["predictions"] self.key_masks = self.stream_keys["masks"] # Get prediction distributions/indices flag. self.use_prediction_distributions = self.config[ "use_prediction_distributions"] # Get masking flag. self.use_masking = self.config["use_masking"] # Get statistics key mappings. self.key_precision = self.statistics_keys["precision"] self.key_recall = self.statistics_keys["recall"] self.key_f1score = self.statistics_keys["f1score"] # Get (or create) vocabulary. if self.config["use_word_mappings"]: # Get labels from word mappings. self.labels = [] self.index_mappings = {} # Assume they are ordered, starting from 0. for i, (word, index) in enumerate(self.globals["word_mappings"].items()): self.labels.append(word) self.index_mappings[index] = i # Set number of classes by looking at labels. self.num_classes = len(self.labels) else: # Get the number of possible outputs. self.num_classes = self.globals["num_classes"] self.labels = list(range(self.num_classes)) self.index_mappings = {i: i for i in range(self.num_classes)} # Check display options. self.show_confusion_matrix = self.config["show_confusion_matrix"] self.show_class_scores = self.config["show_class_scores"]
def __init__(self, name, config): """ Initializes the object. Loads keys, word mappings and vocabularies. :param name: Name of the component read from the configuration file :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, JoinMaskedPredictions, config) # Get input key mappings. # Load list of prediction streams names (keys). self.input_prediction_stream_keys = self.config[ "input_prediction_streams"] if type(self.input_prediction_stream_keys) == str: self.input_prediction_stream_keys = self.input_prediction_stream_keys.replace( " ", "").split(",") #assert(self.input_prediction_stream_keys != ""), "ooo" # Load list of mask streams names (keys). self.input_mask_stream_keys = self.config["input_mask_streams"] if type(self.input_mask_stream_keys) == str: self.input_mask_stream_keys = self.input_mask_stream_keys.replace( " ", "").split(",") # Load list of word mappings names (keys). input_word_mappings_keys = self.config["input_word_mappings"] if type(input_word_mappings_keys) == str: input_word_mappings_keys = input_word_mappings_keys.replace( " ", "").split(",") # Retrieve input word mappings from globals. self.input_ix_to_word = [] for wmk in input_word_mappings_keys: # Get word mappings. word_to_ix = self.globals[wmk] # Create inverse transformation. ix_to_word = {value: key for (key, value) in word_to_ix.items()} self.input_ix_to_word.append(ix_to_word) # Get output key mappings. self.key_output_indices = self.stream_keys["output_indices"] self.key_output_strings = self.stream_keys["output_strings"] # Retrieve output word mappings from globals. self.output_word_to_ix = self.globals["output_word_mappings"]
def __init__(self, name, config): """ Initializes object. :param name: Batch size name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, BatchSizeStatistics, config) # Set key mappings. self.key_indices = self.stream_keys["indices"]
def __init__(self, name, config): """ Initializes the component. :param name: Component name (read from configuration file). :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, SentenceTokenizer, config) # Read the actual configuration. self.mode_detokenize = config['detokenize'] # Get preprocessing. self.preprocessing = get_value_list_from_dictionary( "preprocessing", self.config, 'none | lowercase | remove_punctuation | all'.split(" | ") ) if 'none' in self.preprocessing: self.preprocessing = [] if 'all' in self.preprocessing: self.preprocessing = 'lowercase | remove_punctuation'.split(" | ") self.logger.info("Applied preprocessing: {}".format(self.preprocessing)) self.remove_characters = get_value_list_from_dictionary("remove_characters", self.config) self.logger.info("Additional characters that will be removed during preprocessing: {}".format(self.remove_characters)) if 'remove_punctuation' in self.preprocessing: self.translator = str.maketrans('', '', string.punctuation) # Tokenizer. self.tokenizer = nltk.tokenize.WhitespaceTokenizer() # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] if self.mode_detokenize: # list of strings -> sentence. self.processor = self.detokenize_sample else: # sentence -> list of strings. self.processor = self.tokenize_sample
def __init__(self, name, config): """ Initializes the component. :param name: Component name (read from configuration file). :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructor(s) of parent class(es) - in the right order! Component.__init__(self, name, SentenceOneHotEncoder, config) WordMappings.__init__(self) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"]
def __init__(self, name, config): """ Initializes object. Loads key and word mappings. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, StringToMask, config) # Get key mappings. self.key_strings = self.stream_keys["strings"] self.key_masks = self.stream_keys["masks"] # Retrieve word mappings from globals. self.word_to_ix = self.globals["word_mappings"]
def __init__(self, name, class_type, config): """ Initializes loss object. :param name: Loss name. :type name: str :param class_type: Class type of the component. :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.utils.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, class_type, config) # Get key mappings. self.key_targets = self.stream_keys["targets"] self.key_predictions = self.stream_keys["predictions"] self.key_loss = self.stream_keys["loss"]
def __init__(self, name, config): """ Initializes the bag-of-word encoded by creating dictionary mapping ALL words from training, validation and test sets into unique indices. :param name: Component name (read from configuration file). :type name: str :param config: Dictionary of parameters (read from configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, BOWEncoder, config) # Default name mappings for all encoders. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] # Retrieve bow size from global variables. self.bow_size = self.globals["bow_size"]
def __init__(self, name, config): """ Initializes the component. :param name: Component name (read from configuration file). :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructor(s) of parent class(es) - in the right order! Component.__init__(self, name, WordDecoder, config) WordMappings.__init__(self) # Construct reverse mapping for faster processing. self.ix_to_word = dict((v, k) for k, v in self.word_to_ix.items()) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"]
def __init__(self, name, config): """ Initializes object. :param name: Name of the component loaded from the configuration file. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, ReduceTensor, config) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] # Get number of input dimensions from configuration. self.num_inputs_dims = self.config["num_inputs_dims"] # Get size of a single input item (last dimension) from globals. self.input_size = self.globals["input_size"] # Get reduction tparamsype from configuration. self.dim = self.config["reduction_dim"] self.keepdim = self.config["keepdim"] # Set reduction type. rt = get_value_from_dictionary( "reduction_type", self.config, 'sum | mean | min | max | argmin | argmax'.split(" | ")) reduction_types = {} reduction_types["sum"] = torch.sum reduction_types["mean"] = torch.mean reduction_types["min"] = torch.min reduction_types["max"] = torch.max reduction_types["argmin"] = torch.argmin reduction_types["argmax"] = torch.argmax self.reduction = reduction_types[rt]
def __init__(self, name, config): """ Initializes object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, PrecisionRecallStatistics, config) # Set key mappings. self.key_targets = self.stream_keys["targets"] self.key_predictions = self.stream_keys["predictions"] # Get statistic key mappings. self.key_precision = self.statistics_keys["precision"] self.key_recall = self.statistics_keys["recall"] self.key_f1score = self.statistics_keys["f1score"] # Get the number of possible outputs. self.num_classes = self.globals["num_classes"] # Get (or create) vocabulary. if self.config["use_word_mappings"]: # Get labels from word mappings. self.labels = [] # Assume they are ordered, starting from 0. for key in self.globals["word_mappings"].keys(): self.labels.append(key) else: self.labels = list(range(self.num_classes)) # Check display options. self.show_confusion_matrix = self.config["show_confusion_matrix"] self.show_class_scores = self.config["show_class_scores"]
def __init__(self, name, config): """ Initializes loss object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, ImageViewer, config) # Get default key mappings. self.key_indices = self.stream_keys["indices"] self.key_images = self.stream_keys["images"] self.key_labels = self.stream_keys["labels"] self.key_answers = self.stream_keys["answers"] # Get sample number. self.sample_number = self.config["sample_number"]
def __init__(self, name, config): """ Initializes object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, BLEUStatistics, config) # Get stream key mappings. self.key_targets = self.stream_keys["targets"] self.key_predictions = self.stream_keys["predictions"] self.key_masks = self.stream_keys["masks"] # Get prediction distributions/indices flag. self.use_prediction_distributions = self.config[ "use_prediction_distributions"] # Get masking flag. #self.use_masking = self.config["use_masking"] # Get ignored words self.ignored_words = self.config["ignored_words"] # Retrieve word mappings from globals. word_to_ix = self.globals["word_mappings"] # Construct reverse mapping for faster processing. self.ix_to_word = dict((v, k) for k, v in word_to_ix.items()) # Get masking flag. self.weights = self.config["weights"] # Get statistics key mappings. self.key_bleu = self.statistics_keys["bleu"]
def __init__(self, name, class_type, config): """ Initializes task object: - calls base class constructors. - sets key_indices variable (used for storing indices of samples) >>> self.key_indices = self.mapkey("indices") - sets empry curriculim learning configuration >>> self.curriculum_config = {} :param name: Task name. :type name: str :param class_type: Class type of the component. :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` .. note:: It is likely to encounter a case where the model needs a parameter value only known when the task has been instantiated, like the size of a vocabulary set or the number of marker bits. The user can pass those values in this app_state. All objects will be able to access it later: >>> self.app_state["new_global_value"] = 1 # Sets global value. >>> val = self.app_state["new_global_value" # Gets global value. """ # Call constructors of parent classes. Component.__init__(self, name, class_type, config) Dataset.__init__(self) # Get key mappings for indices. self.key_indices = self.stream_keys["indices"] # Empty curriculum learning config - for now. self.curriculum_config = {}
def __init__(self, name, config): """ Initializes loss object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, StreamViewer, config) # Get key mappings for indices. self.key_indices = self.stream_keys["indices"] # Load list of streams names (keys). self.input_stream_keys = get_value_list_from_dictionary("input_streams", self.config) # Get sample number. self.sample_number = self.config["sample_number"]
def __init__(self, name, config): """ Initializes the object, retrieves names of input streams and creates the output file in experiment directory. :param name: Name of the component. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, StreamFileExporter, config) # Get key mappings for indices. self.key_indices = self.stream_keys["indices"] # Load list of streams names (keys). self.input_stream_keys = get_value_list_from_dictionary( "input_streams", self.config) # Get separator. self.separator = self.config["separator"] # Create file where we will write the results. filename = self.config["filename"] abs_filename = path.join(self.app_state.log_dir, filename) self.file = open(abs_filename, 'w') # Export additional line with separator. if self.config["export_separator_line_to_csv"]: self.file.write("sep={}\n".format(self.separator)) # Export header - once, when we will process the first batch. self.export_header = self.config["export_header_to_csv"] self.logger.info("Writing values from {} streams to {}".format( self.input_stream_keys, abs_filename))
def __init__(self, name, config): """ Initializes object. :param name: Name of the component loaded from the configuration file. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, ListToTensor, config) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] # Get number of input dimensions from configuration. self.num_inputs_dims = self.config["num_inputs_dims"] # Get size of a single input item (last dimension) from globals. self.input_size = self.globals["input_size"]
def __init__(self, name, config): """ Initializes object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, ReshapeTensor, config) # Set key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_outputs = self.stream_keys["outputs"] # Get input and output shapes from configuration. self.input_dims = [int(x) for x in self.config["input_dims"]] self.output_dims = [int(x) for x in self.config["output_dims"]] # Set global variable - all dimensions ASIDE OF BATCH. self.globals["output_size"] = self.output_dims[1:]
def __init__(self, name, config): """ Initializes object. :param name: Loss name. :type name: str :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, ConcatenateTensor, config) # Get key mappings. self.key_outputs = self.stream_keys["outputs"] # Load list of streams names (keys). self.input_stream_keys = self.config["input_streams"] if type(self.input_stream_keys) == str: self.input_stream_keys = self.input_stream_keys.replace( " ", "").split(",") # Get input shapes from configuration. # Assuming that it will be list of lists. self.input_stream_dims = [[int(x) for x in dims] for dims in self.config["input_dims"]] # Get output shape from configuration. self.output_dims = [int(x) for x in self.config["output_dims"]] # Get concatenation dimension. self.dim = self.config["dim"] # Set global variable - all dimensions ASIDE OF BATCH. self.globals["output_size"] = self.output_dims[1:]
def __init__(self, name, config): Component.__init__(self, name, None, config)
def __init__(self, name, class_type, config): """ Initializes the (word:index) mappings. Loads parameters from configuration, :param name: Component name (read from configuration file). :type name: str :param class_type: Class type of the component (derrived from this class). :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). :type config: :py:class:`ptp.configuration.ConfigInterface` """ # Call constructors of parent classes. Component.__init__(self, name, class_type, config) # Read the actual configuration. self.data_folder = os.path.expanduser(self.config['data_folder']) # Source and resulting (indexed) vocabulary. self.source_vocabulary_files = self.config['source_vocabulary_files'] self.word_mappings_file = self.config['word_mappings_file'] # Set aboslute path to file with word mappings. word_mappings_file_path = os.path.join( os.path.expanduser(self.data_folder), self.word_mappings_file) # Check if we want to export word mappings to globals. if self.config["import_word_mappings_from_globals"]: self.word_to_ix = self.globals["word_mappings"] assert ( len(self.word_to_ix) > 0 ), "The word mappings imported from global variables are empty!" # We could also get vocabulary_size from globals... but what for;) elif self.word_mappings_file != "" and os.path.exists( word_mappings_file_path) and not self.config['regenerate']: # Try to load the preprocessed word mappings. self.word_to_ix = wm.load_word_mappings_from_csv_file( self.logger, self.data_folder, self.word_mappings_file) assert (len(self.word_to_ix) > 0), "The word mappings loaded from file are empty!" else: # Try to generate new word mappings from source files. self.word_to_ix = wm.generate_word_mappings_from_source_files( self.logger, self.data_folder, self.source_vocabulary_files) assert (len(self.word_to_ix) > 0), "The word mappings generated from sources are empty!" # Ok, save mappings, so next time we will simply load them. wm.save_word_mappings_to_csv_file(self.logger, self.data_folder, self.word_mappings_file, self.word_to_ix) # Check if additional tokens are present. self.additional_tokens = self.config["additional_tokens"].split(',') for word in self.additional_tokens: # If new token. if word != '' and word not in self.word_to_ix: self.word_to_ix[word] = len(self.word_to_ix) self.logger.info( "Initialized word mappings with vocabulary of size {}".format( len(self.word_to_ix))) # Check if we want to export word mappings to globals. if self.config["export_word_mappings_to_globals"]: self.globals["word_mappings"] = self.word_to_ix # Export vocabulary size to globals. self.globals["vocabulary_size"] = len(self.word_to_ix)
def __init__(self): Component.__init__(self, "MockupComponent", None, ConfigInterface())