예제 #1
0
    def create(self, architecture: Architecture, metadata: Metadata,
               arguments: Configuration) -> Any:
        # create the input layer
        input_layer = self.create_input_layer(architecture, metadata,
                                              arguments)
        # wrap the input layer with the special gain input layer (to receive the mask)
        input_layer = GAINInputLayer(input_layer, metadata.get_num_features())

        # create the hidden layers factory
        hidden_layers_factory = self.create_other(
            "HiddenLayers", architecture, metadata,
            arguments.get("hidden_layers", {}))

        # create the output layer factory
        # this is different from a normal discriminator
        # because the output has the size of the input
        # it predicts if each feature is real or fake
        output_layer_factory = SingleOutputLayerFactory(
            metadata.get_num_features(), activation=Sigmoid())

        # create the encoder
        return FeedForward(input_layer,
                           hidden_layers_factory,
                           output_layer_factory,
                           default_hidden_activation=Tanh())
예제 #2
0
 def create_input_layer(self, architecture: Architecture,
                        metadata: Metadata,
                        arguments: Configuration) -> InputLayer:
     # override the input layer size
     return self.create_other(
         "SingleInputLayer", architecture, metadata,
         Configuration({"input_size": metadata.get_num_features()}))
예제 #3
0
    def train_generator_step(self, configuration: Configuration, metadata: Metadata,
                             architecture: Architecture) -> float:
        # clean previous gradients
        architecture.generator_optimizer.zero_grad()

        # conditional
        if "conditional" in architecture.arguments:
            # for now uniform distribution is used but could be controlled in a different way
            # also this works for both binary and categorical dependent variables
            number_of_conditions = metadata.get_dependent_variable().get_size()
            condition = to_gpu_if_available(FloatTensor(configuration.batch_size).uniform_(0, number_of_conditions))
        # non-conditional
        else:
            condition = None

        # generate a full batch of fake features
        fake_features = self.sample_fake(architecture, configuration.batch_size, condition=condition)

        # calculate loss
        loss = architecture.generator_loss(architecture, fake_features, condition=condition)

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
예제 #4
0
    def generate_sample(self, sampler: Sampler, configuration: Configuration,
                        metadata: Metadata) -> Tensor:
        # generate the samples
        samples = sampler.generate_sample()

        # for each desired variable value
        removed_dimensions = 0
        for variable, keep_value in self.keep_values.items():
            # check that the variable is either categorical or binary
            variable_metadata = metadata.get_independent_variable_by_name(
                variable)
            if not (variable_metadata.is_categorical()
                    or variable_metadata.is_binary()):
                raise Exception(
                    "Cannot reject variable '{}' because it has an invalid type."
                    .format(variable))
            # separate the variable
            index = variable_metadata.get_feature_index() - removed_dimensions
            value = samples[:, index:index + variable_metadata.get_size()]
            # for categorical variables we need to transform one-hot encoding into label encoding
            if variable_metadata.is_categorical():
                value = torch.argmax(value, dim=1)
            # reshape value
            value = value.view(-1)
            # keep only the samples with the desired value for that variable
            samples = samples[value == keep_value, :]
            # remove the variable
            left = samples[:, :index]
            right = samples[:, index + variable_metadata.get_size():]
            samples = torch.cat((left, right), dim=1)
            removed_dimensions += variable_metadata.get_size()

        # recalculate after filtering
        real_batch_size = len(samples)
        self.real_sample_size += real_batch_size

        # if there was a change
        if real_batch_size > 0:
            # restart iterations
            self.iterations = 0
        # if there was no change count another iteration
        else:
            # try again
            self.iterations += 1

            if self.iterations >= self.max_iterations:
                raise Exception(
                    "Reached maximum number of iterations with {:d} samples.".
                    format(self.real_sample_size))

        return samples
예제 #5
0
 def create_output_layer_factory(
         self, architecture: Architecture, metadata: Metadata,
         arguments: Configuration) -> OutputLayerFactory:
     # override the output layer size
     output_layer_configuration = {
         "output_size": metadata.get_num_features()
     }
     # copy activation arguments only if defined
     if "output_layer" in arguments and "activation" in arguments.output_layer:
         output_layer_configuration[
             "activation"] = arguments.output_layer.activation
     # create the output layer factory
     return self.create_other("SingleOutputLayer", architecture, metadata,
                              Configuration(output_layer_configuration))
예제 #6
0
    def __init__(self, input_size: int, metadata: Metadata,
                 categorical_activation: Module) -> None:
        super(MultiOutputLayer, self).__init__()

        self.layers = ModuleList()

        # accumulate binary or numerical variables into "blocks"
        current_block = None

        for variable_metadata in metadata.get_by_independent_variable():
            # first check if a block needs to be created
            if current_block is not None and not current_block.matches_type(
                    variable_metadata):
                # create the block
                self.layers.append(current_block.build(input_size))
                # empty the block
                current_block = None

            # if it is a binary or numerical variable
            if variable_metadata.is_binary() or variable_metadata.is_numerical(
            ):
                # create a block
                if current_block is None:
                    current_block = BlockBuilder(variable_metadata)
                # or add to the existing block
                else:
                    current_block.add(variable_metadata)

            # if it is a categorical variable
            elif variable_metadata.is_categorical():
                # create the categorical layer
                self.layers.append(
                    Sequential(
                        Linear(input_size, variable_metadata.get_size()),
                        categorical_activation))

            # if it is another type
            else:
                raise Exception(
                    "Unexpected variable type '{}' for variable '{}'.".format(
                        variable_metadata.get_type(),
                        variable_metadata.get_name()))

        # if there is still accumulated data for a block
        if current_block is not None:
            # create the last block
            self.layers.append(current_block.build(input_size))
예제 #7
0
    def create(self, architecture: Architecture, metadata: Metadata,
               arguments: Configuration) -> Any:
        # create input layer
        input_layer_configuration = {}
        if self.code:
            input_layer_configuration[
                "input_size"] = architecture.arguments.code_size
        else:
            input_layer_configuration[
                "input_size"] = metadata.get_num_features()

        input_layer = self.create_other(
            "SingleInputLayer", architecture, metadata,
            Configuration(input_layer_configuration))

        # conditional
        if "conditional" in architecture.arguments:
            # wrap the input layer with a conditional layer
            input_layer = ConditionalLayer(
                input_layer, metadata, **architecture.arguments.conditional)

        # mini-batch averaging
        if arguments.get("mini_batch_averaging", False):
            input_layer = MiniBatchAveraging(input_layer)

        # create the hidden layers factory
        hidden_layers_factory = self.create_other(
            "HiddenLayers", architecture, metadata,
            arguments.get("hidden_layers", {}))

        # create the output activation
        if self.critic:
            output_activation = View(-1)
        else:
            output_activation = Sequential(Sigmoid(), View(-1))

        # create the output layer factory
        output_layer_factory = SingleOutputLayerFactory(
            1, activation=output_activation)

        # create the discriminator
        return FeedForward(input_layer,
                           hidden_layers_factory,
                           output_layer_factory,
                           default_hidden_activation=LeakyReLU(0.2))
예제 #8
0
def generate_mask_for(source: Tensor, probability: float,
                      metadata: Metadata) -> Tensor:
    variable_masks = []

    # for each variable
    for variable_metadata in metadata.get_by_independent_variable():
        # ones are generated with the indicated probability
        # zeros are generated with the complement of the indicated probability
        variable_mask = (torch.zeros(len(source), 1).uniform_(0.0, 1.0) <
                         probability).float()

        # repeat across all the features if the variable has more than one feature (e.g. one-hot-encoded)
        if variable_metadata.get_size() > 1:
            variable_mask = variable_mask.repeat(1,
                                                 variable_metadata.get_size())

        # add the variable mask
        variable_masks.append(variable_mask)

    # return the concatenation of each variable mask
    return torch.cat(variable_masks, dim=1)
예제 #9
0
    def create(self, architecture: Architecture, metadata: Metadata,
               arguments: Configuration) -> Any:
        # create the input layer
        input_layer = self.create_input_layer(architecture, metadata,
                                              arguments)
        # wrap the input layer with the special gain input layer (to receive the mask)
        input_layer = GAINInputLayer(input_layer, metadata.get_num_features())

        # create the hidden layers factory
        hidden_layers_factory = self.create_other(
            "HiddenLayers", architecture, metadata,
            arguments.get("hidden_layers", {}))

        # create the output layer factory
        output_layer_factory = self.create_output_layer_factory(
            architecture, metadata, arguments)

        # create the encoder
        return FeedForward(input_layer,
                           hidden_layers_factory,
                           output_layer_factory,
                           default_hidden_activation=Tanh())
예제 #10
0
    def __init__(self,
                 input_layer: InputLayer,
                 metadata: Metadata,
                 min_embedding_size: int = 2,
                 max_embedding_size: int = 50) -> None:
        super(ConditionalLayer, self).__init__()

        self.input_layer = input_layer

        dependent_variable = metadata.get_dependent_variable()

        if dependent_variable.is_binary():
            self.output_size = 1
            self.layer = Identity()
        elif dependent_variable.is_categorical():
            variable_size = dependent_variable.get_size()
            self.output_size = compute_embedding_size(variable_size,
                                                      min_embedding_size,
                                                      max_embedding_size)
            self.layer = Embedding(variable_size, self.output_size)
        else:
            raise Exception(
                "Invalid dependent variable type for conditional layer.")