def shape(self):
        """The dictionary shape of the model"""

        shape = {}

        try:
            if self.name == 'MLPClassifier':
                shape = {}
                shape['name'] = self.name
                shape['classifier_type'] = 'multilabel'
                shape['classes'] = list(self.model.classes_)
                shape['n_classes'] = len(self.model.classes_)
                shape['n_features'] = len(self.model.coefs_[0])
                shape['hidden_activation'] = self.model.activation
                shape['output_activation'] = self.model.out_activation_

                # coefficients & intercepts of hidden layers
                hl_coeffs = self.model.coefs_[:-1]
                hl_intercepts = self.model.intercepts_[:-1]

                if len(hl_coeffs) != len(hl_intercepts):
                    raise ConfigError(
                        "Hidden coefficients&intercepts not equally sized {}/{}"
                        .format(len(hl_coeffs), len(hl_intercepts)))

                hcoeffs = []
                for layer in hl_coeffs:
                    hcoeffs.append([[float(x) for x in cx] for cx in layer])
                shape['hidden_coeffs'] = hcoeffs

                shape['hidden_intercepts'] = \
                    [[float(x) for x in ix] for ix in hl_intercepts]

                # coefficients & intercepts of output layer
                ocoeffs = self.model.coefs_[-1]
                ocoeffs = [[float(x) for x in ox] for ox in ocoeffs]
                ointercepts = self.model.intercepts_[-1]

                if len(ocoeffs[0]) != len(ointercepts):
                    raise ConfigError(
                        "Output coefficients&intercepts not equally sized {}/{}"
                        .format(len(ocoeffs[0]), len(ointercepts)))

                shape['output_coeffs'] = ocoeffs
                shape['output_intercepts'] = list(ointercepts)
            else:
                self.logger.warning(
                    "Unknown shape for {} classifier (WIP)".format(self.name))
        except:
            raise CaughtException("Exception encountered when recovering "
                                  "the {} classifier model's shape".format(
                                      self.name))

        return shape
Esempio n. 2
0
    def __init__(self, model_name, model_file):
        """Load the transformation model"""

        super().__init__()

        model_name = model_name.upper()

        if model_name not in Topics.known_models:
            raise ConfigError("Unknown model name '{}'. Choose from {}".format(
                model_name, Topics.known_models))

        self.model = Topics.known_models[model_name].load(model_file)

        if self.model is None:
            raise ConfigError("Did not load {} model".format(model_name))
Esempio n. 3
0
def check_folder_readable(input_folder):
    """Static method to check folder existance"""

    input_folder = value_checkup(input_folder)

    if not os.path.isdir(input_folder):
        raise ConfigError("Folder '{}' is missing".format(input_folder))
Esempio n. 4
0
def check_file_readable(input_file):
    """Static method to check file existance"""

    input_file = value_checkup(input_file)

    if not os.path.exists(input_file):
        raise ConfigError(
            "File '{}' is missing or not readable".format(input_file))
    def __init__(self, input_file):
        """Initialize with the input filename"""

        super().__init__()

        if not isinstance(input_file, str):
            raise ConfigError(
                "Given parameter {} is not String".format(input_file))

        utils.check_file_readable(input_file)
        self.filename = input_file
    def __iter__(self):
        """
        Iterate through documents:
        yield the bow representation of each document
        """

        if not isinstance(self.input, list):
            raise ConfigError('Input argument is not a List')

        for filename in self.input:  # each file
            with open(filename, 'r') as stream:
                for line in stream:  # each line
                    doc = json.loads(line)
                    yield self.dictionary.doc2bow(doc['content'].split())
    def get_texts(self):
        """
        Iterate through documents:
        yield each token on each document
        """

        if not isinstance(self.input, list):
            raise ConfigError('Input argument is not a List')

        for filename in self.input:  # each file
            with open(filename, 'r') as stream:
                for line in stream:  # each line
                    doc = json.loads(line)
                    yield doc['content'].split()  # split on each word
Esempio n. 8
0
    def __init__(self, output_file):
        """Initialize with the input filename"""

        super().__init__()

        if not isinstance(output_file, str):
            raise ConfigError(
                "Given parameter {} is not a String".format(output_file))

        self.logger.info('Initialized empty corpus')
        self.logger.info("Save new corpus in {} file".format(output_file))

        utils.create_path(output_file)

        self.ofstream = open(output_file, 'w')
        self.size = 0
Esempio n. 9
0
    def __init__(self, model_name, **kwargs):
        """Initialize the transformation model"""

        super().__init__()

        if model_name.lower() not in self.TRANSFORMERS:
            raise ConfigError("Unknown model name '{}'. Choose from {}".format(
                model_name, self.TRANSFORMERS.keys()))

        self.model = None
        self.name = model_name.lower()

        # define the model's training configuration
        # update default arguments when new provided
        self.kwargs = dict(self.TRANSFORMERS[self.name])
        self.kwargs.update(kwargs)

        self.vsize = self.kwargs['size']

        self.logger.info("Initialize the {} transformation model".format(
            self.name))
Esempio n. 10
0
    def __init__(self, input_files):
        """Initialize with the list of filenames"""

        super().__init__()

        if not isinstance(input_files, list):
            raise ConfigError('Given parameter is not a List')
        else:
            for filename in input_files:
                utils.check_file_readable(filename)

        # count the number of documents in each file
        self.ndocs = [count_file_lines(fn) for fn in input_files]
        self.logger.info("Available data for training: {}".format(self.ndocs))

        # create one generator for each input file
        # => return one document at a time from each input file
        self.generators = [loop_doc(filename) for filename in input_files]

        # where we stopped reading from files
        self.stop_index = 0
    def __init__(self, model_name, model_file):
        """Initialize the transformation model"""

        super().__init__()

        if model_name.upper() not in self.TRANSFORMERS:
            raise ConfigError("Unknown model name '{}'. Choose from {}".format(
                model_name, self.TRANSFORMERS.keys()))

        utils.check_file_readable(model_file)

        self.name = model_name.upper()
        self.model = self.TRANSFORMERS[self.name].load(model_file)

        self.ntopics = 0

        if hasattr(self.model, 'num_topics'):
            self.ntopics = self.model.num_topics

        if self.name == "LSI" \
                and self.ntopics != self.model.projection.u[0].size:
            self.ntopics = self.model.projection.u[0].size

        self.logger.info("Loaded {} transformation model".format(self.name))
    def check_model(self):
        """Check if the model was properly loaded"""

        if self.model is None:
            raise ConfigError("Null {} transformation model".format(self.name))
Esempio n. 13
0
    def check_model(self):
        """Check if the model was initialized"""

        if self.model is None:
            raise ConfigError("Failed to build the '{}' model".format(
                self.name))
Esempio n. 14
0
def get_requirements(source):
    if not os.path.exists(source):
        raise ConfigError("Requirements file {} is missing".format(source))

    with open(source, 'r') as f:
        return [req.strip() for req in f]