def shape(self): """The dictionary shape of the model""" shape = {} try: if self.name == 'MLPClassifier': shape = {} shape['name'] = self.name shape['classifier_type'] = 'multilabel' shape['classes'] = list(self.model.classes_) shape['n_classes'] = len(self.model.classes_) shape['n_features'] = len(self.model.coefs_[0]) shape['hidden_activation'] = self.model.activation shape['output_activation'] = self.model.out_activation_ # coefficients & intercepts of hidden layers hl_coeffs = self.model.coefs_[:-1] hl_intercepts = self.model.intercepts_[:-1] if len(hl_coeffs) != len(hl_intercepts): raise ConfigError( "Hidden coefficients&intercepts not equally sized {}/{}" .format(len(hl_coeffs), len(hl_intercepts))) hcoeffs = [] for layer in hl_coeffs: hcoeffs.append([[float(x) for x in cx] for cx in layer]) shape['hidden_coeffs'] = hcoeffs shape['hidden_intercepts'] = \ [[float(x) for x in ix] for ix in hl_intercepts] # coefficients & intercepts of output layer ocoeffs = self.model.coefs_[-1] ocoeffs = [[float(x) for x in ox] for ox in ocoeffs] ointercepts = self.model.intercepts_[-1] if len(ocoeffs[0]) != len(ointercepts): raise ConfigError( "Output coefficients&intercepts not equally sized {}/{}" .format(len(ocoeffs[0]), len(ointercepts))) shape['output_coeffs'] = ocoeffs shape['output_intercepts'] = list(ointercepts) else: self.logger.warning( "Unknown shape for {} classifier (WIP)".format(self.name)) except: raise CaughtException("Exception encountered when recovering " "the {} classifier model's shape".format( self.name)) return shape
def __init__(self, model_name, model_file): """Load the transformation model""" super().__init__() model_name = model_name.upper() if model_name not in Topics.known_models: raise ConfigError("Unknown model name '{}'. Choose from {}".format( model_name, Topics.known_models)) self.model = Topics.known_models[model_name].load(model_file) if self.model is None: raise ConfigError("Did not load {} model".format(model_name))
def check_folder_readable(input_folder): """Static method to check folder existance""" input_folder = value_checkup(input_folder) if not os.path.isdir(input_folder): raise ConfigError("Folder '{}' is missing".format(input_folder))
def check_file_readable(input_file): """Static method to check file existance""" input_file = value_checkup(input_file) if not os.path.exists(input_file): raise ConfigError( "File '{}' is missing or not readable".format(input_file))
def __init__(self, input_file): """Initialize with the input filename""" super().__init__() if not isinstance(input_file, str): raise ConfigError( "Given parameter {} is not String".format(input_file)) utils.check_file_readable(input_file) self.filename = input_file
def __iter__(self): """ Iterate through documents: yield the bow representation of each document """ if not isinstance(self.input, list): raise ConfigError('Input argument is not a List') for filename in self.input: # each file with open(filename, 'r') as stream: for line in stream: # each line doc = json.loads(line) yield self.dictionary.doc2bow(doc['content'].split())
def get_texts(self): """ Iterate through documents: yield each token on each document """ if not isinstance(self.input, list): raise ConfigError('Input argument is not a List') for filename in self.input: # each file with open(filename, 'r') as stream: for line in stream: # each line doc = json.loads(line) yield doc['content'].split() # split on each word
def __init__(self, output_file): """Initialize with the input filename""" super().__init__() if not isinstance(output_file, str): raise ConfigError( "Given parameter {} is not a String".format(output_file)) self.logger.info('Initialized empty corpus') self.logger.info("Save new corpus in {} file".format(output_file)) utils.create_path(output_file) self.ofstream = open(output_file, 'w') self.size = 0
def __init__(self, model_name, **kwargs): """Initialize the transformation model""" super().__init__() if model_name.lower() not in self.TRANSFORMERS: raise ConfigError("Unknown model name '{}'. Choose from {}".format( model_name, self.TRANSFORMERS.keys())) self.model = None self.name = model_name.lower() # define the model's training configuration # update default arguments when new provided self.kwargs = dict(self.TRANSFORMERS[self.name]) self.kwargs.update(kwargs) self.vsize = self.kwargs['size'] self.logger.info("Initialize the {} transformation model".format( self.name))
def __init__(self, input_files): """Initialize with the list of filenames""" super().__init__() if not isinstance(input_files, list): raise ConfigError('Given parameter is not a List') else: for filename in input_files: utils.check_file_readable(filename) # count the number of documents in each file self.ndocs = [count_file_lines(fn) for fn in input_files] self.logger.info("Available data for training: {}".format(self.ndocs)) # create one generator for each input file # => return one document at a time from each input file self.generators = [loop_doc(filename) for filename in input_files] # where we stopped reading from files self.stop_index = 0
def __init__(self, model_name, model_file): """Initialize the transformation model""" super().__init__() if model_name.upper() not in self.TRANSFORMERS: raise ConfigError("Unknown model name '{}'. Choose from {}".format( model_name, self.TRANSFORMERS.keys())) utils.check_file_readable(model_file) self.name = model_name.upper() self.model = self.TRANSFORMERS[self.name].load(model_file) self.ntopics = 0 if hasattr(self.model, 'num_topics'): self.ntopics = self.model.num_topics if self.name == "LSI" \ and self.ntopics != self.model.projection.u[0].size: self.ntopics = self.model.projection.u[0].size self.logger.info("Loaded {} transformation model".format(self.name))
def check_model(self): """Check if the model was properly loaded""" if self.model is None: raise ConfigError("Null {} transformation model".format(self.name))
def check_model(self): """Check if the model was initialized""" if self.model is None: raise ConfigError("Failed to build the '{}' model".format( self.name))
def get_requirements(source): if not os.path.exists(source): raise ConfigError("Requirements file {} is missing".format(source)) with open(source, 'r') as f: return [req.strip() for req in f]