def get_classifier(self): if self.check_classifier(): return self.classifier else: logger.warning("Classifier not initialised yet: Train one using " ".train_classifier() first.") return None
def train_classifier(self, train_set, model=default_model): if model == "maxent": model_class = nltk.classify.MaxentClassifier try: # Try to use the precompiled megam binaries if sys.platform.startswith("darwin"): nltk.config_megam(os.path.join(".", "lib", "megam.opt.darwin")) elif sys.platform.startswith("win32"): nltk.config_megam(os.path.join(".", "lib", "megam.opt.win32.exe")) elif sys.platform.startswith("linux"): nltk.config_megam(os.path.join(".", "lib", "megam.opt.linux")) self.classifier = model_class.train(train_set, "megam") except LookupError as e: self.classifier = model_class.train(train_set) msg = "Could not find Megam; Trained classifier using default " \ "algorithm instead. (Much slower)\n" logger.warning(msg) msg += "\nOriginal LookupError:\n" custom = oce.exceptions.CustomError(str(e).strip(), pre=msg) raise custom else: logger.warning("'" + model + "' is not a valid classifier model.") return self.classifier.model_name = model self.save_classifier()
def parse_server_output(self, msg): """ Takes a Dictionary response from the controller and formats it in a readable way for the client. If there is anything to send back to the client, call send_to_client() on it. """ command_name = msg['command'] if command_name not in format_table.keys(): logger.warning("Telnet command '{}' does not have an associated " "formatting function.".format(command_name)) logger.warning("Defaulting to showing raw server response.") command_name = 'default' format_fn = format_table[command_name] if not hasattr(self, format_fn): logger.warning( "Telnet command '{}' has an entry in the formatting table, " "but does not have a corresponding function defined.".format( command_name)) logger.warning("Defaulting to showing raw server response.") format_fn = format_table['default'] formatted = yield from getattr(self, format_fn)(msg['data']) if formatted is not None: yield from self.send_to_client(formatted)
def __init__(self, model=default_model, trained_file=default_trained_file): self.model = model self.trained_file = trained_file # === Classifier === self.classifier = self.load_classifier(trained_file) if self.classifier is not None: if not hasattr(self.classifier, 'model_name'): logger.warning( "The loaded classifier does not specify which model it " "uses; it could be different from the one expected. " "Use .train_classifier() followed by .save_classifier() " "to overwrite it.") elif self.classifier.model_name != model: logger.warning("The model used by the loaded classifier (" + self.classifier.model_name + ") is different from the one requested (" + model + "). " "Use .train_classifier() followed by .save_classifier() " "to overwrite it.") else: logger.warning("No previously trained classifier found. (" + trained_file + ")") logger.warning("Use .train_classifier() and .save_classifier() to " "train and save a new classifier respectively.") logger.info("Language ID module initialised.")
def save_classifier(self, trained_file=None): classifier = self.get_classifier() if classifier is None: logger.warning("Could not save classifier. (None currently " "initialised.)") return if trained_file is None: trained_file = self.trained_file f = open(trained_file, 'wb') pickle.dump(self.classifier, f) f.close() logger.info("Saved trained classifier to '" + trained_file + "'.") return
def _update_tags(self, new_tags, old_tags): """ Given comma-delimited lists of record tags, update the RecordTags table. :param new_tags: :param old_tags: :return: """ # Start by preparing the lists of old and new record tags if new_tags == '': # Splitting the empty string will give us an array with one (empty) # element, which we don't want new_tags = [] else: new_tags = new_tags.split(',') if old_tags == '': old_tags = [] else: old_tags = old_tags.split(',') added = [x for x in new_tags if x not in old_tags] removed = [x for x in old_tags if x not in new_tags] # Start by looking at the tags that were added for tag in added: rows = self.session.query(RecordTags) \ .filter(RecordTags.tag == tag).all() if len(rows) == 0: # This is a completely new tag. self.session.add(RecordTags(tag=tag, count=1)) else: # Tag is already in the DB -- Update the count. row = rows[0] row.count += 1 self.session.commit() # Deal with the tags that were removed next for tag in removed: rows = self.session.query(RecordTags) \ .filter(RecordTags.tag == tag).all() if len(rows) == 0: # Woah, this shouldn't have happened. logger.warning(("Tried to remove tag that isn't in DB: {0}. " "Ignoring.").format(tag)) else: row = rows[0] row.count -= 1 if row.count == 0: # Remove the row entirely. self.session.delete(row) self.session.commit()
def parse_server_output(self, msg): """ Takes a Dictionary response from the controller and formats it in a readable way for the client. If there is anything to send back to the client, call send_to_client() on it. """ command_name = msg['command'] if command_name not in format_table.keys(): logger.warning("Telnet command '{}' does not have an associated " "formatting function.".format(command_name)) logger.warning("Defaulting to showing raw server response.") command_name = 'default' format_fn = format_table[command_name] if not hasattr(self, format_fn): logger.warning( "Telnet command '{}' has an entry in the formatting table, " "but does not have a corresponding function defined.".format( command_name ) ) logger.warning("Defaulting to showing raw server response.") format_fn = format_table['default'] formatted = yield from getattr(self, format_fn)(msg['data']) if formatted is not None: yield from self.send_to_client(formatted)
def exec_command(self, request): """ Perform a requested command return the results as an object suitable for the client """ # By delegating it to our lovely helper functions command = request["command"] return_message = {} if hasattr(self, "exec_" + command): command_fn = getattr(self, "exec_" + command) results = command_fn(request) else: logger.warning("Invalid input from client.") results = "error" return {"command": command, "data": results}
def update_record(self, row_id, field, value): try: for row in self.session.query(Records) \ .filter(Records.rowid == row_id): if not hasattr(row, field): logger.warning( "Invalid field name given by client: '{}'".format( field)) return 'invalid_field' # ==================== # Field-specific hooks # ==================== # Process record tags to update tweets_tags table if field == 'tag': old_tags = fts_detag('tag', row.tag) self._update_tags(value, old_tags) elif field == 'language': value = langid_normalise_language(value) # ==================== # Add FTS tags if needed value = fts_tag(field, value) logger.info("Updating '{0}' on row {1}: {2} -> {3}".format( field, row.rowid, str(getattr(row, field)).replace('\n', '\\n'), str(value).replace('\n', '\\n'))) setattr(row, field, value) self.session.commit() # Also clear all memoisation caches, in case the update # invalidates their results self._clear_caches() return 'success' except sqlalchemy.exc.SQLAlchemyError as e: # Uh oh. logger.error(e) return 'error'
def parse_client_input(self, fn_table, msg): """ Parses commands sent by the client against the function table provided. """ if not msg.startswith("!"): self.last_command = msg command_array = msg.split() if len(command_array) == 0: yield from self.send_to_client('') return fn_list = list(fn_table.keys()) fn_list.sort() command_name = command_array.pop(0) command_fn = '' for command in fn_list: if command.startswith(command_name): command_fn = fn_table[command] break if command_fn == '': # We didn't find a match in the command table yield from self.send_to_client("Invalid command.") return if not hasattr(self, command_fn): # There's a problem with the command table logger.warning("Telnet command '{}' has an entry in the command " "table, but does not have a corresponding " "function defined.".format(command_fn)) yield from self.send_to_client("Invalid command.") return yield from getattr(self, command_fn)(*command_array)
def wrapped(self, *args, **kwargs): logger.warning("Data provider [{0}] did not define API method: {1}" .format(self.__class__.__name__, func.__name__)) return func(self, *args, **kwargs)
def wrapped(self, *args, **kwargs): logger.warning("Interface [{0}] did not define API method: {1}".format( self.__class__.__name__, func.__name__)) return func(self, *args, **kwargs)