예제 #1
0
 def get_classifier(self):
     if self.check_classifier():
         return self.classifier
     else:
         logger.warning("Classifier not initialised yet: Train one using "
                        ".train_classifier() first.")
         return None
예제 #2
0
    def train_classifier(self, train_set, model=default_model):
        if model == "maxent":
            model_class = nltk.classify.MaxentClassifier
            try:
                # Try to use the precompiled megam binaries
                if sys.platform.startswith("darwin"):
                    nltk.config_megam(os.path.join(".", "lib",
                                                   "megam.opt.darwin"))
                elif sys.platform.startswith("win32"):
                    nltk.config_megam(os.path.join(".", "lib",
                                                   "megam.opt.win32.exe"))
                elif sys.platform.startswith("linux"):
                    nltk.config_megam(os.path.join(".", "lib",
                                                   "megam.opt.linux"))
                self.classifier = model_class.train(train_set, "megam")
            except LookupError as e:
                self.classifier = model_class.train(train_set)
                msg = "Could not find Megam; Trained classifier using default " \
                      "algorithm instead.  (Much slower)\n"
                logger.warning(msg)
                msg += "\nOriginal LookupError:\n"
                custom = oce.exceptions.CustomError(str(e).strip(), pre=msg)
                raise custom
        else:
            logger.warning("'" + model + "' is not a valid classifier model.")
            return

        self.classifier.model_name = model
        self.save_classifier()
예제 #3
0
    def parse_server_output(self, msg):
        """
        Takes a Dictionary response from the controller and formats it in a
        readable way for the client.

        If there is anything to send back to the client, call send_to_client()
        on it.
        """

        command_name = msg['command']
        if command_name not in format_table.keys():
            logger.warning("Telnet command '{}' does not have an associated "
                           "formatting function.".format(command_name))
            logger.warning("Defaulting to showing raw server response.")
            command_name = 'default'

        format_fn = format_table[command_name]
        if not hasattr(self, format_fn):
            logger.warning(
                "Telnet command '{}' has an entry in the  formatting table, "
                "but does not have a corresponding function defined.".format(
                    command_name))
            logger.warning("Defaulting to showing raw server response.")
            format_fn = format_table['default']

        formatted = yield from getattr(self, format_fn)(msg['data'])
        if formatted is not None:
            yield from self.send_to_client(formatted)
예제 #4
0
    def __init__(self, model=default_model, trained_file=default_trained_file):
        self.model = model
        self.trained_file = trained_file

        # === Classifier ===
        self.classifier = self.load_classifier(trained_file)
        if self.classifier is not None:
            if not hasattr(self.classifier, 'model_name'):
                logger.warning(
                    "The loaded classifier does not specify which model it "
                    "uses; it could be different from the one expected.  "
                    "Use .train_classifier() followed by .save_classifier() "
                    "to overwrite it.")
            elif self.classifier.model_name != model:
                logger.warning("The model used by the loaded classifier (" +
                               self.classifier.model_name +
                               ") is different from the one requested (" +
                               model +
                               ").  "
                               "Use .train_classifier() followed by .save_classifier() "
                               "to overwrite it.")
        else:
            logger.warning("No previously trained classifier found. (" +
                           trained_file + ")")
            logger.warning("Use .train_classifier() and .save_classifier() to "
                           "train and save a new classifier respectively.")

        logger.info("Language ID module initialised.")
예제 #5
0
    def save_classifier(self, trained_file=None):
        classifier = self.get_classifier()
        if classifier is None:
            logger.warning("Could not save classifier. (None currently "
                           "initialised.)")
            return

        if trained_file is None:
            trained_file = self.trained_file
        f = open(trained_file, 'wb')
        pickle.dump(self.classifier, f)
        f.close()
        logger.info("Saved trained classifier to '" + trained_file + "'.")
        return
예제 #6
0
    def _update_tags(self, new_tags, old_tags):
        """
        Given comma-delimited lists of record tags, update the RecordTags table.
        :param new_tags:
        :param old_tags:
        :return:
        """
        # Start by preparing the lists of old and new record tags
        if new_tags == '':
            # Splitting the empty string will give us an array with one (empty)
            # element, which we don't want
            new_tags = []
        else:
            new_tags = new_tags.split(',')
        if old_tags == '':
            old_tags = []
        else:
            old_tags = old_tags.split(',')

        added = [x for x in new_tags if x not in old_tags]
        removed = [x for x in old_tags if x not in new_tags]

        # Start by looking at the tags that were added
        for tag in added:
            rows = self.session.query(RecordTags) \
                .filter(RecordTags.tag == tag).all()
            if len(rows) == 0:
                # This is a completely new tag.
                self.session.add(RecordTags(tag=tag, count=1))
            else:
                # Tag is already in the DB -- Update the count.
                row = rows[0]
                row.count += 1
            self.session.commit()

        # Deal with the tags that were removed next
        for tag in removed:
            rows = self.session.query(RecordTags) \
                .filter(RecordTags.tag == tag).all()
            if len(rows) == 0:
                # Woah, this shouldn't have happened.
                logger.warning(("Tried to remove tag that isn't in DB: {0}. "
                                "Ignoring.").format(tag))
            else:
                row = rows[0]
                row.count -= 1
                if row.count == 0:
                    # Remove the row entirely.
                    self.session.delete(row)
                self.session.commit()
예제 #7
0
    def parse_server_output(self, msg):
        """
        Takes a Dictionary response from the controller and formats it in a
        readable way for the client.

        If there is anything to send back to the client, call send_to_client()
        on it.
        """

        command_name = msg['command']
        if command_name not in format_table.keys():
            logger.warning("Telnet command '{}' does not have an associated "
                           "formatting function.".format(command_name))
            logger.warning("Defaulting to showing raw server response.")
            command_name = 'default'

        format_fn = format_table[command_name]
        if not hasattr(self, format_fn):
            logger.warning(
                "Telnet command '{}' has an entry in the  formatting table, "
                "but does not have a corresponding function defined.".format(
                    command_name
                )
            )
            logger.warning("Defaulting to showing raw server response.")
            format_fn = format_table['default']

        formatted = yield from getattr(self, format_fn)(msg['data'])
        if formatted is not None:
            yield from self.send_to_client(formatted)
예제 #8
0
    def exec_command(self, request):
        """
        Perform a requested command return the results as an object suitable for
        the client
        """

        # By delegating it to our lovely helper functions
        command = request["command"]
        return_message = {}
        if hasattr(self, "exec_" + command):
            command_fn = getattr(self, "exec_" + command)
            results = command_fn(request)
        else:
            logger.warning("Invalid input from client.")
            results = "error"

        return {"command": command, "data": results}
예제 #9
0
    def update_record(self, row_id, field, value):
        try:
            for row in self.session.query(Records) \
                    .filter(Records.rowid == row_id):

                if not hasattr(row, field):
                    logger.warning(
                        "Invalid field name given by client: '{}'".format(
                            field))
                    return 'invalid_field'

                # ====================
                # Field-specific hooks
                # ====================
                # Process record tags to update tweets_tags table
                if field == 'tag':
                    old_tags = fts_detag('tag', row.tag)
                    self._update_tags(value, old_tags)
                elif field == 'language':
                    value = langid_normalise_language(value)
                # ====================

                # Add FTS tags if needed
                value = fts_tag(field, value)
                logger.info("Updating '{0}' on row {1}: {2} -> {3}".format(
                    field, row.rowid,
                    str(getattr(row, field)).replace('\n', '\\n'),
                    str(value).replace('\n', '\\n')))
                setattr(row, field, value)

                self.session.commit()

            # Also clear all memoisation caches, in case the update
            # invalidates their results
            self._clear_caches()

            return 'success'
        except sqlalchemy.exc.SQLAlchemyError as e:
            # Uh oh.
            logger.error(e)
            return 'error'
예제 #10
0
    def parse_client_input(self, fn_table, msg):
        """
        Parses commands sent by the client against the function table
        provided.
        """
        if not msg.startswith("!"):
            self.last_command = msg

        command_array = msg.split()
        if len(command_array) == 0:
            yield from self.send_to_client('')
            return

        fn_list = list(fn_table.keys())
        fn_list.sort()

        command_name = command_array.pop(0)
        command_fn = ''
        for command in fn_list:
            if command.startswith(command_name):
                command_fn = fn_table[command]
                break
        if command_fn == '':
            # We didn't find a match in the command table
            yield from self.send_to_client("Invalid command.")
            return

        if not hasattr(self, command_fn):
            # There's a problem with the command table
            logger.warning("Telnet command '{}' has an entry in the command "
                           "table, but does not have a corresponding "
                           "function defined.".format(command_fn))
            yield from self.send_to_client("Invalid command.")
            return

        yield from getattr(self, command_fn)(*command_array)
예제 #11
0
    def parse_client_input(self, fn_table, msg):
        """
        Parses commands sent by the client against the function table
        provided.
        """
        if not msg.startswith("!"):
            self.last_command = msg

        command_array = msg.split()
        if len(command_array) == 0:
            yield from self.send_to_client('')
            return

        fn_list = list(fn_table.keys())
        fn_list.sort()

        command_name = command_array.pop(0)
        command_fn = ''
        for command in fn_list:
            if command.startswith(command_name):
                command_fn = fn_table[command]
                break
        if command_fn == '':
            # We didn't find a match in the command table
            yield from self.send_to_client("Invalid command.")
            return

        if not hasattr(self, command_fn):
            # There's a problem with the command table
            logger.warning("Telnet command '{}' has an entry in the command "
                           "table, but does not have a corresponding "
                           "function defined.".format(command_fn))
            yield from self.send_to_client("Invalid command.")
            return

        yield from getattr(self, command_fn)(*command_array)
예제 #12
0
 def wrapped(self, *args, **kwargs):
     logger.warning("Data provider [{0}] did not define API method: {1}"
                    .format(self.__class__.__name__,
                            func.__name__))
     return func(self, *args, **kwargs)
예제 #13
0
 def wrapped(self, *args, **kwargs):
     logger.warning("Interface [{0}] did not define API method: {1}".format(
         self.__class__.__name__, func.__name__))
     return func(self, *args, **kwargs)