Python MiniClassifier Examples

Programming Language: Python

Namespace/Package Name: classifier

Class/Type: MiniClassifier

Examples at hotexamples.com: 2

Python MiniClassifier - 2 examples found. These are the top rated real world Python examples of classifier.MiniClassifier extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MiniClassifier(1)

decision_function(1)

predict(1)

Example #1

Show file

    def __init__(self):
        abs_dir = os.path.dirname(__file__) # absolute path to here

        self.sent_clf = MiniClassifier(os.path.join(abs_dir, 'robots/bias_sent_level.npz'))
        self.doc_clf = MiniClassifier(os.path.join(abs_dir, 'robots/bias_doc_level.npz'))

        self.vec = ModularVectorizer(norm=None, non_negative=True, binary=True, ngram_range=(1, 2), n_features=2**26)

        self.bias_domains = ['Random sequence generation', 'Allocation concealment', 'Blinding of participants and personnel', 'Blinding of outcome assessment', 'Incomplete outcome data', 'Selective reporting']

Example #2

Show file

class BiasRobot:

    def __init__(self):
        abs_dir = os.path.dirname(__file__) # absolute path to here

        self.sent_clf = MiniClassifier(os.path.join(abs_dir, 'robots/bias_sent_level.npz'))
        self.doc_clf = MiniClassifier(os.path.join(abs_dir, 'robots/bias_doc_level.npz'))

        self.vec = ModularVectorizer(norm=None, non_negative=True, binary=True, ngram_range=(1, 2), n_features=2**26)

        self.bias_domains = ['Random sequence generation', 'Allocation concealment', 'Blinding of participants and personnel', 'Blinding of outcome assessment', 'Incomplete outcome data', 'Selective reporting']


    def annotate(self, doc_text, top_k=3):

        """
        Annotate full text of clinical trial report
        `top_k` refers to 'top-k recall'.

        top-1 recall will return the single most relevant sentence
        in the document, and top-3 recall the 3 most relevant.

        The validation study assessed the accuracy of top-3 and top-1
        and we suggest top-3 as default
        """



        marginalia = []

        doc_sents = sent_tokenize(doc_text)

        for domain in self.bias_domains:


            doc_domains = [domain] * len(doc_sents)
            doc_X_i = izip(doc_sents, doc_domains)

            #
            # build up sentence feature set
            #
            self.vec.builder_clear()

            # uni-bigrams
            self.vec.builder_add_docs(doc_sents)

            # uni-bigrams/domain interactions
            self.vec.builder_add_docs(doc_X_i)

            doc_sents_X = self.vec.builder_transform()
            doc_sents_preds = self.sent_clf.decision_function(doc_sents_X)

            high_prob_sent_indices = np.argsort(doc_sents_preds)[:-top_k-1:-1] # top k, with no 1 first

            high_prob_sents = [doc_sents[i] for i in high_prob_sent_indices]

            high_prob_sents_j = " ".join(high_prob_sents)

            sent_domain_interaction = "-s-" + domain

            #
            # build up document feature set
            #
            self.vec.builder_clear()

            # uni-bigrams
            self.vec.builder_add_docs([doc_text])

            # uni-bigrams/domain interaction
            self.vec.builder_add_docs([(doc_text, domain)])

            # uni-bigrams/relevance interaction
            self.vec.builder_add_docs([(high_prob_sents_j, sent_domain_interaction)])

            X = self.vec.builder_transform()

            bias_pred = self.doc_clf.predict(X)
            bias_class = ["high/unclear", "low"][bias_pred[0]]

            marginalia.append({
                "type": "Risk of Bias",
                "title": domain,
                "annotations": [{"content": sent, "uuid": str(uuid.uuid1())} for sent in high_prob_sents],
                "description": "**Overall risk of bias prediction**: " + bias_class
                })


        return {"marginalia": marginalia}