Python KNN.train Examples

Programming Language: Python

Namespace/Package Name: pattern.vector

Class/Type: KNN

Method/Function: train

Examples at hotexamples.com: 5

Python KNN.train - 5 examples found. These are the top rated real world Python examples of pattern.vector.KNN.train extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KNN(11)

test(4)

train(4)

classify(3)

load(1)

save(1)

Example #1

Show file

def setup():
    global pages
    global urlalias
    global revurlalias
    global knn
    pages = dict()
    urlalias = dict()
    revurlalias = dict()
    knn = KNN()
    db = MySQLdb.connect(host="192.168.200.26",
                         user="******",
                         passwd="xxxsecretxxx",
                         db="pla")
    cur = db.cursor()
    cur.execute("select source, alias from url_alias")
    for row in cur.fetchall():
        urlalias[row[1]] = row[0]
        revurlalias[row[0]] = row[1]
    cur.execute("select tid, name, description, vid from taxonomy_term_data;")
    for row in cur.fetchall():
        url = 'taxonomy/term/' + str(row[0])
        pages[url] = row[1]
        if url in revurlalias:
            pages[revurlalias[url]] = row[1]
            url = revurlalias[url]
        if row[3] == 3:
            soup = bs4.BeautifulSoup(row[2])
            the_text = re.sub(r'[\n\r]+', r'  ', soup.get_text(' ')).lower()
            knn.train(Document(the_text, stemmer=PORTER), url)
            knn.train(Document(row[1].lower()), url)
    cur.execute(
        "select a.tid, c.body_value, d.title from taxonomy_term_data as a inner join field_data_field_practice_areas as b on (a.tid=b.field_practice_areas_tid and b.entity_type='node' and b.bundle != 'professionals' and b.deleted=0) inner join field_data_body as c on (b.entity_id=c.entity_id and b.entity_type=c.entity_type) inner join node as d on (c.entity_id=d.nid);"
    )
    for row in cur.fetchall():
        url = 'taxonomy/term/' + str(row[0])
        if url in revurlalias:
            url = revurlalias[url]
        soup = bs4.BeautifulSoup(row[1])
        the_text = re.sub(r'[\n\r]+', r'  ', soup.get_text(' ')).lower()
        knn.train(Document(the_text, stemmer=PORTER), url)
        knn.train(Document(row[2].lower()), url)
    cur.execute("select nid, title from node where status=1;")
    for row in cur.fetchall():
        url = 'node/' + str(row[0])
        pages[url] = row[1]
        if url in revurlalias:
            pages[revurlalias[url]] = row[1]
    db.close()
    pgcur = conn.cursor()
    pgcur.execute(
        "select query, target from website_queries where target is not null group by query, target"
    )
    for row in pgcur.fetchall():
        words = re.split(r'[\n\r,;]+ *', row[1])
        for word in words:
            print("training on " + row[0].lower() + " for " + word)
            knn.train(Document(row[0].lower()), word)
    conn.commit()
    pgcur.close()

Example #2

Show file

    def train(cls, train_file, model_file):
        sents_dic = (json.loads(jsonl)
                     for jsonl in SoftSkills.load(train_file))
        model = KNN()

        for sent in sents_dic:
            text = sent['text']
            v = count([word for word, pos in tag(text)])  # {'sweet': 1}
            if v:
                model.train(v, type=sent['soft skill'])
        model.save(model_file)
        return model

Example #3

Show file

File: 04-KNN.py Project: ADA110/Cibus

        s = Sentence(parse(s))  # parse tree with part-of-speech tags
        s = search('JJ', s)  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)  # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))

# Train k-Nearest Neighbor on the model.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
for document in m:  # (classify unknown documents with the most frequent type).
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.features)
print

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print classifier.classify('sweet potato burger')  # yields 'WIN'
print classifier.classify('stupid autocorrect')  # yields 'FAIL'

# "What can I do with it?"

Example #4

Show file

File: twittermining.py Project: zbhuiyan/SoftDesSp15

            parse(s)
        )  #parse anlayzes & gives strings that are annotated with specified tags
        s = search('JJ',
                   s)  #searches for adjectives in tweets (JJ = adjectiive)
        s = [match[0].string for match in s]
        s = ' '.join(s)
        if len(s) > 0:
            corpus.append(Document(s, type=p))
            corpus.append(Document(s, type=m))

classifier = KNN()  #k-nearest neighbor classifier = K-NN
objects = []

for document in corpus:  #documents are an unordered bag of given sentences.

    classifier.train(
        document)  #adjective vectors in corpus trains the classifier
    objects.append(classifier.classify('awesome'))  #predicts awesome as win
    objects.append(classifier.classify('cool'))  #predicts cool as win
    objects.append(classifier.classify('damn'))  #predicts damn as fail
    objects.append(classifier.classify('sucks'))  #predicts sucks as fail

print objects
wincounter = 0
failcounter = 0
for thing in objects:
    if thing == 'WIN':
        wincounter += 1
    elif thing == 'FAIL':
        failcounter += 1
    else:
        pass

Example #5

Show file

File: 04-KNN.py Project: DataBranner/pattern

        s = search('JJ', s)                  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)                      # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))

# Train k-Nearest Neighbor on the model.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
# (classify unknown documents with the most frequent type).
for document in m:
    classifier.train(document)

# These are the adjectives the classifier has learned:
print(sorted(classifier.features))
print()

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print(classifier.classify('sweet potato burger'))  # yields 'WIN'
print(classifier.classify('stupid autocorrect'))  # yields 'FAIL'

# "What can I do with it?"