Python Model.neighbors Examples

Programming Language: Python

Namespace/Package Name: pattern.vector

Class/Type: Model

Method/Function: neighbors

Examples at hotexamples.com: 5

Python Model.neighbors - 5 examples found. These are the top rated real world Python examples of pattern.vector.Model.neighbors extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Model(26)

reduce(7)

append(5)

similarity(4)

cluster(2)

neighbors(2)

search(2)

document(1)

feature_selection(1)

filter(1)

idf(1)

ig(1)

load(1)

lsa(1)

nearest_neighbors(1)

save(1)

Example #1

Show file

File: practica7.py Project: albbosgom/practicas-aii

 def buscaCorreo2(x):
     documents = []
     documap = {}
     for archivo in os.listdir("Correos"):
         if archivo.endswith(".txt"):
             f = open("Correos/" + archivo, "r")
             f.readline()
             f.readline()
             f.readline()
             f.readline()
             mailbody = f.read()
             f.close()
             docu = Document(mailbody, name=archivo)
             documents.append(docu)
             docukey = int(archivo[0:-4])
             documap[docukey] = docu
     model = Model(documents=documents, weight=TFIDF)
     docu = documap[int(var.get())]
     tupla = model.neighbors(docu, top=1)[0]
     tkMessageBox.showinfo("Tk", "El documento que mas se parece es el " + tupla[1].name[0:-4] + ", con un " + str(tupla[0]) + " de similitud")

Example #2

Show file

# For text, a better metric than Euclidean distance
# is called cosine similarity. This is what a Model uses:
d1 = m.document(name="lion")
d2 = m.document(name="tiger")
d3 = m.document(name="dolphin")
d4 = m.document(name="shark")
d5 = m.document(name="parakeet")
print "lion-tiger:", m.similarity(d1, d2)
print "lion-dolphin:", m.similarity(d1, d3)
print "dolphin-shark:", m.similarity(d3, d4)
print "dolphin-parakeet:", m.similarity(d3, d5)
print

print "Related to tiger:"
print m.neighbors(d2, top=3)  # Top three most similar.
print

print "Related to a search query ('water'):"
print m.search("water", top=10)

# In summary:

# A Document:
# - takes a string of text,
# - counts the words in the text,
# - constructs a vector of words (features) and normalized word count (weight).

# A Model:
# - groups multiple vectors in a matrix,
# - tweaks the weight with TF-IDF to find "unique" words in each document,

Example #3

Show file

File: 02-model.py Project: DataBranner/pattern

# For text, a better metric than Euclidean distance
# is called cosine similarity. This is what a Model uses:
d1 = m.document(name="lion")
d2 = m.document(name="tiger")
d3 = m.document(name="dolphin")
d4 = m.document(name="shark")
d5 = m.document(name="parakeet")
print("lion-tiger:", m.similarity(d1, d2))
print("lion-dolphin:", m.similarity(d1, d3))
print("dolphin-shark:", m.similarity(d3, d4))
print("dolphin-parakeet:", m.similarity(d3, d5))
print()

print("Related to tiger:")
print(m.neighbors(d2, top=3))  # Top three most similar.
print()

print("Related to a search query ('water'):")
print(m.search("water", top=10))

# In summary:

# A Document:
# - takes a string of text,
# - counts the words in the text,
# - constructs a vector of words (features) and normalized word count (weight).

# A Model:
# - groups multiple vectors in a matrix,
# - tweaks the weight with TF-IDF to find "unique" words in each document,

Example #4

Show file

File: driver.py Project: EricZeiberg/MachineLearning

from pattern.vector import Document, Model, IG, TF, TFIDF, BINARY
import sys
import os

print "Reading sample code and instantiating documents..."
documents = []
exampleDir = "examples/"
for file in os.listdir(exampleDir):
    if os.path.isdir(exampleDir + file):
        for subfile in os.listdir(exampleDir + file):
            if (os.path.isfile(exampleDir + file + "/" + subfile)):
                with open (exampleDir + file + "/" + subfile, "r") as langDoc:
                    text = langDoc.read()
                    doc = Document(text, type=file)
                    documents.append(doc)

print "Creating statistical model..."
m = Model(documents=documents, weight=IG)

# Test with sample Java doc
print "Comparing test document..."
with open ("coffee.txt", "r") as myfile:
    testFile = myfile.read()
testDoc = Document(testFile, type='Java')
testSimilarities = m.neighbors(testDoc, top=10)
prediction = testSimilarities[0][1].type #neighbors() returns (similarity, document) list
confidence = testSimilarities[0][0]
print "LanguageLearn has predicted " + testSimilarities[0][1].type + " with a " + str(round(confidence * 100, 2)) + "% confidence"

Example #5

Show file

File: recommender.py Project: danimm08/eGames-recommender

def recommend_game(this_game):
    games = recommendable_games(this_game)

    total_recommendable = games.count()
    print 'Total recommendable games based on ' + this_game.title + ": " + total_recommendable.__str__()

    document_title = Document(this_game.title)
    document_publisher = Document(this_game.publisher)
    document_summary = Document(this_game.summary,
                                top=None,
                                threshold=0,
                                stemmer=None,
                                exclude=[],
                                stopwords=False,
                                language='en')
    document_keywords = Document(', '.join([x['name'] for x in this_game.keywords.all().values("name")]))
    document_genres = Document(', '.join([x['name'] for x in this_game.genres.all().values("name")]))

    # format: {"id":id, socre:"SUM(dist*pond)"}
    game_similarities = []
    summary_documents = []
    for game in games:
        score = 0
        game = Game.objects.filter(title=game['title'], platform=game['platform'])[0]

        title_similarity = 1 - distance(document_title.vector, Document(game.title).vector)
        publisher_similarity = 1 - distance(document_publisher.vector, Document(game.publisher).vector)
        genre_similarity = 1 - distance(document_genres.vector, Document(
            ', '.join([x['name'] for x in game.genres.all().values("name")])
        ).vector)
        keywords_similarity = 1 - distance(document_keywords.vector, Document(
            ', '.join([x['name'] for x in game.keywords.all().values("name")])
        ).vector)

        score = (0.15 * title_similarity) + (0.2 * genre_similarity) + (0.2 * publisher_similarity) + (
            0.20 * keywords_similarity)

        summary_documents.append(Document(game.summary,
                                          top=None,
                                          threshold=0,
                                          stemmer=None,
                                          exclude=[],
                                          stopwords=False,
                                          language='en',
                                          name=game.id))

        game_similarities.append({"id": game.id, "score": score})

    to_compare = Document(document_summary)

    model = Model(documents=summary_documents, weight=TFIDF)

    neighbours = model.neighbors(to_compare, top=total_recommendable)

    for neighbour in neighbours:
        for rec_game in game_similarities:
            if rec_game['id'] == neighbour[1].name:
                rec_game['score'] = rec_game['score'] + 0.25 * neighbour[0]

    recommended = sorted(game_similarities, key=lambda k: -k['score'])[0:total_recommendable]

    if len(recommended) >= 40:
        random_selection = random.sample(recommended[0:40], 25)
    else:
        random_selection = random.sample(recommended, 25)

    recommended_ids = [g['id'] for g in random_selection]

    return recommended_ids