Ejemplo n.º 1
0
# A Model is a matrix (or vector space)
# with features as columns and feature weights as rows.
# We can then do calculations on the matrix,
# for example to compute TF-IDF or similarity between documents.

# Load a model from a folder of text documents:
documents = []
for f in glob.glob(os.path.join(os.path.dirname(__file__), "corpus", "*.txt")):
    text = codecs.open(f, encoding="utf-8").read()
    name = os.path.basename(f)[:-4]
    documents.append(Document(text, name=name))

m = Model(documents, weight=TFIDF)

# We can retrieve documents by name:
d = m.document(name="lion")

print d.keywords(top=10)
print
print d.tf("food")
print d.tfidf(
    "food")  # TF-IDF is less: "food" is also mentioned with the other animals.
print

# We can compare how similar two documents are.
# This is done by calculating the distance between the document vectors
# (i.e., finding those that are near to each other).

# For example, say we have two vectors with features "x" and "y".
# We can calculate the distance between two points (x, y) in 2-D space:
# d = sqrt(pow(x2 - x1, 2) + pow(y2 - y1, 2))
Ejemplo n.º 2
0
# A Model is a matrix (or vector space)
# with features as columns and feature weights as rows.
# We can then do calculations on the matrix,
# for example to compute TF-IDF or similarity between documents.

# Load a model from a folder of text documents:
documents = []
for f in glob.glob(os.path.join(os.path.dirname(__file__), "corpus", "*.txt")):
    text = codecs.open(f, encoding="utf-8").read()
    name = os.path.basename(f)[:-4]
    documents.append(Document(text, name=name))

m = Model(documents, weight=TFIDF)

# We can retrieve documents by name:
d = m.document(name="lion")

print(d.keywords(top=10))
print()
print(d.tf("food"))
# TF-IDF is less: "food" is also mentioned with the other animals.
print(d.tfidf("food"))
print()

# We can compare how similar two documents are.
# This is done by calculating the distance between the document vectors
# (i.e., finding those that are near to each other).

# For example, say we have two vectors with features "x" and "y".
# We can calculate the distance between two points (x, y) in 2-D space:
# d = sqrt(pow(x2 - x1, 2) + pow(y2 - y1, 2))