Beispiel #1
0
for doc in traning_docs:
    vocabulary += doc.words
vocabulary = set(vocabulary)
vocabulary_length = len(vocabulary)

topics = ["earn", "acq", "money-fx", "grain", "crude"]

knowledge = {}

# Train topic with all lexicons
for topic in topics:
    t = Topic(name=topic,
              documents=[doc for doc in traning_docs if doc.topic == topic],
              total_n_docs=number_of_docs,
              vocabulary_length=vocabulary_length)
    t.train_all_features()
    knowledge[topic] = t

# Select features
feature_vocabulary = []
for topic in knowledge.values():
    topic.select_features(knowledge.values(), 50)
    feature_vocabulary += topic.features

# Find feature vocabulary length
feature_vocabulary = set(feature_vocabulary)
feature_vocabulary_length = len(feature_vocabulary)

for topic in knowledge.values():
    topic.train_mutual(feature_vocabulary)