Python VectorModel, probability Examples

Programming Language: Python

Class/Type: VectorModel

Examples at hotexamples.com: 4

Python VectorModel - 4 examples found. These are the top rated real world Python examples of VectorModel from package probability extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

VectorModel(2)

getVectorModel(2)

Example #1

Show file

File: EntryPoint.py Project: davide990/TwitterPolarityClassificator

def previous_main():
    DEBUGMODE = 0
    numFeatures = 100

    path_dataset_dav_windows = 'Dati/training_set_text.csv'
    path_class_csv = 'Dati/training_set_features.csv'
    path_model_file = 'Dati/model.dat'

    cleaner = TweetsCleaner.TweetsCleaner()
    loader = DatasetLoader.DatasetLoader()
    model = VectorModel.VectorModel()
    classificator = BayesanClassificator.BayesanClassificator()
    evaluator = ClassifierEvaluation.ClassifierEvaluation()

    tweets_dataset = loader.LoadTweets(path_dataset_dav_windows)
    tweets_cleaned = cleaner.ProcessDatasetDict(tweets_dataset)
    features_dataset = loader.LoadFeatures(path_class_csv, 400)
    """
        Trasforma il vettore delle features in un dizionario con chiave IdDoc e valore la classe corrispondente
        (1 : neutra, 2: positiva, 3: negativa, 4: mista
    """
    classes_dataset = loader.createClasses(features_dataset)
    """
        Genero il Modello TF-IDF
    """
    all_phrases = list(tweets_cleaned.values())[:400]

    count = 0
    phrases_tuples = []
    for phrase in all_phrases:
        phrases_tuples.append((count, phrase))
        count += 1

    if not DEBUGMODE or not os.path.exists(path_model_file):
        tfidf = model.get_tfidf(phrases_tuples)
        model.persist_tfidf(tfidf, path_model_file)
    else:
        tfidf = model.deserialize_tfidf(path_model_file)

    doc_index = model.get_doc_index(tfidf)

    # prendo le etichette delle classi per la gold solution
    labels = numpy.array(list(classes_dataset.values()))

    # applico LSA
    reduced = model.LSA(model.get_doc_index_table(doc_index), numFeatures)
    # scalo in [0,1]
    reduced = loader.NormalizeDataset(reduced)

    BayesTest(reduced, labels)

Example #2

Show file

File: CreateLSAmodel.py Project: davide990/TwitterPolarityClassificator

import TweetsCleaner
import VectorModel
import ClassifierEvaluation
import pickle

if __name__ == "__main__":

    DEBUGMODE = 1

    path_dataset_dav_windows = 'Dati/training_set_text.csv'
    path_class_csv = 'Dati/training_set_features.csv'
    path_model_file = 'Dati/model.dat'

    cleaner = TweetsCleaner.TweetsCleaner()
    loader = DatasetLoader.DatasetLoader()
    model = VectorModel.VectorModel()
    evaluator = ClassifierEvaluation.ClassifierEvaluation()

    tweets_dataset = loader.LoadTweets(path_dataset_dav_windows)
    tweets_cleaned = cleaner.ProcessDatasetDict(tweets_dataset)
    features_dataset = loader.LoadFeatures(path_class_csv)
    """
        Trasforma il vettore delle features in un dizionario con chiave IdDoc e valore la classe corrispondente
        (1 : neutra, 2: positiva, 3: negativa, 4: mista
    """
    classes_dataset = loader.createClasses(features_dataset)
    """
        Genero il Modello TF-IDF
    """
    all_phrases = list(tweets_cleaned.values())

Example #3

Show file

File: main.py Project: ffancellu/unsuphology

#!/usr/bin/env python
__author__ = "Tom Kocmi"

import logging
import VectorModel
import Cons
import generateRules
import new_fixes
import time

logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
start = time.time()  # for counting the time

model = VectorModel.getVectorModel()  # this will load existing model
# in case if you want to generate new model, put True in the bracket
prefixes, suffixes = new_fixes.generateFixes(model.vocab.keys())
rules = GenerateRules.generate(prefixes, suffixes, model)
print rules
# generate prefixes and suffixes from the vocabulary of the model
# if the _fixes already exist it will load them from file instead of generating new ones.
# In case that you want to forse it to generate, put True as a second parameter

# experiments with the model, the words must be in the dictionary
# print model.most_similar(positive=['winston', 'love'], negative=['war'])
# print model.doesnt_match("winston julia brother goldstein".split())
# print model.similarity("winston", "julia")

print "Time: " + str(time.time() - start)

Example #4

Show file

File: unsupmorphology.py Project: ffancellu/unsuphology

#!/usr/bin/env python
# -*- coding:utf-8 -*-
__author__ = "Tom Kocmi"

import logging
import VectorModel
import Cons, Fixes, GenerateRules
import time
import pickle
import Queue

start = time.time()  # for counting the time
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)

model = VectorModel.getVectorModel()

# vocabulary = Fixes.downsampleVocabulary(model, Cons.MAXWORDS4AFFIXES)
# with open("models/vocabulary2.data", 'w') as f:
#     pickle.dump(vocabulary, f)
# with open("models/vocabulary.data", 'r') as f:
#      vocabulary = pickle.load(f)


prefixes, suffixes = Fixes.generateFixes(vocabulary)



# rules = GenerateRules.generate(prefixes, suffixes, model, vocabulary)
# with open("models/rules6.data", 'w') as f:
#     pickle.dump(rules, f)