Esempio n. 1
0
def main(argv):

    general_file = 'general.csv'
    twss_file = 'twss.csv'

    if len(argv):
        general_file = argv[0]
        twss_file = argv[1]

    general_file = os.path.abspath(general_file)
    twss_file = os.path.abspath(twss_file)

    print "Loading general corpus..."
    reader = csv.reader(open(general_file, 'rb'))
    general = [(process_string(l[0]), "general") for l in reader]

    print "Loading twss corpus..."
    reader = csv.reader(open(twss_file, 'rb'))
    twss = [(process_string(l[0]), "twss") for l in reader]

    m = min((len(general), len(twss)))

    print "Compiling corpus..."
    corpus = twss + general
    random.shuffle(corpus)

    feature_set = [(word_features(l), g) for (l, g) in corpus]

    print "Building classifier..."
    classifier = nltk.NaiveBayesClassifier.train(feature_set)

    print "Saving classifier..."
    SaveClassifier(classifier, "twss_classifier.pkl")
Esempio n. 2
0
def main(argv):
    
    general_file = 'general.csv'
    twss_file = 'twss.csv'
        
    if len(argv):
        general_file = argv[0]
        twss_file = argv[1]
        
    general_file = os.path.abspath(general_file)
    twss_file = os.path.abspath(twss_file)
    
    print "Loading general corpus..."
    reader = csv.reader(open(general_file, 'rb'))
    general = [(process_string(l[0]), "general") for l in reader]
    
    print "Loading twss corpus..."
    reader = csv.reader(open(twss_file, 'rb'))
    twss = [(process_string(l[0]), "twss") for l in reader]
    
    m = min((len(general), len(twss)))
    
    print "Compiling corpus..."
    corpus = twss + general 
    random.shuffle(corpus)
       
    feature_set = [(word_features(l), g) for (l, g) in corpus]
    
    print "Building classifier..."
    classifier = nltk.NaiveBayesClassifier.train(feature_set)
    
    print "Saving classifier..."
    SaveClassifier(classifier, "twss_classifier.pkl")
Esempio n. 3
0
 def on_status(self, status):
     try:
         print status.text
         cls = self.classifier.classify(word_features(process_string(status.text)))
         if self.c:
             self.c.writerow([cls, status.text, status.text])
     except Exception as e:
         print e
Esempio n. 4
0
 def on_status(self, status):
     try:
         print status.text
         cls = self.classifier.classify(
             word_features(process_string(status.text)))
         if self.c:
             self.c.writerow([cls, status.text, status.text])
     except Exception as e:
         print e