def test_register(self): registry = formats.get_registry() assert_false(CustomFormat in registry.values()) formats.register('trt', CustomFormat) assert_true(CustomFormat in registry.values()) assert_true('trt' in registry.keys())
def test_init_with_custom_format(self): redis_train = [('I like turtles', 'pos'), ('I hate turtles', 'neg')] class MockRedisFormat(formats.BaseFormat): def __init__(self, client, port): self.client = client self.port = port @classmethod def detect(cls, stream): return True def to_iterable(self): return redis_train formats.register('redis', MockRedisFormat) mock_redis = mock.Mock() cl = NaiveBayesClassifier(mock_redis, format='redis', port=1234) assert_equal(cl.train_set, redis_train)
import traceback from textblob.classifiers import NaiveBayesClassifier from textblob import formats from xml.etree import ElementTree import json import sys import random import facebook import requests from sklearn.cross_validation import train_test_split class PipeDelimitedFormat(formats.DelimitedFormat): delimiter = '|' formats.register('psv', PipeDelimitedFormat) errorFile = open('Error.csv','w') separator="|" processed_data="processed_data.psv" processed_age_data = "processed_age_data.psv" reload(sys) #sys.setdefaultencoding("utf-8") import csv def get_fb_token(app_id, app_secret): payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret} file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload) #print file.text #to test what the FB api responded with
import logging from nltk.corpus import stopwords from nltk.tokenize import sent_tokenize, word_tokenize import nltk from textblob.classifiers import NaiveBayesClassifier from textblob import formats from xml.etree import ElementTree import pickle import csv import json import sys, getopt class PipeDelimitedFormat(formats.DelimitedFormat): delimiter = '|' formats.register('psv', PipeDelimitedFormat) separator="|" processed_data="processed_data.psv" reload(sys) sys.setdefaultencoding("utf-8") import csv def preprocess(training_data_dir): """ taining_data_dir : path to training dir. This dir should contain profile and text dir.