Beispiel #1
0
from DatumBox import DatumBox
API_KEY = "2a13913dda346761765020c1f66e34f8"
datum_box = DatumBox(API_KEY)
print datum_box.keyword_extract("I hate my cat and love my dog")

Beispiel #2
0
from DatumBox import DatumBox
datum_box = DatumBox("2a13913dda346761765020c1f66e34f8")
#import networkx as nx
#import matplotlib.pyplot as plt
import urllib2
import re
import nltk, string
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords
TAG_RE = re.compile(r"<[^>]+>")

nltk.download('punkt')
stemmer = nltk.stem.porter.PorterStemmer()
remove_punctuation_map = dict((ord(char), None) for char in string.punctuation)


def fetch_page(siteURL):
    # create a variable which will hold our desired web page as a string
    site = siteURL
    # create the approprriate headers for our http request so that we wont run
    # into any 403 forbidden errors. All of this will be available at the tutorial
    # page that I will link to in the description below
    hdr = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
        'Accept-Encoding': 'none',
        'Accept-Language': 'en-US,en;q=0.8',
Beispiel #3
0
import sys
import requests
import re

from DatumBox import DatumBox


def stripTags(text):
    scripts = re.compile(r'<script.*?/script>')
    css = re.compile(r'<style.*?/style>')
    tags = re.compile(r'<.*?>')

    text = scripts.sub(' ', text)
    text = css.sub(' ', text)
    text = tags.sub(' ', text)

    return text


request = requests.get(sys.argv[1])
content = request.text
#content = '<script>sddd</script><style sdfsafs>sfsdfsfs</style>my text<be>betkk</be>'
content = content.encode('utf-8')
print content
raw_content = stripTags(content)
print raw_content

db = DatumBox('9eb37f7399b4d074c5b83358f24ba626')
res = db.twitter_sentiment_analysis(raw_content)
print res
Beispiel #4
0
            </div>
        </div>
        <div id="about">
            <h2> About Us</h2>
            <p> Something blah blah doo doo doo oop booop boopity boopoop blah blah doo doo doo oop booop boopity boopoop blah blah doo doo doo oop booop boopity boopoop blah blah doo doo doo oop booop boopity boopoop blah blah doo doo doo oop booop boopity boopoop blah blah doo doo doo oop booop boopity boopoop</p>        
        </div>
    </div>

  </body>

</html>
"""


#Begin tests
datum_box = DatumBox(API_KEY)
bad_datum_box = DatumBox("This-API-key-is-not-valid-(hopefully)")
    

class TestSentimentAnalysis(unittest.TestCase):
   
    def test_positive_review(self):
        self.assertEqual(datum_box.sentiment_analysis(positive_review), "positive")
        
    def test_negative_review(self):
        self.assertEqual(datum_box.sentiment_analysis(negative_review), "negative")
        
    def test_bad_api_key(self):
        self.assertRaises(DatumBoxError, bad_datum_box.sentiment_analysis, positive_review)
        
class TestTwitterSentimentAnalysis(unittest.TestCase):
Beispiel #5
0
from DatumBox import DatumBox
datum_box = DatumBox("4e9f0a6e14a83e38d9d9fac895c84e68")
#print datum_box.twitter_sentiment_analysis("I love my cat")
print datum_box.topic_classification(
    "datumbox api wrapper text python code commit sign")
Beispiel #6
0
def get_topic_category(api_key, text):
	datum_box = DatumBox(api_key)
	return datum_box.topic_classification(text)