Esempio n. 1
0
    def populateTweets(self):
        self.lat, self.long, self.radius = self.findCoordinatesRadius()
        geo = str(self.lat) + "," + str(self.long) + "," + str(
            self.radius) + "km"
        tweets = api.search(q=self.search, lang='en', geocode=geo, rpp=100)

        showcase = tweets[0:5]
        self.showcase = []
        for tweet in showcase:
            self.showcase.append([tweet.text, tweet.user.screen_name])

        hashtagsRaw = [tweet.entities['hashtags'] for tweet in tweets]
        hashtagsList = list(itertools.chain.from_iterable(hashtagsRaw))
        hashtags = [hash['text'] for hash in hashtagsList]
        frequency = {}
        for hashtag in hashtags:
            frequency[hashtag] = hashtags.count(hashtag)
        self.popularHashtags = dict(Counter(hashtags).most_common(5)).keys()

        texts = [tweet.text for tweet in tweets]
        self.sentiment = 0.0
        alchemyapi = AlchemyAPI()
        for text in texts:
            response = alchemyapi.sentiment_targeted('text', text.lower(),
                                                     self.search.lower())
            if response['status'] != 'ERROR' and response['docSentiment'][
                    'type'] != 'neutral':
                numeric = float(response['docSentiment']['score'])
                self.sentiment = self.sentiment + (
                    numeric / len(texts))  #computes average sentiment
Esempio n. 2
0
def sentiment():

    twitter = Twython(passwords.Live.twitter_app_key,
                      passwords.Live.twitter_app_secret,
                      oauth_version=2)
    access_token = twitter.obtain_access_token()
    twitter = Twython(passwords.Live.twitter_app_key,
                      access_token=access_token)

    search_results = None
    try:
        search_results = twitter.search(q='$' + request.args.get('symbol'),
                                        result_type='popular')
    except TwythonError as e:
        print e

    twitter_corpus = ""
    for tweets in search_results['statuses']:
        twitter_corpus += tweets['text'].encode('utf-8')

    #Create the AlchemyAPI Object
    alchemyapi = AlchemyAPI()
    response = alchemyapi.sentiment('text', twitter_corpus)

    sentiment = None
    if response['status'] == 'OK':
        sentiment = {"sentiment": response['docSentiment']['type']}

    if request.args.get('output') == "jsonp":
        return Response('callback(' + json.dumps(sentiment) + ')',
                        content_type='application/javascript')
    else:
        return jsonify(sentiment)
Esempio n. 3
0
def run_sentiment_analysis(tweets, text_key):
    def print_error(response):
        # This should be replaced with better logging
        print('Error with AlchemyAPI response:')
        print(sentiment, '\n')

    alchemyapi = AlchemyAPI()
    results = []
    for item in tweets:
        if text_key not in item:
            # Assume it's a bad tweet and continue
            print(text_key, 'not found in tweet')
            continue
        sentiment = alchemyapi.sentiment('text', item['words'])
        try:
            if sentiment['status'].lower() == 'error':
                # Unrecognized language, emoji only, etc...
                print_error(sentiment)
            # Make a deep copy (since it's a nested dictionary)
            new_item = copy.deepcopy(item)
            sentiment_type = sentiment['docSentiment']['type']
            new_item['sentiment_type'] = sentiment_type
            if sentiment_type == 'neutral':
                new_item['sentiment_score'] = 0
            else:
                new_item['sentiment_score'] = sentiment['docSentiment'][
                    'score']
            results.append(new_item)
        except Exception as ex:
            print(type(ex).__name__)
            print_error(sentiment)

    return results
Esempio n. 4
0
def main():

    alchemyapi = AlchemyAPI()

    user_tweets = read_tweet_text_per_user(f_in)
    cnt = 0
    LAST_USR = "******"
    UNPROCESSED = False
    with codecs.open(f_out, 'a', encoding='utf8') as output_file:
        for user1 in user_tweets.iterkeys():
            cnt += 1
            if not UNPROCESSED:
                if user1 != LAST_USR:
                    continue
                else:
                    UNPROCESSED = True
                    print("Found", LAST_USR, cnt)
            if cnt % 100 == 0:
                print(cnt, user1)
            tweets1 = user_tweets[user1]
            BREAK, taxonomy_result1 = alchemy_on_tweets(
                alchemyapi, user1, tweets1)
            # there is the API daily limit so we check when exceeded and continue tomorrow from
            # the last processed users
            if BREAK:
                print("Last processed user: ", user1)
                return
            output_file.write(
                unicode(json.dumps(taxonomy_result1, ensure_ascii=False)) +
                '\n')
            return
def nlp_process(ids, ids_hash):
    #instantiate an elasticsearch client
    es = Elasticsearch()

    #instantiate an alchemy client
    alchemyapi = AlchemyAPI()

    for item in ids:
        data = ' '.join(ids_hash[item])
        lowers = data.lower()
        alchem_data = []

        response = alchemyapi.keywords('text', lowers, {'sentiment': 1})

        if response['status'] == 'OK':
            print('#Success#')
            for keyword in response['keywords']:
                al_temp = defaultdict()

                al_temp['text'] = keyword['text'].encode('utf-8')
                al_temp['relevance'] = keyword['relevance']
                al_temp['sentiment'] = keyword['sentiment']['type']

                if 'score' in keyword['sentiment']:
                    al_temp['score'] = keyword['sentiment']['score']

                alchem_data.append(al_temp)
        else:
            print('Error in keyword extaction call: ', response['statusInfo'])
        print len(alchem_data)
        # prepare body for insertion
        doc = {"business_id": item, "word_freq": alchem_data}
        exit()
        template = {"create": {"_index": "alchem", "_type": "doc"}}
        res = es.index(index="alchem", doc_type='doc', body=doc)
Esempio n. 6
0
def convert_to_clean_titles(infile, outfile):
    alchemyapi = AlchemyAPI()
    f = open(infile, "r")
    f2 = codecs.open(outfile, "w+", "utf-8")
    f3 = codecs.open("Entities.txt", "w+", "utf-8")
    count = 1
    for line in f:
        line = line.decode("utf-8")
        response = alchemyapi.entities('text', line, {
            'sentiment': 1,
            'disambiguate': 1
        })
        if response['status'] == 'OK':
            for entity in response['entities']:
                if "type" in entity.keys:
                    if entity['type'] in [
                            'Country', 'Holiday', 'Movie', 'MusicGroup',
                            'Organization', 'Person', 'PrintMedia', 'Region',
                            'StateOrCountry', 'TelevisionShow',
                            'TelevisionStation', 'Money', 'Company',
                            'GeographicFeature'
                    ]:
                        line = line.replace(entity['text'],
                                            entity['text'].title())
                    print >> f3, entity['text'], entity['type'], entity[
                        'sentiment']
            print >> f2, line,
        else:
            print >> f2, line,
        print count, line
        count += 1
    def handle(self, *args, **options):
        es = elasticsearch.Elasticsearch(es_url)

        alchemyapi = AlchemyAPI()

        query = {
           "query": {
               "and": [
                   { "missing": { "field": "entities" } },
                   { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } },
                   { "match": { "_all": "merkel" }}
                   #{ "range": { "published": { "gte" : "now-1d" } } }
               ]
           },
           "size": 500
        }

        res = es.search(index="rss", doc_type="posting", body=query)
        logger.info("%d documents found" % res['hits']['total'])

        for p in res['hits']['hits']:
            #logger.info('Extracting entities for - %s' % p['_id'])
            
            analyzed_text = p['_source']['title'] + ' ' + p['_source']['description']

            try:
                response = alchemyapi.entities("text", analyzed_text)
                entities = [ x['text'] for x in response["entities"] ]

                #logger.info("Entities: " + entities)

                es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'],
                          body={"doc": {"entities": entities}})
            except KeyError:
                logger.exception("Problem getting sentiment :( %s" % response)
Esempio n. 8
0
def get_sentiment(places):
    twitter_api = get_twitter_api()
    alchemy_api = AlchemyAPI()

    sentiments = dict()
    for place in places:
        r = twitter_api.GetSearch(term=place, count=10)
        for tw in r:
            txt = tw.GetText()
            response = alchemy_api.sentiment('text', txt)
            if response['status'] == 'OK':
                sentiments[txt] = str(response['docSentiment']['type'])

    ret_list = []
    for t, s in sentiments.iteritems():
        ret_json = dict()
        ret_json["tweet"] = t
        ret_json["sentiment"] = s
        ret_list.append(ret_json)

    list_len = 16

    if len(ret_list) > list_len:
        ret_list = random.sample(ret_list, 16)
    else:
        for i in xrange(len(ret_list), list_len):
            ret_list.append({"No Tweet": "neutral"})
    print ret_list
    return ret_list
Esempio n. 9
0
 def sentiment_analysis(text):
     alchemy_api = AlchemyAPI()
     response = alchemy_api.sentiment("text", text)
     try:
         float(response["docSentiment"]['score'])
         return float(response["docSentiment"]['score'])
     except ValueError:
         return None
Esempio n. 10
0
def updateCounter():
  global use
  global alchemyapi
  print use
  use += 1
  if use >= 15:
    use = 0
  alchemyapi = AlchemyAPI(use = use)
Esempio n. 11
0
def get_sentiment(text):
    alchemyapi = AlchemyAPI()
    for key in utils.get_random_alchemy_credentials():
        alchemyapi.apikey = key
        response = alchemyapi.sentiment("text", text)
        if 'docSentiment' not in response:
            continue
        return response['docSentiment'].get('score', '0')
def retrieveReviewSentiment(text):
    alchemyapi = AlchemyAPI()
    response = alchemyapi.sentiment("text", text)
    status = response["status"]
    if status == 'OK':
        return response["docSentiment"]["type"]
    else:
        return response['statusInfo']
Esempio n. 13
0
def main(data):
    args = argv('@', data['recv'])
    # look for URL
    link = geturl(data['recv'])
    if link and link != "" and not modeCheck('b', data):
        link = link[0]
        # look for title
        badext = ('.cgi', '.pdf')
        imgext = ('.jpg', '.png', '.gif', '.bmp')
        if not link[-4:].lower() in badext:
            if not link[-4:].lower() in imgext:
                title = gettitle(link)
                if title:
                    title = html_decode(title)
                    # encode unicode object to byte string
                    if type(title) == unicode:
                        title = title.encode('utf-8', "ignore")
                    title = title.replace('\n', ' ')
                    title = title.replace('\r', ' ')
                    title = title.strip()
                    if len(title) >= 150:
                        title = title[:150]
                    if len(link) > int(
                            data['config']['settings']['maxLinkLen']):
                        # post title + tiny
                        data['api'].say(
                            args['channel'],
                            '^ ' + title + ' ' + maketiny(link) + ' ^')
                        return
                    else:
                        # post title only
                        data['api'].say(args['channel'], '^ ' + title + ' ^')
                        return
            else:
                # We've got an image URL.
                from alchemyapi import AlchemyAPI
                alchemyapi = AlchemyAPI()
                response = alchemyapi.imageTagging('url', link)
                if response['status'] == 'OK' and response['imageKeywords'][0][
                        'text'] != 'NO_TAGS':
                    retme = "^ Image of: "
                    for keyword in response['imageKeywords']:
                        retme += "%s(%s%%) " % (
                            keyword['text'], int(
                                float(keyword['score']) * 100))
                    if len(link) > int(
                            data['config']['settings']['maxLinkLen']):
                        retme += maketiny(link) + " "
                    retme += "^"
                    data['api'].say(args['channel'], retme)
                    return
        if len(link) > int(data['config']['settings']['maxLinkLen']):
            # post tiny only
            data['api'].say(args['channel'], '^ ' + maketiny(link) + ' ^')
            return
        else:
            # nothing
            return False
Esempio n. 14
0
def connect_alchemy(url):
    # to connect with alchemy and tag the content
    from alchemyapi import AlchemyAPI
    alchemyapi = AlchemyAPI()

    resp = alchemyapi.text('url', url)

    response = alchemyapi.keywords("text", resp['text'])

    keywors = response["keywords"]
Esempio n. 15
0
def extract_entities(text, lang):
    entities = {}
    alchemyapi = AlchemyAPI()
    response = alchemyapi.entities('text', text, {'sentiment': 1})
    if response['status'] == 'OK':
        for entity in response['entities']:
            key = entity['text'].encode('utf-8')
            value = entity['type']
            entities[key] = convert_label(value)
    return entities
Esempio n. 16
0
def GetAlchemyAPIObject():
    with open("api_key.txt","r") as aFile:
        for line in aFile.read().split("\n"):
            if line != "":
                api = AlchemyAPI(line)
                result = api.sentiment("text","test")
                if result["status"] != "ERROR":
                    return api
    print "Could not initialize valid, usable AlchemyAPI object. Consider requesting another API key."
    exit()
    return None
Esempio n. 17
0
def update_entities(incremental=True):
    alchemy = AlchemyAPI()
    
    story_criteria = {}
    if incremental:
        story_criteria = { 'entities' : { '$exists' : False } }
        
    for story in _db.stories.find(story_criteria):
        entities = alchemy.analyze_url(story['unescapedUrl'])['entities']
        logging.debug('%s, %s entities' % (story['title'], len(entities)))
        story['entities'] = entities
        _db.stories.save(story)
Esempio n. 18
0
	def __init__(self, aws_id, aws_key, es, aws_region='us-west-2', sqs_name='new-tweet-notifs'):
		try:
			#connect with sqs
			self.sqs = boto.sqs.connect_to_region(aws_region, aws_access_key_id=aws_id, aws_secret_access_key=aws_key)
			self.sqs_queue = self.sqs.get_queue(sqs_name)
			self.alc = AlchemyAPI()
			self.sns = boto.sns.connect_to_region(aws_region)
			self.es = es
			self.thread_pool = ThreadPoolExecutor(max_workers=4)
		except Exception as e:
			print('Could not connect')
			print(e)
		print('Connected to AWS SQS: '+ str(self.sqs))
Esempio n. 19
0
def pos_with_entity_replaced_common_words(infile, outfile):
    alchemyapi = AlchemyAPI()
    common_word_pos = open("common_word_pos.txt", "r")
    title_data = open(infile, "r+")
    f2 = codecs.open(outfile, "w+", "utf-8")
    for line1, line2 in title_data, common_word_pos:
        response = alchemyapi.entities('text', line1, {
            'sentiment': 1,
            'disambiguate': 1
        })
        if response['status'] == 'OK':
            for entity in response['entities']:
                line2.replace(entity['text'], entity['type'])
            print >> f2, line2,
Esempio n. 20
0
def alchemy_keywords(title):
    response = AlchemyAPI().keywords('text', title.encode('utf8'))
    keywords = []  # roots of categories
    # categorize the paragraph
    if response['status'] == 'OK':
        for keyword in response['keywords']:
            #print keyword
            label, score = keyword['text'], keyword['relevance'][:4]
            keywords.append(str(label))
    else:
        pass
    #print 'Error in concept tagging call: ', response['statusInfo']
    # roots = list(set(roots)) # remove duplicates
    #print 'Keywords: ', keywords
    return keywords
Esempio n. 21
0
def getSoup():
    sock = urllib.urlopen('https://en.wikipedia.org/wiki/Motocross')
    sockRaw = sock.read()
    soup = BeautifulSoup(sockRaw, "html.parser")
    soupText = soup.get_text()

    # use the alchemyAPI to find the keyword/phrases from the texts
    alchemyapi = AlchemyAPI()
    response = alchemyapi.keywords('text', soupText, {'maxRetrieve': 10})
    if response['status'] == 'OK':
        print "\nThe Keywords are:"
        for i in response['keywords']:
            print "Word: " + i["text"] + ", Relevance: " + i["relevance"]
    else:
        print "Something went wrong with Alchemy."
Esempio n. 22
0
def checkDailyQuotaAndRunAlchemy(commentDb, cruiseLines):
    with open('data/Alchemy_response_keywords.json', 'rb') as fp:
        returned_keywords = json.load(fp)
    with open('data/Alchemy_response_relations.json', 'rb') as fp:
        returned_relations = json.load(fp)
    alchemyapi = AlchemyAPI()
    test = "test if finished Alchemy daily quota"
    response = alchemyapi.keywords('text', test, {'sentiment': 0})
    if response['status'] == 'OK':
        returned_keywords, returned_relations = runAlchemyApi(
            cruiseLines, commentDb, returned_keywords, returned_relations,
            alchemyapi)
    else:
        print 'Error in keyword extraction call: ', response['statusInfo']
    return returned_keywords, returned_relations
Esempio n. 23
0
    def handle(self, *args, **options):
        es = elasticsearch.Elasticsearch(es_url)

        alchemyapi = AlchemyAPI()

        query = {
            "query": {
                "and": [{
                    "missing": {
                        "field": "sentiment"
                    }
                }, {
                    "terms": {
                        "language": ['en', 'de', 'fr', 'it', 'es', 'pt']
                    }
                }, {
                    "range": {
                        "published": {
                            "gte": "now-1d"
                        }
                    }
                }]
            },
            "size": 100
        }

        res = es.search(index="rss-*", doc_type="posting", body=query)
        logger.info("%d documents found" % res['hits']['total'])

        for p in res['hits']['hits']:
            logger.info('Checking sentiment for - %s' % p['_id'])

            analyzed_text = p['_source']['title'] + ' ' + p['_source'][
                'description']

            try:
                response = alchemyapi.sentiment("text", analyzed_text)
                logger.info("Sentiment: " + response["docSentiment"]["type"])
                sentiment = response["docSentiment"]["type"]

                es.update(index=p['_index'],
                          doc_type=p['_type'],
                          id=p['_id'],
                          body={"doc": {
                              "sentiment": sentiment
                          }})
            except KeyError:
                logger.exception("Problem getting sentiment :( %s" % response)
Esempio n. 24
0
 def taxonomy(self, para):
     response = AlchemyAPI().taxonomy('text', para)
     roots = []  # roots of categories
     # categorize the paragraph
     if response['status'] == 'OK':
         for category in response['taxonomy']:
             label, score = category['label'], category['score'][:4]
             root_label = label.split('/')[1]
             roots.append(str(root_label))
             if self.debug:
                 print "Root: %s \t Score: %s" % (label.ljust(40), score)
     else:
         print 'Error in concept tagging call: ', response['statusInfo']
     roots = list(set(roots))  # remove duplicates
     if self.debug: print 'Category roots', roots
     return roots
Esempio n. 25
0
 def __init__(self, filename, language):
     self.al = AlchemyAPI('bf18ed72384724d86425c8674204039f87352870')
     self.filename = filename
     self.language = language
     self.filterKeys = {
         'created_at': u'[\'created_at\']',
         'id': u'[\'id\']',
         'lang': u'[\'lang\']',
         'tweet_urls': u'[\'entities\'][\'urls\']',
         'tweet_hashtags': u'[\'entities\'][\'hashtags\']',
         'user_location': u'[\'user\'][\'location\']',
         'keywords': u'[\'alchemy\'][\'keywords\']',
         'user_screen_name': u'[\'user\'][\'screen_name\']',
         'text': u'[\'text\']',
         'text_' + self.language: u'[\'text\']'
     }
Esempio n. 26
0
def createTweets(source, num): #num is number of tweets to create
    words = createDict(source, 2) #state size of 2 allows for more combinations as tweets are small
    tweets = []
    alchemyAPI = AlchemyAPI()
    for x in range(0, num): #at most 50% chance of using a hashtag
        if randint(0,1) == 0:
            tweet = generateText(words, 2, choice(range(100,140)))
            tweets.append(tweet)
        else:
            tweet = generateText(words, 2, choice(range(80,120)))
            response = alchemyAPI.concepts('text', tweet)
            if response['status'] == 'OK':
                hashtag = " #" + response['concepts'][0]['text'].replace(" ", "")
                if len(hashtag) <= 140 - len(tweet):
                    tweet = tweet + hashtag
            tweets.append(tweet)
    return tweets
Esempio n. 27
0
def analysecontent(content):
    """
    Process/Analyse the extracted contents with Alchemy API
    Assumption: api_key.txt with a valid key is available from where this program is getting executed.
    """
    print('Processing extracted text with AlchemyAPI...')
    alchemyapi = AlchemyAPI()
    response = alchemyapi.keywords('text', content, {'maxRetrieve': 10})
    if response['status'] == 'OK':
        print('---------------------------------')
        print('## Keywords      ## Relevance')
        for keyword in response['keywords']:
            print("{0}: {1}".format(keyword['text'].encode('utf-8'),
                                    keyword['relevance']))
        print('---------------------------------')
    else:
        print('Error in keyword extraction call: ', response['statusInfo'])
def process(in_queue, out_queue):

    #INPUT:
    #query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos")
    #in_queue -> the shared input queue that is filled with the found tweets.
    #out_queue -> the shared output queue that is filled with the analyzed tweets.

    #OUTPUT:
    #None

    #Create the alchemy api object
    alchemyapi = AlchemyAPI()

    while True:
        #Grab a tweet from the queue
        tweet = in_queue.get()
        #Initilise
        tweet['sentiment'] = {}

        try:
            #Calculate the sentiment for the entire tweet
            response = alchemyapi.sentiment('text', tweet['text'])

            #Add the score if its not returned neutral
            if response['status'] == 'OK':
                tweet['sentiment']['doc'] = {}
                tweet['sentiment']['doc']['type'] = response['docSentiment'][
                    'type']

                if 'score' in response['docSentiment']:
                    tweet['sentiment']['doc']['score'] = response[
                        'docSentiment']['score']
                else:
                    tweet['sentiment']['doc']['score'] = 0

            #Add the result to the output queue
            out_queue.put(tweet)

        except Exception as e:
            #If there's an error, just move on to the next item in the queue
            print 'Error ', e
            pass

        #Signal that the task is finished
        in_queue.task_done()
Esempio n. 29
0
def get_alch_keys(url):
  #Create the AlchemyAPI Object
  print('-----Create AlchemyAPI')
  alch = AlchemyAPI()

  alch_keys = set()
  print('-----get concepts')
  alch_keys |= get_concepts(url, alch)
  print('-----get keywords')
  alch_keys |= get_keywords(url, alch)
  print('-----get entities')
  alch_keys |= get_entities(url, alch)

  good_alch_keys = set()
  for i in alch_keys:
    if verify_companies_with_crunchbase(i):
      good_alch_keys.add(i)
  return good_alch_keys
Esempio n. 30
0
 def __init__(self):
     self.KEYWORDS = {
         "firefox": "browser",
         "mozilla": "org",
         "google": "org",
         "chrome": "browser",
         "internet explorer": "browser",
         "microsoft": "org",
         "safari": "browser",
         "apple": "org"
     }
     self.API_KEY = self.read_api_key(".twitterapikey")
     self.API_SECRET = self.read_api_key(".twitterapisecret")
     self.ACCESS_TOKEN = self.read_api_key(".twitteraccesstoken")
     self.ACCESS_TOKEN_SECRET = self.read_api_key(
         ".twitteraccesstokensecret")
     self.f_out = open("results.csv", "w")
     self.alchemy = AlchemyAPI()
     self.sentiment_results = []