def AnalyzeSentiment(searchTerm): analysisAPI = AlchemyAPI() pos, neg, neu = (0,0,0) dataCollection = database_connection(searchTerm) dataDocuments = dataCollection.find() tweets = [] sentimentByCountry = {} tweetLocation = "" for document in dataDocuments: try: if document.get("sentiment", None) == None: analysisResponse = analysisAPI.sentiment("text", document["text"]) documentSentiment = analysisResponse["docSentiment"]["type"] dataCollection.update_one({"_id":document["_id"]}, {"$set": {"sentiment": analysisResponse["docSentiment"]}}) else: documentSentiment = document["sentiment"]["type"] if documentSentiment == "positive": pos=pos+1 elif documentSentiment == "negative": neg=neg+1 else: neu=neu+1 tweets.append(document["text"].strip()+"\n\n***Tweet-Sentiment: "+documentSentiment+"***\n"+"-"*70) except: print("Unable to parse a Tweet as the language is not understood\n") dataCollection.delete_one({'text':document['text']}) return pos,neg,neu,tweets
def extractConceptFromUrl(self,url): """method for extracting concepts from given url""" #creating AlchemyAPI object alchemyapi = AlchemyAPI() #requesting json response from AlchemyAPI server response = alchemyapi.concepts('url',url) if response['status'] == 'OK': for concept in response['concepts']: #concept object for storing the extracted concept concept = AlchemyStructure.Concept() #extracting the concept name concept.setText(concept['text']) #extracting the relevance of the concept concept.setRelevance(concept['relevance']) #append the concept into the list of retrieved concepts self.conceptsFromText.append(concept) else: print('Error in concept tagging call: ', response['statusInfo'])
def extractKeywordsFromUrl(self,url): """method for extracting keywords from given text""" #creating AlchemyAPI object alchemyapi = AlchemyAPI() #requesting json response from AlchemyAPI server response = alchemyapi.keywords('url', url) if response['status'] == 'OK': for keywords in response['keywords']: #concept object for storing the extracted concept keyword = AlchemyStructure.Keyword() #extracting the keyword keyword.setText(keywords['text']) #extracting the relevance of keyword keyword.setRelevance(keywords['relevance']) #append the concept into the list of retrieved concepts self.keywordsFromUrl.append(keyword) else: print('Error in keyword tagging call: ', response['statusInfo'])
def nlp_process(ids, ids_hash): #instantiate an elasticsearch client es = Elasticsearch() #instantiate an alchemy client alchemyapi = AlchemyAPI() for item in ids: data = ' '.join(ids_hash[item]) lowers = data.lower() alchem_data = [] response = alchemyapi.keywords('text', lowers, {'sentiment': 1}) if response['status'] == 'OK': print('#Success#') for keyword in response['keywords']: al_temp = defaultdict() al_temp['text'] = keyword['text'].encode('utf-8') al_temp['relevance'] = keyword['relevance'] al_temp['sentiment'] = keyword['sentiment']['type'] if 'score' in keyword['sentiment']: al_temp['score'] = keyword['sentiment']['score'] alchem_data.append(al_temp) else: print('Error in keyword extaction call: ', response['statusInfo']) print len(alchem_data) # prepare body for insertion doc = {"business_id": item, "word_freq": alchem_data} exit() template = {"create": {"_index": "alchem", "_type": "doc"}} res = es.index(index="alchem", doc_type='doc', body=doc)
def convert_to_clean_titles(infile, outfile): alchemyapi = AlchemyAPI() f = open(infile, "r") f2 = codecs.open(outfile, "w+", "utf-8") f3 = codecs.open("Entities.txt", "w+", "utf-8") count = 1 for line in f: line = line.decode("utf-8") response = alchemyapi.entities('text', line, { 'sentiment': 1, 'disambiguate': 1 }) if response['status'] == 'OK': for entity in response['entities']: if "type" in entity.keys: if entity['type'] in [ 'Country', 'Holiday', 'Movie', 'MusicGroup', 'Organization', 'Person', 'PrintMedia', 'Region', 'StateOrCountry', 'TelevisionShow', 'TelevisionStation', 'Money', 'Company', 'GeographicFeature' ]: line = line.replace(entity['text'], entity['text'].title()) print >> f3, entity['text'], entity['type'], entity[ 'sentiment'] print >> f2, line, else: print >> f2, line, print count, line count += 1
def performSA(pname, text): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', text) sentiment = response['docSentiment'] if (sentiment['type']=='neutral'): sentiment['score']='0' return sentiment
def function(): string = sys.stdin.readline() alchemyapi = AlchemyAPI() myText = "I'm excited to get started with AlchemyAPI!" response = alchemyapi.sentiment("text", myText) string = "Sentiment: " + response["docSentiment"]["type"] print string
def extractSentimentFromUrl(self, url): """method for extracting the sentiment associated with the url of a document""" # creating AlchemyAPI object alchemyapi = AlchemyAPI() # requesting json response from AlchemyAPI server response = alchemyapi.sentiment("url", url) if response["status"] == "OK": # getting the sentiment type from the response sentimentType = response["docSentiment"]["type"] # checking the sentiment type if sentimentType == "neutral": sentimentScore = 0 else: sentimentScore = response["docSentiment"]["score"] # instantiating sentiment object self.sentimentFromUrl = AlchemyStructure.Sentiment() # set the value for sentiment type self.sentimentFromUrl.setType(sentimentType) # set the value for sentiment score self.sentimentFromUrl.setScore(sentimentScore) else: print("Error in sentiment analysis call: ", response["statusInfo"])
def writeResult(arr, outFile, search): alchemyapi = AlchemyAPI(apiKey) #alchemyapi = AlchemyAPI("89e395ea07490a40a55ccf241612724f80827956") #alchemyapi = AlchemyAPI("f7e81de9b04fcb1eadc9469800a86a15bffd8ec3") #alchemyapi = AlchemyAPI("d3547d0e12ac5425b57cf1d2e05280525224a109") #alchemyapi = AlchemyAPI("6d03602e012eca8b7ab3ac92e37327950b1caa78") #print "In write ",len(arr) fi = open(outFile,'wb') SEARCHTERM = search.lower() writer = csv.writer(fi) neg, pos, neu = 0,0,0 results = [] print "Starting AlchemyAPI" for item in zip(*arr)[0]: #print item response = alchemyapi.sentiment_targeted('text', item, SEARCHTERM) #print "maine response",response try: respType = response['docSentiment']['type'] #print "Response ",respType except Exception, e: #print e continue if respType == 'neutral': neu += 1 elif respType == 'positive': pos += 1 elif respType == 'negative': neg += 1 lst = [item,respType] writer.writerow(lst) results.append(lst)
def Entity_Extraction(self): print " ----------------------------" print "# STARTING ENTITY EXTRACTION:" print " ----------------------------" count = 0 os.system("python templates/alchemyapi.py 32449e7b4f6b65f9ef5cfd84b7128a46440a9402") startTime = datetime.now() # Create the AlchemyAPI Object alchemyapi = AlchemyAPI() for paragraph in self.targeted_paragraphs: response = alchemyapi.entities('text', paragraph, {'sentiment': 1}) if response['status'] == 'OK': print "DOCUMENT-LEVEL RESULTS: " print "ARTICLE TITLE: " , self.article_title[len(self.article_title) - len(self.article_title) + count] print 'ARTICLE URL: ' , self.article_url[len(self.article_url) - len(self.article_url) + count] print "DATA FRAME: " count = count + 1 for entity in response['entities']: entity_text = entity['text'] entity_type = entity['type'] entity_relevance = entity['relevance'] entity_sentiment_type = entity['sentiment']['type'] if 'score' in entity['sentiment']: entity_sentiment_score = entity['sentiment']['score'] df_entity_extraction = pd.DataFrame(data = {'text': [entity_text], 'type': [entity_type], 'relevance': [entity_relevance], 'sentiment': [entity_sentiment_type], 'sentiment_score': [entity_sentiment_score]}) print "***** ENTITY EXTRACTION RESULTS: *****" print df_entity_extraction.T df_transpose = df_entity_extraction.T entity_json_results = df_transpose.to_dict() ####### self.result_entity_extraction.append(entity_json_results) else: pass else: print 'Error in entity extraction call: ', response['statusInfo'] print "----------- Entity Extraction is completed. ---------------" print "Time Elapsed: " , datetime.now() - startTime execution_time = datetime.now() - startTime self.list_time_elapsed.append(execution_time)
def sentiment(): twitter = Twython(passwords.Live.twitter_app_key, passwords.Live.twitter_app_secret, oauth_version=2) access_token = twitter.obtain_access_token() twitter = Twython(passwords.Live.twitter_app_key, access_token=access_token) search_results = None try: search_results = twitter.search(q='$' + request.args.get('symbol'), result_type='popular') except TwythonError as e: print e twitter_corpus = "" for tweets in search_results['statuses']: twitter_corpus += tweets['text'].encode('utf-8') #Create the AlchemyAPI Object alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', twitter_corpus) sentiment = None if response['status'] == 'OK': sentiment = {"sentiment": response['docSentiment']['type']} if request.args.get('output') == "jsonp": return Response('callback(' + json.dumps(sentiment) + ')', content_type='application/javascript') else: return jsonify(sentiment)
def sentiment_alchemy(url): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('url', url) response['usage'] = None if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Author ##') print('author: ', response.get('author', '')) print('') else: print('Error in author extraction call: ', response['statusInfo']) response = alchemyapi.keywords('url', url) del (response['usage']) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Keywords ##') for keyword in response['keywords']: print('text: ', keyword['text'].encode('utf-8')) print('relevance: ', keyword['relevance']) print('sentiment: ', keyword.get('sentiment', {}).get('type', '')) if 'score' in keyword.get('sentiment', {}): print('sentiment score: ' + keyword['sentiment']['score']) print('') else: print('Error in keyword extaction call: ', response.get('statusInfo', ''))
def main(): alchemyapi = AlchemyAPI() try: filename = sys.argv[1] except IndexError: print "Give a filename as the second argument!" sys.exit(1) text = pdf_to_str(filename) if len(text) >= LENGTH_LIMIT: print "PDF content is longer ({} characters) than the maximum \ of {}, skipping remainder".format(len(text), LENGTH_LIMIT) text = text[:LENGTH_LIMIT] print "KEYWORDS" response = alchemyapi.keywords('text', text) for keyword in response['keywords']: print ' - {}'.format(keyword['text']) print print "CONCEPTS" response = alchemyapi.concepts('text', text) for concept in response['concepts']: print ' - {}'.format(concept['text'])
def getCategory(demo_text): alchemyapi = AlchemyAPI() demo_text = unicode(demo_text); demo_text = demo_text.encode("ascii",'ignore'); response = alchemyapi.entities('text', demo_text) if response['status'] == 'OK': #print(json.dumps(response, indent=4)) if (not len(response['entities'])): category = [] category.append("Undefined") return category entity = response['entities'][0] #print('text: ', entity['text'].encode('utf-8')) #print('type: ', entity['type']) #print('relevance: ', entity['relevance']) if entity.has_key('disambiguated') and entity['disambiguated'].has_key('subType'): category = entity['disambiguated']['subType'] else: category = [] category.append(entity['type']) else: category = [] category.append("Undefined") return category
def populateTweets(self): self.lat, self.long, self.radius = self.findCoordinatesRadius() geo = str(self.lat) + "," + str(self.long) + "," + str( self.radius) + "km" tweets = api.search(q=self.search, lang='en', geocode=geo, rpp=100) showcase = tweets[0:5] self.showcase = [] for tweet in showcase: self.showcase.append([tweet.text, tweet.user.screen_name]) hashtagsRaw = [tweet.entities['hashtags'] for tweet in tweets] hashtagsList = list(itertools.chain.from_iterable(hashtagsRaw)) hashtags = [hash['text'] for hash in hashtagsList] frequency = {} for hashtag in hashtags: frequency[hashtag] = hashtags.count(hashtag) self.popularHashtags = dict(Counter(hashtags).most_common(5)).keys() texts = [tweet.text for tweet in tweets] self.sentiment = 0.0 alchemyapi = AlchemyAPI() for text in texts: response = alchemyapi.sentiment_targeted('text', text.lower(), self.search.lower()) if response['status'] != 'ERROR' and response['docSentiment'][ 'type'] != 'neutral': numeric = float(response['docSentiment']['score']) self.sentiment = self.sentiment + ( numeric / len(texts)) #computes average sentiment
def handle(self, *args, **options): es = elasticsearch.Elasticsearch(es_url) alchemyapi = AlchemyAPI() query = { "query": { "and": [ { "missing": { "field": "entities" } }, { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } }, { "match": { "_all": "merkel" }} #{ "range": { "published": { "gte" : "now-1d" } } } ] }, "size": 500 } res = es.search(index="rss", doc_type="posting", body=query) logger.info("%d documents found" % res['hits']['total']) for p in res['hits']['hits']: #logger.info('Extracting entities for - %s' % p['_id']) analyzed_text = p['_source']['title'] + ' ' + p['_source']['description'] try: response = alchemyapi.entities("text", analyzed_text) entities = [ x['text'] for x in response["entities"] ] #logger.info("Entities: " + entities) es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'], body={"doc": {"entities": entities}}) except KeyError: logger.exception("Problem getting sentiment :( %s" % response)
def render_article(request): #if current aricle has content field #render as is #else call alchemy and save content article_id = request.POST['articleData'] article = Article.objects.filter(id = article_id)[0] print(article_id.encode('utf-8')) print(article.content.encode('utf-8')) if article.content: return render_to_response('article.html', {'id' : article.id, 'data' : article.content, 'titleText' : article.title}) else: testURL = article.url #Create AlchemyAPI Object alchemyapi = AlchemyAPI() response = alchemyapi.text('url', testURL) titleData = alchemyapi.title('url', testURL) authorData = alchemyapi.author('url', testURL) article.content = response['text'].encode('utf-8') article.title = titleData['title'].encode('utf-8') article.save() return render_to_response('article.html', {'id' : article.id, 'data' : response['text'].encode('utf-8'), 'titleText' : titleData['title'].encode('utf-8')} )
def run_sentiment_analysis(tweets, text_key): def print_error(response): # This should be replaced with better logging print('Error with AlchemyAPI response:') print(sentiment, '\n') alchemyapi = AlchemyAPI() results = [] for item in tweets: if text_key not in item: # Assume it's a bad tweet and continue print(text_key, 'not found in tweet') continue sentiment = alchemyapi.sentiment('text', item['words']) try: if sentiment['status'].lower() == 'error': # Unrecognized language, emoji only, etc... print_error(sentiment) # Make a deep copy (since it's a nested dictionary) new_item = copy.deepcopy(item) sentiment_type = sentiment['docSentiment']['type'] new_item['sentiment_type'] = sentiment_type if sentiment_type == 'neutral': new_item['sentiment_score'] = 0 else: new_item['sentiment_score'] = sentiment['docSentiment']['score'] results.append(new_item) except Exception as ex: print(type(ex).__name__) print_error(sentiment) return results
def extractConceptFromUrl(self, url): """method for extracting concepts from given url""" # creating AlchemyAPI object alchemyapi = AlchemyAPI() # requesting json response from AlchemyAPI server response = alchemyapi.concepts("url", url) if response["status"] == "OK": for concept in response["concepts"]: # concept object for storing the extracted concept conceptObj = AlchemyStructure.Concept() # extracting the concept name conceptObj.setText(concept["text"]) # extracting the relevance of the concept conceptObj.setRelevance(concept["relevance"]) # append the concept into the list of retrieved concepts self.conceptsFromUrl.append(conceptObj) else: print("Error in concept tagging call: ", response["statusInfo"])
def do_analysis(names, texts): alchemyapi = AlchemyAPI() base=os.path.basename('transcripts/Democrats/dem-2-4-2016.txt') debate_name = os.path.splitext(base)[0] file_name = 'data/targeted sentiments/Democrat/' + str(debate_name) + '_sentiment.txt' f = open(file_name, 'w') filepath = "data/topics/Democratic Debates_Top Three Topics.csv" all_topics = csv.DictReader(open(filepath, "r")) dt_1 = [] for row in all_topics: dt_1.append(row[debate_name]) dt_2 = [x.split('\'') for x in dt_1] dt_3 = [y for x in dt_2[0] for y in x.split(',')] for i in range(len(names)): for j in range(3): topic = [y for x in dt_2[j] for y in x.split(',')] response = alchemyapi.sentiment_targeted('text', texts[i], topic[:(len(topic)-1)]) if response['status'] == 'OK': f.write(topic[len(topic)-1] + ':\n') f.write(names[i] + ' Sentiment \n') f.write('type: ' + str(response['docSentiment']['type']) + '\n') if 'score' in response['docSentiment']: f.write('score: ' + str(response['docSentiment']['score']) + '\n \n') else: print('Error in sentiment analysis call: ', response['statusInfo'])
def getKeywordPerPost(): reader = open('output_sony_posts.txt') all_json_posts = reader.read().splitlines() alchemyapi = AlchemyAPI() counter = 0 for p in all_json_posts: print str(counter) if counter < 1000: counter = counter + 1 continue #elif counter > 2000: # break else: counter = counter + 1 content = json.loads(p)["cleanContent"] response = alchemyapi.keywords('text',content.encode("UTF-8")) if response['status'] == 'OK': keywords = [] posts = "" for keyword in response['keywords']: keywords.append(keyword['text']) posts = posts + keyword['text'] + "," posts = posts[:-1] + "\n" if posts <> "\n": with codecs.open("keyPerPost.txt", "a") as f: f.write(posts.encode("UTF-8")) else: print "error" + str(counter)
def getKeywords(uID, inputText): alchemyapi = AlchemyAPI() #alchemyapi.loadAPIKey("api_key.txt") response = alchemyapi.keywords('text',inputText) print inputText if response['status'] == 'OK': #print('## Response Object ##') #print(json.dumps(response, indent=4)) #print('') #print('## Keywords ##') keywords = [] posts = uID + " : " for keyword in response['keywords']: keywords.append(keyword['text']) posts = posts + keyword['text'] + "|" userKeywords[uID] = keywords posts = posts + "\n" with codecs.open("outNew.txt", "a") as f: f.write(posts.encode("UTF-8")) return True else: print('idError: ', uID) with codecs.open("keywordOut2.txt", "a") as f: text = uID + "\n" f.write(text.encode("UTF-8")) return False
def get_sentiment(company_id, text): alchemyapi = AlchemyAPI() key_phrases = [] for apikey in engine.get_random_alchemy_credentials(): alchemyapi.apikey = apikey response = alchemyapi.keywords('text', text, {'sentiment': 1}) if response['status'] == 'OK': if len(response['keywords']) == 0: return 0 # related_words = models.RelatedWord.query.filter_by(company_id=company_id).all() for keyword in response["keywords"]: if 'sentiment' in keyword: if keyword['sentiment'].has_key('score'): key_phrases.append(float(keyword['sentiment']['score'])) elif keyword['sentiment']['type'] == 'neutral': key_phrases.append(0) if len(key_phrases) == 0: return 0 else: return float("{0:.2f}".format(sum(key_phrases)/len(key_phrases))) elif response['status'] == 'ERROR' and response['statusInfo'] != 'unsupported-text-language': print "ERROR: getting sentiment " + response['statusInfo'] # Skip onto the next api key continue else: print "None of the above " + response['statusInfo'] return 0 #Return none when all api keys are exhausted return None
def run_sentiment_analysis(tweets, text_key): def print_error(response): # This should be replaced with better logging print('Error with AlchemyAPI response:') print(sentiment, '\n') alchemyapi = AlchemyAPI() results = [] for item in tweets: if text_key not in item: # Assume it's a bad tweet and continue print(text_key, 'not found in tweet') continue sentiment = alchemyapi.sentiment('text', item['words']) try: if sentiment['status'].lower() == 'error': # Unrecognized language, emoji only, etc... print_error(sentiment) # Make a deep copy (since it's a nested dictionary) new_item = copy.deepcopy(item) sentiment_type = sentiment['docSentiment']['type'] new_item['sentiment_type'] = sentiment_type if sentiment_type == 'neutral': new_item['sentiment_score'] = 0 else: new_item['sentiment_score'] = sentiment['docSentiment'][ 'score'] results.append(new_item) except Exception as ex: print(type(ex).__name__) print_error(sentiment) return results
def get_sentiment(places): twitter_api = get_twitter_api() alchemy_api = AlchemyAPI() sentiments = dict() for place in places: r = twitter_api.GetSearch(term=place, count=10) for tw in r: txt = tw.GetText() response = alchemy_api.sentiment('text', txt) if response['status'] == 'OK': sentiments[txt] = str(response['docSentiment']['type']) ret_list = [] for t, s in sentiments.iteritems(): ret_json = dict() ret_json["tweet"] = t ret_json["sentiment"] = s ret_list.append(ret_json) list_len = 16 if len(ret_list) > list_len: ret_list = random.sample(ret_list, 16) else: for i in xrange(len(ret_list), list_len): ret_list.append({"No Tweet": "neutral"}) print ret_list return ret_list
def _extract_content_alchemy(self, url): alchemyapi = AlchemyAPI() response = alchemyapi.text('url', url) content = '' if response['status'] == 'OK': content = response['text'].encode('utf8') return content
def grouped_category(texts): """ get category and corresponding statistic for multiplt texts INPUT: text -> text array to be analyzed OUTPUT: statistic -> dict that has the frequence of each category, like{"soccer": 12, "football": 24} category -> array that each text belongs to in sequence """ alchemyapi = AlchemyAPI() decoder = json.JSONDecoder() statistic = {"null": 0} category = []; for text in texts: response = alchemyapi.category('text',text) if response['status'] == 'OK': analysizedData = decoder.decode(json.dumps(response)) category.append(analysizedData.get("category")) if (statistic.get(category[-1]) != None): statistic[category[-1]] = statistic.get(category[-1]) + 1 else: statistic[category[-1]] = 1 else: statistic["null"] = statistic.get("null") + 1 category.append("null") return statistic, category
def populateTweets(self): self.lat, self.long, self.radius = self.findCoordinatesRadius() geo = str(self.lat) + "," + str(self.long) + "," + str(self.radius) + "km" tweets = api.search(q=self.search, lang='en', geocode=geo, rpp=100) showcase = tweets[0:5] self.showcase = [] for tweet in showcase: self.showcase.append([tweet.text, tweet.user.screen_name]) hashtagsRaw = [tweet.entities['hashtags'] for tweet in tweets] hashtagsList = list(itertools.chain.from_iterable(hashtagsRaw)) hashtags = [hash['text'] for hash in hashtagsList] frequency = {} for hashtag in hashtags: frequency[hashtag] = hashtags.count(hashtag) self.popularHashtags = dict(Counter(hashtags).most_common(5)).keys() texts = [tweet.text for tweet in tweets] self.sentiment = 0.0 alchemyapi = AlchemyAPI() for text in texts: response = alchemyapi.sentiment_targeted('text', text.lower(), self.search.lower()) if response['status'] != 'ERROR' and response['docSentiment']['type'] != 'neutral': numeric = float(response['docSentiment']['score']) self.sentiment = self.sentiment + (numeric / len(texts)) #computes average sentiment
def user_list_sentiments(request): """ This function lists all users :param request: GET request from front end :return: list of all users """ if request.method == 'GET': users = [] user = User.objects.all() # docSentimentscore = 1 for u in user: messages = [] message = Message.objects.filter(user_send=u.user_name) for m in message: messages.append(m.message_text) text = ",".join(messages) alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', text) if response["status"] == "OK": if response["docSentiment"]["type"] == "neutral": docSentimentscore = 0 else: docSentimentscore = response["docSentiment"]["score"] usr = {'user_name': u.user_name, 'user_sentiment': docSentimentscore} users.append(usr) print(json.dumps(users)) return HttpResponse(json.dumps(users), content_type="application/json")
def use_api(self, input): keyword_array=[] alchemyapi = AlchemyAPI() response = alchemyapi.keywords("text", input) for keyword in response["keywords"]: #print keyword["text"].encode('utf=8') keyword_array.append(keyword["text"].encode('utf=8')) return keyword_array
def retrieveReviewSentiment(text): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment("text", text) status = response["status"] if status == 'OK': return response["docSentiment"]["type"] else: return response['statusInfo']
def sentiment_analysis(text): alchemy_api = AlchemyAPI() response = alchemy_api.sentiment("text", text) try: float(response["docSentiment"]['score']) return float(response["docSentiment"]['score']) except ValueError: return None
def sentiment(demo_html): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('html', demo_html) if response['status'] == 'OK': if 'score' in response['docSentiment']: return (response['docSentiment']['score']) else: return (0.12)
def get_sentiment(text): alchemyapi = AlchemyAPI() for key in utils.get_random_alchemy_credentials(): alchemyapi.apikey = key response = alchemyapi.sentiment("text", text) if 'docSentiment' not in response: continue return response['docSentiment'].get('score', '0')
def main(data): args = argv('@', data['recv']) # look for URL link = geturl(data['recv']) if link and link != "" and not modeCheck('b', data): link = link[0] # look for title badext = ('.cgi', '.pdf') imgext = ('.jpg', '.png', '.gif', '.bmp') if not link[-4:].lower() in badext: if not link[-4:].lower() in imgext: title = gettitle(link) if title: title = html_decode(title) # encode unicode object to byte string if type(title) == unicode: title = title.encode('utf-8', "ignore") title = title.replace('\n', ' ') title = title.replace('\r', ' ') title = title.strip() if len(title) >= 150: title = title[:150] if len(link) > int( data['config']['settings']['maxLinkLen']): # post title + tiny data['api'].say( args['channel'], '^ ' + title + ' ' + maketiny(link) + ' ^') return else: # post title only data['api'].say(args['channel'], '^ ' + title + ' ^') return else: # We've got an image URL. from alchemyapi import AlchemyAPI alchemyapi = AlchemyAPI() response = alchemyapi.imageTagging('url', link) if response['status'] == 'OK' and response['imageKeywords'][0][ 'text'] != 'NO_TAGS': retme = "^ Image of: " for keyword in response['imageKeywords']: retme += "%s(%s%%) " % ( keyword['text'], int( float(keyword['score']) * 100)) if len(link) > int( data['config']['settings']['maxLinkLen']): retme += maketiny(link) + " " retme += "^" data['api'].say(args['channel'], retme) return if len(link) > int(data['config']['settings']['maxLinkLen']): # post tiny only data['api'].say(args['channel'], '^ ' + maketiny(link) + ' ^') return else: # nothing return False
def extract_entities(text): """ find the category that input text belongs to INPUT: test -> input text that need to be analyze OUTPUT: category string that input belongs to. "null" means alchemyapi fails somehow """ alchemyapi = AlchemyAPI() decoder = json.JSONDecoder() entities = [] type = "" response = alchemyapi.entities('text',text, {'sentiment': 0}) if response['status'] == 'OK': analysizedData = decoder.decode(json.dumps(response)) results = analysizedData.get("entities") for result in results: if result.get("type") in types.combinedTypes: type = get_category(text); if type == 'arts_entertainment' or type == 'gaming' or type == 'recreation': type = "entertainment"; elif type == 'sports': type = "sports"; elif type == 'computers_internet' or type == 'health' or type == 'science_technology': type = "technology"; elif type == 'culture_politics': type = "politics"; else: continue; else: type = result.get("type"); if type in types.entertainmentTypes: type = "entertainment"; elif type in types.sportsTypes: type = "sports"; elif type in types.politicsTypes: type = "politics"; elif type in types.technologyTypes: type = "technology"; else: continue; entity = { "text": result["text"], "relevance": result["relevance"], "type": type, "count": int(result["count"]) } entities.append(entity); return entities else: print(response['status']); return []
def store_concepts(tweets): # Convert string array to string all_tweets_as_string = ' '.join(tweets) alchemyapi = AlchemyAPI() alchemyapi.apikey = get_random_alchemy_credentials() response = alchemyapi.concepts('text', all_tweets_as_string) if response['status'] == 'OK': for concept in response['concepts']: concepts.append(concept['text'])
def getScore(text): alchemyapi = AlchemyAPI() score = -10 response = alchemyapi.sentiment('html', text) if 'docSentiment' in response: if 'score' in response['docSentiment']: score = float(response['docSentiment']['score']) return score
def connect_alchemy(url): # to connect with alchemy and tag the content from alchemyapi import AlchemyAPI alchemyapi = AlchemyAPI() resp = alchemyapi.text('url', url) response = alchemyapi.keywords("text", resp['text']) keywors = response["keywords"]
def extract_entities(text, lang): entities = {} alchemyapi = AlchemyAPI() response = alchemyapi.entities('text', text, {'sentiment': 1}) if response['status'] == 'OK': for entity in response['entities']: key = entity['text'].encode('utf-8') value = entity['type'] entities[key] = convert_label(value) return entities
def get_sentiment_score(text): if len(text) == 0: return -1000 #print("getting sentiment for "+text) alchemyapi = AlchemyAPI() sentiment_object = alchemyapi.sentiment('text', text) #pprint(sentiment_object) if sentiment_object["docSentiment"]["type"] == "neutral": return 0 return sentiment_object["docSentiment"]["score"]
def GetAlchemyAPIObject(): with open("api_key.txt","r") as aFile: for line in aFile.read().split("\n"): if line != "": api = AlchemyAPI(line) result = api.sentiment("text","test") if result["status"] != "ERROR": return api print "Could not initialize valid, usable AlchemyAPI object. Consider requesting another API key." exit() return None
def update_entities(incremental=True): alchemy = AlchemyAPI() story_criteria = {} if incremental: story_criteria = { 'entities' : { '$exists' : False } } for story in _db.stories.find(story_criteria): entities = alchemy.analyze_url(story['unescapedUrl'])['entities'] logging.debug('%s, %s entities' % (story['title'], len(entities))) story['entities'] = entities _db.stories.save(story)
class App: def __init__(self): self.alchemyapi = AlchemyAPI() self.raw_text = '' self.concepts = None self.keywords = None def parse_url(self, url=None): text_response = self.alchemyapi.text('url', url) if text_response['status'] == 'OK': self.raw_text = text_response['text'].encode('utf-8') else: print('Error in text extraction call: ', text_response['statusInfo']) def extract_concepts(self): concept_response = self.alchemyapi.concepts('text', self.raw_text) if concept_response['status'] == 'OK': self.concepts = concept_response['concepts'] # print('## Concepts ##') # for concept in self.concepts: # print('text: ', concept['text']) # print('relevance: ', concept['relevance']) # print('') else: print('Error in concept tagging call: ', concept_response['statusInfo']) def extract_keywords(self): keyword_response = self.alchemyapi.keywords('text', self.raw_text, {'sentiment': 1}) if keyword_response['status'] == 'OK': self.keywords = keyword_response['keywords'] # print('') # print('## Keywords ##') # for keyword in self.keywords: # print('text: ', keyword['text'].encode('utf-8')) # print('relevance: ', keyword['relevance']) # print('sentiment: ', keyword['sentiment']['type']) # if 'score' in keyword['sentiment']: # print('sentiment score: ' + keyword['sentiment']['score']) # print('') else: print('Error in keyword extraction call: ', keyword_response['statusInfo']) def define_concepts(self): for concept in self.concepts: definition = duckduckgo.get_zci(concept['text']) print('%s -> %s' % (concept['text'], definition)) print('') def define_keywords(self): for keyword in self.keywords: definition = duckduckgo.get_zci(keyword['text']) print('%s -> %s' % (keyword['text'], definition)) print('')
def __init__(self, aws_id, aws_key, es, aws_region='us-west-2', sqs_name='new-tweet-notifs'): try: #connect with sqs self.sqs = boto.sqs.connect_to_region(aws_region, aws_access_key_id=aws_id, aws_secret_access_key=aws_key) self.sqs_queue = self.sqs.get_queue(sqs_name) self.alc = AlchemyAPI() self.sns = boto.sns.connect_to_region(aws_region) self.es = es self.thread_pool = ThreadPoolExecutor(max_workers=4) except Exception as e: print('Could not connect') print(e) print('Connected to AWS SQS: '+ str(self.sqs))
def pos_with_entity_replaced_common_words(infile, outfile): alchemyapi = AlchemyAPI() common_word_pos = open("common_word_pos.txt", "r") title_data = open(infile, "r+") f2 = codecs.open(outfile, "w+", "utf-8") for line1, line2 in title_data, common_word_pos: response = alchemyapi.entities('text', line1, { 'sentiment': 1, 'disambiguate': 1 }) if response['status'] == 'OK': for entity in response['entities']: line2.replace(entity['text'], entity['type']) print >> f2, line2,
def checkDailyQuotaAndRunAlchemy(commentDb, cruiseLines): with open('data/Alchemy_response_keywords.json', 'rb') as fp: returned_keywords = json.load(fp) with open('data/Alchemy_response_relations.json', 'rb') as fp: returned_relations = json.load(fp) alchemyapi = AlchemyAPI() test = "test if finished Alchemy daily quota" response = alchemyapi.keywords('text', test, {'sentiment': 0}) if response['status'] == 'OK': returned_keywords, returned_relations = runAlchemyApi( cruiseLines, commentDb, returned_keywords, returned_relations, alchemyapi) else: print 'Error in keyword extraction call: ', response['statusInfo'] return returned_keywords, returned_relations
def getSoup(): sock = urllib.urlopen('https://en.wikipedia.org/wiki/Motocross') sockRaw = sock.read() soup = BeautifulSoup(sockRaw, "html.parser") soupText = soup.get_text() # use the alchemyAPI to find the keyword/phrases from the texts alchemyapi = AlchemyAPI() response = alchemyapi.keywords('text', soupText, {'maxRetrieve': 10}) if response['status'] == 'OK': print "\nThe Keywords are:" for i in response['keywords']: print "Word: " + i["text"] + ", Relevance: " + i["relevance"] else: print "Something went wrong with Alchemy."
def handle(self, *args, **options): es = elasticsearch.Elasticsearch(es_url) alchemyapi = AlchemyAPI() query = { "query": { "and": [{ "missing": { "field": "sentiment" } }, { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } }, { "range": { "published": { "gte": "now-1d" } } }] }, "size": 100 } res = es.search(index="rss-*", doc_type="posting", body=query) logger.info("%d documents found" % res['hits']['total']) for p in res['hits']['hits']: logger.info('Checking sentiment for - %s' % p['_id']) analyzed_text = p['_source']['title'] + ' ' + p['_source'][ 'description'] try: response = alchemyapi.sentiment("text", analyzed_text) logger.info("Sentiment: " + response["docSentiment"]["type"]) sentiment = response["docSentiment"]["type"] es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'], body={"doc": { "sentiment": sentiment }}) except KeyError: logger.exception("Problem getting sentiment :( %s" % response)
def __init__(self, filename, language): self.al = AlchemyAPI('bf18ed72384724d86425c8674204039f87352870') self.filename = filename self.language = language self.filterKeys = { 'created_at': u'[\'created_at\']', 'id': u'[\'id\']', 'lang': u'[\'lang\']', 'tweet_urls': u'[\'entities\'][\'urls\']', 'tweet_hashtags': u'[\'entities\'][\'hashtags\']', 'user_location': u'[\'user\'][\'location\']', 'keywords': u'[\'alchemy\'][\'keywords\']', 'user_screen_name': u'[\'user\'][\'screen_name\']', 'text': u'[\'text\']', 'text_' + self.language: u'[\'text\']' }
def main(): alchemyapi = AlchemyAPI() user_tweets = read_tweet_text_per_user(f_in) cnt = 0 LAST_USR = "******" UNPROCESSED = False with codecs.open(f_out, 'a', encoding='utf8') as output_file: for user1 in user_tweets.iterkeys(): cnt += 1 if not UNPROCESSED: if user1 != LAST_USR: continue else: UNPROCESSED = True print("Found", LAST_USR, cnt) if cnt % 100 == 0: print(cnt, user1) tweets1 = user_tweets[user1] BREAK, taxonomy_result1 = alchemy_on_tweets( alchemyapi, user1, tweets1) # there is the API daily limit so we check when exceeded and continue tomorrow from # the last processed users if BREAK: print("Last processed user: ", user1) return output_file.write( unicode(json.dumps(taxonomy_result1, ensure_ascii=False)) + '\n') return
def updateCounter(): global use global alchemyapi print use use += 1 if use >= 15: use = 0 alchemyapi = AlchemyAPI(use = use)
def analysecontent(content): """ Process/Analyse the extracted contents with Alchemy API Assumption: api_key.txt with a valid key is available from where this program is getting executed. """ print('Processing extracted text with AlchemyAPI...') alchemyapi = AlchemyAPI() response = alchemyapi.keywords('text', content, {'maxRetrieve': 10}) if response['status'] == 'OK': print('---------------------------------') print('## Keywords ## Relevance') for keyword in response['keywords']: print("{0}: {1}".format(keyword['text'].encode('utf-8'), keyword['relevance'])) print('---------------------------------') else: print('Error in keyword extraction call: ', response['statusInfo'])
def createTweets(source, num): #num is number of tweets to create words = createDict(source, 2) #state size of 2 allows for more combinations as tweets are small tweets = [] alchemyAPI = AlchemyAPI() for x in range(0, num): #at most 50% chance of using a hashtag if randint(0,1) == 0: tweet = generateText(words, 2, choice(range(100,140))) tweets.append(tweet) else: tweet = generateText(words, 2, choice(range(80,120))) response = alchemyAPI.concepts('text', tweet) if response['status'] == 'OK': hashtag = " #" + response['concepts'][0]['text'].replace(" ", "") if len(hashtag) <= 140 - len(tweet): tweet = tweet + hashtag tweets.append(tweet) return tweets
def process(in_queue, out_queue): #INPUT: #query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos") #in_queue -> the shared input queue that is filled with the found tweets. #out_queue -> the shared output queue that is filled with the analyzed tweets. #OUTPUT: #None #Create the alchemy api object alchemyapi = AlchemyAPI() while True: #Grab a tweet from the queue tweet = in_queue.get() #Initilise tweet['sentiment'] = {} try: #Calculate the sentiment for the entire tweet response = alchemyapi.sentiment('text', tweet['text']) #Add the score if its not returned neutral if response['status'] == 'OK': tweet['sentiment']['doc'] = {} tweet['sentiment']['doc']['type'] = response['docSentiment'][ 'type'] if 'score' in response['docSentiment']: tweet['sentiment']['doc']['score'] = response[ 'docSentiment']['score'] else: tweet['sentiment']['doc']['score'] = 0 #Add the result to the output queue out_queue.put(tweet) except Exception as e: #If there's an error, just move on to the next item in the queue print 'Error ', e pass #Signal that the task is finished in_queue.task_done()
def __init__(self): self.KEYWORDS = { "firefox": "browser", "mozilla": "org", "google": "org", "chrome": "browser", "internet explorer": "browser", "microsoft": "org", "safari": "browser", "apple": "org" } self.API_KEY = self.read_api_key(".twitterapikey") self.API_SECRET = self.read_api_key(".twitterapisecret") self.ACCESS_TOKEN = self.read_api_key(".twitteraccesstoken") self.ACCESS_TOKEN_SECRET = self.read_api_key( ".twitteraccesstokensecret") self.f_out = open("results.csv", "w") self.alchemy = AlchemyAPI() self.sentiment_results = []
def comment_store(request): avgcom = 0 "method is for analyzing and storing comments" alchemyapi = AlchemyAPI() response = alchemyapi.sentiment("text", request.data["com"]) dict1 = { 'com_from': request.data["com_from"], 'com_to': request.data["com_to"], 'com': request.data["com"], 'com_type': response["docSentiment"]["type"] } qdict = QueryDict('', mutable=True) qdict.update(dict1) serializer = CommentSerializer(data=qdict) if serializer.is_valid(): serializer.save() doccomrate = Comment.objects.filter(com_to=request.data["com_to"]) doccomratepos = Comment.objects.filter(com_to=request.data["com_to"], com_type='positive') avgcom = (len(doccomratepos) / float(len(doccomrate))) * 10 try: temp = AverageComRate.objects.get(doc_id=request.data["com_to"]) except AverageComRate.DoesNotExist: dict2 = { 'doc_id': request.data["com_to"], 'rate': avgcom, } qdict = QueryDict('', mutable=True) qdict.update(dict2) serializer2 = AverageComRateSerializer(data=dict2) if serializer2.is_valid(): serializer2.save() return Response(serializer2.data, status=status.HTTP_201_CREATED) avgcomupdate = AverageComRate.objects.filter( doc_id=request.data["com_to"]).update(rate=avgcom) temp2 = AverageComRate.objects.get(doc_id=request.data["com_to"]) serializer3 = AverageComRateSerializer(temp2) return Response(serializer3.data, status=status.HTTP_201_CREATED) else: return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
class NotificationManager(): def __init__(self, aws_id, aws_key, es, aws_region='us-west-2', sqs_name='new-tweet-notifs'): try: #connect with sqs self.sqs = boto.sqs.connect_to_region(aws_region, aws_access_key_id=aws_id, aws_secret_access_key=aws_key) self.sqs_queue = self.sqs.get_queue(sqs_name) self.alc = AlchemyAPI() self.sns = boto.sns.connect_to_region(aws_region) self.es = es self.thread_pool = ThreadPoolExecutor(max_workers=4) except Exception as e: print('Could not connect') print(e) print('Connected to AWS SQS: '+ str(self.sqs)) def worker_task(self, m): error = False print('Opening notification') body = m.get_body() tweet= ast.literal_eval(body) #do something with the tweet print(tweet['text']) response = self.alc.sentiment("text", tweet['text']) if(response['status']=='ERROR'): print('ERROR') error = True if not error: tweet['sentiment'] = response["docSentiment"]["type"] print("Sentiment: "+ tweet['sentiment']) #add to Elasticsearch try: self.es.index(index="tweets", doc_type="twitter_twp", body=tweet) except Exception as e: print('Elasticserch indexing failed') print(e) json_string = json.dumps(tweet) #send processed tweet to SNS self.sns.publish(arn, json_string, subject='Sub') #delete notification when done self.sqs_queue.delete_message(m) print('Done') def openNotifications(self): while True: #poll for new notifs every second rs = self.sqs_queue.get_messages() #result set if len(rs) > 0: for m in rs: self.thread_pool.submit(self.worker_task, m)