def extractSentimentFromUrl(self, url): """method for extracting the sentiment associated with the url of a document""" # creating AlchemyAPI object alchemyapi = AlchemyAPI() # requesting json response from AlchemyAPI server response = alchemyapi.sentiment("url", url) if response["status"] == "OK": # getting the sentiment type from the response sentimentType = response["docSentiment"]["type"] # checking the sentiment type if sentimentType == "neutral": sentimentScore = 0 else: sentimentScore = response["docSentiment"]["score"] # instantiating sentiment object self.sentimentFromUrl = AlchemyStructure.Sentiment() # set the value for sentiment type self.sentimentFromUrl.setType(sentimentType) # set the value for sentiment score self.sentimentFromUrl.setScore(sentimentScore) else: print("Error in sentiment analysis call: ", response["statusInfo"])
def sentiment(): twitter = Twython(passwords.Live.twitter_app_key, passwords.Live.twitter_app_secret, oauth_version=2) access_token = twitter.obtain_access_token() twitter = Twython(passwords.Live.twitter_app_key, access_token=access_token) search_results = None try: search_results = twitter.search(q='$' + request.args.get('symbol'), result_type='popular') except TwythonError as e: print e twitter_corpus = "" for tweets in search_results['statuses']: twitter_corpus += tweets['text'].encode('utf-8') #Create the AlchemyAPI Object alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', twitter_corpus) sentiment = None if response['status'] == 'OK': sentiment = {"sentiment": response['docSentiment']['type']} if request.args.get('output') == "jsonp": return Response('callback(' + json.dumps(sentiment) + ')', content_type='application/javascript') else: return jsonify(sentiment)
def get_sentiment(places): twitter_api = get_twitter_api() alchemy_api = AlchemyAPI() sentiments = dict() for place in places: r = twitter_api.GetSearch(term=place, count=10) for tw in r: txt = tw.GetText() response = alchemy_api.sentiment('text', txt) if response['status'] == 'OK': sentiments[txt] = str(response['docSentiment']['type']) ret_list = [] for t, s in sentiments.iteritems(): ret_json = dict() ret_json["tweet"] = t ret_json["sentiment"] = s ret_list.append(ret_json) list_len = 16 if len(ret_list) > list_len: ret_list = random.sample(ret_list, 16) else: for i in xrange(len(ret_list), list_len): ret_list.append({"No Tweet": "neutral"}) print ret_list return ret_list
def user_list_sentiments(request): """ This function lists all users :param request: GET request from front end :return: list of all users """ if request.method == 'GET': users = [] user = User.objects.all() # docSentimentscore = 1 for u in user: messages = [] message = Message.objects.filter(user_send=u.user_name) for m in message: messages.append(m.message_text) text = ",".join(messages) alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', text) if response["status"] == "OK": if response["docSentiment"]["type"] == "neutral": docSentimentscore = 0 else: docSentimentscore = response["docSentiment"]["score"] usr = {'user_name': u.user_name, 'user_sentiment': docSentimentscore} users.append(usr) print(json.dumps(users)) return HttpResponse(json.dumps(users), content_type="application/json")
def sentiment_alchemy(url): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('url', url) response['usage'] = None if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Author ##') print('author: ', response.get('author', '')) print('') else: print('Error in author extraction call: ', response['statusInfo']) response = alchemyapi.keywords('url', url) del (response['usage']) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Keywords ##') for keyword in response['keywords']: print('text: ', keyword['text'].encode('utf-8')) print('relevance: ', keyword['relevance']) print('sentiment: ', keyword.get('sentiment', {}).get('type', '')) if 'score' in keyword.get('sentiment', {}): print('sentiment score: ' + keyword['sentiment']['score']) print('') else: print('Error in keyword extaction call: ', response.get('statusInfo', ''))
def function(): string = sys.stdin.readline() alchemyapi = AlchemyAPI() myText = "I'm excited to get started with AlchemyAPI!" response = alchemyapi.sentiment("text", myText) string = "Sentiment: " + response["docSentiment"]["type"] print string
def run_sentiment_analysis(tweets, text_key): def print_error(response): # This should be replaced with better logging print('Error with AlchemyAPI response:') print(sentiment, '\n') alchemyapi = AlchemyAPI() results = [] for item in tweets: if text_key not in item: # Assume it's a bad tweet and continue print(text_key, 'not found in tweet') continue sentiment = alchemyapi.sentiment('text', item['words']) try: if sentiment['status'].lower() == 'error': # Unrecognized language, emoji only, etc... print_error(sentiment) # Make a deep copy (since it's a nested dictionary) new_item = copy.deepcopy(item) sentiment_type = sentiment['docSentiment']['type'] new_item['sentiment_type'] = sentiment_type if sentiment_type == 'neutral': new_item['sentiment_score'] = 0 else: new_item['sentiment_score'] = sentiment['docSentiment'][ 'score'] results.append(new_item) except Exception as ex: print(type(ex).__name__) print_error(sentiment) return results
def AnalyzeSentiment(searchTerm): analysisAPI = AlchemyAPI() pos, neg, neu = (0,0,0) dataCollection = database_connection(searchTerm) dataDocuments = dataCollection.find() tweets = [] sentimentByCountry = {} tweetLocation = "" for document in dataDocuments: try: if document.get("sentiment", None) == None: analysisResponse = analysisAPI.sentiment("text", document["text"]) documentSentiment = analysisResponse["docSentiment"]["type"] dataCollection.update_one({"_id":document["_id"]}, {"$set": {"sentiment": analysisResponse["docSentiment"]}}) else: documentSentiment = document["sentiment"]["type"] if documentSentiment == "positive": pos=pos+1 elif documentSentiment == "negative": neg=neg+1 else: neu=neu+1 tweets.append(document["text"].strip()+"\n\n***Tweet-Sentiment: "+documentSentiment+"***\n"+"-"*70) except: print("Unable to parse a Tweet as the language is not understood\n") dataCollection.delete_one({'text':document['text']}) return pos,neg,neu,tweets
def performSA(pname, text): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', text) sentiment = response['docSentiment'] if (sentiment['type']=='neutral'): sentiment['score']='0' return sentiment
def run_sentiment_analysis(tweets, text_key): def print_error(response): # This should be replaced with better logging print('Error with AlchemyAPI response:') print(sentiment, '\n') alchemyapi = AlchemyAPI() results = [] for item in tweets: if text_key not in item: # Assume it's a bad tweet and continue print(text_key, 'not found in tweet') continue sentiment = alchemyapi.sentiment('text', item['words']) try: if sentiment['status'].lower() == 'error': # Unrecognized language, emoji only, etc... print_error(sentiment) # Make a deep copy (since it's a nested dictionary) new_item = copy.deepcopy(item) sentiment_type = sentiment['docSentiment']['type'] new_item['sentiment_type'] = sentiment_type if sentiment_type == 'neutral': new_item['sentiment_score'] = 0 else: new_item['sentiment_score'] = sentiment['docSentiment']['score'] results.append(new_item) except Exception as ex: print(type(ex).__name__) print_error(sentiment) return results
def sentiment_analysis(text): alchemy_api = AlchemyAPI() response = alchemy_api.sentiment("text", text) try: float(response["docSentiment"]['score']) return float(response["docSentiment"]['score']) except ValueError: return None
def get_sentiment(text): alchemyapi = AlchemyAPI() for key in utils.get_random_alchemy_credentials(): alchemyapi.apikey = key response = alchemyapi.sentiment("text", text) if 'docSentiment' not in response: continue return response['docSentiment'].get('score', '0')
def retrieveReviewSentiment(text): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment("text", text) status = response["status"] if status == 'OK': return response["docSentiment"]["type"] else: return response['statusInfo']
def sentiment(demo_html): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('html', demo_html) if response['status'] == 'OK': if 'score' in response['docSentiment']: return (response['docSentiment']['score']) else: return (0.12)
def getScore(text): alchemyapi = AlchemyAPI() score = -10 response = alchemyapi.sentiment('html', text) if 'docSentiment' in response: if 'score' in response['docSentiment']: score = float(response['docSentiment']['score']) return score
def get_sentiment_score(text): if len(text) == 0: return -1000 #print("getting sentiment for "+text) alchemyapi = AlchemyAPI() sentiment_object = alchemyapi.sentiment('text', text) #pprint(sentiment_object) if sentiment_object["docSentiment"]["type"] == "neutral": return 0 return sentiment_object["docSentiment"]["score"]
def GetAlchemyAPIObject(): with open("api_key.txt","r") as aFile: for line in aFile.read().split("\n"): if line != "": api = AlchemyAPI(line) result = api.sentiment("text","test") if result["status"] != "ERROR": return api print "Could not initialize valid, usable AlchemyAPI object. Consider requesting another API key." exit() return None
class NotificationManager(): def __init__(self, aws_id, aws_key, es, aws_region='us-west-2', sqs_name='new-tweet-notifs'): try: #connect with sqs self.sqs = boto.sqs.connect_to_region(aws_region, aws_access_key_id=aws_id, aws_secret_access_key=aws_key) self.sqs_queue = self.sqs.get_queue(sqs_name) self.alc = AlchemyAPI() self.sns = boto.sns.connect_to_region(aws_region) self.es = es self.thread_pool = ThreadPoolExecutor(max_workers=4) except Exception as e: print('Could not connect') print(e) print('Connected to AWS SQS: '+ str(self.sqs)) def worker_task(self, m): error = False print('Opening notification') body = m.get_body() tweet= ast.literal_eval(body) #do something with the tweet print(tweet['text']) response = self.alc.sentiment("text", tweet['text']) if(response['status']=='ERROR'): print('ERROR') error = True if not error: tweet['sentiment'] = response["docSentiment"]["type"] print("Sentiment: "+ tweet['sentiment']) #add to Elasticsearch try: self.es.index(index="tweets", doc_type="twitter_twp", body=tweet) except Exception as e: print('Elasticserch indexing failed') print(e) json_string = json.dumps(tweet) #send processed tweet to SNS self.sns.publish(arn, json_string, subject='Sub') #delete notification when done self.sqs_queue.delete_message(m) print('Done') def openNotifications(self): while True: #poll for new notifs every second rs = self.sqs_queue.get_messages() #result set if len(rs) > 0: for m in rs: self.thread_pool.submit(self.worker_task, m)
class SentimentAnalyzer: def __init__(self): self.alchemyapi = AlchemyAPI() def get_sentiment(self, text): response = self.alchemyapi.sentiment("text", text) # print response if response['status'] == 'OK': return response["docSentiment"]["type"] else: # print response['statusInfo'] return 'none'
def getAlcData(arrOfObj): alchemyapi = AlchemyAPI() #for x in range(0,len(arrOfObj)): for x in range(0, 100): asc = unicodedata.normalize('NFKD', arrOfObj[x].text).encode('ascii','ignore') print x print asc arrOfObj[x].responseEntities = alchemyapi.entities('text',asc, { 'sentiment':1 }) arrOfObj[x].responseKeywords = alchemyapi.keywords('text',asc, { 'sentiment':1 }) arrOfObj[x].responseSentiment = alchemyapi.sentiment('text',asc)
def __init__(self, query, maxTweet, resultType, nowDate, tweetOrGraph): #authenticate and create instance of twitter search API self.api = twitter.Api( consumer_key = 'CVtJtQ4GNybpv0v9JVpQs7TS3', consumer_secret = 'xE5uyc1fZjSfvdImaotwO79oiq2DWImeIZnVHtXcDCby0APqo4', access_token_key = '273450148-azU5GtOidHPiE9ejrPuvFE7fGztl4l58kVYg5jEh', access_token_secret = 'QJf6FORBROH5s7Zr5pxBrJVMZYz1ceq6EMx4LDkphlFYx' ) #connect to database self.database = Database() #pass through arguements self.query = query self.maxTweet = maxTweet self.resultType = resultType self.nowDate = nowDate self.tweetOrGraph = tweetOrGraph #search for tweets and save to self.search self.search = self.api.GetSearch(term=self.query, lang='en', result_type=resultType, count=self.maxTweet, max_id='', until=self.nowDate) #for each tweet for t in self.search: #create AlchempyAPI object alchemyapi = AlchemyAPI() #find sentiment type response = alchemyapi.sentiment("text", t.text) sentiment = response["docSentiment"]["type"] #find sentiment score, 'neutal' returns none, so catch and assign 0 try: scoreString = response["docSentiment"]["score"] score = float(scoreString) except: score = 0 #if it's for the tweet table if (tweetOrGraph == "tweet"): dictionaryToDatabase = {"text" : t.text, "lang" : t.lang, "screen_name" : t.user.screen_name, "name" : t.user.name, "image" :t.user.profile_image_url, "sentiment" : sentiment, "score" : score, "created_at" : t.created_at[:10]} #populate tweet table self.database.popTable(dictionaryToDatabase) #if it's for the graph table else: dictionaryToDatabase = {"score" : score, "created_at" : t.created_at[3:10]} #populate graph table self.database.popTableGraph(dictionaryToDatabase)
def main(): os.chdir(r'/Users/jmyeluri/Desktop/FinalMapAlchemy') file = open("states.txt") states = file.readlines() file.close() alchemyapi = AlchemyAPI() val1 = 0 val2 = 0 for x in range (0, len(states)): states[x] = states[x].strip() #info.append({'state': states[x], 'dem': val1, 'rep': val2 }) info = updateJsonFile() print(info) sentiment = 0 #democrat for z in range(0, 50): file = open(states[z] + "_democrat_tweets.txt") statesData = file.readlines() file.close() sentiment = 0 print(states[z]) for i in xrange(15): response = alchemyapi.sentiment('text', statesData[i]) print("") if response['status'] == 'OK': response['usage'] = '' if ('score' in response['docSentiment']): if(float(response['docSentiment']['score']) < 0): sentiment -= 1 else: sentiment += 1 else: print('Error in sentiment analysis call: ', response['statusInfo']) info[z]['dem'] = sentiment #print('positive sentiment score: ', response['docSentiment']['score']) jsonFile = open("info.json", "w+") jsonFile.write(json.dumps(info)) jsonFile.close()
def handle(self, *args, **options): es = elasticsearch.Elasticsearch(es_url) alchemyapi = AlchemyAPI() query = { "query": { "and": [{ "missing": { "field": "sentiment" } }, { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } }, { "range": { "published": { "gte": "now-1d" } } }] }, "size": 100 } res = es.search(index="rss-*", doc_type="posting", body=query) logger.info("%d documents found" % res['hits']['total']) for p in res['hits']['hits']: logger.info('Checking sentiment for - %s' % p['_id']) analyzed_text = p['_source']['title'] + ' ' + p['_source'][ 'description'] try: response = alchemyapi.sentiment("text", analyzed_text) logger.info("Sentiment: " + response["docSentiment"]["type"]) sentiment = response["docSentiment"]["type"] es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'], body={"doc": { "sentiment": sentiment }}) except KeyError: logger.exception("Problem getting sentiment :( %s" % response)
def main(): os.chdir(r"/Users/jmyeluri/Desktop/FinalMapAlchemy") file = open("states.txt") states = file.readlines() file.close() alchemyapi = AlchemyAPI() val1 = 0 val2 = 0 for x in range(0, len(states)): states[x] = states[x].strip() # info.append({'state': states[x], 'dem': val1, 'rep': val2 }) info = updateJsonFile() print(info) sentiment = 0 # republican for z in range(0, 50): file = open(states[z] + "_republican_tweets.txt") statesData = file.readlines() file.close() sentiment = 0 print(states[z]) for i in xrange(15): response = alchemyapi.sentiment("text", statesData[i]) print("") if response["status"] == "OK": response["usage"] = "" if "score" in response["docSentiment"]: if float(response["docSentiment"]["score"]) < 0: sentiment -= 1 else: sentiment += 1 else: print("Error in sentiment analysis call: ", response["statusInfo"]) info[z]["rep"] = sentiment print(info[z]["rep"]) # print('positive sentiment score: ', response['docSentiment']['score']) jsonFile = open("info.json", "w+") jsonFile.write(json.dumps(info)) jsonFile.close()
def process(in_queue, out_queue): #INPUT: #query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos") #in_queue -> the shared input queue that is filled with the found tweets. #out_queue -> the shared output queue that is filled with the analyzed tweets. #OUTPUT: #None #Create the alchemy api object alchemyapi = AlchemyAPI() while True: #Grab a tweet from the queue tweet = in_queue.get() #Initilise tweet['sentiment'] = {} try: #Calculate the sentiment for the entire tweet response = alchemyapi.sentiment('text', tweet['text']) #Add the score if its not returned neutral if response['status'] == 'OK': tweet['sentiment']['doc'] = {} tweet['sentiment']['doc']['type'] = response['docSentiment'][ 'type'] if 'score' in response['docSentiment']: tweet['sentiment']['doc']['score'] = response[ 'docSentiment']['score'] else: tweet['sentiment']['doc']['score'] = 0 #Add the result to the output queue out_queue.put(tweet) except Exception as e: #If there's an error, just move on to the next item in the queue print 'Error ', e pass #Signal that the task is finished in_queue.task_done()
def sentimentanalysis(self, text_): """ Does sentiment analysis using SQLAlchemy API """ alchemyapi = AlchemyAPI() # import ipdb; ipdb.set_trace() if "" in text_.keys() and len(text_) < 2: print "No tweets to analyse were found!!" else: response = alchemyapi.sentiment("text", text_) sentrep = response["docSentiment"]["type"] lst = [sentrep] prettytable = PrettyTable(['Sentiment Type']) t = prettytable.add_row(lst) print prettytable
def process(in_queue, out_queue): #INPUT: #query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos") #in_queue -> the shared input queue that is filled with the found tweets. #out_queue -> the shared output queue that is filled with the analyzed tweets. #OUTPUT: #None #Create the alchemy api object alchemyapi = AlchemyAPI() while True: #Grab a tweet from the queue tweet = in_queue.get() #Initilise tweet['sentiment'] = {} try: #Calculate the sentiment for the entire tweet response = alchemyapi.sentiment('text',tweet['text']) #Add the score if its not returned neutral if response['status'] == 'OK': tweet['sentiment']['doc'] = {} tweet['sentiment']['doc']['type'] = response['docSentiment']['type'] if 'score' in response['docSentiment']: tweet['sentiment']['doc']['score'] = response['docSentiment']['score'] else: tweet['sentiment']['doc']['score'] = 0 #Add the result to the output queue out_queue.put(tweet) except Exception as e: #If there's an error, just move on to the next item in the queue print 'Error ', e pass #Signal that the task is finished in_queue.task_done()
def comment_store(request): avgcom = 0 "method is for analyzing and storing comments" alchemyapi = AlchemyAPI() response = alchemyapi.sentiment("text", request.data["com"]) dict1 = { 'com_from': request.data["com_from"], 'com_to': request.data["com_to"], 'com': request.data["com"], 'com_type': response["docSentiment"]["type"] } qdict = QueryDict('', mutable=True) qdict.update(dict1) serializer = CommentSerializer(data=qdict) if serializer.is_valid(): serializer.save() doccomrate = Comment.objects.filter(com_to=request.data["com_to"]) doccomratepos = Comment.objects.filter(com_to=request.data["com_to"], com_type='positive') avgcom = (len(doccomratepos) / float(len(doccomrate))) * 10 try: temp = AverageComRate.objects.get(doc_id=request.data["com_to"]) except AverageComRate.DoesNotExist: dict2 = { 'doc_id': request.data["com_to"], 'rate': avgcom, } qdict = QueryDict('', mutable=True) qdict.update(dict2) serializer2 = AverageComRateSerializer(data=dict2) if serializer2.is_valid(): serializer2.save() return Response(serializer2.data, status=status.HTTP_201_CREATED) avgcomupdate = AverageComRate.objects.filter( doc_id=request.data["com_to"]).update(rate=avgcom) temp2 = AverageComRate.objects.get(doc_id=request.data["com_to"]) serializer3 = AverageComRateSerializer(temp2) return Response(serializer3.data, status=status.HTTP_201_CREATED) else: return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def aggregate_news_sentiment(news_urls): alchemyapi = AlchemyAPI() agg_sentiment = 0.0 sscores = [] for link in news_urls: print ("Processing, " + link) response = alchemyapi.sentiment('url', link) del (response['usage']) if response['status'] == 'OK' and response.get('docSentiment', {}).get('type', '') != 'neutral': sscores += [float(response.get('docSentiment', {}).get('score'))] agg_sentiment = np.sum(sscores) print (agg_sentiment) return agg_sentiment
def display(self, term, limit = 10): print('>>Reading...\n'), sys.stdout.flush() time.sleep(1) data_file = open('data.json', 'r') data = json.load(data_file) data_file.close() data = [word.strip().encode('utf-8') for word in data] #remove file after retrieving data os.remove('data.json') pt = PrettyTable(field_names = ["Status Text", 'Count']) c = Counter(data) try: limit = int(input(">>How many records do you want to view? Press enter for default(10)\n\n")) except: pass table = [pt.add_row(row) for row in c.most_common()[:limit]] pt.add_column("Rank",[i+1 for i in range(len(table))]) pt.align["Status Text"], pt.align['Count'] = 'l', 'r' print("\n \nShowing top %s prominent terms in search for \"%s\" \n \n" %(limit, term)) print(pt) if str(input(">>Get sentiments? Y to continue and anything else to exit \t")).lower() == 'y': print('>>Getting general feel of results...'), sys.stdout.flush() from alchemyapi import AlchemyAPI sentiment_data = [key for key, value in c.most_common()][:limit]#collect top ranked list alchemyapi = AlchemyAPI() Text = "".join([word.decode() for word in sentiment_data]) if len(Text)<=1: print("No text to analyse") else: try: response = alchemyapi.sentiment("text",Text) print("\n\n>>Sentiment: ", response["docSentiment"]["type"]) except: print("Can't test sentiments now, try later")
def aggregate_news_sentiment(news_urls): alchemyapi = AlchemyAPI() agg_sentiment = 0.0 sscores = [] for link in news_urls: print("Processing, " + link) response = alchemyapi.sentiment('url', link) del (response['usage']) if response['status'] == 'OK' and response.get('docSentiment', {}).get( 'type', '') != 'neutral': sscores += [float(response.get('docSentiment', {}).get('score'))] agg_sentiment = np.sum(sscores) print(agg_sentiment) return agg_sentiment
def getNews(query): query = str(query) f = open('test.html', 'w') r = requests.get('https://api.datamarket.azure.com/Data.ashx/Bing/Search/News?Query=%27'+ query+'%27maryland%27&$format=json', auth=('Ql9qEoqZut7Uy3i7mTtiX8Dv1SciVZf1Qwcz07BUx5k','Ql9qEoqZut7Uy3i7mTtiX8Dv1SciVZf1Qwcz07BUx5k')) rrr = r.json() rr = rrr['d']['results'] length = len(rr) alchemyapi = AlchemyAPI() lurl = [] linfo = [] lscore = [] ltitle = [] ldesc = [] for i in range(0, length-1): lurl.append(rr[i]['Url']) ltitle.append(rr[i]['Title']) ldesc.append(rr[i]['Description']) total = 0.0; for i in range(0, length-1): try: linfo.append(alchemyapi.sentiment('text', rr[i]['Title'])['docSentiment']) if linfo[i]['type'] != 'neutral': total += float(linfo[i]['score']) except: continue avg = total/length html = '<html> <body>' for i in range(0, length-1): try: html += '<div class="grid-50 mobile-grid-95" width="50%%"> <a href="' + str(lurl[i]) + '">' + str(ltitle[i]) +'</a><br>' + str(ldesc[i]) + '<br></div><br>' except: continue html+= "</body></html>" return avg, html
class MyAlchemyApi(): def __init__(self): self.ap = AlchemyAPI() def analyze_single_tweet(self, tweet): response = self.ap.sentiment('text', tweet['text']) ret = { 'text' : tweet['text'], 'language' : None, 'type' : None, 'score' : None, 'error' : None } if response['status'] == 'OK': ret['type'] = response['docSentiment']['type'] if 'language' in response: ret['language'] = response['language'] if 'score' in response['docSentiment']: ret['score'] = response['docSentiment']['score'] ret['error'] = None else: if 'statusInfo' in response: ret['error'] = response['statusInfo'] else: ret['error'] = 'error status=' + response['status'] return ret def _filter_unsupported_text_language(self, results): #filter language detection errors results = [x for x in results if x['error'] != 'unsupported-text-language'] return results def analyze_multiple_tweet(self, tweets): ret = [] for t in tweets: ret.append(self.analyze_single_tweet(t)) ret = self._filter_unsupported_text_language(ret) return ret
def AnalyzeSentiment(searchTerm): analysisAPI = AlchemyAPI() pos, neg, neu = (0, 0, 0) dataCollection = database_connection(searchTerm) dataDocuments = dataCollection.find() tweets = [] sentimentByCountry = {} tweetLocation = "" for document in dataDocuments: try: if document.get("sentiment", None) == None: analysisResponse = analysisAPI.sentiment( "text", document["text"]) documentSentiment = analysisResponse["docSentiment"]["type"] dataCollection.update_one( {"_id": document["_id"]}, {"$set": { "sentiment": analysisResponse["docSentiment"] }}) else: documentSentiment = document["sentiment"]["type"] if documentSentiment == "positive": pos = pos + 1 elif documentSentiment == "negative": neg = neg + 1 else: neu = neu + 1 tweets.append(document["text"].strip() + "\n\n***Tweet-Sentiment: " + documentSentiment + "***\n" + "-" * 70) except: print( "Unable to parse a Tweet as the language is not understood\n") dataCollection.delete_one({'text': document['text']}) return pos, neg, neu, tweets
def comment_store(request): avgcom=0 "method is for analyzing and storing comments" alchemyapi = AlchemyAPI() response = alchemyapi.sentiment("text",request.data["com"]) dict1 = {'com_from':request.data["com_from"], 'com_to': request.data["com_to"],'com':request.data["com"],'com_type': response["docSentiment"]["type"]} qdict = QueryDict('',mutable=True) qdict.update(dict1) serializer = CommentSerializer(data=qdict) if serializer.is_valid(): serializer.save() doccomrate=Comment.objects.filter(com_to=request.data["com_to"]) doccomratepos=Comment.objects.filter(com_to=request.data["com_to"],com_type='positive') avgcom=(len(doccomratepos)/float(len(doccomrate)))*10 print len(doccomrate) print len(doccomratepos) print len(doccomratepos)/len(doccomrate) print request.data["com_to"] try: temp=AverageComRate.objects.get(doc_id=request.data["com_to"]) except AverageComRate.DoesNotExist: print "does not exist" dict2 = {'doc_id':request.data["com_to"], 'rate': avgcom,} qdict = QueryDict('',mutable=True) qdict.update(dict2) serializer2=AverageComRateSerializer(data=dict2) if serializer2.is_valid(): serializer2.save() return Response(serializer2.data, status=status.HTTP_201_CREATED) avgcomupdate=AverageComRate.objects.filter(doc_id=request.data["com_to"]).update(rate=avgcom) temp2=AverageComRate.objects.get(doc_id=request.data["com_to"]) serializer3=AverageComRateSerializer(temp2) return Response(serializer3.data, status=status.HTTP_201_CREATED) else: return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def calculate_sentiment(tweet_collection, count_collection): counter = 0 score = 0.0 alchemyapi = AlchemyAPI() print count_collection print print for i in range(len(tweet_collection)): response = alchemyapi.sentiment("html", tweet_collection[i]) if response["status"] == "OK": response["usage"] = "" if "score" in response["docSentiment"]: score += float(response["docSentiment"]["score"]) * math.log10(count_collection[i] + 1) counter += 1 else: print "Error in sentiment analysis call: ", response["statusInfo"] # Percent of positive replies if counter != 0: score /= float(counter) else: score = 1 return score
def calculate_sentiment(tweet_collection, count_collection): counter = 0 score = 0.0 alchemyapi = AlchemyAPI() print count_collection print print for i in range(len(tweet_collection)): response = alchemyapi.sentiment('html', tweet_collection[i]) if response['status'] == 'OK': response['usage'] = '' if 'score' in response['docSentiment']: score += float(response['docSentiment']['score'])*math.log10(count_collection[i]+1) counter += 1 else: print 'Error in sentiment analysis call: ', response['statusInfo'] #Percent of positive replies if(counter != 0): score /= float(counter) else: score = 1 return score
alchemyapi = AlchemyAPI() print('############################################') print('# Image Extraction Example #') print('############################################') print('Processing url: ', image_url) #### IMAGE TAGGING response = alchemyapi.imageTagging('url', image_url) if response['status'] == 'OK': print('## Keywords ##') for keyword in response['imageKeywords']: print(keyword['text'], ' : ', keyword['score']) print('') else: print('Error in image tagging call: ', response['statusInfo']) # Sentiment print('Checking sentiment . . . ') # response = alchemyapi.sentiment('text', test_text) response = alchemyapi.sentiment('url', test_url) if response['status'] == 'OK': # print(json.dumps(response)) print(response['docSentiment']['type']) # print('') # response = alchemyapi.sentiment('html', test_html) # assert(response['status'] == 'OK') # response = alchemyapi.sentiment('url', test_url)
def Sentiment_Analysis(self): print " ----------------------------" print "# STARTING SENTIMENT ANALYSIS:" print " ----------------------------" df_sentiment_combined = [] list_data_sentiment = [] startTime = datetime.now() count = 0 # Create the AlchemyAPI Object alchemyapi = AlchemyAPI() for paragraph in self.targeted_paragraphs: response = alchemyapi.sentiment('text', paragraph) if response['status'] == 'OK': print '---------------------------------------' print "***** DOCUMENT SENTIMENT RESULTS: *****" print "DOCUMENT-LEVEL RESULTS: " print "ARTICLE TITLE: " , self.article_title[count] print 'ARTICLE URL: ' , self.article_url[count] print "DATA FRAME: " try: data_sentiment_formatted = {'article_title' : self.article_title[count], 'type': response['docSentiment']['type'], 'score': response['docSentiment']['score']} except: pass data_sentiment = {'article_title' : self.article_title[count], 'type': [response['docSentiment']['type']], 'score': [response['docSentiment']['score']]} count = count + 1 list_data_sentiment.append(data_sentiment_formatted) df_sentiment_analysis = pd.DataFrame(data = data_sentiment).T print df_sentiment_analysis sentiment_json_results= df_sentiment_analysis.to_dict() ######## df_sentiment_combined.append(df_sentiment_analysis) else: print('Error in sentiment analysis call: ', response['statusInfo']) self.result_sentiment_analysis["sentiment_analysis_result"] = list_data_sentiment try: print "***** AVERAGE DOCUMENT SENTIMENT RESULTS: *****" df_sentiment_avg = pd.concat(df_sentiment_combined).T self.list_sentiment.append(df_sentiment_avg) sentiment_avg_json_results = df_sentiment_avg.to_dict() ########## except Exception as e: print "Error during average sentiment analysis calculation." + str(e) try: sentiment_score = np.array(df_sentiment_avg) sentiment_score_even = sentiment_score[:, 0::2] sentiment_score_calculation = [float(i) for i in sentiment_score_even[0]] avg_sentiment_score = sum(sentiment_score_calculation)/len(sentiment_score_calculation) print "AVERAGE SENTIMENT SCORE = " , avg_sentiment_score average_sentiment_score = { "average_sentiment_score" : avg_sentiment_score} df_sentiment_average = pd.DataFrame(average_sentiment_score) self.list_sentiment.append(df_sentiment_average) self.result_sentiment_analysis["average_sentiment_score"] = average_sentiment_score except: pass print "----------- Sentiment Analysis is completed. ---------------" print "Time Elapsed: " , datetime.now() - startTime execution_time = datetime.now() - startTime self.list_time_elapsed.append(execution_time) print self.result_sentiment_analysis
else: print('Error in entity extraction call: ', response['statusInfo']) print('') print('') print('') print('############################################') print('# Sentiment Analysis Example #') print('############################################') print('') print('') print('Processing html: ', demo_html) print('') response = alchemyapi.sentiment('html', demo_html) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Document Sentiment ##') print('type: ', response['docSentiment']['type']) print('score: ', response['docSentiment']['score']) else: print('Error in sentiment analysis call: ', response['statusInfo']) print('') print('') print('')
class BrowserWars: """A class to fetch sentiment scores for different keywords.""" def __init__(self): self.KEYWORDS = { "firefox": "browser", "mozilla": "org", "google": "org", "chrome": "browser", "internet explorer": "browser", "microsoft": "org", "safari": "browser", "apple": "org" } self.API_KEY = self.read_api_key(".twitterapikey") self.API_SECRET = self.read_api_key(".twitterapisecret") self.ACCESS_TOKEN = self.read_api_key(".twitteraccesstoken") self.ACCESS_TOKEN_SECRET = self.read_api_key( ".twitteraccesstokensecret") self.f_out = open("results.csv", "w") self.alchemy = AlchemyAPI() self.sentiment_results = [] def read_api_key(self, filename): f = open(filename, "r") key = f.read().strip() f.close() return key def start(self): self.start_time = time.time() for i in self.KEYWORDS: self.analyze_tweets(i) def get_tweets(self, query): url = ("https://api.twitter.com/1.1/search/tweets.json?" "q=%s&lang=en&locale=en-US&result_type=recent&count=20" % query) consumer = oauth.Consumer(key=self.API_KEY, secret=self.API_SECRET) token = oauth.Token(key=self.ACCESS_TOKEN, secret=self.ACCESS_TOKEN_SECRET) client = oauth.Client(consumer, token) response, content = client.request(url, method="GET") if int(response["status"]) != 200: sys.exit("Didn't get results") return content def analyze_tweets(self, query): """Returns the JSON string with the sentiment analysis results.""" query = urllib2.quote(query) twitter_response = self.get_tweets(query) self.parse_tweets(twitter_response, query) positive, neutral, negative, length = self.calculate_sentiment(query) self.f_out.write( "%s,%d,%f,%f,%f,%d\n" % (query, self.start_time, positive, neutral, negative, length)) def parse_tweets(self, twitter_response, query): """Parses JSON search results and returns an array of sentiment results""" query = urllib2.quote(query) twitter_result = json.loads(twitter_response) f_results = open("twitter_response_%s.json" % query, "w") f_results.write(twitter_response) f_results.close() if "statuses" not in twitter_result: sys.exit("Unknown twitter response %s" % twitter_response) statuses = twitter_result["statuses"] for s in statuses: if "text" not in s: sys.exit("Response not ok for query %s %s" % (query, twitter_result)) escaped = urllib2.quote(s["text"].encode("utf-8")) response = self.alchemy.sentiment("text", escaped) if response["status"] != "OK": sys.exit("Response not ok for query %s %s" % (escaped, response)) result_obj = { "query": query, "tweet": escaped, "type": response["docSentiment"]["type"] } if "score" in response["docSentiment"]: result_obj["score"] = response["docSentiment"]["score"] self.sentiment_results.append(result_obj) def calculate_sentiment(self, query): m = map(lambda x: x["query"] == query, self.sentiment_results) num_positives = sum(map(lambda x: x["type"] == "positive", m)) num_negatives = sum(map(lambda x: x["type"] == "negative", m)) num_neutrals = sum(map(lambda x: x["type"] == "neutral", m)) return (float(num_positives) / len(m), float(num_neutrals) / len(m), float(num_negatives) / len(m), len(m))
print('') print('') print('') print('############################################') print('# Sentiment Analysis Example #') print('############################################') print('') print('') print('Processing html: ', demo_html) print('') response = alchemyapi.sentiment('html',demo_html) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Document Sentiment ##') print('type: ', response['docSentiment']['type']) if 'score' in response['docSentiment']: print('score: ', response['docSentiment']['score']) else: print('Error in sentiment analysis call: ', response['statusInfo'])
clean1.append(tmp) # In[67]: from __future__ import print_function from alchemyapi import AlchemyAPI import json alchemyapi = AlchemyAPI() # In[68]: sentiment_score = [] for i in clean[]: response = alchemyapi.sentiment('text', i) if response['status'] == 'OK' and 'score' in response['docSentiment'].keys(): flag = response['docSentiment']['score'] sentiment_score.append(flag) else: sentiment_score.append(0) # In[70]: sentiment_score1 = [] for i in clean1: response = alchemyapi.sentiment('text', i) if response['status'] == 'OK' and 'score' in response['docSentiment'].keys(): flag = response['docSentiment']['score'] sentiment_score1.append(flag)
import newspaper import sys alchemyapi = AlchemyAPI() #paper = newspaper.build('http://yourstory.com/2015/05/dilbole/') #article = paper.articles[0] #print (article.url) db = open('story.txt', 'r') out = open('output.txt', 'w') demo_text = db.read() response = alchemyapi.keywords('text', demo_text, {'sentiment': 1}) sentiment = alchemyapi.sentiment('text', demo_text) if response["docSentiment"]["type"] == "positive": if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Keywords ##') for keyword in response['keywords']: print('text: ', keyword['text'].encode('utf-8')) out.write(keyword['text'].encode('utf-8')) out.write("$") print('relevance: ', keyword['relevance']) out.write(keyword['relevance'])
ckey = d['ckey'] csecret = d['csecret'] atoken = d['atoken'] asecret = d['asecret'] api = TwitterAPI(ckey, csecret, atoken, asecret) SEARCH_TERM = 'donald trump' r = api.request('search/tweets', {'q': SEARCH_TERM, 'count': 10}) for item in r: if 'text' in item and 'user' in item: tweet = item['text'] username = item['user']['name'] response = alchemyapi.sentiment('html', tweet) if response['status'] == 'OK': results = json.dumps(response, indent=4) type = response['docSentiment']['type'] if 'score' in response['docSentiment']: score = response['docSentiment']['score'] else: score = 0 else: print('Error in sentiment analysis call: ', response['statusInfo']) print(username) print('=================Output========================') print(score) print(type) print(tweet)
"color": "" }, "parent": "", ## "mentions" is an array of the caliper IDs from the user_mentions objects array "user_mentions": [], ## "hashtags" is an array of the hashtag texts included in the tweet entities "hashtags": [] } } # Set the re-usable variables tweet_text = tweet['text'] ## AlchemyAPI Sentiment Analysis tweet_sentiment = '' response = alchemyapi.sentiment('text', tweet_text) if 'docSentiment' in response.keys(): if 'score' in response['docSentiment']: tweet_sentiment_score = response['docSentiment']['score'] tweet_sentiment_score = float(tweet_sentiment_score) tweet_sentiment_score = round(tweet_sentiment_score, 2) else: tweet_sentiment_score = 0 tweet_sentiment_type = response['docSentiment']['type'] tweet_sentiment_score_a = abs(tweet_sentiment_score) if (tweet_sentiment_score) > 0: tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")" else: tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")" elif force == True: print 'force option set to True. The tweet_sentiment object will be set with API Limit Exceeded values.'
def determineSubject(text): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', text, {'sentiment': 1}) # if response['status'] == 'OK': return response['docSentiment']['score']
'q': Team2, 'count': count2, 'geocode': geocode2, 'since': since, 'until': until }) for item in r: team2_list.append(TweetInfo(item['text'], item['created_at'])) #print(item['text']) alchemyapi = AlchemyAPI() sentiment1 = [0.0, 0.0] sentiment2 = [0.0, 0.0] counter1 = 0 counter2 = 0 for i in xrange(len(team1_list)): response = alchemyapi.sentiment('html', team1_list[i].tweetString) if response['status'] == 'OK': response['usage'] = '' if 'score' in response['docSentiment']: if (float(response['docSentiment']['score']) < 0): sentiment1[0] += 1 else: sentiment1[1] += 1 #print('positive sentiment score: ', response['docSentiment']['score']) counter1 += 1 else: print('Error in sentiment analysis call: ', response['statusInfo'])
message_date_abb = nlong_date_obj.strftime("%Y-%m-%d") print message_date_abb message_date_abb_obj = datetime.strptime(message_date_abb, '%Y-%m-%d') #cursor.execute('UPDATE {0} SET Most_Recent_Message = {1} WHERE Date="{2:%Y}-{2:%m}-{2:%d}"'.format(summary_data_table,all_stocktwits[x][1],message_date_abb_obj)) #Save to DB, I like to save my work cursor.execute('COMMIT') print "The date of message: ",message_date_abb_obj print "the most recent message",all_stocktwits[x][1] #isolate the actual text tweet = stocktwit_obj.json()['messages'][message_num]['body'] #get the sentiment of the tweet print "Getting sentiment analysis from alchemyapi" sentiment_resp = alchemyapi.sentiment('text', tweet) #print "sentiment_resp: ",sentiment_resp #print "tweet: ", tweet #Need to find way to encode sentences with strange links and characters #to be read by the alchemy api if sentiment_resp['status'] == "OK": sentiment_type = sentiment_resp['docSentiment']['type'] else: sentiment_type = "neutral" if sentiment_type == "neutral": sentiment_value = 0 else: sentiment_value = float(sentiment_resp['docSentiment']['score'])
response = alchemyapi.concepts('text', test_text); assert(response['status'] == 'OK') response = alchemyapi.concepts('html', test_html); assert(response['status'] == 'OK') response = alchemyapi.concepts('url', test_url); assert(response['status'] == 'OK') response = alchemyapi.concepts('random', test_url); assert(response['status'] == 'ERROR') #invalid flavor print('Concept tests complete!') print('') #Sentiment print('Checking sentiment . . . ') response = alchemyapi.sentiment('text', test_text); assert(response['status'] == 'OK') response = alchemyapi.sentiment('html', test_html); assert(response['status'] == 'OK') response = alchemyapi.sentiment('url', test_url); assert(response['status'] == 'OK') response = alchemyapi.sentiment('random', test_url); assert(response['status'] == 'ERROR') #invalid flavor print('Sentiment tests complete!') print('') #Targeted Sentiment print('Checking targeted sentiment . . . ') response = alchemyapi.sentiment_targeted('text', test_text, 'heart');
def collect_tweets(keyword, count, force=False): from TwitterSearch import TwitterSearch from TwitterSearch import TwitterSearchOrder import pymongo from dateutil.parser import parse from alchemyapi import AlchemyAPI import ConfigParser # try: # keyword = sys.argv[1] # count = int(sys.argv[2]) # except IndexError: # e_too_few_args = "You did not enter enough arguments. Two are required: keyword, and count" # raise Exception(e_too_few_args) # try: # if sys.argv[3] == '-f': # force = True # else: # e_invalid_argument = "The only option available is -f. It is used to force the script to continue when the Alchemy API limit is exceeded." # raise Exception(e_invalid_argument) # except IndexError: # force = False # Read the config file for config variables config = ConfigParser.RawConfigParser() config.read('config.cfg') mongo_url = config.get('Mongo', 'db_url') # Connect to the Mongo database using MongoClient client = pymongo.MongoClient(mongo_url) db = client.get_default_database() # Access/create the collection based on the command line argument tweets = db[keyword] #Generate the alchemyapi variable alchemyapi = AlchemyAPI() # To accommodate for hashtags the user can substitute a . for the # in the command line. Lines 30 & 31 return it to a hashtag for the search. if keyword[0] is ".": keyword = keyword.replace('.', '#') # Lines 33-42 ensure that the query is not doing duplicate work. # First, it counts to see how many documents exist in the collection db_count = tweets.count() # If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id" if db_count is not 0: latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1) latest_id_str = latest_id[db_count-1]['object']['tweet_id'] latest_id_int = int(latest_id_str) print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.' # If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call. else: print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.' # create a TwitterSearchOrder object tso = TwitterSearchOrder() # let's define all words we would like to have a look for tso.set_keywords([keyword]) # Select language tso.set_language('en') # Include Entity information tso.set_include_entities(True) if db_count is not 0: tso.set_since_id(latest_id_int) print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.' else: print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.' # Create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = config.get('Twitter', 'consumer_key'), consumer_secret = config.get('Twitter', 'consumer_secret'), access_token = config.get('Twitter', 'access_token'), access_token_secret = config.get('Twitter', 'access_token_secret') ) # Perform the search twitter_search = ts.search_tweets_iterable(tso) # Start the insert count variable db_inserts = 0 # this is where the fun actually starts :) try: for tweet in twitter_search: if db_inserts < count: mentions_list = [] hashtags_list = [] # Create the caliper_tweet object caliper_tweet = { "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent", "type": "MessagingEvent", "startedAtTime": "", ## Can be used to query Twitter API for user information "actor": "", "verb": "tweetSent", "object": { "type": "MessagingEvent", "tweet_id": "", "tweet_uri": "", "subtype": "tweet", ## "to" should be calculated by checking in_reply_to_user_id_str is null. If it's not null, then it should be concatenated to "uri:twitter/user/" and stored in "object"['to'] "to": "", "author": { "author_uri": "", "author_alias": "", "author_name": "", }, "text": "", "sentiment": { "type": "", "score": "", "color": "" }, "parent": "", ## "mentions" is an array of the caliper IDs from the user_mentions objects array "user_mentions": [], ## "hashtags" is an array of the hashtag texts included in the tweet entities "hashtags": [] } } # Set the re-usable variables tweet_text = tweet['text'] ## AlchemyAPI Sentiment Analysis tweet_sentiment = '' response = alchemyapi.sentiment('text', tweet_text) if 'docSentiment' in response.keys(): if 'score' in response['docSentiment']: tweet_sentiment_score = response['docSentiment']['score'] tweet_sentiment_score = float(tweet_sentiment_score) tweet_sentiment_score = round(tweet_sentiment_score, 2) else: tweet_sentiment_score = 0 tweet_sentiment_type = response['docSentiment']['type'] tweet_sentiment_score_a = abs(tweet_sentiment_score) if (tweet_sentiment_score) > 0: tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")" else: tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")" elif force == True: print 'Force option set to true. The tweet_sentiment object will be set with API Limit Exceeded values.' tweet_sentiment_type = 'API Limit Exceeded' tweet_sentiment_score = 0 tweet_sentiment_color = 'rgba(0,0,0,0)' else: e_alchemy_api_limit = 'Alchemy API daily limit exceeded. Retry search with force=True to continue' raise Exception(e_alchemy_api_limit) ds = tweet['created_at'] tweet_date = parse(ds) caliper_tweet['startedAtTime'] = tweet_date caliper_tweet['actor'] = 'student:' + tweet['user']['screen_name'] caliper_tweet['object']['tweet_uri'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['id_str'] caliper_tweet['object']['tweet_id'] = tweet['id_str'] if tweet['in_reply_to_user_id_str'] is None: caliper_tweet['object']['to'] = 'NoReply' caliper_tweet['object']['parent'] = 'NoReply' else: caliper_tweet['object']['to'] = 'https://twitter.com/intent/user?user_id=' + tweet['in_reply_to_user_id_str'] if tweet['in_reply_to_status_id_str'] is None: caliper_tweet['object']['parent'] = 'None' else: caliper_tweet['object']['parent'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['in_reply_to_status_id_str'] caliper_tweet['object']['author']['author_uri'] = 'https://twitter.com/intent/user?user_id=' + tweet['user']['id_str'] caliper_tweet['object']['author']['author_alias'] = tweet['user']['screen_name'] caliper_tweet['object']['author']['author_name'] = tweet['user']['name'] caliper_tweet['object']['text'] = unicode(tweet['text']) caliper_tweet['object']['sentiment']['type'] = tweet_sentiment_type caliper_tweet['object']['sentiment']['score'] = tweet_sentiment_score caliper_tweet['object']['sentiment']['color'] = tweet_sentiment_color for x in list(tweet['entities']['hashtags']): hashtag = x['text'] hashtags_list.append(hashtag) for x in list(tweet['entities']['user_mentions']): mention = x['id_str'] mentions_list.append(mention) caliper_tweet['object']['user_mentions'] = mentions_list caliper_tweet['object']['hashtags'] = hashtags_list tweets.insert(caliper_tweet) db_inserts = db_inserts + 1 else: raise StopIteration except StopIteration: print str(db_inserts) + " inserts made in the " + keyword + " collection."
__author__ = 'jingyiyuan' import os from alchemyapi import AlchemyAPI os.chdir("/Users/jingyiyuan/Desktop/Adv Big Data/hw2/datas") companies = [ "Bank of America", "CitiGroup", "IBM", "apple", "McDonald's", "Nike", "twitter", "tesla" ] for i in range(len(companies)): filename = companies[i] + '.txt' myText = open(filename, 'r') alchemyapi = AlchemyAPI() response = alchemyapi.sentiment("text", myText) print response
def scrapeCall(self, url_segment, target): def GetFinancialRegression(ticker, date): print "Creating financial regression..." date_match = re.search(r'(\w+)\s(\d+),\s*(\d{4})', date, flags=re.S | re.I | re.M) if not date_match: raise Exception("Could not find date_match!") else: print "Established date match!" month = datetime.datetime.strptime(date_match.group(1), "%B").month day = date_match.group(2) year = date_match.group(3) if month >= 9: start_month = month - 3 start_year = year next_month = month - 9 next_year = int(year) + 1 ##Go back to finish! elif month <= 3: else: next_month = month + 2 next_year = year print "Creating URL..." historical_url = "http://ichart.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=&%s&f=%s" % ( ticker, month, day, year, next_month, day, next_year) try: historical_data = urllib2.urlopen(historical_url).read() except: return print "read page!" days = re.findall(r'\n([^\n]*?)\n', historical_data) if not days: raise Exception('Could not find days!') closing_prices = [] percentage_changes = [] average_percentage_change = 0 for i in range(0, len(days)): closing_price_match = re.search(r',\d+,(\d+\.\d{2})', days[i]) if not closing_price_match: raise Exception('Could not find closing_price_match!') else: closing_price = float(closing_price_match.group(1)) closing_prices.append(closing_price) return np.std(closing_prices) print "Scraping trancsript text..." url = "http://seekingalpha.com%s&page=single" % url_segment[0] title = url_segment[1] print "-----------------------------" print title print "-----------------------------" headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive' } request = urllib2.Request(url=url, headers=headers) try: page_raw = urllib2.urlopen(request).read() except: raise Exception('Could not open page_raw!') page_clean = page_raw.decode('UTF-8') page_clean = page_clean.replace('<p>', '').replace('</p>', '').replace( '<strong>', '').replace('</strong>', '').replace('\n', '') print "Finding transcript_text_match..." transcript_text_match = re.search( r'<div id="article_body" itemprop="articleBody">(.*?)<div id="article_disclaimer" class="content_part hid">', page_raw, flags=re.S | re.I | re.M) if not transcript_text_match: breakpoint(page_raw) pdb.set_trace() raise Exception('Could not find transcript_text_match!') transcript_text = transcript_text_match.group(1) print "Finding Q&A match..." qa_match = re.search( r'<div id="article_qanda" class="content_part hid">(.*?)</div>', transcript_text, flags=re.S | re.I | re.M) if not qa_match: pdb.set_trace() raise Exception('Could not find qa_match!') else: qa_text = qa_match.group(1) print "finding call_date_match..." call_date_match = re.search( r'(\w+\s+\d+,\s+\d{4},?\s+\d+:\d\d\s+[AP]M\s[EPCG]M?T)', transcript_text, flags=re.S | re.I | re.M) if not call_date_match: return breakpoint(transcript_text) raise Exception('Could not find call_date_match!') call_date = call_date_match.group(1) print "Creating AlchemyAPI instance..." alchemyapi = AlchemyAPI() print "Sending AlchemyAPI request..." response = alchemyapi.sentiment(flavor='url', data=url) if response['status'] == 'OK': print "returned AlchemyAPI object!" else: print('Error in sentiment analysis call: ', response['statusInfo']) beta = GetFinancialRegression(target, call_date) print beta try: transcript = { 'target': target, 'text': transcript_text, 'collection_date': datetime.datetime.now(), 'call_date': call_date, 'source': url, 'html': page_raw, 'transcript_text': transcript_text, 'title': title, 'qa_text': qa_text, 'sentiment': response['docSentiment'], 'beta': beta } except: return print "Returning transcript..." return transcript
from alchemyapi import AlchemyAPI import sys # Create the AlchemyAPI Object alchemyapi = AlchemyAPI() # read from file tweets_file = open(sys.argv[1], 'rb') en = 0 es = 0 fr = 0 cat = 0 other = 0 for line in tweets_file: response = alchemyapi.sentiment("text", line) lang = response["language"] if lang == 'catalan': cat = cat + 1 elif lang == 'spanish': es = es + 1 elif lang == 'english': en = en + 1 elif lang == 'french': fr = fr + 1 else: other = other + 1 print "Total = " + str(en + es + fr + cat + other)
url = "http://quora-api.herokuapp.com/users/" + sys.argv[1] + "/activity" data = requests.get(url).json() data = data['activity'] payload = {} #count=0 #getDocCount() for activities in data: title = activities['title'] summary = activities['summary'] print title document['title'] = title document['summary'] = summary labels = al.taxonomy("text", title) entities = al.entities("html", summary) keywords = al.keywords("html", summary) sentiment = al.sentiment("html", summary) #print labels['taxonomy'] #count+=1 payload['entities'] = {} payload['keywords'] = [] payload['sentiment'] = {} docNode = createDocNode(document) try: print "Yo" labels = labels['taxonomy'][0]['label'] print "Yo1" print labels labels = func(labels) print labels entities = entities['entities'] print "Sub Classification"