wait = raw_input('press enter to continue') print('') print('') print('') print('############################################') print('# Combined Example #') print('############################################') print('') print('') print('Processing text: ', demo_text) print('') response = alchemyapi.combined('text', demo_text) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Keywords ##') for keyword in response['keywords']: print(keyword['text'], ' : ', keyword['relevance']) print('') print('## Concepts ##') for concept in response['concepts']: print(concept['text'], ' : ', concept['relevance'])
print('Checking imagetagging . . . ') response = alchemyapi.imageTagging('text', test_text) assert(response['status'] == 'ERROR') response = alchemyapi.imageTagging('html', test_html) assert(response['status'] == 'ERROR') response = alchemyapi.imageTagging('url', test_url) assert(response['status'] == 'OK') response = alchemyapi.imageTagging('image', test_jpg) assert(response['status'] == 'OK') print('Image tagging tests complete!') print('') print('') # combined print('Checking combined . . . ') response = alchemyapi.combined('text', test_text) assert(response['status'] == 'OK') response = alchemyapi.combined('html', test_html) assert(response['status'] == 'ERROR') response = alchemyapi.combined('url', test_url) assert(response['status'] == 'OK') print('Combined tests complete!') print('') print('') # taxonomy print('Checking taxonomy . . . ') response = alchemyapi.taxonomy('text', test_text) assert(response['status'] == 'OK') response = alchemyapi.taxonomy('html', test_html, {'url': 'test'}) assert(response['status'] == 'OK')
print(category['label'], ' : ', category['score']) print('') else: print('Error in taxonomy call: ', response['statusInfo']) print('') print('') print('# Combined Example #') response = alchemyapi.combined('text', demo_text) if response['status'] == 'OK': print('## Response Object ##') with open('combied.js', 'w') as outfile: json.dump(response,outfile, indent =4) print('') print('## Keywords ##') for keyword in response['keywords']: print(keyword['text'], ' : ', keyword['relevance']) print('') print('## Concepts ##') for concept in response['concepts']:
print("") print("") print("") print("") print("############################################") print("# Combined Example #") print("############################################") print("") print("") print("Processing text: ", demo_text) print("") response = alchemyapi.combined("text", demo_text) if response["status"] == "OK": print("## Response Object ##") print(json.dumps(response, indent=4)) print("") print("## Keywords ##") for keyword in response["keywords"]: print(keyword["text"], " : ", keyword["relevance"]) print("") print("## Concepts ##") for concept in response["concepts"]: print(concept["text"], " : ", concept["relevance"])
class FileStore: def __init__(self, filename, language): self.al = AlchemyAPI('bf18ed72384724d86425c8674204039f87352870') self.filename = filename self.language = language self.filterKeys = {'created_at': u'[\'created_at\']', 'id': u'[\'id\']', 'lang': u'[\'lang\']', 'tweet_urls': u'[\'entities\'][\'urls\']', 'tweet_hashtags': u'[\'entities\'][\'hashtags\']', 'user_location': u'[\'user\'][\'location\']', 'keywords': u'[\'alchemy\'][\'keywords\']', 'user_screen_name': u'[\'user\'][\'screen_name\']', 'text': u'[\'text\']', 'text_'+self.language : u'[\'text\']'} def extract_entity(self, rawTweet, filteredTweet): entities = ['City', 'Country', 'Person', 'Organization', 'PrintMedia'] try: for i in rawTweet['alchemy']['entities']: if i['type'] in entities: if not filteredTweet.has_key(i['type']): filteredTweet[i['type']] = [] filteredTweet[i['type']].append(i['text']) except: pass def extract_concepts(self, rawTweet, filteredTweet): try: filteredTweet['concepts'] = [] for i in rawTweet['alchemy']['concepts']: filteredTweet['concepts'].append(i['text']) filteredTweet['concepts'].append(i['dbpedia']) except: pass #rawTweet['alchemy'] def simplify(self, tweetObj, simplifyKeys): for key,field in simplifyKeys: if tweetObj.has_key(key): tempObjList = tweetObj[key] tweetObj[key] = [el[field] for el in tempObjList] def dateConvert(self, tweetObj): created_at = tweetObj['created_at'] tweetObj['created_at'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.strptime(created_at,'%a %b %d %H:%M:%S +0000 %Y')) def translate(self, tweetObj): lang = ['ar', 'de', 'en', 'fr', 'ru'] lang.remove(tweetObj['lang']) tweet_lang = tweetObj['lang'] tweetObj['text_'+tweet_lang] = tweetObj['text'] for i in lang: if not tweetObj.has_key('text_'+i): text_to_translate = ' '.join(tweetObj['text_'+tweet_lang].split('\n')) tweetObj['text_'+i] = translate(text_to_translate, tweet_lang, i) elif tweetObj['text_'+i].find('TranslateApiException:') != -1: text_to_translate = ' '.join(tweetObj['text_'+tweet_lang].split('\n')) tweetObj['text_'+i] = translate(text_to_translate, tweet_lang, i) def filter(self, tweet): filteredTweet = {} flattenedJson = {} lang = ['ar', 'de', 'en', 'fr', 'ru'] for key in self.filterKeys: try: filteredTweet[key] = eval('tweet' + self.filterKeys[key]) except: pass self.extract_entity(tweet, filteredTweet) self.extract_concepts(tweet, filteredTweet) self.simplify(filteredTweet, [('tweet_urls', 'expanded_url'), ('tweet_hashtags', 'text'), ('keywords', 'text')]) try: self.dateConvert(filteredTweet) except: pass filteredTweet['date'] = filteredTweet['created_at'][:10] if filteredTweet.has_key('text_'+filteredTweet['lang']): filteredTweet['text'] = filteredTweet['text_'+filteredTweet['lang']] for i in lang: if filteredTweet.has_key('text_'+i): filteredTweet['text_'+i] = filteredTweet['text_'+i].replace('\\', '') return filteredTweet def storeFilteredData(self, data): with codecs.open(self.filename+'_tweets_filtered'+'.json', 'w', encoding="utf8") as fileHandle: json.dump([self.filter(tweetData) for tweetData in data], fileHandle) fileHandle.close() def storeTweet(self, data): with codecs.open(self.filename+'_tweets'+'.txt', 'w', encoding="utf8") as fileHandle: for tweet in data: fileHandle.write(str(tweet["id"])+'--->'+tweet['text']+'\n') fileHandle.close() def jsonLoad(self): data = [] try: with codecs.open(self.filename+'.json', 'r', encoding="ISO-8859-1") as fileHandle: try: data = json.load(fileHandle) except: pass except: pass return data def jsonStore(self, data): storedJson = self.jsonLoad() uniqueTweets = self.deduplicate(data + storedJson) with codecs.open(self.filename+'.json', 'w', encoding="ISO-8859-1") as fileHandle: json.dump(uniqueTweets, fileHandle) fileHandle.close() self.storeTweet(uniqueTweets) self.storeFilteredData(uniqueTweets) def store(self, data): self.jsonStore(data) def tag(self, tweet): if not tweet.has_key('alchemy'): if tweet['lang'] == 'de': tweet_text = tweet['text_de'] else: tweet_text = tweet['text_en'] tweet['alchemy'] = self.al.combined('text', tweet_text) elif tweet['alchemy']['status'] == 'ERROR': tweet_text = tweet['text'] ''' if tweet['lang'] == 'de': tweet_text = tweet['text_de'] elif tweet['lang'] == 'en': tweet_text = tweet['text_en'] ''' tweet['alchemy'] = self.al.combined('text', tweet_text) #pprint.pprint( tweet['alchemy']) def deduplicate(self, data): uniqueTweets = {} count = 0 try: for tweet in data: if not uniqueTweets.has_key(tweet['id']): print "translation and tagging: "+str(count) count = count+1 self.translate(tweet) self.tag(tweet) uniqueTweets[tweet['id']] = tweet #print (tweet['text']) print(len(uniqueTweets.values())) except: pass return uniqueTweets.values() def load(self): data = [] try: with codecs.open(self.filename+'.dat', 'rb', encoding="ISO-8859-1") as fileHandle: while True: try: data.append(cPickle.load(fileHandle)) except: break except: pass return data
args.corpusloc += '/' topn = 0 - args.nTOpropose #stpwrds=loadStopwords("stopwords.french.list") #print len(stpwrds) start = time.time() #vectorizer = CountVectorizer(ngram_range=(1,args.ngram),lowercase=True,stop_words=stpwrds) [txtCorpus, tagsw] = readXMLCorpusFrom(args.corpusloc) #print tagsw[11] #print len(txtCorpus) tgsugtop = [] alchemyapi = AlchemyAPI() for docX in txtCorpus: #print docX tagsug = [] response = alchemyapi.combined('text', docX) if response['status'] == 'OK': for keyword in response['keywords']: tagsug.append([keyword['text'], keyword['relevance']]) for entity in response['entities']: tagsug.append([entity['text'], entity['relevance']]) for tgsg in sorted(tagsug, key=lambda x: x[1])[:10]: tgsugtop.append(tgsg[0]) print tgsg[0] #-------------------------------------- #f=open('../../results/tfidfsuggestion/'+args.resultfile+"_"+str(args.nTOpropose)+".res", 'wb') f = open( "../../../results/tooleval/alchemyAPItagger/" + args.resultfile + "_" + str(args.nTOpropose) + ".res", 'wb')
class FileStore: def __init__(self, filename, language): self.al = AlchemyAPI('bf18ed72384724d86425c8674204039f87352870') self.filename = filename self.language = language self.filterKeys = { 'created_at': u'[\'created_at\']', 'id': u'[\'id\']', 'lang': u'[\'lang\']', 'tweet_urls': u'[\'entities\'][\'urls\']', 'tweet_hashtags': u'[\'entities\'][\'hashtags\']', 'user_location': u'[\'user\'][\'location\']', 'keywords': u'[\'alchemy\'][\'keywords\']', 'user_screen_name': u'[\'user\'][\'screen_name\']', 'text': u'[\'text\']', 'text_' + self.language: u'[\'text\']' } def extract_entity(self, rawTweet, filteredTweet): entities = ['City', 'Country', 'Person', 'Organization', 'PrintMedia'] try: for i in rawTweet['alchemy']['entities']: if i['type'] in entities: if not filteredTweet.has_key(i['type']): filteredTweet[i['type']] = [] filteredTweet[i['type']].append(i['text']) except: pass def extract_concepts(self, rawTweet, filteredTweet): try: filteredTweet['concepts'] = [] for i in rawTweet['alchemy']['concepts']: filteredTweet['concepts'].append(i['text']) filteredTweet['concepts'].append(i['dbpedia']) except: pass #rawTweet['alchemy'] def simplify(self, tweetObj, simplifyKeys): for key, field in simplifyKeys: if tweetObj.has_key(key): tempObjList = tweetObj[key] tweetObj[key] = [el[field] for el in tempObjList] def dateConvert(self, tweetObj): created_at = tweetObj['created_at'] tweetObj['created_at'] = time.strftime( '%Y-%m-%dT%H:%M:%SZ', time.strptime(created_at, '%a %b %d %H:%M:%S +0000 %Y')) def translate(self, tweetObj): lang = ['ar', 'de', 'en', 'fr', 'ru'] lang.remove(tweetObj['lang']) tweet_lang = tweetObj['lang'] tweetObj['text_' + tweet_lang] = tweetObj['text'] for i in lang: if not tweetObj.has_key('text_' + i): text_to_translate = ' '.join(tweetObj['text_' + tweet_lang].split('\n')) tweetObj['text_' + i] = translate(text_to_translate, tweet_lang, i) elif tweetObj['text_' + i].find('TranslateApiException:') != -1: text_to_translate = ' '.join(tweetObj['text_' + tweet_lang].split('\n')) tweetObj['text_' + i] = translate(text_to_translate, tweet_lang, i) def filter(self, tweet): filteredTweet = {} flattenedJson = {} lang = ['ar', 'de', 'en', 'fr', 'ru'] for key in self.filterKeys: try: filteredTweet[key] = eval('tweet' + self.filterKeys[key]) except: pass self.extract_entity(tweet, filteredTweet) self.extract_concepts(tweet, filteredTweet) self.simplify(filteredTweet, [('tweet_urls', 'expanded_url'), ('tweet_hashtags', 'text'), ('keywords', 'text')]) try: self.dateConvert(filteredTweet) except: pass filteredTweet['date'] = filteredTweet['created_at'][:10] if filteredTweet.has_key('text_' + filteredTweet['lang']): filteredTweet['text'] = filteredTweet['text_' + filteredTweet['lang']] for i in lang: if filteredTweet.has_key('text_' + i): filteredTweet['text_' + i] = filteredTweet['text_' + i].replace( '\\', '') return filteredTweet def storeFilteredData(self, data): with codecs.open(self.filename + '_tweets_filtered' + '.json', 'w', encoding="utf8") as fileHandle: json.dump([self.filter(tweetData) for tweetData in data], fileHandle) fileHandle.close() def storeTweet(self, data): with codecs.open(self.filename + '_tweets' + '.txt', 'w', encoding="utf8") as fileHandle: for tweet in data: fileHandle.write( str(tweet["id"]) + '--->' + tweet['text'] + '\n') fileHandle.close() def jsonLoad(self): data = [] try: with codecs.open(self.filename + '.json', 'r', encoding="ISO-8859-1") as fileHandle: try: data = json.load(fileHandle) except: pass except: pass return data def jsonStore(self, data): storedJson = self.jsonLoad() uniqueTweets = self.deduplicate(data + storedJson) with codecs.open(self.filename + '.json', 'w', encoding="ISO-8859-1") as fileHandle: json.dump(uniqueTweets, fileHandle) fileHandle.close() self.storeTweet(uniqueTweets) self.storeFilteredData(uniqueTweets) def store(self, data): self.jsonStore(data) def tag(self, tweet): if not tweet.has_key('alchemy'): if tweet['lang'] == 'de': tweet_text = tweet['text_de'] else: tweet_text = tweet['text_en'] tweet['alchemy'] = self.al.combined('text', tweet_text) elif tweet['alchemy']['status'] == 'ERROR': tweet_text = tweet['text'] ''' if tweet['lang'] == 'de': tweet_text = tweet['text_de'] elif tweet['lang'] == 'en': tweet_text = tweet['text_en'] ''' tweet['alchemy'] = self.al.combined('text', tweet_text) #pprint.pprint( tweet['alchemy']) def deduplicate(self, data): uniqueTweets = {} count = 0 try: for tweet in data: if not uniqueTweets.has_key(tweet['id']): print "translation and tagging: " + str(count) count = count + 1 self.translate(tweet) self.tag(tweet) uniqueTweets[tweet['id']] = tweet #print (tweet['text']) print(len(uniqueTweets.values())) except: pass return uniqueTweets.values() def load(self): data = [] try: with codecs.open(self.filename + '.dat', 'rb', encoding="ISO-8859-1") as fileHandle: while True: try: data.append(cPickle.load(fileHandle)) except: break except: pass return data
print('Checking imagetagging . . . ') response = alchemyapi.imageTagging('text', test_text) assert (response['status'] == 'ERROR') response = alchemyapi.imageTagging('html', test_html) assert (response['status'] == 'ERROR') response = alchemyapi.imageTagging('url', test_url) assert (response['status'] == 'OK') response = alchemyapi.imageTagging('image', test_jpg) assert (response['status'] == 'OK') print('Image tagging tests complete!') print('') print('') # combined print('Checking combined . . . ') response = alchemyapi.combined('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.combined('html', test_html) assert (response['status'] == 'ERROR') response = alchemyapi.combined('url', test_url) assert (response['status'] == 'OK') print('Combined tests complete!') print('') print('') # taxonomy print('Checking taxonomy . . . ') response = alchemyapi.taxonomy('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.taxonomy('html', test_html, {'url': 'test'}) assert (response['status'] == 'OK')
def user_analysis_sentiments(request): if request.method == 'GET': print request.GET.get('user', '') user = request.GET.get('user', '') messages = [] message = Message.objects.filter(user_send=user.decode("utf8")) for m in message: messages.append(m.message_text) text = ",".join(messages) alchemyapi = AlchemyAPI() #keywords response = alchemyapi.keywords('text', text, {'sentiment': 1}) if response['status'] == 'OK': keywords = [] for keyword in response['keywords']: keyword_text = keyword['text'].encode('utf-8') keyword_relevance = keyword['relevance'] keyword_sentiment = keyword['sentiment']['type'] key_word = {'keyword_text': keyword_text, 'keyword_relevance': keyword_relevance, 'keyword_sentiment': keyword_sentiment} keywords.append(key_word) else: print('Error in keyword extaction call: ', response['statusInfo']) response = alchemyapi.concepts('text', text) if response['status'] == 'OK': concepts = [] for concept in response['concepts']: concept_text = concept['text'] concept_relevance = concept['relevance'] concept_entity = {'concept_text': concept_text, 'concept_relevance': concept_relevance} concepts.append(concept_entity) else: print('Error in concept tagging call: ', response['statusInfo']) response = alchemyapi.language('text', text) if response['status'] == 'OK': print(response['wikipedia']) language = response['language'] iso_639_1 = response['iso-639-1'] native_speakers = response['native-speakers'] wikipedia = response['wikipedia'] language_id = {'language': language, 'iso_639_1': iso_639_1, 'native_speakers': native_speakers, 'wikipedia': wikipedia} else: print('Error in language detection call: ', response['statusInfo']) response = alchemyapi.relations('text', text) if response['status'] == 'OK': relations = [] for relation in response['relations']: if 'subject' in relation: relation_subject_text = relation['subject']['text'].encode('utf-8') if 'action' in relation: relation_action_text = relation['action']['text'].encode('utf-8') if 'object' in relation: relation_object_text = relation['object']['text'].encode('utf-8') relation_entity = {'relation_subject_text': relation_subject_text, 'relation_action_text': relation_action_text, 'relation_object_text': relation_object_text} relations.append(relation_entity) else: print('Error in relation extaction call: ', response['statusInfo']) response = alchemyapi.category('text', text) if response['status'] == 'OK': print('text: ', response['category']) category = response['category'] print('score: ', response['score']) score = response['score'] categories = {'category': category, 'score': score} else: print('Error in text categorization call: ', response['statusInfo']) response = alchemyapi.taxonomy('text', text) if response['status'] == 'OK': taxonomies = [] for category in response['taxonomy']: taxonomy_label = category['label'] taxonomy_score = category['score'] taxonomy = {'taxonomy_label': taxonomy_label, 'taxonomy_score': taxonomy_score} taxonomies.append(taxonomy) else: print('Error in taxonomy call: ', response['statusInfo']) response = alchemyapi.combined('text', text) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') user = {'user_name': 'LOL', 'keywords': keywords, 'concepts': concepts, 'language_id': language_id, 'relations': relations, 'categories': categories, 'taxonomies': taxonomies} return HttpResponse(json.dumps(user), content_type="application/json")
#d = [] #wiki_terms = pd.DataFrame(data=None,columns=['Title','URL','Entities','Keywords','Taxonomy','Concepts','Language','Author']) #for i in range(len(wiki_db)): # d = [wiki_db[i]['title'],wiki_db[i]['url'],wiki_db[i]['entities'],wiki_db[i]['keywords'],wiki_db[i]['taxonomy'],wiki_db[i]['concepts'],wiki_db[i]['language'],wiki_db[i]['author']] # wiki_terms.loc[i]=d #path = '/Users/michael/PycharmProjects/Startup Weekend/' #wiki_terms.to_pickle('wiki_terms.pk1') #wiki_terms.to_csv('wiki_terms2.csv') wiki_terms = pd.read_pickle('wiki_terms.pk1') #wiki_terms2 = pd.read_csv('wiki_terms2.csv') news = alchemyapi.combined( 'url', news_urls, options={ 'extract': 'page-image, entity, keyword, title, author, taxonomy, concept' }) news_keywords = [] for i in range(len(news['keywords'])): news_keywords.append(news['keywords'][i]['text']) for i in range(len(news['entities'])): news_keywords.append(news['entities'][i]['text']) for i in range(len(news['concepts'])): news_keywords.append(news['concepts'][i]['text']) news_list = Counter(news_keywords) wiki_list1 = []
data_dir = os.path.join(upper_dir, 'data') filename = os.path.join(data_dir, 'Allmovie_2.csv.csv') alchemy_language = AlchemyLanguageV1( api_key='7a454d32972b85fd2599536ba9063e4d1530014b') alchemyapi = AlchemyAPI() f = open(filename, 'rb') reader = csv.DictReader(f) result = open('result_of_alchemyapi_overview_to_keywords.csv', 'w') fieldnames = ['movie overview', 'movie keywords'] writer = csv.DictWriter(result, fieldnames=fieldnames) writer.writeheader() counter = 0 for line in reader: counter = counter + 1 if counter >= 2501: response = alchemyapi.combined('text', line['overview']) if response['status'] == 'OK': keywords = [] for keyword in response['keywords']: if float(keyword['relevance']) > 0.5: keywords.append(keyword['text'].encode('utf-8')) print(keyword['text'], ' : ', keyword['relevance']) writer.writerow({ 'movie overview': line['overview'], 'movie keywords': keywords }) f.close() data.close()
def __init__(self): ##### Input file, can either be modified manually or with another program. fo = open("answer.txt", "r") text = open("answer.txt").read().splitlines() index = len(text) - 1 ##### Because I was working with a file that was updated very frequently, ##### I grab the most recent expression in the file. text = text[index] fo.close() #### API instance creation alchemyapi = AlchemyAPI() print(text) print('') isCity = False isWeather = False wasIn = False #### If there is no city name in our input, program asks for a city name until you specify one. #### NOTICE: enter the city name between quotation marks. e.g. "Paris" while (not isCity) or (not isWeather): ##### This is not the best solution ever, will update when I come up with a new one :P if wasIn: test = input("WHERE?") print(test) text = text + " " + test # JSON result which has the intents we're looking for. response = alchemyapi.combined('text', text) # Uncomment the line below for the JSON result. #print(json.dumps(response, indent=4)) ##### If the sentence is not related with weather, an exception is thrown if response['taxonomy']: for x in response['taxonomy']: if "weather" in x['label']: isWeather = True break # A temporary solution for simple cases. if "FORECAST" in text: isWeather = True elif "SUNNY" in text: isWeather = True elif "RAINY" in text: isWeather = True ##### Distinguishing between cities and countries in order to prevent troubles if response['entities']: for x in response['entities']: if "City" in x['type']: isCity = True break wasIn = True ##### Variables below are passed to weather.py for further usage. try: if isCity: self.city = response['entities'][0]['text'] #city name except: self.city = "" try: self.total_days = response['entities'][1]['text'] except: self.total_days = "TODAY" pass