def get_keywords(documents, text_accessor=lambda x: x): alchemy_api = AlchemyAPI() keywords = [] for document in documents: response = alchemy_api.keywords('text', text_accessor(document), {'sentiment': 1}) if response['status'] == 'OK': keywords.append(set(map(lambda x: x['text'], response['keywords']))) else: keywords.append(set()) print('Error in keyword extraction call') return keywords
def main(): from alchemyapi.alchemyapi import AlchemyAPI # Extract text from a webpage for analysis url = 'http://www.bloombergview.com/articles/2014-09-26/the-secret-goldman-sachs-tapes' article = Webpage(url) text = article.get_text() # Extract keywords alchemyapi = AlchemyAPI() output = alchemyapi.keywords('text', text) keywords = parse_keyword_output(output) keywords.sort(reverse=True) # Print to console print('\nThe URL used for this assignment is:\n\n%s\n' % url) print_keywords(keywords[:10])
class AlchemyApiService(object): def __init__(self): self.alchemy_api = AlchemyAPI() def get_keywords(self, text): # TODO(simplyfaisal): Refine error handling logic. try: response = self.alchemy_api.keywords('text', text, {'sentiment': 1}) if response['status'] == 'OK': return response['keywords'] else: print ('Error in keyword extraction') print response except Exception as e: print e return []
print(str(divText.div.strong.prettify) + " AUTHOR NAME TEST") print(str(divText.div.blockquote.prettify) + " QUOTE TEXT TEST") def build_quotes(TextSoup): divText = TextSoup.find("div", {"id": "main_body"} , {"class": "right"}) Allauthors = [] Allquotes = [] for tag in divText: author = tag.find('strong') if author not in (-1, None): Allauthors.append(author.contents) quotes = tag.find('blockquote') if quotes not in (-1, None): quote = quotes.findAll('p', text = True) Allquotes.append(quote) print(str(Allauthors[0]) + str(Allquotes[0]) + "FIRST QUOTE - RESULTS TEST") return(Allquotes) Allquotes = build_quotes(soup) alchemyapi = AlchemyAPI() response = alchemyapi.keywords('text', Allquotes) keywords = response.values() keywords = keywords[2] print(str(keywords[0]) + " KEYWORD SLICE CHECK") df = pd.DataFrame(keywords) df.index = df.index + 1 print df[0:10]
from alchemyapi.alchemyapi import AlchemyAPI from json import load, dump import time songs = load(open('lyrics.json')) alchemyapi = AlchemyAPI() keywords = {} for song in songs: response = alchemyapi.keywords('text', songs[song]) for k in [ c['text'] for c in response['keywords'] ]: if k not in keywords: keywords[k] = [] keywords[k].append(song) dump(keywords, open('keywords.json', 'w'))
alchemyapi = AlchemyAPI() songs = Counter() print ' Analysing: %s ' % (d['feed']['subtitle']) count = len(d['entries']) i = 0.0 stdout.write(' Working: 0%') for t in d['entries']: i += 1 title = t['title'] url = t['link'] if 'VIDEO:' not in title and 'AUDIO:' not in title: response = alchemyapi.keywords('url', url) if response['status'] == 'OK': for w in response['keywords']: w = w['text'] if w in keywords and w != 'page': for s in keywords[w]: songs[s] += 1 else: print "\n%s: %s" % (response['status'], response['statusInfo']) exit(1) stdout.write('\b'*4 + '%3d%%' % ((i / count) * 100)) stdout.flush()