def get_keywords(documents, text_accessor=lambda x: x):
     alchemy_api = AlchemyAPI()
     keywords = []
     for document in documents:
         response = alchemy_api.keywords('text', text_accessor(document), {'sentiment': 1})
         if response['status'] == 'OK':
             keywords.append(set(map(lambda x: x['text'], response['keywords'])))
         else:
             keywords.append(set())
             print('Error in keyword extraction call')
     return keywords
Exemple #2
0
def main():

    from alchemyapi.alchemyapi import AlchemyAPI
    # Extract text from a webpage for analysis
    url = 'http://www.bloombergview.com/articles/2014-09-26/the-secret-goldman-sachs-tapes'
    article = Webpage(url)
    text = article.get_text()

    # Extract keywords
    alchemyapi = AlchemyAPI()
    output = alchemyapi.keywords('text', text)
    keywords = parse_keyword_output(output)
    keywords.sort(reverse=True)

    # Print to console
    print('\nThe URL used for this assignment is:\n\n%s\n' % url)
    print_keywords(keywords[:10])
class AlchemyApiService(object):

    def __init__(self):
        self.alchemy_api = AlchemyAPI()

    def get_keywords(self, text):
        # TODO(simplyfaisal): Refine error handling logic.
        try:
            response = self.alchemy_api.keywords('text', text, {'sentiment': 1})
            if response['status'] == 'OK':
                return response['keywords']
            else:
                print ('Error in keyword extraction')
                print response
        except Exception as e:
            print e
        return []
print(str(divText.div.strong.prettify) + " AUTHOR NAME TEST")
print(str(divText.div.blockquote.prettify) + " QUOTE TEXT TEST")

def build_quotes(TextSoup):
        divText = TextSoup.find("div",  {"id": "main_body"} , {"class": "right"})
        Allauthors = []
        Allquotes = []
        for tag in divText:
            author = tag.find('strong')
            if author not in (-1, None):
                Allauthors.append(author.contents)
            quotes = tag.find('blockquote')
            if quotes not in (-1, None):
                quote = quotes.findAll('p', text = True)
                Allquotes.append(quote)
        print(str(Allauthors[0]) + str(Allquotes[0]) + "FIRST QUOTE - RESULTS TEST")
        return(Allquotes)

Allquotes = build_quotes(soup)
alchemyapi = AlchemyAPI()
response = alchemyapi.keywords('text', Allquotes)
keywords = response.values()
keywords = keywords[2]

print(str(keywords[0]) + " KEYWORD SLICE CHECK")

df = pd.DataFrame(keywords)
df.index = df.index + 1
print df[0:10]

from alchemyapi.alchemyapi import AlchemyAPI
from json import load, dump
import time

songs = load(open('lyrics.json'))

alchemyapi = AlchemyAPI()

keywords = {}

for song in songs:
    response = alchemyapi.keywords('text', songs[song])

    for k in [ c['text'] for c in response['keywords'] ]:
        if k not in keywords:
            keywords[k] = []
        keywords[k].append(song)

dump(keywords, open('keywords.json', 'w'))
alchemyapi = AlchemyAPI()

songs = Counter()

print '  Analysing: %s ' % (d['feed']['subtitle'])
count = len(d['entries'])
i = 0.0

stdout.write('    Working:   0%')
for t in d['entries']:
    i += 1
    title = t['title']
    url = t['link']

    if 'VIDEO:' not in title and 'AUDIO:' not in title:
        response = alchemyapi.keywords('url', url)

        if response['status'] == 'OK':
            for w in response['keywords']:

                w = w['text']
                if w in keywords and w != 'page':
                    for s in keywords[w]:
                        songs[s] += 1
        else:
            print "\n%s: %s" % (response['status'], response['statusInfo'])
            exit(1)

    stdout.write('\b'*4 + '%3d%%' % ((i / count) * 100))
    stdout.flush()