Esempio n. 1
0
    def Entity_Extraction(self):

        print " ----------------------------"
        print "# STARTING ENTITY EXTRACTION:"
        print " ----------------------------"

        count = 0

        os.system("python templates/alchemyapi.py 32449e7b4f6b65f9ef5cfd84b7128a46440a9402")

        startTime = datetime.now()
        # Create the AlchemyAPI Object
        alchemyapi = AlchemyAPI()
        for paragraph in self.targeted_paragraphs:
            response = alchemyapi.entities('text', paragraph, {'sentiment': 1})

            if response['status'] == 'OK':

                print "DOCUMENT-LEVEL RESULTS:  "

                print "ARTICLE TITLE: " , self.article_title[len(self.article_title) - len(self.article_title) + count]
                print 'ARTICLE URL: ' , self.article_url[len(self.article_url) - len(self.article_url) + count]
                print "DATA FRAME: "
                count = count + 1

                for entity in response['entities']:

                    entity_text = entity['text']
                    entity_type = entity['type']
                    entity_relevance = entity['relevance']
                    entity_sentiment_type = entity['sentiment']['type']

                    if 'score' in entity['sentiment']:
                        entity_sentiment_score = entity['sentiment']['score']

                    df_entity_extraction = pd.DataFrame(data = {'text': [entity_text],
                                                         'type': [entity_type],
                                                         'relevance': [entity_relevance],
                                                         'sentiment': [entity_sentiment_type],
                                                         'sentiment_score': [entity_sentiment_score]})

                    print "***** ENTITY EXTRACTION RESULTS: *****"
                    print df_entity_extraction.T
                    df_transpose = df_entity_extraction.T


                    entity_json_results = df_transpose.to_dict() #######
                    self.result_entity_extraction.append(entity_json_results)

                else:
                    pass

            else:
                print 'Error in entity extraction call: ', response['statusInfo']

        print "----------- Entity Extraction is completed. ---------------"

        print "Time Elapsed: " , datetime.now() - startTime
        execution_time = datetime.now() - startTime
        self.list_time_elapsed.append(execution_time)
    def handle(self, *args, **options):
        es = elasticsearch.Elasticsearch(es_url)

        alchemyapi = AlchemyAPI()

        query = {
           "query": {
               "and": [
                   { "missing": { "field": "entities" } },
                   { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } },
                   { "match": { "_all": "merkel" }}
                   #{ "range": { "published": { "gte" : "now-1d" } } }
               ]
           },
           "size": 500
        }

        res = es.search(index="rss", doc_type="posting", body=query)
        logger.info("%d documents found" % res['hits']['total'])

        for p in res['hits']['hits']:
            #logger.info('Extracting entities for - %s' % p['_id'])
            
            analyzed_text = p['_source']['title'] + ' ' + p['_source']['description']

            try:
                response = alchemyapi.entities("text", analyzed_text)
                entities = [ x['text'] for x in response["entities"] ]

                #logger.info("Entities: " + entities)

                es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'],
                          body={"doc": {"entities": entities}})
            except KeyError:
                logger.exception("Problem getting sentiment :( %s" % response)
Esempio n. 3
0
def convert_to_clean_titles(infile, outfile):
    alchemyapi = AlchemyAPI()
    f = open(infile, "r")
    f2 = codecs.open(outfile, "w+", "utf-8")
    f3 = codecs.open("Entities.txt", "w+", "utf-8")
    count = 1
    for line in f:
        line = line.decode("utf-8")
        response = alchemyapi.entities('text', line, {
            'sentiment': 1,
            'disambiguate': 1
        })
        if response['status'] == 'OK':
            for entity in response['entities']:
                if "type" in entity.keys:
                    if entity['type'] in [
                            'Country', 'Holiday', 'Movie', 'MusicGroup',
                            'Organization', 'Person', 'PrintMedia', 'Region',
                            'StateOrCountry', 'TelevisionShow',
                            'TelevisionStation', 'Money', 'Company',
                            'GeographicFeature'
                    ]:
                        line = line.replace(entity['text'],
                                            entity['text'].title())
                    print >> f3, entity['text'], entity['type'], entity[
                        'sentiment']
            print >> f2, line,
        else:
            print >> f2, line,
        print count, line
        count += 1
Esempio n. 4
0
def getCategory(demo_text):
	alchemyapi = AlchemyAPI()
	demo_text = unicode(demo_text);
	demo_text = demo_text.encode("ascii",'ignore');
	response = alchemyapi.entities('text', demo_text)

	if response['status'] == 'OK':
		#print(json.dumps(response, indent=4))
		if (not len(response['entities'])):
			category = []
			category.append("Undefined")
			return category

		entity = response['entities'][0]
		#print('text: ', entity['text'].encode('utf-8'))
		#print('type: ', entity['type'])
		#print('relevance: ', entity['relevance'])

		if entity.has_key('disambiguated') and entity['disambiguated'].has_key('subType'):
			category = entity['disambiguated']['subType']
		else:
			category = []
			category.append(entity['type'])
	else:
		category = []
		category.append("Undefined")
	return category
Esempio n. 5
0
def extract_entities(text):
	"""
	find the category that input text belongs to

	INPUT:
	test -> input text that need to be analyze

	OUTPUT:
	category string that input belongs to. "null" means alchemyapi fails somehow
	"""
	
	alchemyapi = AlchemyAPI()
	decoder = json.JSONDecoder()
	entities = []
	type = ""

	response = alchemyapi.entities('text',text, {'sentiment': 0})

	if response['status'] == 'OK':
		analysizedData = decoder.decode(json.dumps(response))
		results = analysizedData.get("entities")
		for result in results:
			if result.get("type") in types.combinedTypes:
				type = get_category(text);
				if type == 'arts_entertainment' or type == 'gaming' or type == 'recreation':
					type = "entertainment";
				elif type == 'sports':
					type = "sports";
				elif type == 'computers_internet' or type == 'health' or type == 'science_technology':
					type = "technology";
				elif type == 'culture_politics':
					type = "politics";
				else:
					continue;
			else:
				type = result.get("type");
				if type in types.entertainmentTypes:
					type = "entertainment";
				elif type in types.sportsTypes:
					type = "sports";
				elif type in types.politicsTypes:
					type = "politics";
				elif type in types.technologyTypes:
					type = "technology";
				else:
					continue;
			entity = {
				"text": result["text"],
				"relevance": result["relevance"],
				"type": type,
				"count": int(result["count"])
			}
			entities.append(entity);
		return entities
	else:
		print(response['status']);
		return []
Esempio n. 6
0
def performEE(url):
    alchemyapi = AlchemyAPI()
    response = alchemyapi.entities('url', url)
    relatedEntities = {}
    if response['status'] == 'OK':
        entities = response['entities']
        for entity in entities:
            if (float(entity['relevance'])>0.1):
                relatedEntities[entity["type"]]=entity["text"]
    return relatedEntities
Esempio n. 7
0
def extract_entities(text, lang):
	entities = {}
	alchemyapi = AlchemyAPI()
	response = alchemyapi.entities('text', text, {'sentiment': 1})
	if response['status'] == 'OK':
		for entity in response['entities']:
			key = entity['text'].encode('utf-8')
			value = entity['type']
			entities[key] = convert_label(value)
	return entities
Esempio n. 8
0
def extract_entities(text, lang):
    entities = {}
    alchemyapi = AlchemyAPI()
    response = alchemyapi.entities('text', text, {'sentiment': 1})
    if response['status'] == 'OK':
        for entity in response['entities']:
            key = entity['text'].encode('utf-8')
            value = entity['type']
            entities[key] = convert_label(value)
    return entities
Esempio n. 9
0
def getAlcData(arrOfObj):
	
	alchemyapi = AlchemyAPI()	

	
	#for x in range(0,len(arrOfObj)):
	for x in range(0, 10):
		asc = unicodedata.normalize('NFKD', arrOfObj[x].text).encode('ascii','ignore')
		print x		
		print asc 
		arrOfObj[x].responseEntities = alchemyapi.entities('text',asc, { 'sentiment':1 })
		arrOfObj[x].responseKeywords = alchemyapi.keywords('text',asc, { 'sentiment':1 })	
Esempio n. 10
0
def performEE(url):
    alchemyapi = AlchemyAPI()
    response = alchemyapi.entities('url', url, {'disambiguate': 0})
    relatedEntities = {}
    if response['status'] == 'OK':
        entities = response['entities']
        print(entities)
        for entity in entities:
            print(entity["relevance"]+" "+entity["text"])
            if (float(entity['relevance'])>0.1):
                relatedEntities[entity["type"]]=entity["text"]
    return relatedEntities
Esempio n. 11
0
def ExtractEntity(text):
    # Create the AlchemyAPI Object
    alchemyapi = AlchemyAPI()

    response = alchemyapi.entities('text', text, {'sentiment': 1})

    if response['status'] == 'OK':
        for entity in response['entities']:
            print('text: ', entity['text'].encode('utf-8'))
            print('type: ', entity['type'])
            print('relevance: ', entity['relevance'])
    else:
        print('Error in entity extraction call: ', response['statusInfo'])
Esempio n. 12
0
def main():
    alchemyapi = AlchemyAPI()
    text = "I'm wondering if it will find a city name, like Abu Dhabi, but I hope so!"
    nytimes = "http://www.nytimes.com/"
    nytimes2 = 'http://www.nytimes.com/2014/02/14/world/asia/on-indian-tea-plantations-low-wages-and-crumbling-homes.html?ref=world'
    good_types = ['StateOrCounty', 'City', 'Country']
    response = alchemyapi.entities('url', nytimes2)
    #print response
    #print response['entities']
    for e in response['entities']:
        if e['type'] in good_types:
            print e['text'], e['type']
            find_geocode(e['text'])
    def extractNamedEntitiesFromText(self,txt):
        """method for extracting named entities from given text"""
        
        #creating AlchemyAPI object
        alchemyapi = AlchemyAPI()
        
        #requesting json response from AlchemyAPI server
        response = alchemyapi.entities('text',txt, { 'sentiment':1 })
        
        
        if response['status'] == 'OK':
                
                for entity in response['entities']:
                    #entity object for storing the properties of an entity
                    entityObj = AlchemyStructure.Entity()
                    
                    #sentiment object for storing the sentiment properties related to an entity
                    sentimentObj = AlchemyStructure.Sentiment()
                    
                    #extracting the name of the entity
                    entityObj.setName(entity['text']) 
                    
                    #extracting the type of the entity example Organization, Person, FieldTerminology ect
                    entityObj.setType(entity['type'])
                    
                    #extracting the relevance of the entity for the particular type
                    entityObj.setRelevance(entity['relevance'])
                    
                    
                    
                    #extracting the type of the sentiment associated with the entity -> positive, negative or neutral
                    sentimentObj.setType(entity['sentiment']['type'])
                    
                    #extracting the score of the sentiment associated with the entity
                    if entity['sentiment']['type'] == "neutral":
                        sentimentObj.setScore("0")
                    else:
                        sentimentObj.setScore(entity["sentiment"]["score"])
                        
                        
                    #extracting the frequency of occurrence of the entity in the given text
                    entityObj.setFreq(entity['count'])
                    
                    #setting the sentiment attached with the entity
                    entityObj.setSentiment(sentimentObj)
                    
                    #insert the entity into the list of retrieved entities
                    self.entitiesFromText.append(entityObj)

        else:
            print('Error in entity extraction call: ', response['statusInfo'])
Esempio n. 14
0
def pos_with_entity_replaced_common_words(infile, outfile):
    alchemyapi = AlchemyAPI()
    common_word_pos = open("common_word_pos.txt", "r")
    title_data = open(infile, "r+")
    f2 = codecs.open(outfile, "w+", "utf-8")
    for line1, line2 in title_data, common_word_pos:
        response = alchemyapi.entities('text', line1, {
            'sentiment': 1,
            'disambiguate': 1
        })
        if response['status'] == 'OK':
            for entity in response['entities']:
                line2.replace(entity['text'], entity['type'])
            print >> f2, line2,
Esempio n. 15
0
    def extractNamedEntitiesFromUrl(self, url):
        """method for extracting named entities from given url"""

        # creating AlchemyAPI object
        alchemyapi = AlchemyAPI()

        # requesting json response from AlchemyAPI server
        response = alchemyapi.entities("url", url, {"sentiment": 1})

        if response["status"] == "OK":

            for entity in response["entities"]:
                # entity object for storing the properties of an entity
                entityObj = AlchemyStructure.Entity()

                # sentiment object for storing the sentiment properties related to an entity
                sentimentObj = AlchemyStructure.Sentiment()

                # extracting the name of the entity
                entityObj.setName(entity["text"])

                # extracting the type of the entity example Organization, Person, FieldTerminology ect
                entityObj.setType(entity["type"])

                # extracting the relevance of the entity for the particular type
                entityObj.setRelevance(entity["relevance"])

                # extracting the score of the sentiment associated with the entity
                if entity["sentiment"]["type"] == "neutral":
                    sentimentObj.setScore("0")
                else:
                    sentimentObj.setScore(entity["sentiment"]["score"])

                # extracting the type of the sentiment associated with the entity -> positive, negative or neutral
                sentimentObj.setType(entity["sentiment"]["type"])

                # extracting the frequency of occurrence of the entity in the given text of the url
                entityObj.setFreq(entity["count"])

                # setting the sentiment attached with the entity
                entityObj.setSentiment(sentimentObj)

                # insert the entity into the list of retrieved entities
                self.entitiesFromUrl.append(entityObj)

        else:
            print("Error in entity extraction call: ", response["statusInfo"])
Esempio n. 16
0
def generate_which(inputsentence,sent):
    which=[]
    typeoftext={}
    alchemyapi = AlchemyAPI()
    response = alchemyapi.entities('text', inputsentence)
    if response['status'] == 'OK':
        for entity in response['entities']:
            typeoftext[entity['text'].encode('utf-8')]=entity['type']
    else:
        print >> log ,('Error in entity extraction call: ', response['statusInfo'])
    for key in typeoftext:
        which_question=[]
        which_answer=[]
        for (i,row) in enumerate(sent):
            if row[11]=="ROOT":
                if row[3]=="be":
                    which_question.insert(0,row[1])
                    if sent[i+1][5]=="VBG":
                        which_question.append(sent[i+1][1])
                        i+=1
                else:         
                    if row[5]=="VBZ":
                        which_question.insert(0,"does")
                    elif row[5]=="VBD":
                        which_question.insert(0,"did")
                    elif row[5]=="VBG":
                        if which_question:
                            top=which_question.pop()
                            which_question.insert(0,top)
                            which_question.append(row[1])
                            flag=True
                            continue
                    else:
                        which_question.insert(0,"do")
                    which_question.append(row[2])
            elif row[1] == key:
                which_question.insert(0,typeoftext[row[1]])
                which_question.insert(0,"what")
                which_answer=[row[1]]
                which.append({'Q':which_question,'A':which_answer})
                break
            else:
                which_question.append(row[1]) 
    return which
Esempio n. 17
0
class Alchemy(object):
  'Chama API para leitura de feeds Atom RSS'

  def __init__(self):
    #Chamador do AlchemyAPI
    self.alchemy_api = AlchemyAPI()

  def processa_html(self, link):

    #Retorna o texto limpo a partir de uma URL
    return self.alchemy_api.text('url', link)['text']

  def obtem_titulo(self, link):

    #Retorna o texto limpo a partir de uma URL
    return self.alchemy_api.title('url', link)['title']

  def obtem_entidades(self, texto):

    #Retorna as entidaades encontradas no texto
    return self.alchemy_api.entities('text', texto, {'sentiment': 1})
Esempio n. 18
0
    def entity_topic_extraction(self, myText):

        alchemyApi = AlchemyAPI()

        # put all entities in a list
        entity_list = []
        response = alchemyApi.entities('text', myText, {'sentiment': 1})
        if response['status'] == 'OK':
            for entity in response['entities']:
                #entity_list.append((entity['text'].encode('utf-8'),entity['type']))
                entity_list.append((entity['text'].encode('utf-8'), entity['type'].encode('utf-8')))


        # put all taxonomy in a list
        response = alchemyApi.taxonomy('text', myText)
        # put all taxonomy in a list
        taxonomy_list = []
        if response['status'] == 'OK':
            for category in response['taxonomy']:
                taxonomy_list.append(category['label'].encode('utf-8'))

        return entity_list, taxonomy_list
Esempio n. 19
0
txt_name = []
for(dirpath, dirnames,filenames) in walk(out_txt_path):
    txt_name.extend(filenames)
    break

json_data = {}
entity_list = []
keywords_list = []
concept_list = []
for f in txt_name:
    if f[-3:] == "txt":
        full_text_path = out_txt_path + f
        with open(full_text_path, 'r') as current_txt_file:
            txt_data = current_txt_file.read().replace('\n','')
            response_entities = alchemyapi.entities('text', txt_data)
            response_keywords = alchemyapi.keywords('text', txt_data)
            response_concepts = alchemyapi.concepts('text', txt_data)
            if response_entities['status'] == 'OK' and response_keywords['status'] == 'OK':
                print "status OK"
                for entity in response_entities["entities"]:
                    dict_temp = {'entity': entity['text'],
                                 'type': entity['type'],
                                 'relevance': entity['relevance']}
                    entity_list.append(dict_temp)
                for keyword in response_keywords["keywords"]:
                    dict_temp = {'keyword': keyword['text'],
                                 'relevance': keyword['relevance']}
                    keywords_list.append(dict_temp)
                for concept in response_concepts['concepts']:
                    dict_temp = {'concept': concept['text'],
Esempio n. 20
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

# cef5dcb639f382ce8db12ddd1a38a44311bf4d96
from alchemyapi import AlchemyAPI
import json
import os
from os.path import join, getsize

alchemyapi = AlchemyAPI()

for root, dirs, files in os.walk('data/GOT5'):
    for name in files:
		print name
		with open(join(root, name),'r') as infile:
			text = infile.read()
			response = alchemyapi.entities('text', text, {'sentiment': 0})

			if response['status'] == 'OK':

				with open(join(root, name)[:-4]+'-entities.json','w') as outfile:
					json.dump(response,outfile)
			else:
				print 'problem with ',name
Esempio n. 21
0
    print "[*] Parsing %s" % pdf_file
    
    pdf_obj = pyPdf.PdfFileReader(open(pdf_file,"rb"))

    full_text = ""
    
    # extract all of the text from each page
    for page in pdf_obj.pages:
        
        full_text += page.extractText()

    # let the Alchemy API extract entities
    print "[*] Sending %d bytes to the Alchemy API" % len(full_text)
    
    
    response = alchemyapi.entities('text', full_text, {'sentiment': 0})
    
    
    if response['status'] == 'OK':
        
        # loop through the list of entities
        for entity in response['entities']:
            
            # add each entity to our master list
            if entities.has_key(entity['text']):
                entities[entity['text']] += int(entity['count'])
            else:
                entities[entity['text']] = int(entity['count'])
                
        print "[*] Retrieved %d entities from %s" % (len(entities),pdf_file)    
        
Esempio n. 22
0
class WatsonMagic:
    SOCIAL_TONES = ['Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional Range']
    ENTITY_MAPPING = {'City':'cities', 'Person':'people', 'JobTitle':'jobs', 'Organization':'organizations', 'Company':'companies', 'Sport':'sports', 'PrintMedia':'media', 'Country':'countries', 'FieldTerminology':'terms', 'StateOrCounty':'places', 'Holiday':'holidays'}

    def __init__(self):
        self.tone_analyzer = ToneAnalyzerV3Beta(
            username='******',
            password='******',
            version='2016-02-11')
        self.alchemyapi = AlchemyAPI()
        self.entities = {}


    def extract_type_entities_from_alchemy(self, text):
        types = {}
        response = self.alchemyapi.entities('text', text, {'sentiment': 0})

        if response['status'] == 'OK':
            for entity in response['entities']:
                if entity['type'] in types:
                    types[entity['type']].append((entity['text'],entity))
                else:
                    types[entity['type']] = [(entity['text'],entity)]
            print("[*] Retrieved {} entities from {}".format(len(self.entities), text))
        else:
            print("[!] Error receiving Alchemy response: %s" % response['statusInfo'])
        time.sleep(1)
        # now accumulate our most common terms and print them out
        sorted_type_keys = sorted(types, key=lambda x: len(types[x]),reverse=True)
        print(sorted_type_keys)
        sorted_types = []
        for k in sorted_type_keys:
            sorted_types.append(types[k])
        return sorted_types


        #   types_counter = Counter(types)
      #  top_types = types_counter.most_common()
       # print(top_types)
        #return top_types[0:5]

    def extract_most_popular_entitiesfrom_alchemy(self, text):
        response = self.alchemyapi.entities('text', text, {'sentiment': 0})

        if response['status'] == 'OK':

            # loop through the list of entities
            for entity in response['entities']:

                # add each entity to our master list
                if entity['text'] in self.entities:
                    self.entities[entity['text']] += int(entity['count'])
                else:
                    self.entities[entity['text']] = int(entity['count'])
            print("[*] Retrieved {} entities from {}".format(len(self.entities), text))
        else:
            print("[!] Error receiving Alchemy response: %s" % response['statusInfo'])

        time.sleep(1)
        # now accumulate our most common terms and print them out
        entity_counter = Counter(self.entities)

        top_entities = entity_counter.most_common()

        # let's take the top 10 entities UBL mentions
        for top_entity in top_entities[0:10]:
            # most_common returns a tuple (entity,count)
            print("%s => %d" % (top_entity[0], top_entity[1]))

    def get_tone_category_elements(self,category,text):
        t = self.tone_analyzer.tone(text=text, tones=category)['document_tone']['tone_categories'][0]['tones']

        return t
Esempio n. 23
0
demo_url = 'http://www.npr.org/2013/11/26/247336038/dont-stuff-the-turkey-and-other-tips-from-americas-test-kitchen'
demo_html = '<html><head><title>Python Demo | AlchemyAPI</title></head><body><h1>Did you know that AlchemyAPI works on HTML?</h1><p>Well, you do now.</p></body></html>'
image_url = 'http://demo1.alchemyapi.com/images/vision/football.jpg'
reddit_url = 'http://www.reddit.com/r/worldnews'


#Create the AlchemyAPI Object
alchemyapi = AlchemyAPI()

locations = []

relevance = []

master_locations = []

response = alchemyapi.entities('url', reddit_url, {'sourceText':'xpath', 'xpath':'//*[contains(@class,"title may-blank")]' }) 

if response['status'] == 'OK':

	for entity in response['entities']:

		if entity['type'] == 'Country' or entity['type'] == 'Region' or entity['type'] == 'City' or entity['type'] == 'StateOrCountry' or entity['type'] == 'Continent':
			currentRelevance = float(entity['relevance'])

			if entity.get('disambiguated'):

				locations.append(entity['disambiguated']['name'])

				relevance.append(currentRelevance)

			else:
Esempio n. 24
0
def main():

    tmpdir = "/tmp/pagekicker"

    #personal api key saved as api_key.txt
    parser = argparse.ArgumentParser()
    parser.add_argument('path', help="target file or directory for NER")
    parser.add_argument('output', help="target file for output")
    parser.add_argument('uuid', help="uuid")
    args = parser.parse_args()

    in_file = args.path
    out_file = args.output
    uuid = args.uuid
    folder = os.path.join(tmpdir, uuid)
    print(folder)
    cwd = os.getcwd()
    apikey_location = os.path.join(cwd, "api_key.txt")

    with open(in_file) as f:
        text = f.read()

    alchemyapi = AlchemyAPI()
    # alchemyapi = alchemyapi(api_key='b887e176b6a650093c3d4ca635cd1b470be6584e')
    # result = alchemyapi.TextGetRankedNamedEntities(text,json)
    result = alchemyapi.entities('text', text, {'sentiment': 1})
    root = ET.fromstring(result)

    place_list = ['City', 'Continent', 'Country', 'Facility', 'GeographicFeature',\
    'Region', 'StateOrCounty']
    People = {}
    Places = {}
    Other = {}

    for entity in root.getiterator('entity'):
        if entity[0].text == 'Person':
            People[entity[3].text] = [entity[1].text, entity[2].text]
        elif entity[0].text in place_list:
            Places[entity[3].text] = [entity[1].text, entity[2].text]
        else:
            Other[entity[3].text] = [entity[1].text, entity[2].text]

    #print lists ordered by relevance
    Places_s = sorted(Places, key=Places.get, reverse=True)
    People_s = sorted(People, key=People.get, reverse=True)
    Other_s = sorted(Other, key=Other.get, reverse=True)

    with codecs.open(out_file, mode='w', encoding='utf-8') as o:
        listwrite(o, People_s)
        listwrite(o, Places_s)
        listwrite(o, Other_s)
#
    out_file = os.path.join(folder, 'People')
    with codecs.open(out_file, mode='w', encoding='utf-8') as o:
        listwrite(o, People_s)
    out_file = os.path.join(folder, 'Places')
    with codecs.open(out_file, mode='w', encoding='utf-8') as o:
        listwrite(o, Places_s)
    out_file = os.path.join(folder, 'Other')
    with codecs.open(out_file, mode='w', encoding='utf-8') as o:
        listwrite(o, Other_s)

url = "http://quora-api.herokuapp.com/users/" + sys.argv[1] + "/activity"
data = requests.get(url).json()
data = data["activity"]
payload = {}
# count=0
# getDocCount()
for activities in data:
    title = activities["title"]
    summary = activities["summary"]
    print title
    document["title"] = title
    document["summary"] = summary
    labels = al.taxonomy("text", title)
    entities = al.entities("html", summary)
    keywords = al.keywords("html", summary)
    sentiment = al.sentiment("html", summary)
    # print labels['taxonomy']
    # count+=1
    payload["entities"] = {}
    payload["keywords"] = []
    payload["sentiment"] = {}
    docNode = createDocNode(document)
    try:
        print "Yo"
        labels = labels["taxonomy"][0]["label"]
        print "Yo1"
        print labels
        labels = func(labels)
        print labels
Esempio n. 26
0

url = "http://quora-api.herokuapp.com/users/" + sys.argv[1] + "/activity"
data = requests.get(url).json()
data = data['activity']
payload = {}
#count=0
#getDocCount()
for activities in data:
    title = activities['title']
    summary = activities['summary']
    print title
    document['title'] = title
    document['summary'] = summary
    labels = al.taxonomy("text", title)
    entities = al.entities("html", summary)
    keywords = al.keywords("html", summary)
    sentiment = al.sentiment("html", summary)
    #print labels['taxonomy']
    #count+=1
    payload['entities'] = {}
    payload['keywords'] = []
    payload['sentiment'] = {}
    docNode = createDocNode(document)
    try:
        print "Yo"
        labels = labels['taxonomy'][0]['label']
        print "Yo1"
        print labels
        labels = func(labels)
        print labels
Esempio n. 27
0
class AlchemyPost:

    def __init__(self, post_tumblr, post_id, consumer_key, consumer_secret, oauth_token, oauth_secret):
        self.post_tumblr = post_tumblr
        self.post_id = post_id
        self._init_tumblr(consumer_key, consumer_secret, oauth_token, oauth_secret)
        self._init_alchemy()

    def _init_tumblr(self, consumer_key, consumer_secret, oauth_token, oauth_secret):
        self._client = pytumblr.TumblrRestClient(consumer_key, consumer_secret, oauth_token, oauth_secret)    

    def _init_alchemy(self):
        self.alchemyapi = AlchemyAPI()
        self.content = {}

    def analyze_post(self):
        self.post = self._get_content_post()
        self._alchemy_entities()
        self._alchemy_keywords()
        self._alchemy_concepts()
        self._alchemy_sentiment()
        self._alchemy_relations()
        self._alchemy_category()
        self._alchemy_feeds()
        self._alchemy_taxonomy()

    def print_content(self):
        print(json.dumps(self.content, indent=4))

    def _get_content_post(self):
        print "*",
        infos = self._get_infos_post() 
        self.title = ''
        self.tags = []
        if 'tags' in infos:
            self.tags = infos['tags']
        
        if infos['type'] == 'text':
            return self._get_content_text(infos)
        if infos['type'] == 'quote':
            return self._get_content_quote(infos)
        return ''

    def _get_infos_post(self):
         infos = self._client.posts(self.post_tumblr, id=self.post_id)
         if 'posts' in infos and len(infos['posts'])>0:
            return infos['posts'][0]
         return {}

    def _get_content_text(self, infos):
        content = "<h1>" + str(infos['title']) + "</h1>"
        content += " <br>" + str(infos['body'])
        content += " <br>" + " ".join(infos['tags'])
        return content

    def _get_content_quote(self, infos):
        content = str(infos['text'])
        content += " <br>" + str(infos['source'])
        content += " <br>" + " ".join(infos['tags'])
        return content

    def _alchemy_entities(self):
        print ".",
        response = self.alchemyapi.entities('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['entities'] = response['entities']
        return True

    def _alchemy_keywords(self):
        print ".",
        response = self.alchemyapi.keywords('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['keywords'] = response['keywords']
        return True

    def _alchemy_concepts(self):
        print ".",
        response = self.alchemyapi.concepts('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['concepts'] = response['concepts']
        return True

    def _alchemy_sentiment(self):
        print ".",
        response = self.alchemyapi.sentiment('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['sentiment'] = response['docSentiment']
        return True

    def _alchemy_relations(self):
        print ".",
        response = self.alchemyapi.relations('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['relations'] = response['relations'] 
        return True

    def _alchemy_category(self):
        print ".",
        response = self.alchemyapi.category('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['category'] = response['category'] 
        self.content['score'] = response['score'] 
        return True

    def _alchemy_feeds(self):
        print ".",
        response = self.alchemyapi.feeds('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['feeds'] = response['feeds'] 
        return True

    def _alchemy_taxonomy(self):
        print ".",
        response = self.alchemyapi.taxonomy('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['taxonomy'] = response['taxonomy'] 
        return True
Esempio n. 28
0
def key_entity():

	print "storing keywords"
	# Create the AlchemyAPI Object
	alchemyapi = AlchemyAPI()

	art_keywords = {}
	art_entities = {}
	
	count = 0 

	graph = Graph("http://*****:*****@localhost:7474/db/data/")

	article_query =  """MATCH (n:article) 
						Return distinct n.url as url"""

	result = graph.cypher.execute(article_query)

	keyword_count = 0
	entity_count = 0
	art_count = 0
	for arti in result:

		if count >= 1000:
			print "Alchemy limit exceeds"
			exit()


		art = arti['url']

		article_node = """ MATCH (article:article{url:'"""+art+"""'})
		SET article.processed = 'yes'
		Return article;
		"""
		article = graph.cypher.execute(article_node)

		if art not in art_keywords.keys():
			art_keywords[art] = []
			response = alchemyapi.keywords('url', art, {'sentiment': 1})
			count = count + 1
			art_count = art_count + 1
			if response['status'] == 'OK':
				for keyword in response['keywords']:
					# print('text: ', keyword['text'].encode('utf-8'))
					key = str(keyword['text'].encode('utf-8')).replace("'","")
					art_keywords[art].append(key)

					rel_dict = {}

					rel_dict['relevance'] = keyword['relevance']
					rel_dict['sentiment'] = keyword['sentiment']['type']
					if 'score' in keyword['sentiment']:
						rel_dict['sentiment_score'] = keyword['sentiment']['score']

					keyword_node = """ MERGE (keyword:Keywords{text:'"""+key+"""'})
					Return keyword;
					"""
					at_keywords = graph.cypher.execute(keyword_node)

					if len(list(graph.match(start_node=article.one,end_node=at_keywords.one, rel_type=("has_keyword",rel_dict)))) == 0:
						pth = Path(article.one,("has_keyword",rel_dict),at_keywords.one)
						graph.create(pth)
						keyword_count = keyword_count + 1


		if count >= 1000:
			print "Alchemy limit exceeds"
			exit()


		if art not in art_entities.keys():
			art_entities[art] = []
			response = alchemyapi.entities('url', art, {'sentiment': 1})
			count = count + 1
			if response['status'] == 'OK':
				for entities in response['entities']:
					# print('text: ', entities['text'].encode('utf-8'))
					key = str(entities['text'].encode('utf-8')).replace("'","")
					art_entities[art].append(key)

					rel_dict = {}

					rel_dict['type'] = entities['type']
					rel_dict['relevance'] = entities['relevance']
					rel_dict['sentiment'] = entities['sentiment']['type']
					if 'score' in entities['sentiment']:
						rel_dict['sentiment_score'] = entities['sentiment']['score']
					
					entities_node = """ MERGE (entities:Entities{text:'"""+key+"""'})
					Return entities;
					"""
					if len(list(graph.match(start_node=article.one,end_node=at_entities.one, rel_type=("has_entity",rel_dict)))) == 0:		
						at_entities = graph.cypher.execute(entities_node)
						pth = Path(article.one,("has_entity",rel_dict),at_entities.one)
						graph.create(pth)
						
						entity_count  = entity_count + 1

	return {'articles':str(art_count),'keywords':str(keyword_count),'entities':str(entity_count)}
    data = file.read()
    myText = data

    print "ANALYSIS FOR " + f + " \n"

    # extract keywords
    # kr_0 = alchemyapi.keywords('text', test_text)
    # print "KEYWORDS: \n"
    # pp.pprint(kr_0)

    # extract taxonomy
    # tr_0 = alchemyapi.taxonomy('text', myText)
    # pp.pprint(tr_0)

    # extract entities
    er_0 = alchemyapi.entities('text', myText)
    print "ENTITIES: \n"
    for i in range(0, 50):
      pp.pprint(er_0['entities'])
      print(er_0['entities'][i]['text'])
      print(er_0['entities'][i]['relevance'])
      print(er_0['entities'][i]['type'])
      print(er_0['entities'][i]['count'])
      print '\n'

    # extract categories
    # cr_0 = alchemyapi.category('text', myText)
    # pp.pprint(cr_0)


    # extract concepts
Esempio n. 30
0
#Create the AlchemyAPI Object
alchemyapi = AlchemyAPI()

print('')
print('')
print('############################################')
print('#   Entity Extraction Example              #')
print('############################################')
print('')  
print('')

print('Processing text: ', demo_text)
print('')

response = alchemyapi.entities('text',demo_text, { 'sentiment':1 })

if response['status'] == 'OK':
	print('## Response Object ##')
	print(json.dumps(response, indent=4))


	print('')
	print('## Entities ##')
	for entity in response['entities']:
		print('text: ', entity['text'].encode('utf-8'))
		print('type: ', entity['type'])
		print('relevance: ', entity['relevance'])
		print('sentiment: ', entity['sentiment']['type'])
		if 'score' in entity['sentiment']:
			print('sentiment score: ' + entity['sentiment']['score'])
Esempio n. 31
0
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

from __future__ import print_function
from alchemyapi import AlchemyAPI

test_text = 'Bob broke my heart, and then made up this silly sentence to test the PHP SDK'
test_html = '<html><head><title>The best SDK Test | AlchemyAPI</title></head><body><h1>Hello World!</h1><p>My favorite language is PHP</p></body></html>'
test_url = 'http://www.nytimes.com/2013/07/13/us/politics/a-day-of-friction-notable-even-for-a-fractious-congress.html?_r=0'

alchemyapi = AlchemyAPI()

#Entities
print('Checking entities . . . ')
response = alchemyapi.entities('text', test_text)
assert (response['status'] == 'OK')
response = alchemyapi.entities('html', test_html)
assert (response['status'] == 'OK')
response = alchemyapi.entities('url', test_url)
assert (response['status'] == 'OK')
response = alchemyapi.entities('random', test_url)
assert (response['status'] == 'ERROR')  #invalid flavor
print('Entity tests complete!')
print('')

#Keywords
print('Checking keywords . . . ')
response = alchemyapi.keywords('text', test_text)
assert (response['status'] == 'OK')
response = alchemyapi.keywords('html', test_html)
Esempio n. 32
0
# Create the AlchemyAPI Object
alchemyapi = AlchemyAPI()

print("")
print("")
print("############################################")
print("#   Entity Extraction Example              #")
print("############################################")
print("")
print("")

print("Processing text: ", demo_text)
print("")

response = alchemyapi.entities("text", demo_text, {"sentiment": 1})

if response["status"] == "OK":
    print("## Response Object ##")
    print(json.dumps(response, indent=4))

    print("")
    print("## Entities ##")
    for entity in response["entities"]:
        print("text: ", entity["text"].encode("utf-8"))
        print("type: ", entity["type"])
        print("relevance: ", entity["relevance"])
        print("sentiment: ", entity["sentiment"]["type"])
        if "score" in entity["sentiment"]:
            print("sentiment score: " + entity["sentiment"]["score"])
        print("")
Esempio n. 33
0
from alchemyapi import AlchemyAPI
alchemyapi = AlchemyAPI()
entities = {}
mytext = open('mytext.txt','r')
mytext = mytext.read()


response1 = alchemyapi.entities("text",mytext)
response = alchemyapi.keywords("text",mytext)
response3 = alchemyapi.relations("text",mytext)


en1 = response1['entities'][0]['text']
en2 = response1['entities'][1]['text']

key1 = response['keywords'][0]['text']
key2 = response['keywords'][1]['text']
key3 = response['keywords'][2]['text']
key4 = response['keywords'][3]['text']
key5 = response['keywords'][4]['text']
key6 = response['keywords'][5]['text']





Esempio n. 34
0
counter = 0
while(counter != 4):

	url = ('https://ajax.googleapis.com/ajax/services/search/web'
	       '?v=1.0&q=Google%20wokrers&start='+str(counter))

	request = urllib2.Request(url, None, {})
	response = urllib2.urlopen(request)

	# Process the JSON string.
	results = simplejson.load(response)
	i = 0
	for i in range(0, len(results.get("responseData").get("results")), 1):
		print results.get("responseData").get("results")[i].get("title")
			response = alchemyapi.entities('text', title, {'sentiment': 1})

		if response['status'] == 'OK':
		    for entity in response['entities']:
		    	if(entity['sentiment']['type'] == 'negative'):
		    		negative += 1
		    	elif(entity['sentiment']['type'] == 'positive'):
		       		positive += 1
		       	else:
		       		neutral +=1
		else:
		    print('Error in entity extraction call: ', response['statusInfo'])
	counter += 4

print negative
print positive
Esempio n. 35
0
        data = file.read()
        myText = data

        print "ANALYSIS FOR " + f + " \n"

        # extract keywords
        # kr_0 = alchemyapi.keywords('text', test_text)
        # print "KEYWORDS: \n"
        # pp.pprint(kr_0)

        # extract taxonomy
        # tr_0 = alchemyapi.taxonomy('text', myText)
        # pp.pprint(tr_0)

        # extract entities
        er_0 = alchemyapi.entities('text', myText)
        print "ENTITIES: \n"
        for i in range(0, 50):
            pp.pprint(er_0['entities'])
            print(er_0['entities'][i]['text'])
            print(er_0['entities'][i]['relevance'])
            print(er_0['entities'][i]['type'])
            print(er_0['entities'][i]['count'])
            print '\n'

        # extract categories
        # cr_0 = alchemyapi.category('text', myText)
        # pp.pprint(cr_0)

        # extract concepts
        # cr_0 = alchemyapi.concepts('text', myText)
Esempio n. 36
0
)

alchemyapi = AlchemyAPI()

print('')
print('')
print('############################################')
print('#   Entity Extraction Example              #')
print('############################################')
print('')
print('')

print('Processing text: ', demo_text)
print('')

response = alchemyapi.entities('text', demo_text, {'sentiment': 1})

if response['status'] == 'OK':
    print('## Response Object ##')
    print(json.dumps(response, indent=4))

    print('')
    print('## Entities ##')
    for entity in response['entities']:
        print('text: ', entity['text'])
        print('type: ', entity['type'])
        print('relevance: ', entity['relevance'])
        print(
            'sentiment: ', entity['sentiment']['type'] + ' (' +
            entity['sentiment']['score'] + ')')
        print('')
Esempio n. 37
0
def process(query, in_queue, out_queue):
	"""
	The worker thread to grab a found Tweet off the queue and 
	calculate the sentiment via AlchemyAPI. 

	It calculates the document-level sentiment for the entire tweet, and
	it will also attempt to calculate entity-level sentiment if the query
	string is identified as an entity. If the query string is not 
	identified as an entity for the tweet, no entity level sentiment
	will be returned.
	
	INPUT:
	query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos")
	in_queue -> the shared input queue that is filled with the found tweets.
	out_queue -> the shared output queue that is filled with the analyzed tweets.

	OUTPUT:
	None	
	"""

	#Create the AlchemyAPI object
	alchemyapi = AlchemyAPI()
	
	while True:
		#grab a tweet from the queue
		tweet = in_queue.get()	
		
		#init
		tweet['sentiment'] = {}

		try:
			#calculate the sentiment for the entity
			response = alchemyapi.entities('text',tweet['text'], { 'sentiment': 1 })
			if response['status'] == 'OK':
				for entity in response['entities']:
					#Check if we've found an entity that matches our query
					if entity['text'] == query:
						tweet['sentiment']['entity'] = {}
						tweet['sentiment']['entity']['type'] = entity['sentiment']['type']
						
						#Add the score (it's not returned if type=neutral)
						if 'score' in entity['sentiment']:
							tweet['sentiment']['entity']['score'] = entity['sentiment']['score']
						else:
							tweet['sentiment']['entity']['score'] = 0  
						
						#Only 1 entity can possibly match the query, so exit the loop
						break;

			#calculate the sentiment for the entire tweet
			response = alchemyapi.sentiment('text',tweet['text'])

			if response['status'] == 'OK':
				tweet['sentiment']['doc'] = {}
				tweet['sentiment']['doc']['type'] = response['docSentiment']['type']
				
				#Add the score (it's not returned if type=neutral)
				if 'score' in response['docSentiment']:
					tweet['sentiment']['doc']['score'] = response['docSentiment']['score']
				else:
					tweet['sentiment']['doc']['score'] = 0  
			
			#add the result to the output queue
			out_queue.put(tweet)
		
		except Exception as e:
			#if there's an error, just move on to the next item in the queue
			print 'Uh oh, this just happened: ', e
			pass
			
		#signal that the task is complete
		in_queue.task_done()
Esempio n. 38
0
    uuid = args.uuid
    folder = os.path.join(tmpdir, uuid)
    print(folder)
    cwd = os.getcwd()
    apikey_location = os.path.join(cwd, "api_key.txt")
    print(in_file)
    with open(in_file) as f:
        filetext = f.read()
    return filetext


filetext = main()

alchemyapi = AlchemyAPI()

response = alchemyapi.entities('text', filetext, {'sentiment': 1})

if response['status'] == 'OK':

    print(json.dumps(response, indent=4))

    for entity in response['entities']:
        print('text: ', entity['text'].encode('utf-8'))
        print('type: ', entity['type'])
        print('relevance: ', entity['relevance'])
        print('sentiment: ', entity['sentiment']['type'])
        if 'score' in entity['sentiment']:
            print('sentiment score: ' + entity['sentiment']['score'])
        print('')
else:
    print('Error in entity extraction call: ', response['statusInfo'])
Esempio n. 39
0
def createGraph(ac_results):
	alchemyapi = AlchemyAPI()
	g=nx.Graph()
	total = 0.0
	i = 0.0

	for key in sorted(ac_results.iterkeys()):
		total += len(ac_results[key])

	for key in sorted(ac_results.iterkeys()):
		print "\nAlchemyAPI is now intepreting all of the "+key+" queries...\n"	
		for item in ac_results[key]:
			i +=1.0
			percent_complete = round((i/total)*100.0, 0)
			# print str(i) +" / "+str(total)+" - "+item
			print str(int(i)) +" / "+str(int(total))+"   "+str(percent_complete) +"%  " + item
			response_relations = alchemyapi.relations('text',item, {'entities':1, 'sentiment':1})
			response_entities = alchemyapi.entities('text',item, { 'sentiment':0 })

			if response_relations['status'] == 'OK':
				for relation in response_relations['relations']:
					# red.publish('chat', "found relation!")
					if 'subject' in relation:
						subject = relation['subject']['text']
						g.add_node(subject, query=key)

						if 'entities' in relation['subject']:
							g.node[subject]['type'] = relation['subject']['entities'][0]['type']

						if 'sentimentFromObject' in relation['subject']:
							# print relation['subject']['sentimentFromObject']['score']
							g.node[subject]['sentiment'] = float(relation['subject']['sentimentFromObject']['score'])

						if 'sentiment' in relation['subject']:
							# print relation['subject']['sentiment']['score']
							g.node[subject]['sentiment'] = float(relation['subject']['sentiment']['score'])
					
					if 'object' in relation:		
						object_ = relation['object']['text']
						g.add_node(object_, query=key)
						
						if 'entities' in relation['object']:
							g.node[object_]['type'] = relation['object']['entities'][0]['type']
							
						if 'sentimentFromSubject' in relation['object']:
							# print relation['object']['sentimentFromSubject']['score']
							g.node[object_]['sentiment'] = float(relation['object']['sentimentFromSubject']['score']							)

						if 'sentiment' in relation['object']:
							# print relation['object']['sentiment']['score']
							g.node[object_]['sentiment'] = float(relation['object']['sentiment']['score'])

					try:
						if all(x in ['subject', 'action', 'object'] for x in relation):
							n1 = relation['subject']['text']
							a =  relation['action']['text']
							n2 = relation['object']['text']
							if g.has_edge(n1,n2):
								g[n1][n2]['weight'] += 1
							else:
								g.add_edge(n1,n2, weight=1, relation=a)
					except:
						pass

				try:
					for entity in response_entities['entities']:
						g.add_node(entity['text'], type=entity['type'], query=key)
				except:
					continue

				nx.write_gexf(g, topic+".gexf")

			else:
				print "AlchemyAPI is not responding."
	return g
Esempio n. 40
0
 #raw_input()
 tokens = WhitespaceTokenizer().tokenize(at_less.lower())
 stopped_tokens = [i for i in tokens if not i in en_stop]
 stopped_tokens2 = [i for i in stopped_tokens if not i in gen_stop]
 stemmed_tokens = [p_stemmer.stem(i) for i in stopped_tokens2]
 topic.append(stemmed_tokens)
 dictionary = corpora.Dictionary(topic)
 corpus = [dictionary.doc2bow(text1) for text1 in topic]
 ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics=2, id2word = dictionary, passes=20)
 tweet_data['topic'] = ldamodel.print_topics(num_topics=1, num_words=1)
 print (ldamodel.print_topics(num_topics=1, num_words=1))
 #Alchemy Stuff:
 print cnt
 cnt=cnt+1
 if cnt>0:
     response = json.loads(json.dumps(alchemyapi.entities('text', trans_text , {'sentiment': 1})))
     print 'heya'
     # size=len(response['entities'])
     flag=0
     ent=[]
     ent_rele=[]
     ent_type=[]
     if response['status'] == 'OK':
         flag=1
         for entity in response['entities']:
             ent.append(entity['text'])
             ent_rele.append(entity['relevance'])
             ent_type.append(entity['type'])
     else:
         print('Error in entity extraction call: ', response['statusInfo'])
     if flag==1:
Esempio n. 41
0
class GoldenGlobeAnalyzer:
	def __init__(self, jsonFile, category_list = None, debug = False):
		'''Initialize a GGA object and load the tweets contained in the json file'''
		self.debug = debug
		self._entity_count_cutoff = 80

		self.tweets = []
		with open(jsonFile, 'r') as f:
			self.tweets = map(json.loads, f)

		print "-- read tweets\n"

		self.awards = []

		for c in category_list:
			self.awards.append(Award(c[0],c[1],c[2]))

		self.hosts = [None,None]
		self.alchemyapi = AlchemyAPI()

	def _get_permutations_internal(self,lst):
		permutations = {}
		for i in range(len(lst)):
			for j in range(len(lst)):
				if i != j:
					name = lst[i] + " " + lst[j]
					permutations[name] = 0
		return permutations

	def find_tweets_by_user(self, username):
		'''Find all the tweets by a given user whose user name is :param username'''
		ret = []
		for t in self.tweets:
			if (t['user']['screen_name'].lower() == username.lower()):
				ret.append(t)
		return ret

	def find_tweets_containing(self, keyword):
		ret = []
		for t in self.tweets:
			if keyword in t['text']:
				ret.append(t)
		return ret

	def get_sentiment_of_tweets(self,tweet_lump):
		response = self.alchemyapi.sentiment('text',tweet_lump.encode('ascii','ignore'))
		if response['status'] != u'ERROR':
			if 'sentiment' in response:
				return response['sentiment']
			elif 'docSentiment' in response:
				return response['docSentiment']
		return 0.0

	def get_entities(self,tweet_text):
		tweet_text = tweet_text.encode('utf-8')
		tokens = nltk.word_tokenize(tweet_text)
		tagged = nltk.pos_tag(tokens)
		chunks = nltk.chunk.ne_chunk(tagged)
		
		entity_names = []
		for word_tuple in chunks.pos():
			if word_tuple[1] == 'PERSON' or word_tuple[1] == 'ORGANIZATION':
				if word_tuple[0][0].lower() != "golden" and word_tuple[0][0].lower() != "globes" and word_tuple[0][0].lower() != "goldenglobes" and word_tuple[0][0].lower() != "best":
					entity_names.append(word_tuple[0])
		return entity_names

	def find_presenters(self):

		blacklist = ""
		for award in self.awards:
			blacklist += award.winner.lower()

		for award in self.awards:
			relevant = []
			cont = True

			for t in self.tweets:
				if re.findall(r"[pP]resent*",t["text"]) and re.findall(award.re,t["text"]):
					relevant.append(t)

			ent_dict = {}

			for tweet in relevant:
				ents = self.get_entities(tweet["text"])
				for e in ents:
					entity = e[0]
					if (entity.lower() not in award.long_name.lower()) and (entity.lower() not in blacklist):
						if entity in ent_dict:
							ent_dict[entity] += 1
							#print ent_dict[e]
						else:
							ent_dict[entity] = 1
						if ent_dict[entity] > 30: # cutoff to improve performance
							cont = False
				if not cont:
					break

			sorted_ents = sorted(ent_dict.iteritems(), key=operator.itemgetter(1), reverse = True)
			name_parts = []
			for i in range(min(len(sorted_ents),8)):
				name_parts.append(sorted_ents[i][0])

			presenter_idx = 0
			possible_name_combos = self._get_permutations_internal(name_parts)
			cont = True
			for tweet in relevant:
				for name in possible_name_combos:
					if name in tweet["text"]:
						possible_name_combos[name] += 1
						if possible_name_combos[name] > 0:
							award.presenters[presenter_idx] = name
							presenter_idx += 1
							if presenter_idx > 1:
								cont = False
								break
							possible_name_combos[name] -= 1000 # ugly hack but it works
				if not cont:
					break

			if award.presenters[0] == "None":
				for t in self.tweets:
					if re.findall(award.re,t["text"]):
						relevant.append(t)
				ent_dict2 = {}
				relevant_text = ""
				for r in relevant:
					relevant_text += r["text"] + "\n"

				response = self.alchemyapi.entities('text',relevant_text.encode('ascii','ignore'))
				if response['status'] == 'OK':
					for entity in response['entities']:
						ent_txt = entity["text"].encode('ascii','ignore')
						if ent_txt.lower() != "goldenglobes" and ent_txt.lower() != "golden globes":
							if ent_txt in ent_dict2:
								ent_dict2[ent_txt] += 1
							else:
								ent_dict2[ent_txt] = 1

				sorted_ents = sorted(ent_dict2.iteritems(), key=operator.itemgetter(1), reverse = True)
				for s in range(min(len(sorted_ents),2)):
					award.presenters[s] = sorted_ents[s][0]
					blacklist += " " + sorted_ents[s][0].lower()

			print "-- " + award.presenters[0] + ((" and " + award.presenters[1] ) if award.presenters[1] != "None" else "") + " presented " + award.long_name

		return

	def print_presenters(self):
		for award in self.awards:
			print award.presenters[0] + ((" and " + award.presenters[1] ) if award.presenters[1] != "None" else "") + " presented " + award.long_name

	def find_hosts(self):

		relevant = []
		new_tweets = []
		cont = True

		for t in self.tweets:
			if len(re.findall("host*",t["text"])) > 0:
				relevant.append(t)
		#	else:
		#		new_tweets.append(t)

		#self.tweets = new_tweets

		ent_dict = {}
		for tweet in relevant:
			ents = self.get_entities(tweet["text"])
			for e in ents:
				entity = e[0]
				if entity in ent_dict:
					ent_dict[entity] += 1
				else:
					ent_dict[entity] = 1
				if ent_dict[entity] > self._entity_count_cutoff: # cutoff to improve performance
					cont = False
			if not cont:
				break

		sorted_ents = sorted(ent_dict.iteritems(), key=operator.itemgetter(1), reverse = True)

		## two first names and two last names, so now we just need to figure out which of them belong together
		name_parts = []
		for i in range(4):
			name_parts.append(sorted_ents[i][0])

		host_idx = 0
		possible_name_combos = self._get_permutations_internal(name_parts)
		for tweet in relevant:
			for name in possible_name_combos:
				if name in tweet["text"]:
					possible_name_combos[name] += 1
					if possible_name_combos[name] > 10:
						print "-- " + name + " was a host"
						self.hosts[host_idx] = name
						host_idx += 1
						if host_idx > 1:
							return
						possible_name_combos[name] -= 1000 # ugly hack but it works
		return

	def print_hosts(self):
		for h in self.hosts:
			print h + " was a host\n"

	def find_winners(self):

		tweets_lst = []
		new_tweets = []

		for tweet in self.tweets:
			if ("wins" in tweet["text"] or "won" in tweet["text"]) and "should" not in tweet["text"]:
				info = (tweet["text"].partition("http")[0])
				info = info.partition(":")[2]
				if "@" in info:
					info = info.partition("@")[2]
				info=info.replace("#","")
				info = info.replace('"','')
				tweets_lst.append(info)
			else:
				new_tweets.append(tweet)
		tweets_lst = list(set(tweets_lst))
		self.tweets = new_tweets

		for i in tweets_lst:
			if "wins" in i:
				i = i.partition("wins")
			else:
				i = i.partition("won")
			winner = i[0]
			category = i[2]

			for award in self.awards:
				if re.findall(award.re,category):
					award.winner_candidates.append(winner)
					break

		for award in self.awards:
			award.extract_most_likely_winner()
			print award.winner + "won " + award.long_name

		return

	def print_winners(self):
		for award in self.awards:
			print award.winner + "won " + award.long_name
			print "\n"

	def find_nominees(self):

		blacklist = ""
		for award in self.awards:
			blacklist += award.winner.lower()

		for award in self.awards:
			relevant = []
			cont = True

			for t in self.tweets:
				if re.findall(r"([nN]ominated.*[fF]or)|([nN]ominee)",t["text"]) and re.findall(award.re,t["text"]) and "should" not in t["text"] and "wasn't" not in t["text"]:
					relevant.append(t)
				elif re.findall(r"\b[sS]hould.*[wW]on\b",t["text"]) and re.findall(award.re,t["text"]):
					relevant.append(t)

			ent_dict = {}
			relevant_text = ""
			for r in relevant:
				relevant_text += r["text"] + "\n"

			response = self.alchemyapi.entities('text',relevant_text.encode('ascii','ignore'))
			if response['status'] == 'OK':

				for entity in response['entities']:
					ent_txt = entity["text"].encode('ascii','ignore')
					if ent_txt.lower() != "goldenglobes" and ent_txt.lower() != "golden globes":
						if ent_txt in ent_dict:
							ent_dict[ent_txt] += 1
						else:
							ent_dict[ent_txt] = 1

			sorted_ents = sorted(ent_dict.iteritems(), key=operator.itemgetter(1), reverse = True)
			for s in range(min(len(sorted_ents),5)):
				award.nominees[s] = sorted_ents[s][0]
			print "The nominees for " + award.long_name + " are " + award.nominees[0] + ", " + award.nominees[1] + ", " + award.nominees[2] + ", " + award.nominees[3] + " and " + award.nominees[4]

	def find_popularity_of_winners(self):
		for award in self.awards:
			if award.winner != "None":
				relevant_text = ""
				for t in self.tweets:
					if award.winner in t["text"]:
						relevant_text += award.winner + '\n'
				award.popularity = self.get_sentiment_of_tweets(relevant_text)

		most_popular = self.awards[0]
		least_popular = self.awards[0]
		for award in self.awards:
			if award.popularity > most_popular.popularity:
				most_popular = award
			elif award.popularity < least_popular.popularity:
				least_popular = award

		print "-- The most popular winner was " + most_popular.winner + " and the least popular winner was " + least_popular.winner
		return

	def find_dress_opinions(self):
		_all_positive = []
		_all_negative = []
		people = []
		for award in self.awards:
			people.append(award.winner)
		for host in self.hosts:
			people.append(host.encode('ascii','ignore'))

		for person in people:
			if person != "None":
				relevant_txt = ""
				for t in self.tweets:
					if person in t["text"] and re.findall(r"\b[dD]ress\b",t["text"]):
						relevant_txt += t["text"]
				sentiment_score = self.get_sentiment_of_tweets(relevant_txt)
				if type(sentiment_score) is not float:
					#print person + " : " + str(sentiment_score["score"])
					if float(sentiment_score["score"]) > 0.15:
						_all_positive.append([sentiment_score["score"],person])
					elif sentiment_score != 0.0:
						_all_negative.append([sentiment_score["score"],person])
		
		_all_positive = sorted(_all_positive, key = lambda tup : tup[0])
		_all_negative = sorted(_all_negative, key = lambda tup : tup[0])

		print "People liked these people's dresses (in descending order)"
		for a in _all_positive:
			print " - " + a[1]

		print "People didn't like these people's dresses very much (in descending order)"
		for a in _all_negative:
			print " - " + a[1]
Esempio n. 42
0
from __future__ import print_function
from alchemyapi import AlchemyAPI


test_text = 'Bob broke my heart, and then made up this silly sentence to test the PHP SDK'  
test_html = '<html><head><title>The best SDK Test | AlchemyAPI</title></head><body><h1>Hello World!</h1><p>My favorite language is PHP</p></body></html>'
test_url = 'http://www.nytimes.com/2013/07/13/us/politics/a-day-of-friction-notable-even-for-a-fractious-congress.html?_r=0'



alchemyapi = AlchemyAPI()


#Entities
print('Checking entities . . . ')
response = alchemyapi.entities('text', test_text);
assert(response['status'] == 'OK')
response = alchemyapi.entities('html', test_html);
assert(response['status'] == 'OK')
response = alchemyapi.entities('url', test_url);
assert(response['status'] == 'OK')
response = alchemyapi.entities('random', test_url);
assert(response['status'] == 'ERROR') 	#invalid flavor
print('Entity tests complete!')
print('')


#Keywords
print('Checking keywords . . . ')
response = alchemyapi.keywords('text', test_text);
assert(response['status'] == 'OK')
Esempio n. 43
0
def process(query, in_queue, out_queue):
	"""
	The worker thread to grab a found Tweet off the queue and 
	calculate the sentiment via AlchemyAPI. 

	It calculates the document-level sentiment for the entire tweet, and
	it will also attempt to calculate entity-level sentiment if the query
	string is identified as an entity. If the query string is not 
	identified as an entity for the tweet, no entity level sentiment
	will be returned.
	
	INPUT:
	query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos")
	in_queue -> the shared input queue that is filled with the found tweets.
	out_queue -> the shared output queue that is filled with the analyzed tweets.

	OUTPUT:
	None	
	"""

	#Create the AlchemyAPI object
	alchemyapi = AlchemyAPI()
	
	while True:
		#grab a tweet from the queue
		tweet = in_queue.get()	
		
		#init
		tweet['sentiment'] = {}

		try:
			#calculate the sentiment for the entity
			response = alchemyapi.entities('text',tweet['text'], { 'sentiment': 1 })
			if response['status'] == 'OK':
				for entity in response['entities']:
					#Check if we've found an entity that matches our query
					if entity['text'] == query:
						tweet['sentiment']['entity'] = {}
						tweet['sentiment']['entity']['type'] = entity['sentiment']['type']
						
						#Add the score (it's not returned if type=neutral)
						if 'score' in entity['sentiment']:
							tweet['sentiment']['entity']['score'] = entity['sentiment']['score']
						else:
							tweet['sentiment']['entity']['score'] = 0  
						
						#Only 1 entity can possibly match the query, so exit the loop
						break

			#calculate the sentiment for the entire tweet
			response = alchemyapi.sentiment('text',tweet['text'])

			if response['status'] == 'OK':
				tweet['sentiment']['doc'] = {}
				tweet['sentiment']['doc']['type'] = response['docSentiment']['type']
				
				#Add the score (it's not returned if type=neutral)
				if 'score' in response['docSentiment']:
					tweet['sentiment']['doc']['score'] = response['docSentiment']['score']
				else:
					tweet['sentiment']['doc']['score'] = 0  
			
			#add the result to the output queue
			out_queue.put(tweet)
		
		except Exception as e:
			#if there's an error, just move on to the next item in the queue
			print 'Uh oh, this just happened: ', e
			pass
			
		#signal that the task is complete
		in_queue.task_done()
Esempio n. 44
0
    print("**********")
    response = alchemyapi.concepts('text', text, {'maxRetrieve': 10})

    if response['status'] == "OK":
        #print(json.dumps(response, indent = 4))
        keywords = [(concept['text'].encode('ascii'),
                     float(concept['relevance']))
                    for concept in response['concepts']]

        for concept in response['concepts']:
            print('Concept text:', concept['text'].encode('utf-8'))
            print('Concept relevance:', concept['relevance'])

        print keywords

    print("**********")
    response = alchemyapi.entities('text', text, {'maxRetrieve': 200})
    if response['status'] == "OK":
        print(json.dumps(response, indent=4))

        for entity in response['entities']:
            print("Entity text", entity['text'])
            print("Entity type", entity['type'])

        persons = [
            ent['text'] for ent in response['entities']
            if ent['type'] == 'Person'
        ]

        print("All persons:", persons)
Esempio n. 45
0
    out_file = args.outfile
    uuid = args.uuid
    folder = os.path.join(tmpdir, uuid)
    print(folder)
    cwd = os.getcwd()
    apikey_location = os.path.join(cwd, "api_key.txt")
    print(in_file)
    with open(in_file) as f:
        filetext = f.read()
    return filetext

filetext = main()

alchemyapi = AlchemyAPI()
       
response = alchemyapi.entities('text', filetext, {'sentiment': 1})

if response['status'] == 'OK':

    print(json.dumps(response, indent=4))

    for entity in response['entities']:
        print('text: ', entity['text'].encode('utf-8'))
        print('type: ', entity['type'])
        print('relevance: ', entity['relevance'])
        print('sentiment: ', entity['sentiment']['type'])
        if 'score' in entity['sentiment']:
            print('sentiment score: ' + entity['sentiment']['score'])
        print('')
else:
    print('Error in entity extraction call: ', response['statusInfo'])