def grouped_category(texts):
        """
        get category and corresponding statistic for multiplt texts

        INPUT:
        text -> text array to be analyzed

        OUTPUT:
        statistic -> dict that has the frequence of each category, like{"soccer": 12, "football": 24}
        category -> array that each text belongs to in sequence
        """
        alchemyapi = AlchemyAPI()
        decoder = json.JSONDecoder()

        statistic = {"null": 0}
        category = [];
 
        for text in texts:
                response = alchemyapi.category('text',text)

                if response['status'] == 'OK':
                        analysizedData = decoder.decode(json.dumps(response))
                        category.append(analysizedData.get("category"))
                        if (statistic.get(category[-1]) != None):
                                statistic[category[-1]] = statistic.get(category[-1]) + 1
                        else:
                                statistic[category[-1]] = 1
                else:
                        statistic["null"] = statistic.get("null") + 1
                        category.append("null")

        return statistic, category
Exemple #2
0
    def extractCategoryFromUrl(self, url):
        """method for extracting the category associated with the given url"""

        # creating AlchemyAPI object
        alchemyapi = AlchemyAPI()

        # requesting json response from AlchemyAPI server
        response = alchemyapi.category("text", url)

        if response["status"] == "OK":

            # get the category from the response
            self.categoryFromUrl.setCategory(response["category"])

            # get the confidence score from the response
            self.categoryFromUrl.setScore(response["score"])

        else:
            print("Error in text categorization call: ", response["statusInfo"])
def category(text):
        """
        find the category that input text belongs to

        INPUT:
        test -> input text that need to be analyze

        OUTPUT:
        category string that input belongs to. "null" means alchemyapi fails somehow
        """
        
        alchemyapi = AlchemyAPI()
        decoder = json.JSONDecoder()
 
        response = alchemyapi.category('text',text)

        if response['status'] == 'OK':
            analysizedData = decoder.decode(json.dumps(response))
            category = analysizedData.get("category")
            return category
        else:
        	return "null"
Exemple #4
0
response = alchemyapi.relations('text', test_text);
assert(response['status'] == 'OK')
response = alchemyapi.relations('html', test_html);
assert(response['status'] == 'OK')
response = alchemyapi.relations('url', test_url);
assert(response['status'] == 'OK')
response = alchemyapi.relations('random', test_url);
assert(response['status'] == 'ERROR') 	#invalid flavor
print('Relation tests complete!')
print('')



#Category
print('Checking category . . . ')
response = alchemyapi.category('text', test_text);
assert(response['status'] == 'OK')
response = alchemyapi.category('html', test_html, {'url':'test'});
assert(response['status'] == 'OK')
response = alchemyapi.category('url', test_url);
assert(response['status'] == 'OK')
response = alchemyapi.category('random', test_url);
assert(response['status'] == 'ERROR') 	#invalid flavor
print('Category tests complete!')
print('')



#Feeds
print('Checking feeds . . . ')
response = alchemyapi.feeds('text', test_text);
Exemple #5
0
else:
    print('Error in relation extaction call: ', response['statusInfo'])

print('')
print('')
print('')
print('############################################')
print('#   Text Categorization Example            #')
print('############################################')
print('')
print('')

print('Processing text: ', demo_text)
print('')

response = alchemyapi.category('text', demo_text)

if response['status'] == 'OK':
    print('## Response Object ##')
    print(json.dumps(response, indent=4))

    print('')
    print('## Category ##')
    print('text: ', response['category'])
    print('score: ', response['score'])
    print('')
else:
    print('Error in text categorization call: ', response['statusInfo'])

print('')
print('')
Exemple #6
0
def index(request):

  alchemyapi = AlchemyAPI()
  template = loader.get_template('main/index.html')

 
  api = twitter.Api(consumer_key='CCC',consumer_secret='SSS', access_token_key='AAA', access_token_secret='TTT')

  infile = open(filename,'r')
  statuses = pickle.load(infile) 
  statuses2 = []
  if GETNEWTWEETS:
   statuses2 = api.GetHomeTimeline(NUM_STATUSES)
 
  countclash = 0
  countpass = 0
  passt = 0
 
  if not ONTHEFLY: 
   for item in statuses2:
     passt = 1
     for fitem in statuses:
       if fitem.id == item.id:
         countclash = countclash + 1
         passt = 0
     if passt > 0:
         temp = clean(item.text)
         response = alchemyapi.category('text',temp)
         category = "unknown"
         if response['status'] == 'OK':
           category = response['category']
           cat_score = response['score']
         else:
           print('Error in entity extraction call: ', response['statusInfo'])
         item.lang = category
         statuses.append(item)
         countpass = countpass + 1       
   
  #print("%i clash %i pass" % (countclash,countpass))
   
  ai = -1
  bi = -1
  
  size = len(statuses)
   
  for a in statuses:
   ai = ai + 1
  
   cmax = a.retweet_count
   cmaxindex = ai
  
   for bi in range(ai,size):
      if statuses[bi].retweet_count > cmax:
       cmaxindex = bi
       cmax = statuses[bi].retweet_count
  
   if cmaxindex != ai:    
     statuses[ai], statuses[cmaxindex] = statuses[cmaxindex], statuses[ai]
  
  tweet_list = []
  index = 0
 
  for s in statuses:
   if index < MAXTWEETS:
    if s.retweet_count > MIN_RETWEETS: 
     tweet_list.append(s) 
     index = index + 1
  
  
  newsize = len(statuses)
 
  if not ONTHEFLY: 
#   del statuses[NUM_STATUSES:newsize]
   f = open(filename,'w')
   pickle.dump(statuses, f)
     

  context = RequestContext(request, {'tweet_list' : tweet_list})
  return HttpResponse(template.render(context))  
def findCategory(text):
	alchemyapi = AlchemyAPI()
	response = alchemyapi.category('text', text)
	return response
Exemple #8
0
#Relations
print('Checking relations . . . ')
response = alchemyapi.relations('text', test_text)
assert (response['status'] == 'OK')
response = alchemyapi.relations('html', test_html)
assert (response['status'] == 'OK')
response = alchemyapi.relations('url', test_url)
assert (response['status'] == 'OK')
response = alchemyapi.relations('random', test_url)
assert (response['status'] == 'ERROR')  #invalid flavor
print('Relation tests complete!')
print('')

#Category
print('Checking category . . . ')
response = alchemyapi.category('text', test_text)
assert (response['status'] == 'OK')
response = alchemyapi.category('html', test_html, {'url': 'test'})
assert (response['status'] == 'OK')
response = alchemyapi.category('url', test_url)
assert (response['status'] == 'OK')
response = alchemyapi.category('random', test_url)
assert (response['status'] == 'ERROR')  #invalid flavor
print('Category tests complete!')
print('')

#Feeds
print('Checking feeds . . . ')
response = alchemyapi.feeds('text', test_text)
assert (response['status'] == 'ERROR')  #only works for html and url content
response = alchemyapi.feeds('html', test_html, {'url': 'test'})
    print("Error in relation extaction call: ", response["statusInfo"])


print("")
print("")
print("")
print("############################################")
print("#   Text Categorization Example            #")
print("############################################")
print("")
print("")

print("Processing text: ", demo_text)
print("")

response = alchemyapi.category("text", demo_text)

if response["status"] == "OK":
    print("## Response Object ##")
    print(json.dumps(response, indent=4))

    print("")
    print("## Category ##")
    print("text: ", response["category"])
    print("score: ", response["score"])
    print("")
else:
    print("Error in text categorization call: ", response["statusInfo"])


print("")
myText = ""
with open("cg_entities_and_descr.csv", "r") as file:
  data = file.read()
  myText = data.split()
  print type(myText)
  print myText

  for i,x in enumerate(lis):              #print the list items 
    print 



    # #print "line{0} = {1}".format(i,x)
    # #print "{1}".format(i,x)
    # names = x.split(',', 2)[0]
    # entities.append(x)
#categorize entities in Civic Graph based on their descriptions (data = 'text')
  for i in range(0, len(lis)):
    categ_result = alchemyapi.category('text', entities[i]);
    pp.pprint(categ_result)
    i += 1

categ_result = alchemyapi.category('text', "http://techpresident.com/news/25496/first-post-data-driven");
pp.pprint(categ_result['category'])






Exemple #11
0
        for entity in response['entities']:

            # add each entity to our master list
            if entities.has_key(entity['text']):
                entities[entity['text']] += int(entity['count'])
            else:
                entities[entity['text']] = int(entity['count'])

        print "[*] Retrieved %d entities from %s" % (len(entities), pdf_file)

    else:
        print "[!] Error receiving Alchemy response: %s" % response[
            'statusInfo']

    # get the category
    response = alchemyapi.category('text', full_text)

    if response['status'] == 'OK':

        if categories.has_key(response['category']):
            categories[response['category']] += 1
        else:
            categories[response['category']] = 1

        print "[*] Categorized %s as %s" % (pdf_file, response['category'])

    # grab the concepts
    response = alchemyapi.concepts('text', full_text)

    if response['status'] == 'OK':
Exemple #12
0
class AlchemyPost:

    def __init__(self, post_tumblr, post_id, consumer_key, consumer_secret, oauth_token, oauth_secret):
        self.post_tumblr = post_tumblr
        self.post_id = post_id
        self._init_tumblr(consumer_key, consumer_secret, oauth_token, oauth_secret)
        self._init_alchemy()

    def _init_tumblr(self, consumer_key, consumer_secret, oauth_token, oauth_secret):
        self._client = pytumblr.TumblrRestClient(consumer_key, consumer_secret, oauth_token, oauth_secret)    

    def _init_alchemy(self):
        self.alchemyapi = AlchemyAPI()
        self.content = {}

    def analyze_post(self):
        self.post = self._get_content_post()
        self._alchemy_entities()
        self._alchemy_keywords()
        self._alchemy_concepts()
        self._alchemy_sentiment()
        self._alchemy_relations()
        self._alchemy_category()
        self._alchemy_feeds()
        self._alchemy_taxonomy()

    def print_content(self):
        print(json.dumps(self.content, indent=4))

    def _get_content_post(self):
        print "*",
        infos = self._get_infos_post() 
        self.title = ''
        self.tags = []
        if 'tags' in infos:
            self.tags = infos['tags']
        
        if infos['type'] == 'text':
            return self._get_content_text(infos)
        if infos['type'] == 'quote':
            return self._get_content_quote(infos)
        return ''

    def _get_infos_post(self):
         infos = self._client.posts(self.post_tumblr, id=self.post_id)
         if 'posts' in infos and len(infos['posts'])>0:
            return infos['posts'][0]
         return {}

    def _get_content_text(self, infos):
        content = "<h1>" + str(infos['title']) + "</h1>"
        content += " <br>" + str(infos['body'])
        content += " <br>" + " ".join(infos['tags'])
        return content

    def _get_content_quote(self, infos):
        content = str(infos['text'])
        content += " <br>" + str(infos['source'])
        content += " <br>" + " ".join(infos['tags'])
        return content

    def _alchemy_entities(self):
        print ".",
        response = self.alchemyapi.entities('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['entities'] = response['entities']
        return True

    def _alchemy_keywords(self):
        print ".",
        response = self.alchemyapi.keywords('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['keywords'] = response['keywords']
        return True

    def _alchemy_concepts(self):
        print ".",
        response = self.alchemyapi.concepts('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['concepts'] = response['concepts']
        return True

    def _alchemy_sentiment(self):
        print ".",
        response = self.alchemyapi.sentiment('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['sentiment'] = response['docSentiment']
        return True

    def _alchemy_relations(self):
        print ".",
        response = self.alchemyapi.relations('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['relations'] = response['relations'] 
        return True

    def _alchemy_category(self):
        print ".",
        response = self.alchemyapi.category('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['category'] = response['category'] 
        self.content['score'] = response['score'] 
        return True

    def _alchemy_feeds(self):
        print ".",
        response = self.alchemyapi.feeds('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['feeds'] = response['feeds'] 
        return True

    def _alchemy_taxonomy(self):
        print ".",
        response = self.alchemyapi.taxonomy('html', self.post)
        if response['status'] != 'OK':
            return False
        self.content['taxonomy'] = response['taxonomy'] 
        return True
Exemple #13
0
def user_analysis_sentiments(request):
    if request.method == 'GET':
        print request.GET.get('user', '')
        user = request.GET.get('user', '')
        messages = []
        message = Message.objects.filter(user_send=user.decode("utf8"))
        for m in message:
            messages.append(m.message_text)
        text = ",".join(messages)
        alchemyapi = AlchemyAPI()

        #keywords
        response = alchemyapi.keywords('text', text, {'sentiment': 1})
        if response['status'] == 'OK':
            keywords = []
            for keyword in response['keywords']:
                keyword_text = keyword['text'].encode('utf-8')
                keyword_relevance = keyword['relevance']
                keyword_sentiment = keyword['sentiment']['type']
                key_word = {'keyword_text': keyword_text, 'keyword_relevance': keyword_relevance,
                            'keyword_sentiment': keyword_sentiment}
                keywords.append(key_word)
        else:
            print('Error in keyword extaction call: ', response['statusInfo'])

        response = alchemyapi.concepts('text', text)

        if response['status'] == 'OK':
            concepts = []
            for concept in response['concepts']:
                concept_text = concept['text']
                concept_relevance = concept['relevance']
                concept_entity = {'concept_text': concept_text, 'concept_relevance': concept_relevance}
                concepts.append(concept_entity)
        else:
            print('Error in concept tagging call: ', response['statusInfo'])

        response = alchemyapi.language('text', text)

        if response['status'] == 'OK':
            print(response['wikipedia'])
            language = response['language']
            iso_639_1 = response['iso-639-1']
            native_speakers = response['native-speakers']
            wikipedia = response['wikipedia']
            language_id = {'language': language, 'iso_639_1': iso_639_1, 'native_speakers': native_speakers, 'wikipedia': wikipedia}
        else:
            print('Error in language detection call: ', response['statusInfo'])

        response = alchemyapi.relations('text', text)

        if response['status'] == 'OK':
            relations = []
            for relation in response['relations']:
                if 'subject' in relation:
                    relation_subject_text = relation['subject']['text'].encode('utf-8')
                if 'action' in relation:
                    relation_action_text = relation['action']['text'].encode('utf-8')
                if 'object' in relation:
                    relation_object_text = relation['object']['text'].encode('utf-8')
                relation_entity = {'relation_subject_text': relation_subject_text,
                                   'relation_action_text': relation_action_text,
                                   'relation_object_text': relation_object_text}
                relations.append(relation_entity)
        else:
            print('Error in relation extaction call: ', response['statusInfo'])

        response = alchemyapi.category('text', text)

        if response['status'] == 'OK':
            print('text: ', response['category'])
            category = response['category']
            print('score: ', response['score'])
            score = response['score']
            categories = {'category': category, 'score': score}
        else:
            print('Error in text categorization call: ', response['statusInfo'])

        response = alchemyapi.taxonomy('text', text)

        if response['status'] == 'OK':
            taxonomies = []
            for category in response['taxonomy']:
                taxonomy_label = category['label']
                taxonomy_score = category['score']
                taxonomy = {'taxonomy_label': taxonomy_label, 'taxonomy_score': taxonomy_score}
                taxonomies.append(taxonomy)
        else:
            print('Error in taxonomy call: ', response['statusInfo'])

        response = alchemyapi.combined('text', text)

        if response['status'] == 'OK':
            print('## Response Object ##')
            print(json.dumps(response, indent=4))
            print('')

        user = {'user_name': 'LOL', 'keywords': keywords, 'concepts': concepts, 'language_id': language_id,
                'relations': relations, 'categories': categories, 'taxonomies': taxonomies}
        return HttpResponse(json.dumps(user), content_type="application/json")
alchemyapi = AlchemyAPI() # API Key: ff8f993db5ee0b907a3e41f19bbd57b8b4cbc24a


pp = pprint.PrettyPrinter(indent=4)

#Read in data
myText = ""
with open("cg_entities_and_descr.csv", "r") as file:
  data = file.read()
  myText = data.split()
  print type(myText)
  print myText


#categorize entities in Civic Graph based on their descriptions (data = 'text')
for i in range(0, len(lis)):
  categ_result = alchemyapi.category('text', entities[i]);
  pp.pprint(categ_result)
  i += 1

#Test API call
# categ_result = alchemyapi.category('text', "http://techpresident.com/news/25496/first-post-data-driven");
# pp.pprint(categ_result['category'])






Exemple #15
0
     for entity in response['entities']:
         
         # add each entity to our master list
         if entities.has_key(entity['text']):
             entities[entity['text']] += int(entity['count'])
         else:
             entities[entity['text']] = int(entity['count'])
             
     print "[*] Retrieved %d entities from %s" % (len(entities),pdf_file)    
     
 else:
     print "[!] Error receiving Alchemy response: %s" % response['statusInfo']
     
 
 # get the category
 response = alchemyapi.category('text',full_text)
 
 if response['status'] == 'OK':
     
     if categories.has_key(response['category']):
         categories[response['category']] += 1
     else:
         categories[response['category']]  = 1
 
     print "[*] Categorized %s as %s" % (pdf_file,response['category'])
     
     
 # grab the concepts
 response = alchemyapi.concepts('text',full_text)
 
 if response['status'] == 'OK':
Exemple #16
0


print('')
print('')
print('')
print('############################################')
print('#   Text Categorization Example            #')
print('############################################')
print('')
print('')

print('Processing text: ', demo_text)
print('')

response = alchemyapi.category('text',demo_text)

if response['status'] == 'OK':
	print('## Response Object ##')
	print(json.dumps(response, indent=4))


	print('')
	print('## Category ##')
	print('text: ', response['category'])
	print('score: ', response['score'])
	print('')
else:
	print('Error in text categorization call: ', response['statusInfo'])

Exemple #17
0
from alchemyapi import AlchemyAPI
import json
import pprint

#Reads in file containing names of entities in Civic Graph and their associated descriptions
#Attempts to categorize entities in Civic Graph based on descriptions using AlchemyAPI's "category" function

alchemyapi = AlchemyAPI()  # API Key: ff8f993db5ee0b907a3e41f19bbd57b8b4cbc24a

pp = pprint.PrettyPrinter(indent=4)

#Read in data
myText = ""
with open("cg_entities_and_descr.csv", "r") as file:
    data = file.read()
    myText = data.split()
    print type(myText)
    print myText

#categorize entities in Civic Graph based on their descriptions (data = 'text')
for i in range(0, len(lis)):
    categ_result = alchemyapi.category('text', entities[i])
    pp.pprint(categ_result)
    i += 1

#Test API call
# categ_result = alchemyapi.category('text', "http://techpresident.com/news/25496/first-post-data-driven");
# pp.pprint(categ_result['category'])