예제 #1
0
def render_article(request):	
	#if current aricle has content field
	#render as is
	#else call alchemy and save content

	article_id = request.POST['articleData']
	article = Article.objects.filter(id = article_id)[0]

	print(article_id.encode('utf-8'))
	print(article.content.encode('utf-8'))
	if article.content:
		return render_to_response('article.html', {'id' : article.id, 'data' : article.content, 'titleText' : article.title})
	else:				
		testURL = article.url
		#Create AlchemyAPI Object
		alchemyapi = AlchemyAPI()
		response = alchemyapi.text('url', testURL)
		titleData = alchemyapi.title('url', testURL)
		authorData = alchemyapi.author('url', testURL)
		article.content = response['text'].encode('utf-8')
		article.title = titleData['title'].encode('utf-8')
		article.save()

		return render_to_response('article.html', {'id' : article.id, 'data' : response['text'].encode('utf-8'), 'titleText' : titleData['title'].encode('utf-8')}
 )
예제 #2
0
class Alchemy(object):
  'Chama API para leitura de feeds Atom RSS'

  def __init__(self):
    #Chamador do AlchemyAPI
    self.alchemy_api = AlchemyAPI()

  def processa_html(self, link):

    #Retorna o texto limpo a partir de uma URL
    return self.alchemy_api.text('url', link)['text']

  def obtem_titulo(self, link):

    #Retorna o texto limpo a partir de uma URL
    return self.alchemy_api.title('url', link)['title']

  def obtem_entidades(self, texto):

    #Retorna as entidaades encontradas no texto
    return self.alchemy_api.entities('text', texto, {'sentiment': 1})
예제 #3
0


print('')
print('')
print('')
print('############################################')
print('#   Title Extraction Example               #')
print('############################################')
print('')
print('')

print('Processing url: ', demo_url)
print('')

response = alchemyapi.title('url',demo_url)

if response['status'] == 'OK':
	print('## Response Object ##')
	print(json.dumps(response, indent=4))


	print('')
	print('## Title ##')
	print('title: ', response['title'].encode('utf-8'))
	print('')
else:
	print('Error in title extraction call: ', response['statusInfo'])


예제 #4
0
class Extraction:
    def __init__(self, url):
        self.alchemyAPI = AlchemyAPI()
        self.alchemyAPI.outputMode = 'json'
        self.url = url
        # must call extraction after initialization

    """
    Goes through all URL processing routines for the constructor-specified URL
    """
    def processText(self):
        text = self.__extractText(self.url)
        self.sentences = self.__sbdText(text)
        self.author    = self.__extractAuthor(self.url)
        self.title     = self.__extractTitle(self.url)


    """
    Calls AlchemyAPI to extract the text from the given article
    """
    def __extractText(self, url):
        if url is None or url == "":
            raise InputException("Invalid URL")

        response = self.alchemyAPI.text('url', url)
        if response['status'] != 'OK':
            warn(response['statusInfo'])

        return response['text'].encode('utf-8')

    """
    Calls AlchemyAPI to extract the author of the article.
    """
    def __extractAuthor(self, url):
        if url is None or url == "":
            raise InputException("Invalid URL")

        response = self.alchemyAPI.author('url', url)
        if response['status'] != 'OK':
            warn(response['statusInfo'])

        return response['author'].encode('utf-8')

    """
    Gets the article title with
    """
    def __extractTitle(self, url):
        if url is None or url == "":
            raise InputException("Invalid URL")

        response = self.alchemyAPI.title('url', url)
        if response['status'] != 'OK':
            warn(response['statusInfo'])
        return response['title'].encode('utf-8')


    """
    Applies a sentence boundary disambiguation algorithm to the extracted
    article text. We then have access to the individual sentences of the article.
    From there any quotes are removed, so sentiment analysis is performed on the writer's
    additions only.
    """
    def __sbdText(self, extractedText):
        import re
        sentenceEnders = re.compile(r"""
            # Split sentences on whitespace between them.
            (?:               # Group for two positive lookbehinds.
              (?<=[.!?])      # Either an end of sentence punct,
            | (?<=[.!?]['"])  # or end of sentence punct and quote.
            )                 # End group of two positive lookbehinds.
            (?<!  Mr\.   )    # Don't end sentence on "Mr."
            (?<!  Mrs\.  )    # Don't end sentence on "Mrs."
            (?<!  Jr\.   )    # Don't end sentence on "Jr."
            (?<!  Dr\.   )    # Don't end sentence on "Dr."
            (?<!  Prof\. )    # Don't end sentence on "Prof."
            (?<!  Sr\.   )    # Don't end sentence on "Sr."
            \s+               # Split on whitespace between sentences.
            """,
        re.IGNORECASE | re.VERBOSE)
        sentenceList = sentenceEnders.split(extractedText)

        """
        remove any quotes by recognizing ascii/unicode double sentences.
        any quotes within sentences are left, because this paraphrasing/choice
        is still somewhat indicative of possible bias
        """
        for sentence in list(sentenceList):
            if sentence[:3] == "“" or sentence[:1] == '"': # “ = unicode representation of slanted double quote
                sentenceList.remove(sentence)

        return sentenceList
예제 #5
0
response = alchemyapi.language('text', test_text);
assert(response['status'] == 'OK')
response = alchemyapi.language('html', test_html);
assert(response['status'] == 'OK')
response = alchemyapi.language('url', test_url);
assert(response['status'] == 'OK')
response = alchemyapi.language('random', test_url);
assert(response['status'] == 'ERROR') 	#invalid flavor
print('Language tests complete!')
print('')



#Title
print('Checking title . . . ')
response = alchemyapi.title('text', test_text);
assert(response['status'] == 'ERROR')	#only works for html and url content
response = alchemyapi.title('html', test_html);
assert(response['status'] == 'OK')
response = alchemyapi.title('url', test_url);
assert(response['status'] == 'OK')
print('Title tests complete!')
print('')



#Relations
print('Checking relations . . . ')
response = alchemyapi.relations('text', test_text);
assert(response['status'] == 'OK')
response = alchemyapi.relations('html', test_html);
예제 #6
0
wait = raw_input('press enter to continue')

print('')
print('')
print('')
print('############################################')
print('#   Title Extraction Example               #')
print('############################################')
print('')
print('')

print('Processing url: ', demo_url)
print('')

response = alchemyapi.title('url', demo_url)

if response['status'] == 'OK':
    print('## Response Object ##')
    print(json.dumps(response, indent=4))

    print('')
    print('## Title ##')
    print('title: ', response['title'].encode('utf-8'))
    print('')
else:
    print('Error in title extraction call: ', response['statusInfo'])

wait = raw_input('press enter to continue')

print('')
예제 #7
0
#Language
print('Checking language . . . ')
response = alchemyapi.language('text', test_text)
assert (response['status'] == 'OK')
response = alchemyapi.language('html', test_html)
assert (response['status'] == 'OK')
response = alchemyapi.language('url', test_url)
assert (response['status'] == 'OK')
response = alchemyapi.language('random', test_url)
assert (response['status'] == 'ERROR')  #invalid flavor
print('Language tests complete!')
print('')

#Title
print('Checking title . . . ')
response = alchemyapi.title('text', test_text)
assert (response['status'] == 'ERROR')  #only works for html and url content
response = alchemyapi.title('html', test_html)
assert (response['status'] == 'OK')
response = alchemyapi.title('url', test_url)
assert (response['status'] == 'OK')
print('Title tests complete!')
print('')

#Relations
print('Checking relations . . . ')
response = alchemyapi.relations('text', test_text)
assert (response['status'] == 'OK')
response = alchemyapi.relations('html', test_html)
assert (response['status'] == 'OK')
response = alchemyapi.relations('url', test_url)
예제 #8
0
    print("Error in language detection call: ", response["statusInfo"])


print("")
print("")
print("")
print("############################################")
print("#   Title Extraction Example               #")
print("############################################")
print("")
print("")

print("Processing url: ", demo_url)
print("")

response = alchemyapi.title("url", demo_url)

if response["status"] == "OK":
    print("## Response Object ##")
    print(json.dumps(response, indent=4))

    print("")
    print("## Title ##")
    print("title: ", response["title"].encode("utf-8"))
    print("")
else:
    print("Error in title extraction call: ", response["statusInfo"])


print("")
print("")