Ejemplo n.º 1
0
def addToDict():
    masterList = api.generateResponse()
    for x in range(0, len(masterList)):
        for y in range(0, len(masterList[x])):
            for z in range(0, 4):
                try:
                    locations = nlp.HTMLParser(masterList[x][y][1])
                    for z in range(0, len(locations)):
                        location = str(getAddress(str(locations[z])))
                        if location not in aggregatedDict:
                            aggregatedDict[location] = []
                        listInfo = aggregatedDict[location]
                        tempDict = {}
                        tempDict['url'] = masterList[x][y][1]
                        tempDict['author'] = masterList[x][y][2]
                        tempDict['title'] = masterList[x][y][0]
                        tempDict['credRating'] = svm.compute(
                            masterList[x][y][1])
                        if x is 0:
                            tempDict['topic'] = 'general'
                        if x is 1:
                            tempDict['topic'] = 'technology'
                        if x is 2:
                            tempDict['topic'] = 'sports'
                        if x is 3:
                            tempDict['topic'] = 'business'
                        if x is 4:
                            tempDict['topic'] = 'entertainment'
                        if x is 5:
                            tempDict['topic'] = 'science'
                        listInfo.append(tempDict)
                        aggregatedDict[location] = listInfo
                except:
                    None
Ejemplo n.º 2
0
def getNewsAPI():
    articles = urllib2.urlopen("https://newsapi.org/v1/sources").read()
    articles = ast.literal_eval(articles)
    articles = articles.items()[1]
    articles = articles[1]
    counter = 0
    for x in range(0, len(articles)):
        articleid = articles[x]['id']
        topic = articles[x]['category']
        if topic == 'entertainment' or (topic == 'gaming') or topic == 'music':
            topic = 'entertainment'
        if topic == 'science-and-nature':
            topic = 'science'
        payload = {
            'source': articleid,
            'apiKey': '6f62a98cbb734492abbdba50a4bdff86',
            'sortBy': 'top'
        }
        r = requests.get('https://newsapi.org/v1/articles', params=payload)
        if str(r.json()['status']) == 'ok':
            presentArticles = json.loads(json.dumps(r.json()['articles']))
            for y in range(0, len(presentArticles)):
                jsonReceived = presentArticles[y]
                title = jsonReceived[u'title']
                author = jsonReceived[u'author']
                url = jsonReceived[u'url']
                credRating = -1
                try:
                    locations = nlp.HTMLParser(url)
                    for z in range(0, len(locations)):
                        location = str(getAddress(str(locations[z])))
                        if location not in aggregatedDict:
                            aggregatedDict[location] = []
                        listInfo = aggregatedDict[location]
                        tempDict = {}
                        tempDict['url'] = url
                        tempDict['author'] = author
                        tempDict['title'] = title
                        tempDict['credRating'] = svm.compute(url)
                        tempDict['topic'] = topic
                        listInfo.append(tempDict)
                        aggregatedDict[location] = listInfo
                except:
                    None
Ejemplo n.º 3
0
def manualSearch(query):
    url = 'https://api.cognitive.microsoft.com/bing/v5.0/news/search?q=' + query
    # query string parameters
    payload = {'q': query, 'freshness': 'Week'}
    # custom headers
    headers = {'Ocp-Apim-Subscription-Key': '22207001cbdc4c2487ad91d1cec1bdf2'}
    r = requests.get(url, params=payload, headers=headers)
    links = []

    descriptions = []
    print(r.json())
    try:
        listOfArticles = r.json()['value']
    except:
        return []
    max = 5
    for article in listOfArticles:
        if ('clusteredArticles' in article):
            information = article['clusteredArticles']
        else:
            information = article
        thisList = []
        if max == 0:
            break
        max -= 1
        if (type(information) is dict):
            links.append(information['url'])
            descriptions.append(
                str(information['description'].encode("ascii", "ignore")))
    fin = []
    rating = 0.0
    i = 0
    for link in links:
        thisDict = {}
        rating = svm.compute(link)
        thisDict['id'] = str(i + 1)
        thisDict['description'] = descriptions[i]
        thisDict['url'] = link
        thisDict['score'] = str(rating)
        fin.append(thisDict)
        i = i + 1

    return json.dumps(fin)
Ejemplo n.º 4
0
def processURL(url):
    toReturn = {}

    score = svm.compute(url)

    t = lxml.html.parse(url)

    title = t.find(".//title").text

    response = get(url)
    extractor = Goose()
    article = extractor.extract(raw_html=response.content)
    file = article.cleaned_text

    keywords = nlp.generateEntity(file)

    toReturn['title'] = title
    toReturn['score'] = score
    toReturn['keywords'] = keywords
    toReturn['url'] = url

    return json.dumps(toReturn)