def getBuzzfeed(word): firstNode = LinkedListNode("") firstNode.setNext(None) LLlist= LinkedList(firstNode) jsonStr = '{ \"results\": [' url = "http://www.buzzfeed.com/tag/"+word html = urllib.urlopen(url).read() htmlObject = BeautifulSoup(html, features="lxml") headlines = [] count =0 ##print ("Second tag") ##print (htmlObject) for item in htmlObject.findAll(re.compile("h2", re.S)): if count <3: # #print(item) pattern = re.compile("<a href=\"(.*)\" rel:gt_act=\"post/titl.*>(.*)</a>", re.S) match = pattern.match(repr(item.a)) if match != None: url = "http://www.buzzfeed.com"+match.groups()[0] match= match.groups()[1].replace("\\n\\t\\t\\n\\t\\t\\t","") match = match.replace("\\xa0", " ") match = match.replace('u201c', ' ') match = match.replace('u201d', ' ') match = match.replace('u2026', ' ') match = match.replace("\\n\\t\\t", "") match = re.sub(r'\W+', ' ', match) match = match.replace("u2019", "'") node = LinkedListNode(url) node.setTitle(match) LLlist.insertFirst(node) count= count+1 currentJSON = LLlist.deleteFirst() while(currentJSON.getNext().getNext() != None): jsonStr+= " {\"title\": \" "+currentJSON.getTitle()+ "\", \"url\": \""+currentJSON.getURL() +"\" }, " currentJSON = LLlist.deleteFirst() jsonStr+= " {\"title\": \" "+currentJSON.getTitle()+ "\", \"url\": \""+currentJSON.getURL() +"\" } " jsonStr+= ' ]}' sentiment = getSentiment(getBuzzfeedPost(jsonStr), word) return sentiment