def extractStatDataFromScript(self, script):
     lines = script.split('\n')
     dateList = []
     dataList = []
     for line in lines:
         if line.find('categories: [') != -1:
             dateLine = line[line.find('[') + 1 : line.find(']') - 1]
             dateList = [IMVDBDateStringToDate(dateString) for dateString in dateLine.split(',')]
         elif line.find('data: [') != -1:
             dataLine = line[line.find('[') + 1 : line.find(']') - 1]
             dataList = [int(cleanUnicode(dataValue)) for dataValue in dataLine.split(',')]
             break
     rawDataList = zip(dateList, dataList)
     return self.filterDataByWeek(rawDataList)
 def extractDetailStatData(self, tables, URL):
     detailStatDict = {'week' : dateToSaturday(datetime.today()), 'URL' : URL}
     for table in tables:
         tableText = cleanUnicode(table.text)
         if tableText.find('Views') != -1:
             detailStatDict['MVViewCount'] = self.getDetailStatTableData(tableText, 'Views')
             detailStatDict['MVCommentCount'] = self.getDetailStatTableData(tableText, 'Comments')
         else:
             detailStatDict['FBLikeCount'] = self.getDetailStatTableData(tableText, 'Facebook Like Count')
             detailStatDict['FBShareCount'] = self.getDetailStatTableData(tableText, 'Facebook Share Count')
             detailStatDict['FBCommentCount'] = self.getDetailStatTableData(tableText, 'Facebook Comment Count')
             detailStatDict['TwitterCount'] = self.getDetailStatTableData(tableText, 'Twitter')
             detailStatDict['GooglePlusCount'] = self.getDetailStatTableData(tableText, 'GooglePlusOne')
     return detailStatDict
 def extractContent(self, textDict):
     try:
         page = urllib2.urlopen(textDict["URL"])
         soup = BeautifulSoup(page.read())
         if textDict["type"] == "article":
             body = soup.find(attrs={"class": "article-body"})
         else:
             body = soup.find(attrs={"class": "entry"})
         text = ""
         for content in body.contents:
             # iterate among body, check if it's tag class, and name is <p>
             if "Tag" in type(content).__name__ and content.name == "p":
                 text += content.text
     except Exception as e:
         # the URL link maybe invalid
         print e
         text = ""
     return cleanUnicode(text)