Example #1
0
    def skimthatevaluation(self, path):
        arr = {}
        f = open(path + "original_urls", "r")
        for line in f:
            line = line.split(" ")
            arr[line[0]] = line[1]

        g = SkimThatGroundTruth(arr, path)

        for i in arr:
            domain = ""
            if re.search(r"news\.cnet", arr[i]) != None:
                domain = "cnetnews"
            if domain == "":
                print "error in domain"
                return
            bh = crawler.BlogHtml(arr[i], domain)
            items = util.getitems(bh.title, bh.blogparas)
            response = ratingslearner.predictratings(items)
            response = json.loads(response)
            if response == None or response["status"] == 400:
                print "error evaluating... itemratings could not be calculated"
                return
            items = response["content"]
            """
            for i in items:
                print i['text'], i['rating'], i['position']
            """
            sorted_items = sorted(items, key=lambda k: k["rating"], reverse=True)
            sorted_items = sorted_items[:5]
            sorted_items = sorted(sorted_items, key=lambda k: k["position"])
            sim = g.comparewithskimthatgroundtruth(sorted_items, i)
            print "cosine similarity: ", sim
 def processrequest(self):
     domain = ""
     if re.search(r'(eng*\.co)|(engadget\.com.*)', self.url) != None:
         domain = 'engadget'
     elif re.search(r'(mash*\.to)|(mashable\.*)', self.url) != None:
         domain = 'mashable'
     elif re.search(r'ndtv', self.url) != None:
         domain = 'ndtv'
     elif re.search(r'fakingnews', self.url) != None:
         domain = 'fakingnews'
     elif re.search(r'treehugger', self.url) != None or 'treehugger' in expandURL(self.url)['long-url']:
         domain = 'treehugger'
     elif re.search(r'news\.cnet', self.url) != None:
         domain = 'cnetnews'
     if domain == "":
         return self.geterrorresponse("Url Not Valid...")
     
     self.domain = domain
     bh = crawler.BlogHtml(self.url, domain)
     self.title = bh.title
     self.items = util.getitems(bh.title, bh.blogparas)