def skimthatevaluation(self, path): arr = {} f = open(path + "original_urls", "r") for line in f: line = line.split(" ") arr[line[0]] = line[1] g = SkimThatGroundTruth(arr, path) for i in arr: domain = "" if re.search(r"news\.cnet", arr[i]) != None: domain = "cnetnews" if domain == "": print "error in domain" return bh = crawler.BlogHtml(arr[i], domain) items = util.getitems(bh.title, bh.blogparas) response = ratingslearner.predictratings(items) response = json.loads(response) if response == None or response["status"] == 400: print "error evaluating... itemratings could not be calculated" return items = response["content"] """ for i in items: print i['text'], i['rating'], i['position'] """ sorted_items = sorted(items, key=lambda k: k["rating"], reverse=True) sorted_items = sorted_items[:5] sorted_items = sorted(sorted_items, key=lambda k: k["position"]) sim = g.comparewithskimthatgroundtruth(sorted_items, i) print "cosine similarity: ", sim
def processrequest(self): domain = "" if re.search(r'(eng*\.co)|(engadget\.com.*)', self.url) != None: domain = 'engadget' elif re.search(r'(mash*\.to)|(mashable\.*)', self.url) != None: domain = 'mashable' elif re.search(r'ndtv', self.url) != None: domain = 'ndtv' elif re.search(r'fakingnews', self.url) != None: domain = 'fakingnews' elif re.search(r'treehugger', self.url) != None or 'treehugger' in expandURL(self.url)['long-url']: domain = 'treehugger' elif re.search(r'news\.cnet', self.url) != None: domain = 'cnetnews' if domain == "": return self.geterrorresponse("Url Not Valid...") self.domain = domain bh = crawler.BlogHtml(self.url, domain) self.title = bh.title self.items = util.getitems(bh.title, bh.blogparas)