def skimthatevaluation(self, path): arr = {} f = open(path + "original_urls", "r") for line in f: line = line.split(" ") arr[line[0]] = line[1] g = SkimThatGroundTruth(arr, path) for i in arr: domain = "" if re.search(r"news\.cnet", arr[i]) != None: domain = "cnetnews" if domain == "": print "error in domain" return bh = crawler.BlogHtml(arr[i], domain) items = util.getitems(bh.title, bh.blogparas) response = ratingslearner.predictratings(items) response = json.loads(response) if response == None or response["status"] == 400: print "error evaluating... itemratings could not be calculated" return items = response["content"] """ for i in items: print i['text'], i['rating'], i['position'] """ sorted_items = sorted(items, key=lambda k: k["rating"], reverse=True) sorted_items = sorted_items[:5] sorted_items = sorted(sorted_items, key=lambda k: k["position"]) sim = g.comparewithskimthatgroundtruth(sorted_items, i) print "cosine similarity: ", sim
def getrateditems(self): response = ratingslearner.predictratings(self.items) response = json.loads(response) if response == None or response['status'] == 400: return [] items = response['content'] ''' Debugging ''' """ for i in items: print i['text'], i['rating'], i['position'] """ return items
def evaluatethesummary(self, path, recommender=False): blogsitems = {} summaries = [] blogs = [] for (root, dir, files) in os.walk(path): if "items" in root: for i in files: fi = open(root + "/" + i, "r") name = i.split(".")[0] blogsitems[name] = pickle.load(fi) if recommender: blogstweets = {} for (root, dir, files) in os.walk(path): if "tweets" in root: for i in files: ft = open(root + "/" + i, "r") name = i.split(".")[0] blogstweets[name] = pickle.load(ft) for i in blogsitems: r = recommendersystem.Recommender(0.5) r.creatematrix(blogsitems[i], blogstweets[i]) r.calcaggregateratings() items = r.getrecommenderrateditems() if items == []: print "error evaluating... recommender itemratings could not be calculated" return else: sorted_items = sorted(items, key=lambda k: k["rating"], reverse=True) sorted_items = sorted_items[:5] sorted_items = sorted(sorted_items, key=lambda k: k["position"]) summaries.append(sorted_items) blogs.append(items) else: for i in blogsitems: response = ratingslearner.predictratings(blogsitems[i]) response = json.loads(response) if response == None or response["status"] == 400: print "error evaluating... itemratings could not be calculated" return items = response["content"] for i in items: print i["text"], i["rating"], i["position"] sorted_items = sorted(items, key=lambda k: k["rating"], reverse=True) sorted_items = sorted_items[:5] sorted_items = sorted(sorted_items, key=lambda k: k["position"]) summaries.append(sorted_items) blogs.append(items) g = GroundTruth(blogs) for i in range(len(summaries)): g.comparewithgroundtruth(summaries[i], i)
def creatematrix(self, items, tweets): response = ratingslearner.predictratings(items) response = json.loads(response) if response == None or response['status'] == 400: return items1 = response['content'] """ for i in items1: print i['text'], i['rating'], i['position'] """ self.items = items1 response1 = ratingslearner.predicttweetratings(tweets) response1 = json.loads(response1) if response1 == None or response1['status'] == 400: return tweets1 = response1['content'] """ for i in tweets1: print i['text'], i['predictedratings'], i['id'] """ self.tweets = tweets1 ratingrow = [] for i in items1: ratingrow.append(i['rating']) self.utilitymatrix['docrow'] = ratingrow id_indexer = {} for i in tweets1: self.utilitymatrix[i['id']] = i['predictedratings'] id_indexer[i['id']] = i self.supporttweets = [] self.supportratings = [] l = len(items1) for i in range(l): self.supporttweets.append("") self.supportratings.append(-1234) for i in self.utilitymatrix: if i != 'docrow': for j in range(l): if self.utilitymatrix[i][j] > self.supportratings[j]: self.supportratings[j] = self.utilitymatrix[i][j] self.supporttweets[j] = id_indexer[i]