def rank(self): competitors = self.competitors.filter( is_private=False, status__gt=0, ).distinct().order_by('-tot_points') points = [x.tot_points for x in competitors] ranked = Ranking(points, strategy=ORDINAL, start=1) for competitor in competitors: competitor.tot_rank = ranked.rank(competitor.tot_points) competitor.save() competitors = self.competitors.filter( is_private=False, status__gt=0, ).distinct().order_by('-mus_points') points = [x.mus_points for x in competitors] ranked = Ranking(points, start=1) for competitor in competitors: competitor.mus_rank = ranked.rank(competitor.mus_points) competitor.save() competitors = self.competitors.filter( is_private=False, status__gt=0, ).distinct().order_by('-per_points') points = [x.per_points for x in competitors] ranked = Ranking(points, start=1) for competitor in competitors: competitor.per_rank = ranked.rank(competitor.per_points) competitor.save() competitors = self.competitors.filter( is_private=False, status__gt=0, ).distinct().order_by('-sng_points') points = [x.sng_points for x in competitors] ranked = Ranking(points, start=1) for competitor in competitors: competitor.sng_rank = ranked.rank(competitor.sng_points) competitor.save() return
X_repeat = np.repeat(X_all[idx1,:MERGED_SIZE/2].reshape((1,MERGED_SIZE/2)),X_all.shape[0], axis = 0) X_right = np.concatenate((X_repeat, X_all[:,:MERGED_SIZE/2]), axis = 1) X_rel = np.ones((X_all.shape[0],1)) X_rel.fill(meta_dict['dicts']['rel_idx'][r]) b = np.random.choice(range(N_BATCH)) scores_left = getFinalClone(X_left[b*BATCH_SIZE: (b+1)*BATCH_SIZE],X_rel[b*BATCH_SIZE: (b+1)*BATCH_SIZE]) scores_right = getFinalClone(X_right[b*BATCH_SIZE: (b+1)*BATCH_SIZE],X_rel[b*BATCH_SIZE: (b+1)*BATCH_SIZE]) score_correct = getFinalClone(np.concatenate((X_all[idx1,:MERGED_SIZE/2].reshape((1,MERGED_SIZE/2)),X_all[idx2,:MERGED_SIZE/2].reshape((1,MERGED_SIZE/2))), axis = 1), np.array(meta_dict['dicts']['rel_idx'][r]).reshape((1,1)) ) if instance % 10000 == 0: print >> sys.stderr, "instance %d/%d " % (instance+1, len(lines)) l_list = scores_left.reshape((scores_left.shape[0],)).tolist() + score_correct.tolist()[0] r_list = scores_right.reshape((scores_right.shape[0],)).tolist() + score_correct.tolist()[0] l_set = set(l_list) r_set = set(r_list) r_left = Ranking(sorted(l_list,reverse = True)) r_right = Ranking(sorted(r_list,reverse = True)) left_rank = r_left.rank(score_correct[0][0]) right_rank = r_right.rank(score_correct[0][0]) llist += [left_rank] rlist += [right_rank] print sys.argv[1],':',np.mean(llist), np.mean(rlist)
def compute_p_value(self, test_statistic, null_distribution): dist = np.append(null_distribution, test_statistic) dist.sort() r = Ranking(dist[::-1]) return r.rank(test_statistic) / float(len(dist))
def compute_p_value(self, test_statistic, null_distribution): dist = np.append(null_distribution, test_statistic) dist.sort() r = Ranking(dist[::-1]) return r.rank(test_statistic)/float(len(dist))
def rank(self): appearances = self.appearances.filter( competitor__is_private=False, competitor__status__gt=0, ).distinct().order_by('-tot_points') points = [x.tot_points for x in appearances] ranked = Ranking(points, strategy=ORDINAL, start=1) for appearance in appearances: appearance.tot_rank = ranked.rank(appearance.tot_points) appearance.save() appearances = self.appearances.filter( competitor__is_private=False, competitor__status__gt=0, ).distinct().order_by('-mus_points') points = [x.mus_points for x in appearances] ranked = Ranking(points, start=1) for appearance in appearances: appearance.mus_rank = ranked.rank(appearance.mus_points) appearance.save() appearances = self.appearances.filter( competitor__is_private=False, competitor__status__gt=0, ).distinct().order_by('-per_points') points = [x.per_points for x in appearances] ranked = Ranking(points, start=1) for appearance in appearances: appearance.per_rank = ranked.rank(appearance.per_points) appearance.save() appearances = self.appearances.filter( competitor__is_private=False, competitor__status__gt=0, ).distinct().order_by('-sng_points') points = [x.sng_points for x in appearances] ranked = Ranking(points, start=1) for appearance in appearances: appearance.sng_rank = ranked.rank(appearance.sng_points) appearance.save() # Songs ranked relative to Round Song = apps.get_model('api.song') songs = Song.objects.filter( appearance__round=self, appearance__competitor__is_private=False, appearance__competitor__status__gt=0, ).distinct().order_by('-tot_points') points = [x.tot_points for x in songs] ranked = Ranking(points, strategy=ORDINAL, start=1) for song in songs: song.tot_rank = ranked.rank(song.tot_points) song.save() songs = Song.objects.filter( appearance__round=self, appearance__competitor__is_private=False, appearance__competitor__status__gt=0, ).distinct().order_by('-mus_points') points = [x.mus_points for x in songs] ranked = Ranking(points, start=1) for song in songs: song.mus_rank = ranked.rank(song.mus_points) song.save() songs = Song.objects.filter( appearance__round=self, appearance__competitor__is_private=False, appearance__competitor__status__gt=0, ).distinct().order_by('-per_points') points = [x.per_points for x in songs] ranked = Ranking(points, start=1) for song in songs: song.per_rank = ranked.rank(song.per_points) song.save() songs = Song.objects.filter( appearance__round=self, appearance__competitor__is_private=False, appearance__competitor__status__gt=0, ).distinct().order_by('-sng_points') points = [x.sng_points for x in songs] ranked = Ranking(points, start=1) for song in songs: song.sng_rank = ranked.rank(song.sng_points) song.save() return
def getRank(): topic_results = mongo.topicCollection.find() queries = [topic_result for topic_result in topic_results] filters = [] pending_topics = [] rank_list = [] final_ranks = [] df = pd.read_csv("./handles.csv") screen_names = df.values.tolist() candidates = [] for screen_name in screen_names: res = twitterGraph.fetch_user(screen_name[1]) candidates.append({ 'id': res.id, 'screen_name': screen_name[1], 'name': screen_name[0], # 'topic_relevance': 0, 'image_url': res.profile_image_url }) ranks = topicInfluence.compute_rank( twitterFetch["max_tweets"], [candidate['id'] for candidate in candidates]) for i, rank in enumerate(ranks): candidates[i] = dict(candidates[i], **rank) candidates[i]["moiScore"] = moi.fetch_moi_score( candidates[i]['id'], twitterFetch["max_tweets"]) ranking = Ranking(candidates, filters=['influence', 'moiScore']) weightages = {'influence': 0.5, 'moiScore': 0.5} ranking.rank(weightages) influenceRanks = ranking.dataframe.to_dict(orient='records')[:5] print("Influence ranks") print(influenceRanks) weightages = { 'influence': 0.125, 'moiScore': 0.125, 'topic_relevance': 0.75 } for query_dict in queries: topic_dist = 0 query = query_dict['name'] if query_dict['isPresent'] == False: pending_topics.append(query_dict['name']) for candidate in candidates: candidate["topic_relevance"] = 0 else: rankings = [] for candidate in candidates: candidateTweets = twitterGraph.fetch_preprocessed_tweets( candidate['id'], twitterFetch["max_tweets"]) print(candidate["screen_name"]) candidate[ "topic_relevance"] = ldamodelInstance.getTopicDistFromQuery( candidateTweets, query) ranking = Ranking(candidates) ranking.rank(weightages) rank_list = ranking.dataframe.to_dict(orient='records')[:5] final_ranks.append({'query': query, 'rank_list': rank_list}) return render_template("form.html", final_ranks=final_ranks, pending_topics=pending_topics, influenceRanks=influenceRanks)
class Discovery: def __init__(self, seed_file, data_dir, similarity_method): if not os.path.exists(data_dir): os.makedirs(data_dir) self.ranked_urls_file = data_dir + "/ranked_urls.csv" self.seed_urls = self._read_urls_from_file(seed_file) print "Number of seed urls: ", len(self.seed_urls) self.discovered_urls = set() for url in self.seed_urls: self.discovered_urls.add(url) self.searcher = Search_APIs(data_dir) self.ranker = Ranking(data_dir, self.seed_urls, similarity_method) self.seed_threshold = 0.1 # minimum score for an url to be selected as a seed self.search_threshold = 0.05 # minimum score for an url to be selected for search def discover_with_backlink_search(self): '''url discovery using moz backlink search''' next_urls = [(-1.0, url) for url in self.seed_urls ] # negate the scores to turn minheap into maxheap heapq.heapify(next_urls) # make next_urls be priority queue new_discovered_urls = [] # urls with relevant scores ranked_urls = [] # discovered urls with ranking scores while next_urls: seed = heapq.heappop(next_urls)[1] results = self.searcher.search_related(seed) for url in results: if url not in self.discovered_urls: new_discovered_urls.append(url) self.discovered_urls.add(url) print "Seed: ", seed, "Retrieved ", len(results), " related urls" # Rank the discovered urls new_seed_urls = [] if new_discovered_urls: new_ranked_urls = self.ranker.rank(new_discovered_urls) self._save_ranked_urls(new_ranked_urls) ranked_urls.extend(new_ranked_urls) for url, score in new_ranked_urls: if score > self.seed_threshold: new_seed_urls.append(url) if score > self.search_threshold: heapq.heappush(next_urls, (-score, url)) new_discovered_urls = [] for url, score in new_ranked_urls: print url, score self.ranker.update_seeds(new_seed_urls) if len(self.discovered_urls) > 300: break def discover_with_related_search(self): '''url discovery using google related search''' next_urls = [(-1.0, url) for url in self.seed_urls ] # negate the scores to turn minheap into maxheap heapq.heapify(next_urls) # make next_urls be priority queue new_discovered_urls = [] # urls with relevant scores ranked_urls = [] # discovered urls with ranking scores while next_urls: seed = heapq.heappop(next_urls)[1] results = self.searcher.search_related(seed) for url in results: if url not in self.discovered_urls: new_discovered_urls.append(url) self.discovered_urls.add(url) print "Seed: ", seed, "Retrieved ", len(results), " related urls" # Rank the discovered urls new_seed_urls = [] if new_discovered_urls: new_ranked_urls = self.ranker.rank(new_discovered_urls) self._save_ranked_urls(new_ranked_urls) ranked_urls.extend(new_ranked_urls) for url, score in new_ranked_urls: if score > self.seed_threshold: new_seed_urls.append(url) if score > self.search_threshold: heapq.heappush(next_urls, (-score, url)) new_discovered_urls = [] for url, score in new_ranked_urls: print url, score self.ranker.update_seeds(new_seed_urls) if len(self.discovered_urls) > 300: break def _save_ranked_urls(self, urls): out = open(self.ranked_urls_file, "a+") for url, score in urls: out.write(str(score) + " " + url + "\n") out.close() def test_discover_with_related_search(self): '''A simple discovery round using related search''' # Discover related urls related_urls = [] # results from related search for seed in self.seed_urls: results = self.searcher.search_related(seed) #time.sleep(5) if results: for url in results: if url not in self.discovered_urls: related_urls.append(url) self.discovered_urls.add(url) print "Retrieved ", len(related_urls), " related urls" # Rank the discovered urls ranked_urls = self.ranker.rank(related_urls) for url, score in ranked_urls: print url, score def _read_urls_from_file(self, filepath): urls = [] with open(filepath) as lines: for line in lines: url = url_normalize(line.strip()) urls.append(url) return urls