def __init__(self, dataset, compute_ranks, compute_mono_rank, build_indexes, max_per_rank=100, index_url='http://127.0.0.1:8080', debug=False): self.__debug = debug self.__index_url = index_url self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_indexes) self.__html_helper = EggHelper(self.__debug, max_per_rank) self.__html_helper.set_index_url(index_url) self.__max_per_rank = max_per_rank
class EggOMaticExperiments: __exp_OSim = True __exp_KSim = True __offline_list = ['offline1'] # or 'gold2' __online_list = ['online1'] __top_tags = 20 __begin_top_many_users = 1 __end_top_many_users = 10 # exponential step 2, 4 ,8 16 __step_top_many_users = 2 def set_top_tags(self, top_tags=20): self.__top_tags = top_tags def set_forb_tags(self, forb_tags): self.__forb_tags = forb_tags def set_offline_list(self, type=['offline1']): self.__offline_list = type def set_online_list(self, list): self.__online_list = list def set_error_list(self, list=['osim','ksim']): self.__error_list = list def __init__(self, dataset, compute_ranks=True, compute_mono_rank=True, build_index=True, max_per_rank=10000000000, top_tags=None): self.__dataset = dataset self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_index, max_per_rank, top_tags) def __calc_error_save(self, offline_type, online_type, type_rank, error_type): f = open(self.__dataset + '.' + offline_type + '_VS_' + online_type + '.' + error_type, 'a') print 'writing ' + self.__dataset + '.' + offline_type + '_VS_' + online_type + '.' + error_type offline_rank = type_rank[offline_type] online_rank = type_rank[online_type] for top_many_users in [self.__step_top_many_users**i for i in range(self.__begin_top_many_users,self.__end_top_many_users)]: float_error, info_val = eval('rank_dist_%s(offline_rank, online_rank, top_many_users)' % error_type) if info_val >= 0: f.write('%d %d %f \n' % (top_many_users, len(offline_rank), float_error)) def __empty_result_files(self): for offline_type in self.__offline_list: for online_type in self.__online_list: for error_type in self.__error_list: f = open(self.__dataset + '.' + offline_type + '_VS_' + online_type + '.' + error_type, 'w') f.close() def run(self): filename = self.__dataset + '.tagged_graph' tags_filename = filename + '.ranks/all.tags' f = open(tags_filename) lines = [] for i in range(self.__top_tags + 1000): lines.append(f.readline()) f.close() top_tags = map(lambda line: line.strip().split(' ')[0], lines) top_tags = filter(lambda x:not x in self.__forb_tags, top_tags)[0:self.__top_tags] #top_tags = tags.get_top_tags(self.__top_tags) #top_tags = map(lambda x: x[0], top_tags) self.__ranker = RankerByTags() self.__ranker.load(filename) # tag_sets = [] # for i in range(len(top_tags)): # tag1 = top_tags[i] # for tag2 in top_tags[:i-1]: # tag_sets.append((tag1, tag2)) # empty result files self.__empty_result_files() count = 0 nro_exps = len(top_tags)*(len(top_tags)-1) / 2 len_top_tags = len(top_tags) for i in range(1,len(top_tags)): tag1 = top_tags[i] for tag2 in top_tags[:i]: ranks = self.__eggomatic.rank_by_tags([tag1, tag2]) type_rank = {} type_rank['offline1'] = ranks[0] type_rank['mono'] = ranks[1] type_rank['online1'] = ranks[2] type_rank['online2'] = ranks[3] type_rank['online3'] = ranks[4] type_rank['online4'] = ranks[5] type_rank['offline2'] = ranks[6] type_rank['online6'] = ranks[7] if (tag1=='portugal' and tag2=='music') or (tag2=='portugal' and tag1=='music'): f = open('ranks.txt','w') for trank, rank in type_rank.iteritems(): f.write('----------------------------------------\n') f.write('%s\n' % trank) for t in rank: f.write('%s\n' % str(t)) f.close() for offline_type in self.__offline_list: for online_type in self.__online_list: for error_type in self.__error_list: print 'for tags: %s %s' % (tag1,tag2) self.__calc_error_save(offline_type, online_type, type_rank, error_type) count += 1 print 'NUMBER OF EXPERIMENTS/PAIRS COMPLETED: %d of %d' % (count, nro_exps) print 'EXPERIMENTS COMPLETED FOR THE TOP %d of %d TAGS (all pairs)' % (i,len_top_tags)
def __init__(self, dataset, compute_ranks=True, compute_mono_rank=True, build_index=True, max_per_rank=10000000000, top_tags=None): self.__dataset = dataset self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_index, max_per_rank, top_tags)
class EggOMaticWeb(object): __error = 'ERROR: bad URL, clean your nose -> %s' def __init__(self, dataset, compute_ranks, compute_mono_rank, build_indexes, max_per_rank=100, index_url='http://127.0.0.1:8080', debug=False): self.__debug = debug self.__index_url = index_url self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_indexes) self.__html_helper = EggHelper(self.__debug, max_per_rank) self.__html_helper.set_index_url(index_url) self.__max_per_rank = max_per_rank def set_index_url(self, index_url): self.__html_helper.set_index_url(index_url) def set_debug(self, flag): self.__debug = flag def index(self): return self.__html_helper.header() + self.__html_helper.end() index.exposed = True def query(self, tags, clustering='0'): clustering = clustering == '1' tags = tags.strip().lower() match = re.search('[a-z0-9\- ]+', tags) if len(tags) > 0 and (not match or len(match.group()) != len(tags)): return self.__html_helper.bad_input(tags) tags = map(lambda x:x.strip(),tags.split(' ')) cluster, rank = [], [] print 'TAGS: %s' % tags if not tags or len(tags) == 0 or (len(tags)==1 and tags[0]==''): rank = self.__eggomatic.rank_by_tag('') elif len(tags) > 1: for tag in tags: if not self.__eggomatic.good_tag(tag): return self.__html_helper.bad_tag(tag) ranks = self.__eggomatic.rank_by_tags(tags) for tag in tags: if self.__eggomatic.good_tag(tag): cluster.append(tag) else: # len(tags) == 1: if not self.__eggomatic.good_tag(tags[0]): return self.__html_helper.bad_tag(tags[0]) elif self.__eggomatic.has_bigger_cluster(tags[0]): cluster_number = 0 if clustering: if not self.__eggomatic.has_many_clusters(tags[0]): cluster = self.__eggomatic.clusters(tags[0])[0] else: clusters = self.__eggomatic.clusters(tags[0]) return self.__html_helper.choose_cluster_page(clusters, self.__eggomatic.total_users(), self.__eggomatic.total_tags()) else: cluster = [tags[0]] ranks = self.__eggomatic.rank_by_tag(tags[0], clustering, cluster_number) else: cluster = [tags[0]] ranks = self.__eggomatic.rank_by_tag(tags[0]) # TODO ask if has more than one cluster. # else: # return self.__html_helper.header() + self.__html_helper.end() cluster = list(cluster) cluster.sort() return self.__html_helper.complete_page(ranks, cluster, self.__eggomatic.total_users(), self.__eggomatic.total_tags()) query.exposed = True def query_user(self, user, clustering='0'): clustering = clustering == '1' user = user.strip() #.lower() match = re.search('[A-Za-z0-9\-]+', user) if len(user) == 0: return self.index() if (not match or len(match.group()) != len(user)): return self.__html_helper.bad_input_user(user) if not self.__eggomatic.good_user(user): return self.__html_helper.bad_user(user) if clustering: rank = self.__eggomatic.user_ranks_clustering(user) else: rank = self.__eggomatic.user_ranks(user) return self.__html_helper.complete_page_user(rank, [user], self.__eggomatic.total_users(), self.__eggomatic.total_tags(), clustering) query_user.exposed = True def default(self, param): print 'default' param = param.lower() match = re.search('[a-z\-_]+', param) if not match: return self.__error % str(param) param = param.lower() tags = param.split('_') if len(tags) == 1: rank = self.__eggomatic.rank_by_tag(tags[0]) default.exposed = True