Ejemplo n.º 1
0
 def __init__(self, dataset, compute_ranks, compute_mono_rank, build_indexes, max_per_rank=100, index_url='http://127.0.0.1:8080', debug=False):
     self.__debug = debug
     self.__index_url = index_url
     self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_indexes)
     self.__html_helper = EggHelper(self.__debug, max_per_rank)
     self.__html_helper.set_index_url(index_url)
     self.__max_per_rank = max_per_rank
Ejemplo n.º 2
0
class EggOMaticExperiments:

    __exp_OSim = True
    __exp_KSim = True
    
    __offline_list = ['offline1'] # or 'gold2'
    __online_list = ['online1']
    __top_tags = 20
    
    __begin_top_many_users = 1
    __end_top_many_users = 10
    # exponential step 2, 4 ,8 16
    __step_top_many_users = 2

    def set_top_tags(self, top_tags=20):
        self.__top_tags = top_tags

    def set_forb_tags(self, forb_tags):
        self.__forb_tags = forb_tags

    def set_offline_list(self, type=['offline1']):
        self.__offline_list = type

    def set_online_list(self, list):
        self.__online_list = list

    def set_error_list(self, list=['osim','ksim']):
        self.__error_list = list

    def __init__(self, dataset, compute_ranks=True,  compute_mono_rank=True, build_index=True, max_per_rank=10000000000, top_tags=None):
        self.__dataset = dataset
        self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_index, max_per_rank, top_tags)
        
    def __calc_error_save(self, offline_type, online_type, type_rank, error_type):
        f = open(self.__dataset + '.' + offline_type + '_VS_' + online_type + '.' + error_type, 'a')
        print 'writing ' + self.__dataset + '.' + offline_type + '_VS_' + online_type + '.' + error_type
        
        offline_rank = type_rank[offline_type]
        online_rank = type_rank[online_type]
        for top_many_users in [self.__step_top_many_users**i for i in range(self.__begin_top_many_users,self.__end_top_many_users)]:
            float_error, info_val = eval('rank_dist_%s(offline_rank, online_rank, top_many_users)' % error_type)
            if info_val >= 0: 
                f.write('%d %d %f \n' % (top_many_users, len(offline_rank), float_error))

    def __empty_result_files(self):        
        for offline_type in self.__offline_list:            
            for online_type in self.__online_list:                
                for error_type in self.__error_list:
                    f = open(self.__dataset + '.' + offline_type + '_VS_' + online_type + '.' + error_type, 'w')
                    f.close()
        
    def run(self):
        filename = self.__dataset + '.tagged_graph'
        tags_filename = filename + '.ranks/all.tags'
        f = open(tags_filename)
        lines = []
        for i in range(self.__top_tags + 1000):
            lines.append(f.readline())
        f.close()
        top_tags = map(lambda line: line.strip().split(' ')[0], lines)
        top_tags = filter(lambda x:not x in self.__forb_tags, top_tags)[0:self.__top_tags]
        #top_tags = tags.get_top_tags(self.__top_tags)
        #top_tags = map(lambda x: x[0], top_tags)
        
        self.__ranker = RankerByTags()
        self.__ranker.load(filename)

#        tag_sets = []
#        for i in range(len(top_tags)):
#	    tag1 = top_tags[i]
#            for tag2 in top_tags[:i-1]:
#                tag_sets.append((tag1, tag2))

        # empty result files
        self.__empty_result_files()

	count = 0
        nro_exps = len(top_tags)*(len(top_tags)-1) / 2
        len_top_tags = len(top_tags)
        for i in range(1,len(top_tags)):
	    tag1 = top_tags[i]
            for tag2 in top_tags[:i]:
                ranks = self.__eggomatic.rank_by_tags([tag1, tag2])
                type_rank = {}
                type_rank['offline1'] = ranks[0]                 
                type_rank['mono'] = ranks[1]
                type_rank['online1'] = ranks[2]
                type_rank['online2'] = ranks[3]
                type_rank['online3'] = ranks[4]
                type_rank['online4'] = ranks[5]
                type_rank['offline2'] = ranks[6]
                type_rank['online6'] = ranks[7]
		if (tag1=='portugal' and tag2=='music') or (tag2=='portugal' and tag1=='music'):
			f = open('ranks.txt','w')
			for trank, rank in type_rank.iteritems():
				f.write('----------------------------------------\n')
				f.write('%s\n' % trank)
				for t in rank:
					f.write('%s\n' % str(t))
			f.close()
                for offline_type in self.__offline_list:                
                    for online_type in self.__online_list:                    
                        for error_type in self.__error_list:
                            print 'for tags: %s %s' % (tag1,tag2)
                            self.__calc_error_save(offline_type, online_type, type_rank, error_type)
                count += 1
                print 'NUMBER OF EXPERIMENTS/PAIRS COMPLETED: %d of %d' % (count, nro_exps)
            print 'EXPERIMENTS COMPLETED FOR THE TOP %d of %d TAGS (all pairs)' % (i,len_top_tags)
Ejemplo n.º 3
0
 def __init__(self, dataset, compute_ranks=True,  compute_mono_rank=True, build_index=True, max_per_rank=10000000000, top_tags=None):
     self.__dataset = dataset
     self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_index, max_per_rank, top_tags)
Ejemplo n.º 4
0
class EggOMaticWeb(object):
    
    __error = 'ERROR: bad URL, clean your nose -> %s'
    
    def __init__(self, dataset, compute_ranks, compute_mono_rank, build_indexes, max_per_rank=100, index_url='http://127.0.0.1:8080', debug=False):
        self.__debug = debug
        self.__index_url = index_url
        self.__eggomatic = EggOMatic(dataset, compute_ranks, compute_mono_rank, build_indexes)
        self.__html_helper = EggHelper(self.__debug, max_per_rank)
        self.__html_helper.set_index_url(index_url)
        self.__max_per_rank = max_per_rank
        
    def set_index_url(self, index_url):
        self.__html_helper.set_index_url(index_url)
        
    def set_debug(self, flag):
        self.__debug = flag
        
    def index(self):
        return self.__html_helper.header() + self.__html_helper.end()
    index.exposed = True

    def query(self, tags, clustering='0'):
        clustering = clustering == '1'                
        tags = tags.strip().lower()
        match = re.search('[a-z0-9\- ]+', tags)
        if len(tags) > 0 and (not match or len(match.group()) != len(tags)):
            return self.__html_helper.bad_input(tags)
        tags = map(lambda x:x.strip(),tags.split(' '))
        cluster, rank = [], []
        print 'TAGS: %s' % tags
        if not tags or len(tags) == 0 or (len(tags)==1 and tags[0]==''):
            rank = self.__eggomatic.rank_by_tag('')
        elif len(tags) > 1:
            for tag in tags:
                if not self.__eggomatic.good_tag(tag):
                    return self.__html_helper.bad_tag(tag)
            ranks = self.__eggomatic.rank_by_tags(tags)
            for tag in tags:
                if self.__eggomatic.good_tag(tag):
                    cluster.append(tag)
        else: # len(tags) == 1:
            if not self.__eggomatic.good_tag(tags[0]):
                return self.__html_helper.bad_tag(tags[0])
            elif self.__eggomatic.has_bigger_cluster(tags[0]):
                cluster_number = 0
                if clustering:
                    if not self.__eggomatic.has_many_clusters(tags[0]):
                        cluster = self.__eggomatic.clusters(tags[0])[0]
                    else:
                        clusters = self.__eggomatic.clusters(tags[0])
                        return self.__html_helper.choose_cluster_page(clusters, self.__eggomatic.total_users(), self.__eggomatic.total_tags())
                else:
                    cluster = [tags[0]]
                ranks = self.__eggomatic.rank_by_tag(tags[0], clustering, cluster_number)
            else:
                cluster = [tags[0]]
                ranks = self.__eggomatic.rank_by_tag(tags[0])
                # TODO ask if has more than one cluster.
#        else:
#            return self.__html_helper.header() + self.__html_helper.end()
        cluster = list(cluster)
        cluster.sort()
        return self.__html_helper.complete_page(ranks, cluster, self.__eggomatic.total_users(), self.__eggomatic.total_tags())
    query.exposed = True

    def query_user(self, user, clustering='0'):
        
        clustering = clustering == '1'
        user = user.strip() #.lower()
        match = re.search('[A-Za-z0-9\-]+', user)
        if len(user) == 0:
            return self.index()        
        if (not match or len(match.group()) != len(user)):
            return self.__html_helper.bad_input_user(user)
        if not self.__eggomatic.good_user(user):
            return self.__html_helper.bad_user(user)
        
        if clustering:
            rank = self.__eggomatic.user_ranks_clustering(user)            
        else:        
            rank = self.__eggomatic.user_ranks(user)            
        return self.__html_helper.complete_page_user(rank, [user], self.__eggomatic.total_users(), self.__eggomatic.total_tags(), clustering)
    query_user.exposed = True



    def default(self, param):
        print 'default'
        param = param.lower()
        match = re.search('[a-z\-_]+', param)
        if not match:
            return self.__error % str(param)
        
        param = param.lower()
        tags = param.split('_')
        
        if len(tags) == 1:
            rank = self.__eggomatic.rank_by_tag(tags[0])
        
             
        
    default.exposed = True