Example #1
0
def main(database, table, smooth_func, lambda_, min_tag_freq):
    
    with AnnotReader(database) as reader:
        reader.change_table(table)
        
        #Builds value calculator
        estimator = SmoothEstimator(smooth_func, lambda_, reader.iterate())
        calculator = ValueCalculator(estimator)
        
        #Determine tags which will be considered
        tags_to_consider = []
        if min_tag_freq < 0: #All tags
            tags_to_consider = range(estimator.num_tags())
        else:
            counter = Counter(annot['tag'] for annot in reader.iterate())
            for tag, pop in counter.iteritems():
                if pop >= min_tag_freq:
                    tags_to_consider.append(tag)
                    
        #Dumps probabilities
        connection = None
        database = None
        try:            
            items = np.arange(estimator.num_items())
            for tag in tags_to_consider:
                v_prob_it = calculator.rnorm_prob_items_given_tag(tag, items)
                for item in xrange(len(v_prob_it)):
                    prob = float(v_prob_it[item])
                    print({'tag':tag, 'item':item, 'prob_it':prob})
                
        finally:
            if connection:
                connection.disconnect()
Example #2
0
def compute_for_user(database, table, user, relevant, annotated, 
                     smooth_func, lambda_, user_profile_size, out_folder):
    with AnnotReader(database) as reader:
        reader.change_table(table)
        
        #Relevant items by user are left out with this query
        query = {'$or' : [
                          { 'user':{'$ne'  : user} }, 
                          { 'item':{'$nin' : relevant} }
                         ]
                }
        
        #Probability estimator
        est = SmoothEstimator(smooth_func, lambda_, 
                              reader.iterate(query = query),
                              user_profile_size = user_profile_size)
        value_calc = value_calculator.ValueCalculator(est)
        
        fname = 'user_%d' % user
        user_folder = os.path.join(out_folder, fname)
        os.mkdir(user_folder)
        
        #Initial information
        with io.open(os.path.join(user_folder, 'info'), 'w') as info:
            info.write(u'#UID: %d\n' %user)
            
            relevant_str = ' '.join([str(i) for i in relevant])
            annotated_str = ' '.join([str(i) for i in annotated])
            
            info.write(u'# %d relevant  items: %s\n' %(len(relevant), 
                                                       str(relevant_str)))
            info.write(u'# %d annotated items: %s\n' %(len(annotated), 
                                                       str(annotated_str)))
        
        #Create Graph
        iterator = reader.iterate(query = query)
        tag_to_item, item_to_tag = \
            index_creator.create_double_occurrence_index(iterator, 
                                                         'tag', 'item')
            
        #Items to consider <-> Gamma items
        items_to_consider = set(xrange(est.num_items()))
        annotated_set = set(annotated)
        items_to_consider.difference_update(annotated_set)
        
        compute_tag_values(est, value_calc, tag_to_item, user, 
                           user_folder, 
                           np.array([i for i in items_to_consider]))
        
        relevant_tags_fpath = os.path.join(user_folder, 'relevant_item.tags')
        with io.open(relevant_tags_fpath, 'w') as rel:
            rel.write(u'#ITEM TAG\n')
            for item in relevant:
                for tag in item_to_tag[item]:
                    rel.write(u'%d %d\n' %(item, tag))
Example #3
0
def compute_for_user(database, table, user, relevant, annotated, smooth_func,
                     lambda_, user_profile_size, out_folder):
    with AnnotReader(database) as reader:
        reader.change_table(table)

        #Relevant items by user are left out with this query
        query = {
            '$or': [{
                'user': {
                    '$ne': user
                }
            }, {
                'item': {
                    '$nin': relevant
                }
            }]
        }

        #Probability estimator
        est = SmoothEstimator(smooth_func,
                              lambda_,
                              reader.iterate(query=query),
                              user_profile_size=user_profile_size)
        value_calc = value_calculator.ValueCalculator(est)

        fname = 'user_%d' % user
        user_folder = os.path.join(out_folder, fname)
        os.mkdir(user_folder)

        #Initial information
        with io.open(os.path.join(user_folder, 'info'), 'w') as info:
            info.write(u'#UID: %d\n' % user)

            relevant_str = ' '.join([str(i) for i in relevant])
            annotated_str = ' '.join([str(i) for i in annotated])

            info.write(u'# %d relevant  items: %s\n' %
                       (len(relevant), str(relevant_str)))
            info.write(u'# %d annotated items: %s\n' %
                       (len(annotated), str(annotated_str)))

        #Create Graph
        iterator = reader.iterate(query=query)
        tag_to_item, item_to_tag = \
            index_creator.create_double_occurrence_index(iterator,
                                                         'tag', 'item')

        #Items to consider <-> Gamma items
        items_to_consider = set(xrange(est.num_items()))
        annotated_set = set(annotated)
        items_to_consider.difference_update(annotated_set)

        compute_tag_values(est, value_calc, tag_to_item, user, user_folder,
                           np.array([i for i in items_to_consider]))

        relevant_tags_fpath = os.path.join(user_folder, 'relevant_item.tags')
        with io.open(relevant_tags_fpath, 'w') as rel:
            rel.write(u'#ITEM TAG\n')
            for item in relevant:
                for tag in item_to_tag[item]:
                    rel.write(u'%d %d\n' % (item, tag))