def main(database, table, smooth_func, lambda_, min_tag_freq): with AnnotReader(database) as reader: reader.change_table(table) #Builds value calculator estimator = SmoothEstimator(smooth_func, lambda_, reader.iterate()) calculator = ValueCalculator(estimator) #Determine tags which will be considered tags_to_consider = [] if min_tag_freq < 0: #All tags tags_to_consider = range(estimator.num_tags()) else: counter = Counter(annot['tag'] for annot in reader.iterate()) for tag, pop in counter.iteritems(): if pop >= min_tag_freq: tags_to_consider.append(tag) #Dumps probabilities connection = None database = None try: items = np.arange(estimator.num_items()) for tag in tags_to_consider: v_prob_it = calculator.rnorm_prob_items_given_tag(tag, items) for item in xrange(len(v_prob_it)): prob = float(v_prob_it[item]) print({'tag':tag, 'item':item, 'prob_it':prob}) finally: if connection: connection.disconnect()
def compute_for_user(database, table, user, relevant, annotated, smooth_func, lambda_, user_profile_size, out_folder): with AnnotReader(database) as reader: reader.change_table(table) #Relevant items by user are left out with this query query = {'$or' : [ { 'user':{'$ne' : user} }, { 'item':{'$nin' : relevant} } ] } #Probability estimator est = SmoothEstimator(smooth_func, lambda_, reader.iterate(query = query), user_profile_size = user_profile_size) value_calc = value_calculator.ValueCalculator(est) fname = 'user_%d' % user user_folder = os.path.join(out_folder, fname) os.mkdir(user_folder) #Initial information with io.open(os.path.join(user_folder, 'info'), 'w') as info: info.write(u'#UID: %d\n' %user) relevant_str = ' '.join([str(i) for i in relevant]) annotated_str = ' '.join([str(i) for i in annotated]) info.write(u'# %d relevant items: %s\n' %(len(relevant), str(relevant_str))) info.write(u'# %d annotated items: %s\n' %(len(annotated), str(annotated_str))) #Create Graph iterator = reader.iterate(query = query) tag_to_item, item_to_tag = \ index_creator.create_double_occurrence_index(iterator, 'tag', 'item') #Items to consider <-> Gamma items items_to_consider = set(xrange(est.num_items())) annotated_set = set(annotated) items_to_consider.difference_update(annotated_set) compute_tag_values(est, value_calc, tag_to_item, user, user_folder, np.array([i for i in items_to_consider])) relevant_tags_fpath = os.path.join(user_folder, 'relevant_item.tags') with io.open(relevant_tags_fpath, 'w') as rel: rel.write(u'#ITEM TAG\n') for item in relevant: for tag in item_to_tag[item]: rel.write(u'%d %d\n' %(item, tag))
def compute_for_user(database, table, user, relevant, annotated, smooth_func, lambda_, user_profile_size, out_folder): with AnnotReader(database) as reader: reader.change_table(table) #Relevant items by user are left out with this query query = { '$or': [{ 'user': { '$ne': user } }, { 'item': { '$nin': relevant } }] } #Probability estimator est = SmoothEstimator(smooth_func, lambda_, reader.iterate(query=query), user_profile_size=user_profile_size) value_calc = value_calculator.ValueCalculator(est) fname = 'user_%d' % user user_folder = os.path.join(out_folder, fname) os.mkdir(user_folder) #Initial information with io.open(os.path.join(user_folder, 'info'), 'w') as info: info.write(u'#UID: %d\n' % user) relevant_str = ' '.join([str(i) for i in relevant]) annotated_str = ' '.join([str(i) for i in annotated]) info.write(u'# %d relevant items: %s\n' % (len(relevant), str(relevant_str))) info.write(u'# %d annotated items: %s\n' % (len(annotated), str(annotated_str))) #Create Graph iterator = reader.iterate(query=query) tag_to_item, item_to_tag = \ index_creator.create_double_occurrence_index(iterator, 'tag', 'item') #Items to consider <-> Gamma items items_to_consider = set(xrange(est.num_items())) annotated_set = set(annotated) items_to_consider.difference_update(annotated_set) compute_tag_values(est, value_calc, tag_to_item, user, user_folder, np.array([i for i in items_to_consider])) relevant_tags_fpath = os.path.join(user_folder, 'relevant_item.tags') with io.open(relevant_tags_fpath, 'w') as rel: rel.write(u'#ITEM TAG\n') for item in relevant: for tag in item_to_tag[item]: rel.write(u'%d %d\n' % (item, tag))