def load_recommenders(self): # Load classifier from file print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % ( self.dataset, ", ".join(self.classes)) print "Loading community detector..." self.communityDetector = CommunityDetector( verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier") print self.communityDetector # Loading class recommenders print "Loading class recommenders..." self.recommenders = dict() for class_name in self.classes: self.recommenders[class_name] = TagRecommender() self.recommenders[class_name].set_heuristic( self.recommendation_heuristic) data = { 'TAG_NAMES': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET_TAG_NAMES.npy'), 'SIMILARITY_MATRIX': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET.npy'), } self.recommenders[class_name].load_data(data=data, dataset="%s-%s" % (self.dataset, class_name), metric=self.metric) print self.recommenders[class_name]
def load_recommenders(self): # Load classifier from file print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % (self.dataset, ", ".join(self.classes)) print "Loading community detector..." self.communityDetector = CommunityDetector(verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier") print self.communityDetector # Loading class recommenders print "Loading class recommenders..." self.recommenders = dict() for class_name in self.classes: self.recommenders[class_name] = TagRecommender() self.recommenders[class_name].set_heuristic(self.recommendation_heuristic) data = { 'TAG_NAMES': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET_TAG_NAMES.npy'), 'SIMILARITY_MATRIX': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET.npy'), } self.recommenders[class_name].load_data( data=data, dataset="%s-%s" % (self.dataset, class_name), metric=self.metric ) print self.recommenders[class_name]
def process_tag_recommendation_data(self, resources_limit=None, tag_threshold=10, line_limit=99999999999999, recompute_all_classes=False, similarity_metric="cosine"): # Process tas file and turn into association matrix and derived files database_name = self.tas_to_association_matrix( tag_threshold=tag_threshold, line_limit=line_limit) print "Loading community detector..." cd = CommunityDetector(verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier") print cd # Classify existing resources resources_tags = loadFromJson(RECOMMENDATION_TMP_DATA_DIR + database_name + '_RESOURCES_TAGS.json') instances_ids = resources_tags.keys() try: resource_class = loadFromJson( RECOMMENDATION_DATA_DIR + 'Classifier_classified_resources.json') except Exception, e: resource_class = dict()
class CommunityBasedTagRecommender(): recommenders = None communityDetector = None dataProcessor = None #collections_ids = None dataset = None metric = None community_detection_heuristic = None classifier_type = None recommendation_heuristic = None classes = None def __init__(self, dataset="", classes=[], metric="cosine", community_detection_heuristic="ZeroInit", recommendation_heuristic="hRankPercentage015", classifier_type="bayes"): self.dataset = dataset self.classes = classes self.metric = metric self.community_detection_heuristic = community_detection_heuristic self.classifier_type = classifier_type self.recommendation_heuristic = recommendation_heuristic def load_recommenders(self): # Load classifier from file print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % (self.dataset, ", ".join(self.classes)) print "Loading community detector..." self.communityDetector = CommunityDetector(verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier") print self.communityDetector # Loading class recommenders print "Loading class recommenders..." self.recommenders = dict() for class_name in self.classes: self.recommenders[class_name] = TagRecommender() self.recommenders[class_name].set_heuristic(self.recommendation_heuristic) data = { 'TAG_NAMES': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET_TAG_NAMES.npy'), 'SIMILARITY_MATRIX': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET.npy'), } self.recommenders[class_name].load_data( data=data, dataset="%s-%s" % (self.dataset, class_name), metric=self.metric ) print self.recommenders[class_name] def recommend_tags(self, input_tags, max_number_of_tags=None): com_name = self.communityDetector.detectCommunity(input_tags) rec = self.recommenders[com_name].recommend_tags(input_tags) return rec[0:max_number_of_tags], com_name
class CommunityBasedTagRecommender(): recommenders = None communityDetector = None dataProcessor = None #collections_ids = None dataset = None metric = None community_detection_heuristic = None classifier_type = None recommendation_heuristic = None classes = None def __init__(self, dataset="", classes=[], metric="cosine", community_detection_heuristic="ZeroInit", recommendation_heuristic="hRankPercentage015", classifier_type="bayes"): self.dataset = dataset self.classes = classes self.metric = metric self.community_detection_heuristic = community_detection_heuristic self.classifier_type = classifier_type self.recommendation_heuristic = recommendation_heuristic def load_recommenders(self): # Load classifier from file print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % ( self.dataset, ", ".join(self.classes)) print "Loading community detector..." self.communityDetector = CommunityDetector( verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier") print self.communityDetector # Loading class recommenders print "Loading class recommenders..." self.recommenders = dict() for class_name in self.classes: self.recommenders[class_name] = TagRecommender() self.recommenders[class_name].set_heuristic( self.recommendation_heuristic) data = { 'TAG_NAMES': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET_TAG_NAMES.npy'), 'SIMILARITY_MATRIX': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET.npy'), } self.recommenders[class_name].load_data(data=data, dataset="%s-%s" % (self.dataset, class_name), metric=self.metric) print self.recommenders[class_name] def recommend_tags(self, input_tags, max_number_of_tags=None): com_name = self.communityDetector.detectCommunity(input_tags) rec = self.recommenders[com_name].recommend_tags(input_tags) return rec[0:max_number_of_tags], com_name
def process_tag_recommendation_data(self, resources_limit=None, tag_threshold=10, line_limit=99999999999999, recompute_all_classes=False, similarity_metric="cosine"): # Process tas file and turn into association matrix and derived files database_name = self.tas_to_association_matrix(tag_threshold=tag_threshold, line_limit=line_limit) print "Loading community detector..." cd = CommunityDetector(verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier") print cd # Classify existing resources resources_tags = loadFromJson(RECOMMENDATION_TMP_DATA_DIR + database_name + '_RESOURCES_TAGS.json') instances_ids = resources_tags.keys() try: resource_class = loadFromJson(RECOMMENDATION_DATA_DIR + 'Classifier_classified_resources.json') except Exception as e: resource_class = dict() for count, id in enumerate(instances_ids): if not recompute_all_classes: if id not in resource_class: resource_class[id] = cd.detectCommunity(input_tags=resources_tags[id]) else: resource_class[id] = cd.detectCommunity(input_tags=resources_tags[id]) if self.verbose: sys.stdout.write("\rClassifying resources... %.2f%%"%(float(100*(count+1))/len(instances_ids))) sys.stdout.flush() print "" saveToJson(RECOMMENDATION_DATA_DIR + 'Classifier_classified_resources.json', resource_class) print "" print "\nComputing data for general recommender..." self.association_matrix_to_similarity_matrix( dataset=database_name, training_set=instances_ids[0:resources_limit], save_sim=True, is_general_recommender=True, metric=similarity_metric, ) print "\nComputing data for class recommenders..." instance_id_class = [] distinct_classes = [] for count, instance_id in enumerate(instances_ids): class_id = resource_class[instance_id] instance_id_class.append([instance_id, class_id]) if class_id not in distinct_classes: distinct_classes.append(class_id) print distinct_classes for collection_id in distinct_classes: print "\nComputing recommender for collection %s..." % collection_id # All resources from the training set classified as the selected category # (instead of all manually labeled) training_ids = [] for instance in instance_id_class: if instance[1] == collection_id: training_ids.append(instance[0]) # Add limit training_ids = training_ids[0:resources_limit] if len(training_ids) < 1: raise Exception("Too less training ids for collection %s" % collection_id) self.association_matrix_to_similarity_matrix( dataset=database_name, training_set=training_ids, save_sim=True, out_name_prefix=collection_id, is_general_recommender=False, metric=similarity_metric, )