예제 #1
0
    def load_recommenders(self):
        # Load classifier from file
        print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % (
            self.dataset, ", ".join(self.classes))
        print "Loading community detector..."
        self.communityDetector = CommunityDetector(
            verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier")
        print self.communityDetector

        # Loading class recommenders
        print "Loading class recommenders..."
        self.recommenders = dict()
        for class_name in self.classes:

            self.recommenders[class_name] = TagRecommender()
            self.recommenders[class_name].set_heuristic(
                self.recommendation_heuristic)

            data = {
                'TAG_NAMES':
                load(RECOMMENDATION_DATA_DIR + self.dataset +
                     '_%s_SIMILARITY_MATRIX_' % class_name + self.metric +
                     '_SUBSET_TAG_NAMES.npy'),
                'SIMILARITY_MATRIX':
                load(RECOMMENDATION_DATA_DIR + self.dataset +
                     '_%s_SIMILARITY_MATRIX_' % class_name + self.metric +
                     '_SUBSET.npy'),
            }

            self.recommenders[class_name].load_data(data=data,
                                                    dataset="%s-%s" %
                                                    (self.dataset, class_name),
                                                    metric=self.metric)

            print self.recommenders[class_name]
예제 #2
0
    def load_recommenders(self):
        # Load classifier from file
        print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % (self.dataset, ", ".join(self.classes))
        print "Loading community detector..."
        self.communityDetector = CommunityDetector(verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier")
        print self.communityDetector

        # Loading class recommenders
        print "Loading class recommenders..."
        self.recommenders = dict()
        for class_name in self.classes:

            self.recommenders[class_name] = TagRecommender()
            self.recommenders[class_name].set_heuristic(self.recommendation_heuristic)

            data = {
                'TAG_NAMES': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET_TAG_NAMES.npy'),
                'SIMILARITY_MATRIX': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET.npy'),
            }

            self.recommenders[class_name].load_data(
                data=data,
                dataset="%s-%s" % (self.dataset, class_name),
                metric=self.metric
            )

            print self.recommenders[class_name]
예제 #3
0
    def process_tag_recommendation_data(self,
                                        resources_limit=None,
                                        tag_threshold=10,
                                        line_limit=99999999999999,
                                        recompute_all_classes=False,
                                        similarity_metric="cosine"):

        # Process tas file and turn into association matrix and derived files
        database_name = self.tas_to_association_matrix(
            tag_threshold=tag_threshold, line_limit=line_limit)

        print "Loading community detector..."
        cd = CommunityDetector(verbose=False,
                               PATH=RECOMMENDATION_DATA_DIR + "Classifier")
        print cd

        # Classify existing resources
        resources_tags = loadFromJson(RECOMMENDATION_TMP_DATA_DIR +
                                      database_name + '_RESOURCES_TAGS.json')
        instances_ids = resources_tags.keys()
        try:
            resource_class = loadFromJson(
                RECOMMENDATION_DATA_DIR +
                'Classifier_classified_resources.json')
        except Exception, e:
            resource_class = dict()
예제 #4
0
class CommunityBasedTagRecommender():

    recommenders = None
    communityDetector = None
    dataProcessor = None
    #collections_ids = None
    dataset = None
    metric = None
    community_detection_heuristic = None
    classifier_type = None
    recommendation_heuristic = None
    classes = None

    def __init__(self,
                 dataset="",
                 classes=[],
                 metric="cosine",
                 community_detection_heuristic="ZeroInit",
                 recommendation_heuristic="hRankPercentage015",
                 classifier_type="bayes"):

        self.dataset = dataset
        self.classes = classes
        self.metric = metric
        self.community_detection_heuristic = community_detection_heuristic
        self.classifier_type = classifier_type
        self.recommendation_heuristic = recommendation_heuristic

    def load_recommenders(self):
        # Load classifier from file
        print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % (self.dataset, ", ".join(self.classes))
        print "Loading community detector..."
        self.communityDetector = CommunityDetector(verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier")
        print self.communityDetector

        # Loading class recommenders
        print "Loading class recommenders..."
        self.recommenders = dict()
        for class_name in self.classes:

            self.recommenders[class_name] = TagRecommender()
            self.recommenders[class_name].set_heuristic(self.recommendation_heuristic)

            data = {
                'TAG_NAMES': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET_TAG_NAMES.npy'),
                'SIMILARITY_MATRIX': load(RECOMMENDATION_DATA_DIR + self.dataset + '_%s_SIMILARITY_MATRIX_' % class_name + self.metric + '_SUBSET.npy'),
            }

            self.recommenders[class_name].load_data(
                data=data,
                dataset="%s-%s" % (self.dataset, class_name),
                metric=self.metric
            )

            print self.recommenders[class_name]

    def recommend_tags(self, input_tags, max_number_of_tags=None):
        com_name = self.communityDetector.detectCommunity(input_tags)
        rec = self.recommenders[com_name].recommend_tags(input_tags)

        return rec[0:max_number_of_tags], com_name
예제 #5
0
class CommunityBasedTagRecommender():

    recommenders = None
    communityDetector = None
    dataProcessor = None
    #collections_ids = None
    dataset = None
    metric = None
    community_detection_heuristic = None
    classifier_type = None
    recommendation_heuristic = None
    classes = None

    def __init__(self,
                 dataset="",
                 classes=[],
                 metric="cosine",
                 community_detection_heuristic="ZeroInit",
                 recommendation_heuristic="hRankPercentage015",
                 classifier_type="bayes"):

        self.dataset = dataset
        self.classes = classes
        self.metric = metric
        self.community_detection_heuristic = community_detection_heuristic
        self.classifier_type = classifier_type
        self.recommendation_heuristic = recommendation_heuristic

    def load_recommenders(self):
        # Load classifier from file
        print "\nLOADING DATA FOR DATABASE %s AND CLASSES %s\n" % (
            self.dataset, ", ".join(self.classes))
        print "Loading community detector..."
        self.communityDetector = CommunityDetector(
            verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier")
        print self.communityDetector

        # Loading class recommenders
        print "Loading class recommenders..."
        self.recommenders = dict()
        for class_name in self.classes:

            self.recommenders[class_name] = TagRecommender()
            self.recommenders[class_name].set_heuristic(
                self.recommendation_heuristic)

            data = {
                'TAG_NAMES':
                load(RECOMMENDATION_DATA_DIR + self.dataset +
                     '_%s_SIMILARITY_MATRIX_' % class_name + self.metric +
                     '_SUBSET_TAG_NAMES.npy'),
                'SIMILARITY_MATRIX':
                load(RECOMMENDATION_DATA_DIR + self.dataset +
                     '_%s_SIMILARITY_MATRIX_' % class_name + self.metric +
                     '_SUBSET.npy'),
            }

            self.recommenders[class_name].load_data(data=data,
                                                    dataset="%s-%s" %
                                                    (self.dataset, class_name),
                                                    metric=self.metric)

            print self.recommenders[class_name]

    def recommend_tags(self, input_tags, max_number_of_tags=None):
        com_name = self.communityDetector.detectCommunity(input_tags)
        rec = self.recommenders[com_name].recommend_tags(input_tags)

        return rec[0:max_number_of_tags], com_name
예제 #6
0
    def process_tag_recommendation_data(self,
                                        resources_limit=None,
                                        tag_threshold=10,
                                        line_limit=99999999999999,
                                        recompute_all_classes=False,
                                        similarity_metric="cosine"):

        # Process tas file and turn into association matrix and derived files
        database_name = self.tas_to_association_matrix(tag_threshold=tag_threshold, line_limit=line_limit)

        print "Loading community detector..."
        cd = CommunityDetector(verbose=False, PATH=RECOMMENDATION_DATA_DIR + "Classifier")
        print cd

        # Classify existing resources
        resources_tags = loadFromJson(RECOMMENDATION_TMP_DATA_DIR + database_name + '_RESOURCES_TAGS.json')
        instances_ids = resources_tags.keys()
        try:
            resource_class = loadFromJson(RECOMMENDATION_DATA_DIR + 'Classifier_classified_resources.json')
        except Exception as e:
            resource_class = dict()

        for count, id in enumerate(instances_ids):
            if not recompute_all_classes:
                if id not in resource_class:
                    resource_class[id] = cd.detectCommunity(input_tags=resources_tags[id])
            else:
                resource_class[id] = cd.detectCommunity(input_tags=resources_tags[id])

            if self.verbose:
                sys.stdout.write("\rClassifying resources... %.2f%%"%(float(100*(count+1))/len(instances_ids)))
                sys.stdout.flush()

        print ""
        saveToJson(RECOMMENDATION_DATA_DIR + 'Classifier_classified_resources.json', resource_class)
        print ""

        print "\nComputing data for general recommender..."
        self.association_matrix_to_similarity_matrix(
            dataset=database_name,
            training_set=instances_ids[0:resources_limit],
            save_sim=True,
            is_general_recommender=True,
            metric=similarity_metric,
        )

        print "\nComputing data for class recommenders..."
        instance_id_class = []
        distinct_classes = []
        for count, instance_id in enumerate(instances_ids):
            class_id = resource_class[instance_id]
            instance_id_class.append([instance_id, class_id])

            if class_id not in distinct_classes:
                distinct_classes.append(class_id)

        print distinct_classes

        for collection_id in distinct_classes:
            print "\nComputing recommender for collection %s..." % collection_id

            # All resources from the training set classified as the selected category
            # (instead of all manually labeled)
            training_ids = []
            for instance in instance_id_class:
                if instance[1] == collection_id:
                    training_ids.append(instance[0])
            # Add limit
            training_ids = training_ids[0:resources_limit]

            if len(training_ids) < 1:
                raise Exception("Too less training ids for collection %s" % collection_id)

            self.association_matrix_to_similarity_matrix(
                dataset=database_name,
                training_set=training_ids,
                save_sim=True,
                out_name_prefix=collection_id,
                is_general_recommender=False,
                metric=similarity_metric,
            )