예제 #1
0
파일: splitting.py 프로젝트: banstala/alpha
def test_train_inPlaceSplit_Frame(ratingList,
                                  testSize=0.2,
                                  relativeSplit=True,
                                  shuffle=False,
                                  random_state=None,
                                  axis=0):
    test = []
    train = []

    if shuffle is True:
        random_state = (int)(
            np.random.rand() *
            MAX_SEED) if random_state is None else random_state
        pprint("-> Random State %d" % random_state)

    group = ratingList.groupby('userId')
    for key in group.groups.keys():
        iTest, iTrain = test_train_split(pd.Series(group.groups[key]),
                                         testSize, relativeSplit, shuffle,
                                         random_state, axis)
        test.extend(iTest)
        train.extend(iTrain)

    testSet = ratingList.loc[test]
    for key in test:
        ratingList.loc[key, 'rating'] = 0

    return (testSet, ratingList)
예제 #2
0
파일: hybrid.py 프로젝트: banstala/alpha
    def calculate(self, ratingTable, avgRating, **params):
        self.name = "%s+%s" % (params["algo1"].name, params["algo2"].name)

        pprint('Calculating %s Scores' % self.name)

        self.alpha = params["alpha"]
        self.algo1 = params["algo1"]
        self.algo2 = params["algo2"]
예제 #3
0
    def calculate(self, ratingTable, avgRating, **params):
        pprint('Calculating %s Scores' % self.name)
        personalityScoresFrame = pd.DataFrame(index=ratingTable.columns)
        for i in ratingTable.columns:
            personalityScoresFrame[i] = self.__personalityScoreUsers(
                i, ratingTable, params["persScores"])

        self.score = personalityScoresFrame
예제 #4
0
	def calculate(self, ratingTable, avgRating, **params):
		pprint('Calculating %s Scores' % self.name)
		
		gamma = params["k"]
		pearsonScoresFrame = pd.DataFrame(index = ratingTable.columns)
		for i in ratingTable.columns:
			pearsonScoresFrame[i] = self.__pearsonScoreUsers(i, ratingTable, avgRating, gamma)
		
		self.score = pearsonScoresFrame
예제 #5
0
파일: pip.py 프로젝트: banstala/alpha
    def calculate(self, ratingTable, avgRating, **params):
        pprint('Calculating %s Scores' % self.name)

        pipScoresFrame = pd.DataFrame(index=ratingTable.columns)
        for i in ratingTable.columns:
            pipScoresFrame[i] = self.__pipScoreUsers(i, ratingTable,
                                                     params["itemsAvgRating"])
            pipScoresFrame[i] = normalizeScore(pipScoresFrame[i])

        self.score = pipScoresFrame
예제 #6
0
	def predict_evaluate(self, ratingTable, avgRating, testRatingList, k, **params):
		pprint("Evaluating %s Method" % self.name)
		
		if self.score is None:
			self.calculate(ratingTable, avgRating, **params)
		
		self.predict(ratingTable, avgRating, testRatingList, k)
		
		testScores = metrics.specificity_precision_accuracy(testRatingList['rating'], self.prediction)
		
		testScores.extend([metrics.mae(testRatingList['rating'], self.prediction),
		                   metrics.rmse(testRatingList['rating'], self.prediction)])
		
		self.metrics = testScores
예제 #7
0
파일: testMode.py 프로젝트: banstala/alpha
def run():
    keys = list(dataset.DATASETS.keys())
    if MULTI_TYPE_TEST is False:
        keys = keys[:1]

    for key in keys:
        pprint("Testing for %s" % key, symbolCount=16, sepCount=1)

        # Load the Datasets
        ratingList = dataset.getRatingsList(key)
        persScores = dataset.getPersonalityDataset()

        # Minimise dataset for Optimization
        if SHOULD_MINIMIZE_SET is True:
            ratingList, persScores = dataset.minimizeSet(
                ratingList, persScores, MINIMUM_ITEM_RATED_COUNT,
                MINIMUM_USER_RATE_COUNT)

        testRatingList, trainRatingList = splitting.test_train_inPlaceSplit_Frame(
            ratingList,
            1,
            relativeSplit=False,
            shuffle=SHOULD_SHUFFLE,
            random_state=RANDOM_STATE)

        testRatingList.index = testRatingList['itemId']
        ratingTable = dataset.getRatingTable(trainRatingList)
        sparsity = 1 - len(trainRatingList) / np.prod(ratingTable.shape)
        pprint("-> Sparsity: %f%%" % float(sparsity * 100))

        # Calculate Timings of High Computation Tasks
        with Timing() as startTime:

            # Get Average Ratings
            avgRating = rating.getUsersAverageRating(ratingTable)
            itemsAvgRating = rating.getItemsAverageRating(ratingTable)

            # Calculating Scores
            methods = {
                algo.Pearson.TASK:
                algo.Pearson(ratingTable, avgRating, k=NEIGHBOURS_COUNT),
                algo.Pip.TASK:
                algo.Pip(ratingTable,
                         avgRating,
                         itemsAvgRating=itemsAvgRating,
                         k=NEIGHBOURS_COUNT),
                algo.Personality.TASK:
                algo.Personality(ratingTable,
                                 avgRating,
                                 persScores=persScores,
                                 k=NEIGHBOURS_COUNT),
            }
            hybrids = {
                "pipPer":
                algo.Hybrid(ratingTable,
                            avgRating,
                            algo1=methods[algo.Pip.TASK],
                            algo2=methods[algo.Personality.TASK],
                            alpha=HYBRID_ALPHA),
                "persPer":
                algo.Hybrid(ratingTable,
                            avgRating,
                            algo1=methods[algo.Personality.TASK],
                            algo2=methods[algo.Personality.TASK],
                            alpha=HYBRID_ALPHA),
            }

            pprint("-> Scores Calculated in %.4f seconds" %
                   startTime.getElapsedTime())

        # Calculate Timings of High Computation Tasks
        with Timing() as startTime:

            # Calculating Ratings and Metrics
            for method in methods.values():
                method.predict_evaluate(ratingTable,
                                        avgRating,
                                        testRatingList,
                                        k=NEIGHBOURS_COUNT,
                                        itemsAvgRating=itemsAvgRating)
            for method in hybrids.values():
                method.predict_evaluate(ratingTable,
                                        avgRating,
                                        testRatingList,
                                        k=NEIGHBOURS_COUNT,
                                        itemsAvgRating=itemsAvgRating)

            methods = {**methods, **hybrids}

            pprint("-> Ratings Calculated in %.4f seconds" %
                   startTime.getElapsedTime())

        testLabels = [
            'Method', 'Specificity', 'Precision', 'Accuracy', 'MAE', 'RMSE'
        ]

        resultLabel = " Test Scores "
        pprint(resultLabel,
               symbolCount=int(
                   (COLUMN_LENGTH * len(testLabels) - len(resultLabel)) / 2))

        rowFormat = getRowFormat(len(testLabels))

        print(rowFormat.format(*testLabels))

        for method in methods.values():
            print(
                rowFormat.format(method.name,
                                 *["%.4f" % val for val in method.metrics]))

        print("*" * (COLUMN_LENGTH * len(testLabels)), end="\n\n\n")