コード例 #1
0
ファイル: model.py プロジェクト: jonmagal/recsys_challenge
    def test_model(self, test_data, empty_solution, evaluate = False):
        model_weka = None
        if os.path.isfile(self.prediction_file):
            print 'Model ' + self.name + ' already tested.'
        elif not os.path.isfile(self.model_file):
            print 'Impossible testing this model. It should be trained first.'
            return
        else: 
            print 'Starting to test_model model ' + self.name + '.'
            model_weka = Classifier(jobject = serialization.read(self.model_file)) 
            evaluation = Evaluation(data = test_data)
            evaluation.test_model(classifier = model_weka, data = test_data)
            
            predictions = evaluation.predictions()
            rows        = read_sheet(file_name = empty_solution)
            solutions   = []

            for row in rows:
                solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()]
                solutions.append(solution)
            write_the_solution_file(solutions, self.prediction_file)
            print 'Model ' + self.name + ' tested.'
        
        if evaluate == True:
            if os.path.isfile(self.evaluation_file):
                print 'Model ' + self.name + ' already evaluated.'
                return
            elif model_weka == None:
                model_weka = Classifier(jobject = serialization.read(self.model_file)) 
                evaluation = Evaluation(data = test_data)
                evaluation.test_model(classifier = model_weka, data = test_data)
            save_file(file_name = self.evaluation_file, content = evaluation.to_summary())
            print 'Model ' + self.name + ' evaluated.'
コード例 #2
0
def order_solution():
    solutions = read_sheet(DATASET_PATH + 'neural_solution.dat')
    
    # Sort the solutions on user id (desc), engagement (desc) and tweet id (desc)
    solutions = sorted(solutions, key=lambda data: (-int(data['userid']), -float(data['engagement']), -int(data['tweetid'])))
    
    solution_final = []
    
    for solution in solutions:
        solution_final.append([solution['userid'], solution['tweetid'], solution['engagement']])
    # Write the _solution file
    write_the_solution_file(solution_final, DATASET_PATH + 'neural_solution2.dat')
コード例 #3
0
def random_solution():
    # Read the training file

    # Read the _empty file (the task)
    todos = read_todo_from_empty_file(DATASET_PATH + 'test_empty.dat')

    # For all (user,tweet) pairs, generate their engagement
    solutions = list()
    #random.seed(1)
    for (user,tweet) in todos:
        # Random guess the engagement between 0-50
        engagement = random.randint(0,50)
        solutions.append((user,tweet,engagement))

    # Sort the solutions on user id (desc), engagement (desc) and tweet id (desc)
    solutions = sorted(solutions, key=lambda data: (-int(data[0]), -int(data[2]), -int(data[1])))

    # Write the _solution file
    write_the_solution_file(solutions, DATASET_PATH + 'random_solution.dat')

    print 'done.'