Example #1
0
class SC2BSubmissions(SubmissionTools, ST2):
    def __init__(self, client=None, version=2):
        super(SC2BSubmissions, self).__init__(client=client, name="SC2B")
        self.version = version
        from hpn import HPNAdmin
        self.hpn = HPNAdmin(client=self.client)

        # download missing file automatically if needed.
        from dreamtools import Challenge
        c = Challenge('D8C1')
        c._download_data('experimental.zip', 'syn1920412')

    def load_submissions(self, startweek=0, endweek=9, keep_latest=True):
        """Loads all SCORED submissions from SC2A

        Attaches the week, status, ranking and zscores

        """

        # load all scored submissions
        self.submissions = self.hpn.get_submissions_prediction_insilico(status="SCORED")
        print("Got %s SCORED submissions" % len(self.submissions))

        # attach week and filter the submissions
        self.submissions = self.attach_week_to_submissions(self.submissions,"sc2b")
        self.submissions = [sub for sub in self.submissions if sub['week']<=endweek]
        self.submissions = [sub for sub in self.submissions if sub['week']>=startweek]
        print("Keeping %s submissions in the week range requested" % len(self.submissions))

        if keep_latest:
            self.submissions = self._keep_latest_only()
            print("Keep %s latest scored submissions " % len(self.submissions))

        # attach the zscore
        print("attaching submissions")
        self.submissions = self.attach_status_to_submissions(self.submissions)

        print("remove some users")
        self.remove_users()

        print("attaching scores and compute final ranking")
        self._attach_zscores() #  attach zscores
        print("all submissions available in the **submissions** attribute")
        for sub in self.submissions:
            rmse = json.loads(sub['substatus']['report'])
            sub['old_rmses'] = copy.deepcopy(rmse)

    #def remove_users(self, userIds=["375805", "1991105", "1971259"]):
    def remove_users(self, userIds=["375805", "1991105"]):
        """

        * 375805 alphabeta is a test from TC
        * 1991105 sakve from week 5 has different id from sakev week 6. renmove
          week5 that has a lower score anyway
        * HD systems see SC1A function docstring 1971259

        """
        submissions = [x for x in self.submissions if x['userId'] not in userIds]
        self.submissions = submissions

    def _get_ranking(self):
        ranking = scoring.HPNScoringPredictionInsilico_ranking()
        for i,sub in enumerate(self.submissions):
            rmse = json.loads(sub['substatus']['report'])
            filename = self.client.getSubmission(sub, downloadFile=True, ifcollision="keep.local")['filePath']
            s = scoring.HPNScoringPredictionInsilico(filename, version=self.version)
            s.compute_all_rmse()
            rmse = copy.deepcopy(s.rmse)
            ranking.add_rmse(rmse, sub['submitterAlias'] +"_"+ str(i))
        return ranking

    def _attach_zscores(self):
        """attach mean zscore, 32 individual zscores
        """
        ranking = self._get_ranking()
        zscores = ranking.get_mean_zscores()
        ranks = ranking.get_mean_ranks()

        for i,participant in enumerate(ranking.participants):
            # mean zscore
            self.submissions[i]['zscore'] = zscores[participant]
            # final rank
            self.submissions[i]['ranking'] = ranks[participant]
            rmse = ranking.rmse[i]
            self.submissions[i]['rmses'] = rmse

            all_rmse = [rmse[k1][k2] for k1 in rmse.keys() for k2 in rmse[k1].keys()]
            all_rmse = [x for x in all_rmse if np.isnan(x)==False] # exclude
            mean_rmse = np.mean(all_rmse)
            self.submissions[i]['mean_rmse'] = mean_rmse

    def summary_final(self, show=True):
        # an alias
        subs = self.submissions

        # sorted indices of the mean ranks
        ranks = np.argsort([sub['ranking'] for sub in subs])

        teams = [subs[rank]['submitterAlias'] for rank in ranks]

        df = pd.DataFrame(index=teams, data=ranks, columns=['mean Rank'])

        # finally the ranks
        df['Team Name'] = [subs[rank]['submitterAlias'] for rank in ranks]
        df['Team Id'] = [subs[rank]['userId'] for rank in ranks]
        df['Submission Id'] = [subs[rank]['substatus']['id'] for rank in ranks]
        df['Entity Id'] = [subs[rank]['substatus']['entityId'] for rank in ranks]
        df['Mean Rank'] = [subs[rank]['ranking'] for rank in ranks]
        df['Mean RMSE'] = [subs[rank]['mean_rmse'] for rank in ranks]

        ranks = df['Mean Rank'].rank()
        df['Final Rank'] = ranks.values

        df = df.set_index('Final Rank')
        df = df[['Team Name', u'Team Id', u'Submission Id', 
                u'Entity Id', 'Mean Rank', u'Mean RMSE']]
    
        ranks = np.argsort([sub['ranking'] for sub in subs])
        if show is False:
            return df

        print(dataframe_towiki(df))
        return df

    def save_rmse_to_json(self):
        super(SC2BSubmissions, self).save_rmse_to_json(N1=231, N2=254)
Example #2
0
class SC2BSubmissions(SubmissionTools):
    def __init__(self, client=None, version='official'):
        super(SC2BSubmissions, self).__init__(client=client, name="SC2B")
        from hpn import HPNAdmin
        self.hpn = HPNAdmin(client=self.client)
        self.version = version

        # download missing file automatically if needed.
        from dreamtools import Challenge
        c = Challenge('D8C1')
        c._download_data('experimental.zip', 'syn1920412')

    def load_submissions(self, startweek=0, endweek=9, keep_latest=True):
        """Loads all SCORED submissions from SC2A

        Attaches the week, status, ranking and zscores

        """
        # load all scored submissions
        self.submissions = self.hpn.get_submissions_prediction_insilico(status="SCORED")
        print("Got %s SCORED submissions" % len(self.submissions))

        # attach week and filter the submissions
        self.submissions = self.attach_week_to_submissions(self.submissions,"sc2b")
        self.submissions = [sub for sub in self.submissions if sub['week']<=endweek]
        self.submissions = [sub for sub in self.submissions if sub['week']>=startweek]
        print("Keeping %s submissions in the week range requested" % len(self.submissions))

        if keep_latest:
            self.submissions = self._keep_latest_only()
            print("Keep %s latest scored submissions " % len(self.submissions))

        # attach the zscore
        print("attaching submissions")
        self.submissions = self.attach_status_to_submissions(self.submissions)

        print("remove some users")
        self.remove_users()

        print("attaching scores and compute final ranking")
        self._attach_zscores() #  attach zscores
        print("all submissions available in the **submissions** attribute")
        for sub in self.submissions:
            rmse = json.loads(sub['substatus']['report'])
            sub['old_rmses'] = copy.deepcopy(rmse)

    #def remove_users(self, userIds=["375805", "1991105", "1971259"]):
    def remove_users(self, userIds=["375805", "1991105"]):
        """

        * 375805 alphabeta is a test from TC
        * 1991105 sakve from week 5 has different id from sakev week 6. renmove
          week5 that has a lower score anyway
        * HD systems see SC1A function docstring 1971259

        """
        submissions = [x for x in self.submissions if x['userId'] not in userIds]
        self.submissions = submissions

    def _get_ranking(self):
        ranking = scoring.HPNScoringPredictionInsilico_ranking()
        for i,sub in enumerate(self.submissions):
            rmse = json.loads(sub['substatus']['report'])
            filename = self.client.getSubmission(sub, downloadFile=True, ifcollision="keep.local")['filePath']
            s = scoring.HPNScoringPredictionInsilico(filename, version=self.version)
            s.compute_all_rmse()
            rmse = copy.deepcopy(s.rmse)
            ranking.add_rmse(rmse, sub['submitterAlias'] +"_"+ str(i))
        return ranking

    def _attach_zscores(self):
        """attach mean zscore, 32 individual zscores
        """
        ranking = self._get_ranking()
        zscores = ranking.get_mean_zscores()
        ranks = ranking.get_mean_ranks()

        for i,participant in enumerate(ranking.participants):
            # mean zscore
            self.submissions[i]['zscore'] = zscores[participant]
            # final rank
            self.submissions[i]['ranking'] = ranks[participant]
            rmse = ranking.rmse[i]
            self.submissions[i]['rmses'] = rmse

            all_rmse = [rmse[k1][k2] for k1 in rmse.keys() for k2 in rmse[k1].keys()]
            all_rmse = [x for x in all_rmse if np.isnan(x)==False] # exclude
            mean_rmse = np.mean(all_rmse)
            self.submissions[i]['mean_rmse'] = mean_rmse

    def summary(self, show="all"):
        ranks = np.argsort([sub['ranking'] for sub in self.submissions])
        print("| Rank | User Id | Submitted Alias | Week | created on | status |  RMSE | zscore | mean Rank |")
        for i, rank  in enumerate(ranks):
            sub = self.submissions[rank]
            print("|%s | %10s | %5s |%5s |%s|%s|  %10.6s  | %10.6s |%10.6s |" %
                    (i+1, sub['userId'], sub['submitterAlias'],sub['week'],
                sub['substatus']['status'],  sub['createdOn'],  sub['mean_rmse'],sub['ranking'],sub['zscore']))

    def summary_final(self):
        ranks = np.argsort([sub['ranking'] for sub in self.submissions])
        print("| Rank | Team name | SynapseId Id | mean RMSE | mean Rank | mean zscore |")
        print("|----|-----|-----|----|----|------|")

        results = {}
        for i, rank  in enumerate(ranks):
            sub = self.submissions[rank]
            print("|%s | %20s | %20s | %20s | %20s | %10.6s  | %10.6s |%10.6s |" %
                    (i+1, sub['submitterAlias'], sub['userId'],
                        sub['substatus']['id'], sub['substatus']['entityId'],
                        sub['mean_rmse'],sub['ranking'],sub['zscore']))
            results[sub['submitterAlias']] = i+1
        return results