Example #1
0
class SC1BSubmissions(SubmissionTools, ST2):
    def __init__(self, client=None, name="SC1B"):
        super(SC1BSubmissions, self).__init__(client=client, name=name)
        from hpn import HPNAdmin
        self.hpn = HPNAdmin(client=self.client)

    def load_submissions(self, startweek=0, endweek=9, keep_latest=True):
        """Loads all SCORED submissions from SC1B

        Attaches the week, status, ranking and zscores

        """

        # load all scored submissions
        self.submissions = self.hpn.get_submissions_network_insilico(status="SCORED")
        print("Got %s SCORED submissions" % len(self.submissions))

        # attach week and filter the submissions
        self.submissions = self.attach_week_to_submissions(self.submissions, "sc1b")
        self.submissions = [sub for sub in self.submissions if sub['week']<=endweek]
        self.submissions = [sub for sub in self.submissions if sub['week']>=startweek]
        print("Keeping %s submissions in the week range requested" % len(self.submissions))

        if keep_latest:
            self.submissions = self._keep_latest_only()
            print("Keep %s latest scored submissions " % len(self.submissions))

        # attach the zscore
        print("attaching submissions")
        self.submissions = self.attach_status_to_submissions(self.submissions)

        print("remove soem users")
        self.remove_users()

        print("attaching scores and compute final ranking")
        self._attach_zscores() #  attach zscores
        print("all submissions available in the **submissions** attribute")
        #return self.submissions

    def remove_users(self, userIds=["375805", "1971259", "1961142", "2208193",
        "2154231", "2023612", "2200202"]):
        """

        * 375805 alphabeta is a test from TC
        * 1971259 HD_systems has 2 ID see SC1A remove_users function.

        # From Steven's analysis, some results are highly correlated:

        * ams1012,cas3,cas4 are the same submission. We keep only ams1012
          cas3 1961142, cas4 2208193 should be removed.
        * gucas (2154231) is same as Zhangroup. gucas removed
        * remove chilin 2023612
        * Dream5607 (2200202) and Pitt.transmed are the same. Remove Dream5607
        * sfntt has same alias but 2 different userId. Remove 2197351 so that
          the latest remains only.

        """
        submissions = [x for x in self.submissions if x['userId'] not in userIds]
        self.submissions = submissions

    def _attach_zscores(self):
        """attach mean zscore, 32 individual zscores
        """

        aucs = []
        for i, submission in enumerate(self.submissions):
            report = json.loads(submission['substatus']['report'])
            auc = report['auc']
            aucs.append(auc)
        ranks = np.argsort(aucs)[::-1] # sort a
        for i, rank in enumerate(ranks):
            self.submissions[rank]['ranking'] = i
            report = json.loads(self.submissions[i]['substatus']['report'])
            self.submissions[i]['auc'] = report['auc']
            self.submissions[i]['zscore'] = report['score']

    def summary_final(self):

        header = ("Final rank", "Team name", "userID", "synapse ID", "entityID" , "AUC", "zscore", "p-value")
        print("| %12s | %20s | %20s | %20s | %12s  | %12s | %12s | %12s |" %  header)
        print("|%s|%s|%s|%s|%s|%s|" % ("-"*12, "-"*20, "-"*12, "-"*12,"-"*12, "-"*12))
        ranks = np.argsort([sub['ranking'] for sub in self.submissions])
        results = {}
        for count, i in enumerate(ranks):
            sub = self.submissions[i]
            pvalue = 0
            if sub['submitterAlias'] == "ChaosLab":
                sub['submitterAlias'] = "FreiburgBiossX"


            data = (count+1, sub['submitterAlias'], sub['userId'],
                        sub['substatus']['id'], sub['substatus']['entityId'],
                    sub['auc'], sub['zscore'], pvalue)
            print("|%12s | %20s | %20s | %20s | %10.6s|  %10.6s |%10.6s |%12.6s|" % data)
            results[sub['submitterAlias']] = data[:]
        return results