def __init__(self, client=None, version=2): super(SC2BSubmissions, self).__init__(client=client, name="SC2B") self.version = version from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) # download missing file automatically if needed. from dreamtools import Challenge c = Challenge('D8C1') c._download_data('experimental.zip', 'syn1920412')
class SC2BSubmissions(SubmissionTools, ST2): def __init__(self, client=None, version=2): super(SC2BSubmissions, self).__init__(client=client, name="SC2B") self.version = version from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) # download missing file automatically if needed. from dreamtools import Challenge c = Challenge('D8C1') c._download_data('experimental.zip', 'syn1920412') def load_submissions(self, startweek=0, endweek=9, keep_latest=True): """Loads all SCORED submissions from SC2A Attaches the week, status, ranking and zscores """ # load all scored submissions self.submissions = self.hpn.get_submissions_prediction_insilico(status="SCORED") print("Got %s SCORED submissions" % len(self.submissions)) # attach week and filter the submissions self.submissions = self.attach_week_to_submissions(self.submissions,"sc2b") self.submissions = [sub for sub in self.submissions if sub['week']<=endweek] self.submissions = [sub for sub in self.submissions if sub['week']>=startweek] print("Keeping %s submissions in the week range requested" % len(self.submissions)) if keep_latest: self.submissions = self._keep_latest_only() print("Keep %s latest scored submissions " % len(self.submissions)) # attach the zscore print("attaching submissions") self.submissions = self.attach_status_to_submissions(self.submissions) print("remove some users") self.remove_users() print("attaching scores and compute final ranking") self._attach_zscores() # attach zscores print("all submissions available in the **submissions** attribute") for sub in self.submissions: rmse = json.loads(sub['substatus']['report']) sub['old_rmses'] = copy.deepcopy(rmse) #def remove_users(self, userIds=["375805", "1991105", "1971259"]): def remove_users(self, userIds=["375805", "1991105"]): """ * 375805 alphabeta is a test from TC * 1991105 sakve from week 5 has different id from sakev week 6. renmove week5 that has a lower score anyway * HD systems see SC1A function docstring 1971259 """ submissions = [x for x in self.submissions if x['userId'] not in userIds] self.submissions = submissions def _get_ranking(self): ranking = scoring.HPNScoringPredictionInsilico_ranking() for i,sub in enumerate(self.submissions): rmse = json.loads(sub['substatus']['report']) filename = self.client.getSubmission(sub, downloadFile=True, ifcollision="keep.local")['filePath'] s = scoring.HPNScoringPredictionInsilico(filename, version=self.version) s.compute_all_rmse() rmse = copy.deepcopy(s.rmse) ranking.add_rmse(rmse, sub['submitterAlias'] +"_"+ str(i)) return ranking def _attach_zscores(self): """attach mean zscore, 32 individual zscores """ ranking = self._get_ranking() zscores = ranking.get_mean_zscores() ranks = ranking.get_mean_ranks() for i,participant in enumerate(ranking.participants): # mean zscore self.submissions[i]['zscore'] = zscores[participant] # final rank self.submissions[i]['ranking'] = ranks[participant] rmse = ranking.rmse[i] self.submissions[i]['rmses'] = rmse all_rmse = [rmse[k1][k2] for k1 in rmse.keys() for k2 in rmse[k1].keys()] all_rmse = [x for x in all_rmse if np.isnan(x)==False] # exclude mean_rmse = np.mean(all_rmse) self.submissions[i]['mean_rmse'] = mean_rmse def summary_final(self, show=True): # an alias subs = self.submissions # sorted indices of the mean ranks ranks = np.argsort([sub['ranking'] for sub in subs]) teams = [subs[rank]['submitterAlias'] for rank in ranks] df = pd.DataFrame(index=teams, data=ranks, columns=['mean Rank']) # finally the ranks df['Team Name'] = [subs[rank]['submitterAlias'] for rank in ranks] df['Team Id'] = [subs[rank]['userId'] for rank in ranks] df['Submission Id'] = [subs[rank]['substatus']['id'] for rank in ranks] df['Entity Id'] = [subs[rank]['substatus']['entityId'] for rank in ranks] df['Mean Rank'] = [subs[rank]['ranking'] for rank in ranks] df['Mean RMSE'] = [subs[rank]['mean_rmse'] for rank in ranks] ranks = df['Mean Rank'].rank() df['Final Rank'] = ranks.values df = df.set_index('Final Rank') df = df[['Team Name', u'Team Id', u'Submission Id', u'Entity Id', 'Mean Rank', u'Mean RMSE']] ranks = np.argsort([sub['ranking'] for sub in subs]) if show is False: return df print(dataframe_towiki(df)) return df def save_rmse_to_json(self): super(SC2BSubmissions, self).save_rmse_to_json(N1=231, N2=254)
class SC2ASubmissions(SubmissionTools, ST2): def __init__(self, client=None, version=2): super(SC2ASubmissions, self).__init__(client=client, name='SC2A') # should be here to avoid import cycling from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) self.version = version def load_submissions(self, startweek=0, endweek=9, keep_latest=True): """Loads all SCORED submissions from SC2A Attaches the week, status, ranking and zscores """ # load all scored submissions self.submissions = self.hpn.get_submissions_prediction(status="SCORED") print("Got %s SCORED submissions" % len(self.submissions)) # attach week and filter the submissions self.submissions = self.attach_week_to_submissions(self.submissions,"sc2a") self.submissions = [sub for sub in self.submissions if sub['week']<=endweek] self.submissions = [sub for sub in self.submissions if sub['week']>=startweek] print("Keeping %s submissions in the week range requested" % len(self.submissions)) if keep_latest: self.submissions = self._keep_latest_only() print("Keep %s latest scored submissions " % len(self.submissions)) # attach the zscore print("attaching submissions") self.submissions = self.attach_status_to_submissions(self.submissions) print("remove some users") self.remove_users() print("attaching scores and compute final ranking") self._attach_zscores() # attach zscores print("all submissions available in the **submissions** attribute") for sub in self.submissions: rmse = json.loads(sub['substatus']['report']) sub['old_rmses'] = copy.deepcopy(rmse) def remove_users(self, userIds=["375805", "1991105", "1971259"]): """ * 375805 alphabeta is a test from TC * 1991105 sakve from week 5 has different id from sakev week 6. renmove week5 that has a lower score anyway * HD systems see SC1A function docstring 1971259 """ submissions = [x for x in self.submissions if x['userId'] not in userIds] self.submissions = submissions def _get_ranking(self): print("Getting ranking") import json ranking = scoring.HPNScoringPrediction_ranking() for i,sub in enumerate(self.submissions): rmse = json.loads(sub['substatus']['report']) filename = self.client.getSubmission(sub, downloadFile=True, ifcollision="keep.local")['filePath'] print(i, filename) s = scoring.HPNScoringPrediction(filename, version=self.version) s.compute_all_rmse() rmse = copy.deepcopy(s.rmse) ranking.add_rmse(rmse, sub['submitterAlias'] +"_"+ str(i)) return ranking def get_final_pvalue(self, submission): from scipy import stats # get all zscores zz = submission['zscores'] zscores = [zz[k1][k2] for k1 in zz.keys() for k2 in zz[k1].keys()] dof = len(zscores) * 2 assert dof == 56 # zscores are one-sided (could negative) so multiply by 1 sided = 1 total_score = sum([-2 * np.log(stats.norm.sf(x) * sided) for x in zscores]) # this is a fisher method to combine the 32 scores. # chi2 survival for dof=64 and x=100 is 0.002686 pvalue = stats.chi2.sf(total_score, dof) return pvalue def _attach_zscores(self): """attach mean zscore, 32 individual zscores """ ranking = self._get_ranking() zscores = ranking.get_mean_zscores() ranks = ranking.get_mean_ranks() for i,participant in enumerate(ranking.participants): # mean zscore self.submissions[i]['zscore'] = zscores[participant] # 32 individual zscores self.submissions[i]['zscores'] = ranking._get_zscores(ranking.rmse[i]) # final rank self.submissions[i]['ranking'] = ranks[participant] # 32 individual ranks self.submissions[i]['ranks'] = ranking.get_rank_participant(participant) # 32 individual RMSEs self.submissions[i]['rmses'] = ranking.rmse[i].copy() # 32 individual RMSEs s = ranking.rmse[i] data = [s[k1][k2] for k1 in s.keys() for k2 in s[k1].keys()] data = [x for x in data if np.isnan(x)==False] mu = np.mean(data) self.submissions[i]['mean_rmse'] = mu def summary_final(self, show=True): # an alias subs = self.submissions # sorted indices of the mean ranks ranks = np.argsort([sub['ranking'] for sub in subs]) teams = [subs[rank]['submitterAlias'] for rank in ranks] df = pd.DataFrame(index=teams, data=ranks, columns=['mean Rank']) # finally the ranks df['Team Name'] = [subs[rank]['submitterAlias'] for rank in ranks] df['Team Id'] = [subs[rank]['userId'] for rank in ranks] df['Submission Id'] = [subs[rank]['substatus']['id'] for rank in ranks] df['Entity Id'] = [subs[rank]['substatus']['entityId'] for rank in ranks] df['Mean Rank'] = [subs[rank]['ranking'] for rank in ranks] df['Mean RMSE'] = [subs[rank]['mean_rmse'] for rank in ranks] ranks = df['Mean Rank'].rank() df['Final Rank'] = ranks.values df = df.set_index('Final Rank') df = df[['Team Name', u'Team Id', u'Submission Id', u'Entity Id', 'Mean Rank', u'Mean RMSE']] ranks = np.argsort([sub['ranking'] for sub in subs]) header = ("| Final rank| Team name | Team Id | Synapse ID | Entity ID | mean RMSE | mean Rank | mean zscore |") if show is False: return df print(dataframe_towiki(df)) return df def save_rmse_to_json(self): super(SC2ASubmissions, self).save_rmse_to_json(N1=162, N2=153)
def __init__(self, client=None, version=2): super(SC2ASubmissions, self).__init__(client=client, name='SC2A') # should be here to avoid import cycling from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) self.version = version
def __init__(self, client=None, name="SC1B"): super(SC1BSubmissions, self).__init__(client=client, name=name) from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client)
class SC1BSubmissions(SubmissionTools, ST2): def __init__(self, client=None, name="SC1B"): super(SC1BSubmissions, self).__init__(client=client, name=name) from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) def load_submissions(self, startweek=0, endweek=9, keep_latest=True): """Loads all SCORED submissions from SC1B Attaches the week, status, ranking and zscores """ # load all scored submissions self.submissions = self.hpn.get_submissions_network_insilico(status="SCORED") print("Got %s SCORED submissions" % len(self.submissions)) # attach week and filter the submissions self.submissions = self.attach_week_to_submissions(self.submissions, "sc1b") self.submissions = [sub for sub in self.submissions if sub['week']<=endweek] self.submissions = [sub for sub in self.submissions if sub['week']>=startweek] print("Keeping %s submissions in the week range requested" % len(self.submissions)) if keep_latest: self.submissions = self._keep_latest_only() print("Keep %s latest scored submissions " % len(self.submissions)) # attach the zscore print("attaching submissions") self.submissions = self.attach_status_to_submissions(self.submissions) print("remove soem users") self.remove_users() print("attaching scores and compute final ranking") self._attach_zscores() # attach zscores print("all submissions available in the **submissions** attribute") #return self.submissions def remove_users(self, userIds=["375805", "1971259", "1961142", "2208193", "2154231", "2023612", "2200202"]): """ * 375805 alphabeta is a test from TC * 1971259 HD_systems has 2 ID see SC1A remove_users function. # From Steven's analysis, some results are highly correlated: * ams1012,cas3,cas4 are the same submission. We keep only ams1012 cas3 1961142, cas4 2208193 should be removed. * gucas (2154231) is same as Zhangroup. gucas removed * remove chilin 2023612 * Dream5607 (2200202) and Pitt.transmed are the same. Remove Dream5607 * sfntt has same alias but 2 different userId. Remove 2197351 so that the latest remains only. """ submissions = [x for x in self.submissions if x['userId'] not in userIds] self.submissions = submissions def _attach_zscores(self): """attach mean zscore, 32 individual zscores """ aucs = [] for i, submission in enumerate(self.submissions): report = json.loads(submission['substatus']['report']) auc = report['auc'] aucs.append(auc) ranks = np.argsort(aucs)[::-1] # sort a for i, rank in enumerate(ranks): self.submissions[rank]['ranking'] = i report = json.loads(self.submissions[i]['substatus']['report']) self.submissions[i]['auc'] = report['auc'] self.submissions[i]['zscore'] = report['score'] def summary_final(self): header = ("Final rank", "Team name", "userID", "synapse ID", "entityID" , "AUC", "zscore", "p-value") print("| %12s | %20s | %20s | %20s | %12s | %12s | %12s | %12s |" % header) print("|%s|%s|%s|%s|%s|%s|" % ("-"*12, "-"*20, "-"*12, "-"*12,"-"*12, "-"*12)) ranks = np.argsort([sub['ranking'] for sub in self.submissions]) results = {} for count, i in enumerate(ranks): sub = self.submissions[i] pvalue = 0 if sub['submitterAlias'] == "ChaosLab": sub['submitterAlias'] = "FreiburgBiossX" data = (count+1, sub['submitterAlias'], sub['userId'], sub['substatus']['id'], sub['substatus']['entityId'], sub['auc'], sub['zscore'], pvalue) print("|%12s | %20s | %20s | %20s | %10.6s| %10.6s |%10.6s |%12.6s|" % data) results[sub['submitterAlias']] = data[:] return results
class SC1ASubmissions(SubmissionTools): """Retrieve SCORED submissions and attach all relevant information :: >>> from dreamtools.dream8.D8C1.submissions import SC1ASubmissions >>> s = SC1ASubmissions() >>> s.load_submissions() >>> len(s.submissions) 74 """ def __init__(self, client=None, name="SC1A"): super(SC1ASubmissions, self).__init__(client=client, name=name) from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) def load_submissions(self, startweek=0, endweek=9, keep_latest=True): """Loads all SCORED submissions from SC1A Attaches the week, status, ranking and zscores """ # load all scored submissions self.submissions = self.hpn.get_submissions_network(status="SCORED") print("Got %s SCORED submissions" % len(self.submissions)) # attach week and filter the submissions self.submissions = self.attach_week_to_submissions(self.submissions, "sc1a") self.submissions = [sub for sub in self.submissions if sub['week']<=endweek] self.submissions = [sub for sub in self.submissions if sub['week']>=startweek] print("Keeping %s submissions in the week range requested" % len(self.submissions)) if keep_latest: self.submissions = self._keep_latest_only() print("Keep %s latest scored submissions " % len(self.submissions)) #assert len(self.submissions) == 83, "There were 83 submissions valid in SC1A" # attach the zscore print("attaching submissions") self.submissions = self.attach_status_to_submissions(self.submissions) print("remove some users") self.remove_users() print("attaching scores and compute final ranking") self._attach_zscores() # attach zscores print("all submissions available in the **submissions** attribute") #return self.submissions def remove_users(self, userIds=["375805", "1971259", "1991105", "1961142", "2208193", "2154231", "2200202", "2197351", "375570"]): """Remove some users to get final submissions * 375805 alphabeta is a test from TC * sakevin is the same team as sakev Last submission by sakve in on week 8. sakevin submitted last on week 2 so can be ignored (userId 1991105) * hd systems submitted with 2 userId: [(u'1967990', u'HD_Systems', u'Michael Zengerling', 8)] [(u'1971259', u'HD_Systems', u'Ruth Grosseholz', 7)] The latest has the best score (week 8) so I suspect this is the one to be used. * SHCH and tongki aliases correspond to the same userId. tongji submitted last From Steven's analysis, some results are highly correlated: * ams1012,cas3,cas4 are the same submission. We keep only ams1012 cas3 1961142, cas4 2208193 should be removed. * gucas (2154231) is same as Zhangroup. gucas removed * Dream5607 (2200202) and Pitt.transmed are the same. Remove Dream5607 * sfntt has same alias but 2 different userId. Remove 2197351 so that the latest remains only. * same with SBIT 375570 """ submissions = [x for x in self.submissions if x['userId'] not in userIds] self.submissions = submissions def _get_ranking(self): ranking = scoring.HPNScoringNetwork_ranking() for i,sub in enumerate(self.submissions): auc = json.loads(sub['substatus']['report']) ranking.add_auc(auc, sub['submitterAlias'] +"_"+ str(i)) return ranking def _attach_zscores(self): """attach mean zscore, 32 individual zscores """ ranking = self._get_ranking() zscores = ranking.get_mean_zscores() # need to remove the 4 cell lines that causes trouble. cannot be done in # ranking function ranks = ranking.get_mean_ranks() for i,participant in enumerate(ranking.participants): # mean zscore self.submissions[i]['zscore'] = zscores[participant] # 32 individual zscores self.submissions[i]['zscores'] = ranking._get_zscores(ranking.aucs[i]) # final rank self.submissions[i]['ranking'] = ranks[participant] # 32 individual ranks self.submissions[i]['ranks'] = ranking.get_rank_participant(participant) # 32 individual AUCS self.submissions[i]['aucs'] = ranking.aucs[i] this = ranking.aucs[i] self.submissions[i]['mean_aucs'] = np.mean([this[k1][k2] for k1 in this.keys() for k2 in this[k1].keys()]) def summary_final(self, show="all"): print("Remove 3 combi of cell line/ligands before printing") ranks = np.argsort([sub['ranking'] for sub in self.submissions]) header = ("Final Rank", "Team name", "Team Id", "Submission Id", "Entity Id", "mean AUC", "mean Rank") print("| %12s | %20s | %20s | %20s| %20s | %12s | %12s | " % header) print("|%s|%s|%s|%s|%s|%s|%s|" % ("-"*12, "-"*20, "-"*12, "-"*12, "-"*12,"-"*12, "-"*12)) results = {} res = {} for count, i in enumerate(ranks): sub = self.submissions[i] pvalue = 0 if sub['submitterAlias'] == "ChaosLab": sub['submitterAlias'] = "FreiburgBiossX" data = (count+1, sub['submitterAlias'], sub['userId'], sub['substatus']['id'], sub['substatus']['entityId'], sub['mean_aucs'],sub['ranking'], pvalue) print("|%12s | %20s | %12s |%12s| %12s | %10.6s |%12.6s|%s|" % data) res[sub['submitterAlias']] = count+1 results[sub['submitterAlias']] = data[:] return results
class SC2BSubmissions(SubmissionTools): def __init__(self, client=None, version='official'): super(SC2BSubmissions, self).__init__(client=client, name="SC2B") from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) self.version = version # download missing file automatically if needed. from dreamtools import Challenge c = Challenge('D8C1') c._download_data('experimental.zip', 'syn1920412') def load_submissions(self, startweek=0, endweek=9, keep_latest=True): """Loads all SCORED submissions from SC2A Attaches the week, status, ranking and zscores """ # load all scored submissions self.submissions = self.hpn.get_submissions_prediction_insilico(status="SCORED") print("Got %s SCORED submissions" % len(self.submissions)) # attach week and filter the submissions self.submissions = self.attach_week_to_submissions(self.submissions,"sc2b") self.submissions = [sub for sub in self.submissions if sub['week']<=endweek] self.submissions = [sub for sub in self.submissions if sub['week']>=startweek] print("Keeping %s submissions in the week range requested" % len(self.submissions)) if keep_latest: self.submissions = self._keep_latest_only() print("Keep %s latest scored submissions " % len(self.submissions)) # attach the zscore print("attaching submissions") self.submissions = self.attach_status_to_submissions(self.submissions) print("remove some users") self.remove_users() print("attaching scores and compute final ranking") self._attach_zscores() # attach zscores print("all submissions available in the **submissions** attribute") for sub in self.submissions: rmse = json.loads(sub['substatus']['report']) sub['old_rmses'] = copy.deepcopy(rmse) #def remove_users(self, userIds=["375805", "1991105", "1971259"]): def remove_users(self, userIds=["375805", "1991105"]): """ * 375805 alphabeta is a test from TC * 1991105 sakve from week 5 has different id from sakev week 6. renmove week5 that has a lower score anyway * HD systems see SC1A function docstring 1971259 """ submissions = [x for x in self.submissions if x['userId'] not in userIds] self.submissions = submissions def _get_ranking(self): ranking = scoring.HPNScoringPredictionInsilico_ranking() for i,sub in enumerate(self.submissions): rmse = json.loads(sub['substatus']['report']) filename = self.client.getSubmission(sub, downloadFile=True, ifcollision="keep.local")['filePath'] s = scoring.HPNScoringPredictionInsilico(filename, version=self.version) s.compute_all_rmse() rmse = copy.deepcopy(s.rmse) ranking.add_rmse(rmse, sub['submitterAlias'] +"_"+ str(i)) return ranking def _attach_zscores(self): """attach mean zscore, 32 individual zscores """ ranking = self._get_ranking() zscores = ranking.get_mean_zscores() ranks = ranking.get_mean_ranks() for i,participant in enumerate(ranking.participants): # mean zscore self.submissions[i]['zscore'] = zscores[participant] # final rank self.submissions[i]['ranking'] = ranks[participant] rmse = ranking.rmse[i] self.submissions[i]['rmses'] = rmse all_rmse = [rmse[k1][k2] for k1 in rmse.keys() for k2 in rmse[k1].keys()] all_rmse = [x for x in all_rmse if np.isnan(x)==False] # exclude mean_rmse = np.mean(all_rmse) self.submissions[i]['mean_rmse'] = mean_rmse def summary(self, show="all"): ranks = np.argsort([sub['ranking'] for sub in self.submissions]) print("| Rank | User Id | Submitted Alias | Week | created on | status | RMSE | zscore | mean Rank |") for i, rank in enumerate(ranks): sub = self.submissions[rank] print("|%s | %10s | %5s |%5s |%s|%s| %10.6s | %10.6s |%10.6s |" % (i+1, sub['userId'], sub['submitterAlias'],sub['week'], sub['substatus']['status'], sub['createdOn'], sub['mean_rmse'],sub['ranking'],sub['zscore'])) def summary_final(self): ranks = np.argsort([sub['ranking'] for sub in self.submissions]) print("| Rank | Team name | SynapseId Id | mean RMSE | mean Rank | mean zscore |") print("|----|-----|-----|----|----|------|") results = {} for i, rank in enumerate(ranks): sub = self.submissions[rank] print("|%s | %20s | %20s | %20s | %20s | %10.6s | %10.6s |%10.6s |" % (i+1, sub['submitterAlias'], sub['userId'], sub['substatus']['id'], sub['substatus']['entityId'], sub['mean_rmse'],sub['ranking'],sub['zscore'])) results[sub['submitterAlias']] = i+1 return results
def __init__(self, client=None, name="SC1A", final=True): super(SC1ASubmissions, self).__init__(client=client, name=name) from hpn import HPNAdmin self.hpn = HPNAdmin(client=self.client) self.final = final