'RID', 'Forecast Month', 'Forecast Date', 'CN relative probability',
        'MCI relative probability', 'AD relative probability', 'ADAS13',
        'ADAS13 50% CI lower', 'ADAS13 50% CI upper', 'Ventricles_ICV',
        'Ventricles_ICV 50% CI lower', 'Ventricles_ICV 50% CI upper'
    ])
output['Forecast Month'] = output['Forecast Month'].astype(int)
output['Forecast Date'] = ym1

str_out_final = os.path.join(
    str_exp, 'IntermediateData',
    'TADPOLE_Submission_Leaderboard_BenchmarkSVM.csv')
output.to_csv(str_out_final, header=True, index=False)

print('Evaluate predictions')
R = pd.read_csv('./Data/TADPOLE_LB4.csv')
import evalOneSubmission as eos
mAUC, bca, adasMAE, ventsMAE, adasWES, ventsWES, adasCPA, ventsCPA, adasEstim, trueADASFilt = eos.evalOneSub(
    R, output)

print 'Diagnosis:'
print 'mAUC = ' + "%0.3f" % mAUC,
print 'BAC = ' + "%0.3f" % bca
print 'ADAS:'
print 'MAE = ' + "%0.3f" % adasMAE,
print 'WES = ' + "%0.3f" % adasWES,
print 'CPA = ' + "%0.3f" % adasCPA
print 'VENTS:'
print 'MAE = ' + "%0.3e" % ventsMAE,
print 'WES = ' + "%0.3e" % ventsWES,
print 'CPA = ' + "%0.3f" % ventsCPA
Beispiel #2
0
def downloadLeaderboardSubmissions():
    htmlFile = '%sTable.html' % tag
    dropboxRemoteFolder = '/ProAD/public_html'
    uploadsFldRemote = '/ProAD/uploads'
    ldbSubmissionsFld = 'leaderboardSubmissions'

    ldbDropbox = DropboxObj()

    fileListAll = ldbDropbox.list_folder(uploadsFldRemote, '/')
    fileListLdb = [
        x for x in fileListAll.keys()
        if x.startswith('TADPOLE_Submission_%s' % tag)
    ]
    fileListLdb.sort()
    print('fileListLdb ', fileListLdb)
    os.system('mkdir -p %s' % ldbSubmissionsFld)
    nrEntries = len(fileListLdb)

    teamNames = [
        f.split('.')[0][len('TADPOLE_Submission_%s_' % tag):]
        for f in fileListLdb
    ]

    evalResFile = '%s/evalResAll.npz' % ldbSubmissionsFld

    # entriesList = [0,1,2]
    tableColumns = ('TEAMNAME', 'RANK', 'MAUC', 'BCA', 'adasMAE', 'ventsMAE',
                    'adasWES', 'ventsWES', 'adasCPA', 'ventsCPA')

    if args.runPart[0] == 'R':
        if args.fast:
            # load submissions already evaluated and only evaluate the new ones
            dataStruct = pickle.load(open(evalResFile, 'rb'))
            evalResults = dataStruct['evalResults']
            fileDatesRemote = dataStruct['fileDatesRemote']
            entriesList = [
                e for e, f in enumerate(teamNames)
                if (evalResults['TEAMNAME'].str.contains(f).sum() == 0)
            ]
            nanSeries = pd.DataFrame(np.nan,
                                     index=range(len(entriesList)),
                                     columns=tableColumns)
            nrEntriesSoFar = evalResults.shape[0]
            evalResults = evalResults.append(nanSeries, ignore_index=True)
            print('teamNames', teamNames)
            print('entriesList', entriesList)
            print('evalResults', evalResults)
            # print(adsa)
        else:
            evalResults = pd.DataFrame(np.nan,
                                       index=range(nrEntries),
                                       columns=tableColumns)
            fileDatesRemote = []
            entriesList = range(nrEntries)
            nrEntriesSoFar = 0

        lb4Df = pd.read_csv('../data/TADPOLE_LB4.csv')
        lb4Df = lb4Df[lb4Df['LB4'] == 1]  # only keep the LB4 entries
        lb4Df.reset_index(drop=True, inplace=True)
        indexInTable = 0
        entryToAddIndex = nrEntriesSoFar
        for f in entriesList:
            fileName = fileListLdb[f]
            teamName = teamNames[f]
            # print('teamname ', teamName)
            remotePath = '%s/%s' % (uploadsFldRemote, fileName)
            localPath = '%s/%s' % (ldbSubmissionsFld, fileName)
            ldbDropbox.download(localPath, remotePath)

            metadataFileRemote = ldbDropbox.dbx.files_get_metadata(remotePath)
            fileDatesRemote += [metadataFileRemote.server_modified]

            print('Evaluating %s' % fileName)
            forecastDf = pd.read_csv(localPath)
            try:
                evalResults.loc[entryToAddIndex, ['MAUC', 'BCA',
                'adasMAE', 'ventsMAE', 'adasWES', 'ventsWES', 'adasCPA', 'ventsCPA']] = \
                  evalOneSubmission.evalOneSub(lb4Df, forecastDf)
                evalResults.loc[entryToAddIndex, 'TEAMNAME'] = teamName
            except:
                print('Error while processing submission %s' % fileName)
                pass

            # if not np.isnan(evalResults['MAUC'].iloc[f]):

            entryToAddIndex += 1

        nanMask = np.isnan(evalResults['MAUC'])
        evalResults = evalResults[np.logical_not(nanMask)]
        evalResults.reset_index(drop=True, inplace=True)

        # # compute the ranks using MAUC
        # rankOrder = np.argsort(evalResults.as_matrix(columns = ['MAUC']).reshape(-1))[::-1]  # sort them by MAUC
        # rankOrder += 1  # make them start from 1
        # print('ranks', evalResults['MAUC'], rankOrder, evalResults.as_matrix(columns = ['MAUC']).reshape(-1))
        # for f in range(evalResults.shape[0]):
        #   evalResults.loc[f, 'RANK'] = rankOrder[f]

        dataStruct = dict(evalResults=evalResults,
                          fileDatesRemote=fileDatesRemote)
        pickle.dump(dataStruct,
                    open(evalResFile, 'wb'),
                    protocol=pickle.HIGHEST_PROTOCOL)
    else:
        dataStruct = pickle.load(open(evalResFile, 'rb'))
        fileDatesRemote = dataStruct['fileDatesRemote']
        evalResults = dataStruct['evalResults']

    rankMAUC = rankdata(
        rankdata(-evalResults.as_matrix(columns=['MAUC']).reshape(-1),
                 method='average'),
        method='average')
    rankADAS = rankdata(rankdata(
        evalResults.as_matrix(columns=['adasMAE']).reshape(-1),
        method='average'),
                        method='average')
    rankVENTS = rankdata(rankdata(
        evalResults.as_matrix(columns=['ventsMAE']).reshape(-1),
        method='average'),
                         method='average')

    print('rankMAUC', rankMAUC)
    print('rankADAS', rankADAS)
    print('rankVENTS', rankVENTS)

    rankSum = rankMAUC + rankADAS + rankVENTS

    rankOrder = rankdata(rankSum, method='average')  # make them start from 1
    for f in range(evalResults.shape[0]):
        evalResults.loc[f, 'RANK'] = rankOrder[f]

    # print('evalResults before\n', evalResults)

    evalResults = evalResults.sort_values(by=['MAUC', 'BCA'], ascending=False)
    evalResults = evalResults.reset_index(drop=True)

    print('evalResults after\n', evalResults)

    htmlFileFullPathRemote = '%s/%s' % (dropboxRemoteFolder, htmlFile)
    htmlFileFullPathLocal = '%s/%s' % (ldbSubmissionsFld, htmlFile)
    writeHTMLtable(evalResults, htmlFileFullPathLocal, fileListLdb,
                   fileDatesRemote)
    ldbDropbox.upload(htmlFileFullPathLocal, htmlFileFullPathRemote)
    columns=[
        'RID', 'Forecast Month', 'Forecast Date', 'CN relative probability',
        'MCI relative probability', 'AD relative probability', 'ADAS13',
        'ADAS13 50% CI lower', 'ADAS13 50% CI upper', 'Ventricles_ICV',
        'Ventricles_ICV 50% CI lower', 'Ventricles_ICV 50% CI upper'
    ])
output['Forecast Month'] = output['Forecast Month'].astype(int)
output['Forecast Date'] = ym1

str_out_final = os.path.join(str_exp, 'IntermediateData',
                             'TADPOLE_Submission_BenchmarkSVM.csv')
output.to_csv(str_out_final, header=True, index=False)

print('Evaluate predictions')
R = pd.read_csv('./IntermediateData/D4_dummy.csv')
import evalOneSubmission as eos
mAUC, bca, adasMAE, ventsMAE, adasWES, ventsWES, adasCPA, ventsCPA = eos.evalOneSub(
    R, output)

print('Diagnosis:')
print('mAUC = ' + "%0.3f" % mAUC)
print('BAC = ' + "%0.3f" % bca)
print('ADAS:')
print('MAE = ' + "%0.3f" % adasMAE)
print('WES = ' + "%0.3f" % adasWES)
print('CPA = ' + "%0.3f" % adasCPA)
print('VENTS:')
print('MAE = ' + "%0.3e" % ventsMAE)
print('WES = ' + "%0.3e" % ventsWES)
print('CPA = ' + "%0.3f" % ventsCPA)
for i in range(len(o)):
    o1[count:count+nr_pred]=o[i]
    o1[count:count+nr_pred,1]=range(1,nr_pred+1)
    count=count+nr_pred
    

output=pd.DataFrame(o1, columns=['RID','Forecast Month','Forecast Date','CN relative probability','MCI relative probability','AD relative probability'])
output['Forecast Month'] = output['Forecast Month'].astype(int)
output['Forecast Date'] = ym1

str_out_final = os.path.join(str_exp, 'IntermediateData','TADPOLE_Submission_Leaderboard_BenchmarkSVM.csv')
output.to_csv(str_out_final,header=True,index=False)


print('Evaluate predictions')
R=pd.read_csv('./TADPOLE_LB4.csv')
import evalOneSubmission as eos
mAUC, bca = eos.evalOneSub(R,output)

print('Diagnosis:')
print('mAUC = ' + "%0.3f" % mAUC,)
print ('BAC = ' + "%0.3f" % bca)
# print('ADAS:')
# print('MAE = ' + "%0.3f" % adasMAE, )
# print('WES = ' + "%0.3f" % adasWES,)
# print('CPA = ' + "%0.3f" % adasCPA )
# print('VENTS:')
# print('MAE = ' + "%0.3e" % ventsMAE,)
# print('WES = ' + "%0.3e" % ventsWES,)
# print('CPA = ' + "%0.3f" % ventsCPA )
Beispiel #5
0
def downloadLeaderboardSubmissions():
  htmlFile = 'leaderboardTable.html'
  dropboxRemoteFolder = '/ProAD/public_html'
  uploadsFldRemote = '/ProAD/uploads'
  ldbSubmissionsFld = 'leaderboardSubmissions'

  ldbDropbox = DropboxObj()

  fileListAll = ldbDropbox.list_folder(uploadsFldRemote, '/')
  fileListLdb = [x for x in fileListAll.keys() if x.startswith('TADPOLE_Submission_Leaderboard')]
  fileListLdb.sort()
  print('fileListLdb ', fileListLdb)
  os.system('mkdir -p %s' % ldbSubmissionsFld)
  nrEntries = len(fileListLdb)

  evalResFile = '%s/evalResAll.npz' % ldbSubmissionsFld

  entriesList = range(nrEntries)
  # entriesList = [0,1,2]

  if args.runPart[0] == 'R':
    evalResults = pd.DataFrame(np.nan, index=range(nrEntries), columns=('TEAMNAME', 'RANK' , 'MAUC', 'BCA',
    'adasMAE', 'ventsMAE', 'adasWES', 'ventsWES', 'adasCPA', 'ventsCPA'))
    lb4Df = pd.read_csv('TADPOLE_LB4.csv')
    lb4Df = lb4Df[lb4Df['LB4'] == 1] # only keep the LB4 entries
    lb4Df.reset_index(drop=True, inplace=True)
    fileDatesRemote = []
    indexInTable = 0
    for f in entriesList:
      fileName = fileListLdb[f]
      remotePath = '%s/%s' % (uploadsFldRemote, fileName)
      localPath = '%s/%s' % (ldbSubmissionsFld, fileName)
      ldbDropbox.download(localPath, remotePath)

      metadataFileRemote = ldbDropbox.dbx.files_get_metadata(remotePath)
      fileDatesRemote += [metadataFileRemote.server_modified]

      print('Evaluating %s' % fileName)
      forecastDf = pd.read_csv(localPath)
      try:
        evalResults.loc[f, ['MAUC', 'BCA',
    'adasMAE', 'ventsMAE', 'adasWES', 'ventsWES', 'adasCPA', 'ventsCPA']] = \
        evalOneSubmission.evalOneSub(lb4Df, forecastDf)
      except :
        print('Error while processing submission %s' % fileName)
        pass

      if not np.isnan(evalResults['MAUC'].iloc[f]):
        teamName = fileName.split('.')[0][len('TADPOLE_Submission_Leaderboard_'):]
        print('teamname ', teamName)
        evalResults.loc[f, 'TEAMNAME'] = teamName

    nanMask = np.isnan(evalResults['MAUC'])
    evalResults = evalResults[np.logical_not(nanMask)]
    evalResults.reset_index(drop = True, inplace = True)

    # # compute the ranks using MAUC
    # rankOrder = np.argsort(evalResults.as_matrix(columns = ['MAUC']).reshape(-1))[::-1]  # sort them by MAUC
    # rankOrder += 1  # make them start from 1
    # print('ranks', evalResults['MAUC'], rankOrder, evalResults.as_matrix(columns = ['MAUC']).reshape(-1))
    # for f in range(evalResults.shape[0]):
    #   evalResults.loc[f, 'RANK'] = rankOrder[f]

    dataStruct = dict(evalResults=evalResults, fileDatesRemote=fileDatesRemote)
    pickle.dump(dataStruct, open(evalResFile, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
  else:
    dataStruct = pickle.load(open(evalResFile, 'rb'))
    fileDatesRemote = dataStruct['fileDatesRemote']
    evalResults = dataStruct['evalResults']

  # compute the ranks using MAUC
  rankOrder = np.argsort(evalResults.as_matrix(columns=['MAUC']).reshape(-1))[::-1]  # sort them by MAUC
  rankOrder = np.argsort(rankOrder) + 1  # make them start from 1
  print('ranks', evalResults['MAUC'], rankOrder, evalResults.as_matrix(columns=['MAUC']).reshape(-1), np.argsort(rankOrder))
  for f in range(evalResults.shape[0]):
    evalResults.loc[f, 'RANK'] = rankOrder[f]

  print('evalResults before\n', evalResults)

  evalResults = evalResults.sort_values(by=['MAUC', 'BCA'],ascending=False)
  evalResults = evalResults.reset_index(drop=True)

  print('evalResults after\n', evalResults)


  htmlFileFullPathRemote = '%s/%s' % (dropboxRemoteFolder, htmlFile)
  htmlFileFullPathLocal = '%s/%s' % (ldbSubmissionsFld, htmlFile)
  writeHTMLtable(evalResults, htmlFileFullPathLocal, fileListLdb, fileDatesRemote)
  ldbDropbox.upload(htmlFileFullPathLocal, htmlFileFullPathRemote)
submission_table.to_csv(outputFile, index=False)

print('Evaluate predictions')
from datetime import datetime

d4Df = pd.read_csv('./TADPOLE_D4_corr.csv')

d4Df['CognitiveAssessmentDate'] = [
    datetime.strptime(x, '%Y-%m-%d') for x in d4Df['CognitiveAssessmentDate']
]
d4Df['ScanDate'] = [datetime.strptime(x, '%Y-%m-%d') for x in d4Df['ScanDate']]
mapping = {'CN': 0, 'MCI': 1, 'AD': 2}
d4Df.replace({'Diagnosis': mapping}, inplace=True)

import evalOneSubmission as eos

mAUC, bca, adasMAE, ventsMAE, adasWES, ventsWES, adasCPA, ventsCPA = eos.evalOneSub(
    d4Df, submission_table)

print('Diagnosis:')
print('mAUC = ' + "%0.3f" % mAUC)
print('BAC = ' + "%0.3f" % bca)
print('ADAS:')
print('MAE = ' + "%0.3f" % adasMAE)
print('WES = ' + "%0.3f" % adasWES)
print('CPA = ' + "%0.3f" % adasCPA)
print('VENTS:')
print('MAE = ' + "%0.3e" % ventsMAE)
print('WES = ' + "%0.3e" % ventsWES)
print('CPA = ' + "%0.3f" % ventsCPA)
Beispiel #7
0
output = pd.DataFrame(o1,
                      columns=[
                          'RID', 'Forecast Month', 'Forecast Date',
                          'CN relative probability',
                          'MCI relative probability', 'AD relative probability'
                      ])
output['Forecast Month'] = output['Forecast Month'].astype(int)
output['Forecast Date'] = ym1

str_out_final = os.path.join(
    str_exp, 'IntermediateData',
    'TADPOLE_Submission_Leaderboard_BenchmarkSVM.csv')
output.to_csv(str_out_final, header=True, index=False)

print('Evaluate predictions')
R = pd.read_csv('./TADPOLE_LB4.csv')
import evalOneSubmission as eos
mAUC, bca = eos.evalOneSub(R, output)

print('Diagnosis:')
print('mAUC = ' + "%0.3f" % mAUC, )
print('BAC = ' + "%0.3f" % bca)
# print('ADAS:')
# print('MAE = ' + "%0.3f" % adasMAE, )
# print('WES = ' + "%0.3f" % adasWES,)
# print('CPA = ' + "%0.3f" % adasCPA )
# print('VENTS:')
# print('MAE = ' + "%0.3e" % ventsMAE,)
# print('WES = ' + "%0.3e" % ventsWES,)
# print('CPA = ' + "%0.3f" % ventsCPA )