def loadDataObjects(self, callback=None):
     # Try to parse files in order by format (some files are more informative than others,
     # and we want to start off with the best information) ... this may get shaken up
     # when we support masking/specific loci
     gff3Files = []
     bedFiles = []
     
     vcfFiles = []
     csvFiles = []
     axisLabels = set()
     forcedCategoricals = set()
     individualsToInclude = self.getAllSamples()
     
     for fileID,f in self.files.iteritems():
         if f.format == '.vcf':
             vcfFiles.append(fileID)
             axisLabels.update(f.hardFilters.iterkeys())
             forcedCategoricals.update(f.forcedCategoricals)
         elif f.format == '.csv':
             csvFiles.append(fileID)
             axisLabels.update(f.hardFilters.iterkeys())
             forcedCategoricals.update(f.forcedCategoricals)
         elif f.format == '.gff3':
             gff3Files.append(fileID)
         elif f.format == '.bed':
             bedFiles.append(fileID)
     
     vData = tempVariantData(axisLabels,set(self.statistics.iterkeys()),forcedCategoricals,self.startingXaxis,self.startingYaxis)
     
     for fileID in gff3Files:
         pass
     for fileID in bedFiles:
         pass
     for fileID in vcfFiles:
         if callback != None:
             callback(numTicks=0,message='Loading %s' % fileID)
         parameters = variantLoadingParameters(build=self.files[fileID].build,
                                             passFunction=vData.addVariant,
                                             rejectFunction=None,
                                             callbackArgs={},
                                             tickFunction=callback,
                                             tickInterval=self.loadingPercentages[fileID],
                                             individualsToInclude=individualsToInclude,
                                             individualAppendString=" (%s)" % fileID,
                                             lociToInclude=None, # TODO: support masking and specfic loci
                                             mask=None,
                                             attributesToInclude=self.files[fileID].hardFilters,
                                             attributeAppendString=" (%s)" % fileID,
                                             skipGenotypeAttributes=True)
         if variantFile.parseVcfFile(self.files[fileID].path, parameters) == "ABORTED":
             return (None,None)
     
     for fileID in csvFiles:
         if callback != None:
             callback(numTicks=0,message='Loading %s' % fileID)
         parameters = variantLoadingParameters(build=self.files[fileID].build,
                                             passFunction=vData.addVariant,
                                             rejectFunction=None,
                                             callbackArgs={},
                                             tickFunction=callback,
                                             tickInterval=self.loadingPercentages[fileID],
                                             individualsToInclude=[],    # .csv files don't have genotypes
                                             individualAppendString="",
                                             lociToInclude=None, # TODO: support masking and specfic loci
                                             mask=None,
                                             attributesToInclude=self.files[fileID].hardFilters,
                                             attributeAppendString=" (%s)" % fileID,
                                             skipGenotypeAttributes=True)
         if variantFile.parseCsvFile(self.files[fileID].path, parameters) == "ABORTED":
             return (None,None)
     
     # Now that we've loaded the files, do our group calculations
     callback(numTicks=0,message='Calculating group statistics')
     vData.performGroupCalculations(self.groups, self.statistics, callback, self.loadingPercentages[None])
     
     return vData
def runApp(mask="",m="",vcf="",v="",out="",o="",csv="",c=""):
    print "loading features",
    params = featureLoadingParameters(build=genomeUtils.hg19,
                                     passFunction=None,
                                     rejectFunction=None,
                                     callbackArgs={},
                                     tickFunction=tick,
                                     tickInterval=10,
                                     mask=None,
                                     attributesToInclude={},
                                     returnFileObject=True)
    features = featureFile.parseBedFile(mask, params)
    print ""
    
    variants = {}
    fileAttributes = variantFile.extractVcfFileInfo(vcf)
    
    def addVariant(v):
        variants[v.genomePosition] = v
    
    params = variantLoadingParameters(build=genomeUtils.hg19,
                                     passFunction=addVariant,
                                     rejectFunction=None,
                                     callbackArgs={},
                                     tickFunction=tick,
                                     tickInterval=10,
                                     individualsToInclude=None,
                                     individualAppendString="",
                                     lociToInclude=None,
                                     mask=None,  #features.regions,
                                     invertMask=False,  #True
                                     attributesToInclude=None,
                                     attributeAppendString="",
                                     skipGenotypeAttributes=True,   # TODO: there's actually a bug here (you should turn this back off
                                     returnFileObject=False,
                                     alleleMatching=allele.UNENFORCED,
                                     attemptRepairsWhenComparing=True)
    print "parsing vcf file",
    variantFile.parseVcfFile(vcf, params)
    print ""
    # TODO: throw this bit out
    print "swapping attributes"
    for v in variants.itervalues():
        newName = v.attributes.get('RSID',v.name)
        if v.attributes.has_key('RSID'):
            newName = v.attributes['RSID']
            del v.attributes['RSID']
        else:
            newName = v.name
        if isinstance(newName,list):
            newName = newName[1]
        #if newName.startswith('dbsnp'):
        #    newName = newName.split(':')[1]
        if newName == '.':
            newName = v.basicName
        v.name = newName
    print "calculating frequencies",
    tickInterval = len(variants) / 10
    i = 0
    nextTick = tickInterval
    groupDict = {   "CASES3":set(["T2DG0300147", "T2DG0300160", "T2DG0300135", "T2DG0300133", "T2DG0300143"]),
                    "CASES6":set(["T2DG0600449", "T2DG0600426", "T2DG0600428", "T2DG0600470", "T2DG0600442", "T2DG0600431"]),
                    "CASES20":set(["T2DG2000900", "T2DG2000901", "T2DG2000904", "T2DG2000928"]),
                    "CASES21":set(["T2DG2100946", "T2DG2100955", "T2DG2100967", "T2DG2100966"]),
                    "CONTROLS_IND_EXTREME":set(["T2DG0200013", "T2DG0200027", "T2DG0500309", "T2DG0701163", "T2DG0701156", "T2DG0800488", "T2DG0901234", "T2DG1000568", "T2DG1000570", "T2DG1101324", "T2DG1600768", "T2DG2701070", "T2DG4701118", "T2DG0200068", "T2DG0200071", "T2DG0400250", "T2DG0500349", "T2DG0500339", "T2DG0701179", "T2DG0800504", "T2DG0800564", "T2DG0901289", "T2DG0901275", "T2DG1000595", "T2DG1101348", "T2DG1600785", "T2DG1700875", "T2DG2701094", "T2DG4701124"]),
                    "CONTROL_Normal_nonind":set(["T2DG0200098", "T2DG0200076", "T2DG0200104", "T2DG0200065", "T2DG0400287", "T2DG0400260", "T2DG0400280", "T2DG0400288", "T2DG0400267", "T2DG0400269", "T2DG0400279", "T2DG0400256", "T2DG0400273", "T2DG0500370", "T2DG0500367", "T2DG0500353", "T2DG0500383", "T2DG0500362", "T2DG0500364", "T2DG0500379", "T2DG0500351", "T2DG0500381", "T2DG0500385", "T2DG0500357", "T2DG0701225", "T2DG0701227", "T2DG0701194", "T2DG0701196", "T2DG0701195", "T2DG0701211", "T2DG0701222", "T2DG0701220", "T2DG0701204", "T2DG0701208", "T2DG0701192", "T2DG0701191", "T2DG0701198", "T2DG0701181", "T2DG0701174", "T2DG0800561", "T2DG0800542", "T2DG0800563", "T2DG0800547", "T2DG0800559", "T2DG0901306", "T2DG0901296", "T2DG0901285", "T2DG0901284", "T2DG0901269", "T2DG0901295", "T2DG0901299", "T2DG0901279", "T2DG0901307", "T2DG1000637", "T2DG1000629", "T2DG1000630", "T2DG1000614", "T2DG1000612", "T2DG1000613", "T2DG1000631", "T2DG1000591", "T2DG1000620", "T2DG1000640", "T2DG1000606", "T2DG1000636", "T2DG1000638", "T2DG1000627", "T2DG1101369", "T2DG1101381", "T2DG1101383", "T2DG1101377", "T2DG1101354", "T2DG1101356", "T2DG1600793", "T2DG1600811", "T2DG1600816", "T2DG1600810", "T2DG1700872", "T2DG1700869", "T2DG1700876", "T2DG1700867", "T2DG2701096", "T2DG2701093", "T2DG4701139", "T2DG4701129"]),
                    "CONTROLS_PreHypertension_ind":set(["T2DG0200073", "T2DG0200031", "T2DG0200040", "T2DG0200032", "T2DG0200042", "T2DG0200047", "T2DG0200070", "T2DG0200078", "T2DG0200096", "T2DG0200063", "T2DG0200077", "T2DG0200057", "T2DG0200086", "T2DG0200041", "T2DG0400234", "T2DG0400243", "T2DG0400257", "T2DG0400261", "T2DG0400264", "T2DG0400241", "T2DG0400237", "T2DG0400238", "T2DG0400258", "T2DG0400295", "T2DG0400254", "T2DG0400262", "T2DG0500334", "T2DG0500346", "T2DG0500358", "T2DG0500371", "T2DG0500389", "T2DG0500332", "T2DG0500352", "T2DG0500375", "T2DG0500347", "T2DG0500388", "T2DG0500373", "T2DG0701188", "T2DG0701219", "T2DG0701203", "T2DG0701199", "T2DG0701216", "T2DG0701217", "T2DG0701214", "T2DG0800541", "T2DG0800520", "T2DG0800552", "T2DG0800529", "T2DG0800514", "T2DG0800502", "T2DG0800505", "T2DG0800509", "T2DG0901305", "T2DG0901287", "T2DG0901267", "T2DG0901271", "T2DG0901270", "T2DG0901308", "T2DG0901288", "T2DG0901298", "T2DG0901278", "T2DG0901272", "T2DG0901263", "T2DG1000599", "T2DG1000618", "T2DG1000597", "T2DG1000611", "T2DG1000616", "T2DG1000592", "T2DG1000598", "T2DG1000604", "T2DG1000642", "T2DG1000639", "T2DG1000607", "T2DG1101365", "T2DG1101366", "T2DG1101382", "T2DG1101388", "T2DG1101389", "T2DG1101372", "T2DG1101384", "T2DG1101385", "T2DG1101338", "T2DG1101341", "T2DG1101343", "T2DG1101387", "T2DG1101390", "T2DG1101344", "T2DG1101342", "T2DG1600812", "T2DG1600819", "T2DG1600805", "T2DG1600804", "T2DG1600807", "T2DG1600799", "T2DG1700861", "T2DG1700846", "T2DG1700854", "T2DG1700853", "T2DG1700868", "T2DG1700870", "T2DG2701110", "T2DG2701085", "T2DG2701088", "T2DG2701111", "T2DG2701107", "T2DG2701091", "T2DG4701130", "T2DG4701122", "T2DG4701133", "T2DG4701127", "T2DG4701128", "T2DG0200006", "T2DG0200008", "T2DG0200012", "T2DG0200009", "T2DG0200018", "T2DG0200023", "T2DG0200007", "T2DG0400219", "T2DG0500318", "T2DG0500327", "T2DG0500310", "T2DG0500312", "T2DG0500313", "T2DG0701164", "T2DG0701143", "T2DG0800490", "T2DG0800498", "T2DG0901251", "T2DG1000567", "T2DG1000582", "T2DG1000586", "T2DG1000565", "T2DG1000566", "T2DG1000569", "T2DG1101320", "T2DG1101330", "T2DG1600767", "T2DG1600773", "T2DG1600778", "T2DG1600771", "T2DG1700824", "T2DG1700836", "T2DG2701073", "T2DG2701079"]),
                    "CONTROLS_ALL":set(["T2DG0200013", "T2DG0200027", "T2DG0500309", "T2DG0701163", "T2DG0701156", "T2DG0800488", "T2DG0901234", "T2DG1000568", "T2DG1000570", "T2DG1101324", "T2DG1600768", "T2DG2701070", "T2DG4701118", "T2DG0200068", "T2DG0200071", "T2DG0400250", "T2DG0500349", "T2DG0500339", "T2DG0701179", "T2DG0800504", "T2DG0800564", "T2DG0901289", "T2DG0901275", "T2DG1000595", "T2DG1101348", "T2DG1600785", "T2DG1700875", "T2DG2701094", "T2DG4701124", "T2DG0200098", "T2DG0200076", "T2DG0200104", "T2DG0200065", "T2DG0400287", "T2DG0400260", "T2DG0400280", "T2DG0400288", "T2DG0400267", "T2DG0400269", "T2DG0400279", "T2DG0400256", "T2DG0400273", "T2DG0500370", "T2DG0500367", "T2DG0500353", "T2DG0500383", "T2DG0500362", "T2DG0500364", "T2DG0500379", "T2DG0500351", "T2DG0500381", "T2DG0500385", "T2DG0500357", "T2DG0701225", "T2DG0701227", "T2DG0701194", "T2DG0701196", "T2DG0701195", "T2DG0701211", "T2DG0701222", "T2DG0701220", "T2DG0701204", "T2DG0701208", "T2DG0701192", "T2DG0701191", "T2DG0701198", "T2DG0701181", "T2DG0701174", "T2DG0800561", "T2DG0800542", "T2DG0800563", "T2DG0800547", "T2DG0800559", "T2DG0901306", "T2DG0901296", "T2DG0901285", "T2DG0901284", "T2DG0901269", "T2DG0901295", "T2DG0901299", "T2DG0901279", "T2DG0901307", "T2DG1000637", "T2DG1000629", "T2DG1000630", "T2DG1000614", "T2DG1000612", "T2DG1000613", "T2DG1000631", "T2DG1000591", "T2DG1000620", "T2DG1000640", "T2DG1000606", "T2DG1000636", "T2DG1000638", "T2DG1000627", "T2DG1101369", "T2DG1101381", "T2DG1101383", "T2DG1101377", "T2DG1101354", "T2DG1101356", "T2DG1600793", "T2DG1600811", "T2DG1600816", "T2DG1600810", "T2DG1700872", "T2DG1700869", "T2DG1700876", "T2DG1700867", "T2DG2701096", "T2DG2701093", "T2DG4701139", "T2DG4701129", "T2DG0200073", "T2DG0200031", "T2DG0200040", "T2DG0200032", "T2DG0200042", "T2DG0200047", "T2DG0200070", "T2DG0200078", "T2DG0200096", "T2DG0200063", "T2DG0200077", "T2DG0200057", "T2DG0200086", "T2DG0200041", "T2DG0400234", "T2DG0400243", "T2DG0400257", "T2DG0400261", "T2DG0400264", "T2DG0400241", "T2DG0400237", "T2DG0400238", "T2DG0400258", "T2DG0400295", "T2DG0400254", "T2DG0400262", "T2DG0500334", "T2DG0500346", "T2DG0500358", "T2DG0500371", "T2DG0500389", "T2DG0500332", "T2DG0500352", "T2DG0500375", "T2DG0500347", "T2DG0500388", "T2DG0500373", "T2DG0701188", "T2DG0701219", "T2DG0701203", "T2DG0701199", "T2DG0701216", "T2DG0701217", "T2DG0701214", "T2DG0800541", "T2DG0800520", "T2DG0800552", "T2DG0800529", "T2DG0800514", "T2DG0800502", "T2DG0800505", "T2DG0800509", "T2DG0901305", "T2DG0901287", "T2DG0901267", "T2DG0901271", "T2DG0901270", "T2DG0901308", "T2DG0901288", "T2DG0901298", "T2DG0901278", "T2DG0901272", "T2DG0901263", "T2DG1000599", "T2DG1000618", "T2DG1000597", "T2DG1000611", "T2DG1000616", "T2DG1000592", "T2DG1000598", "T2DG1000604", "T2DG1000642", "T2DG1000639", "T2DG1000607", "T2DG1101365", "T2DG1101366", "T2DG1101382", "T2DG1101388", "T2DG1101389", "T2DG1101372", "T2DG1101384", "T2DG1101385", "T2DG1101338", "T2DG1101341", "T2DG1101343", "T2DG1101387", "T2DG1101390", "T2DG1101344", "T2DG1101342", "T2DG1600812", "T2DG1600819", "T2DG1600805", "T2DG1600804", "T2DG1600807", "T2DG1600799", "T2DG1700861", "T2DG1700846", "T2DG1700854", "T2DG1700853", "T2DG1700868", "T2DG1700870", "T2DG2701110", "T2DG2701085", "T2DG2701088", "T2DG2701111", "T2DG2701107", "T2DG2701091", "T2DG4701130", "T2DG4701122", "T2DG4701133", "T2DG4701127", "T2DG4701128", "T2DG0200006", "T2DG0200008", "T2DG0200012", "T2DG0200009", "T2DG0200018", "T2DG0200023", "T2DG0200007", "T2DG0400219", "T2DG0500318", "T2DG0500327", "T2DG0500310", "T2DG0500312", "T2DG0500313", "T2DG0701164", "T2DG0701143", "T2DG0800490", "T2DG0800498", "T2DG0901251", "T2DG1000567", "T2DG1000582", "T2DG1000586", "T2DG1000565", "T2DG1000566", "T2DG1000569", "T2DG1101320", "T2DG1101330", "T2DG1600767", "T2DG1600773", "T2DG1600778", "T2DG1600771", "T2DG1700824", "T2DG1700836", "T2DG2701073", "T2DG2701079"])}
    for v in variants.itervalues():
        performGroupCalculations(v,groupDict,"CONTROLS_IND_EXTREME",mode=-1)
        i += 1
        if i > nextTick:
            nextTick += tickInterval
            print ".",
    print ""
    
    
    del fileAttributes['INDIVIDUALS']
    del fileAttributes['INFO']['RSID']
    for k in groupDict.iterkeys():
        fileAttributes['INFO'][k] = {"ID":k,"Number":1,"Type":"Float","Description":"%s Allele frequency"%k}
    
    
    print "loading .csv file",
    acceptAllFilter = valueFilter()
    csvDict = {"SIFT_score":acceptAllFilter,
               "LRT_score":acceptAllFilter,
               "MutationTaster_pred":acceptAllFilter,
               "phyloP":acceptAllFilter,
               "SLR_test_statistic":acceptAllFilter,
               "LRT_pred":acceptAllFilter,
               "MutationTaster_score":acceptAllFilter,
               "MutationTaster_pred":acceptAllFilter,
               "GERP++_NR":acceptAllFilter,
               "GERP++_RS":acceptAllFilter,
               "phyloP":acceptAllFilter,
               "29way_logOdds":acceptAllFilter,
               "LRT_Omega":acceptAllFilter,
               "1000Gp1_AF":acceptAllFilter}
    for k in csvDict.iterkeys():
        fileAttributes['INFO'][k] = {"ID":k,"Number":1,"Type":"Float","Description":k}
    
    def tryRepair(v):
        if variants.has_key(v.genomePosition):
            for key,value in v.attributes.iteritems():
                variants[v.genomePosition].setAttribute(key,value)
    params = variantLoadingParameters(build=genomeUtils.hg19,
                                      passFunction=tryRepair,
                                      rejectFunction=None,
                                      callbackArgs={},
                                      tickFunction=tick,
                                      tickInterval=10,
                                      individualsToInclude=None,
                                      individualAppendString="",
                                      lociToInclude=None,
                                      mask=None,    #features.regions,
                                      invertMask=False, #True,
                                      attributesToInclude=csvDict,
                                      attributeAppendString="",
                                      skipGenotypeAttributes=True,
                                      returnFileObject=False,
                                      alleleMatching=allele.UNENFORCED,
                                      attemptRepairsWhenComparing=True)
    variantFile.parseCsvFile(csv,params)
    print ""
    
    print "writing file..."
    temp = variantFile(fileAttributes)
    temp.variants = variants.values()
    temp.writeVcfFile(out, sortMethod="NUMXYM", includeScriptLine=True)
    print ""
    print "done"