def test_raster_extraction(self):   
     print ('\n Testing RIS extraction: \n')     
     self.parser= XMLparser("test_maps/raster-images.hdf.xml","e","RIS",None,True)
     self.create_dump_dir(self.parser.xml_file + "_dump")
     code=self.parser.parseAndDumpMapContent()
     self.assertEqual(code,0)        
     self.tear_down(self.parser.xml_file + "_dump")
 def test_one_dimensional_sds_extraction(self):   
     print ('\n Testing one-dimensional SDS extraction: \n')     
     self.parser= XMLparser("test_maps/one-dimensional-sds.xml","e","SDS",None,True)
     self.create_dump_dir(self.parser.xml_file + "_dump")
     code=self.parser.parseAndDumpMapContent()
     self.assertEqual(code,0)        
     self.tear_down(self.parser.xml_file + "_dump")
 def test_vdata_extraction(self):   
     print ('\n Testing VData extraction: \n')     
     self.parser= XMLparser("test_maps/vdata-tables.hdf.xml","e","VData",None,True)
     self.create_dump_dir(self.parser.xml_file + "_dump")
     code=self.parser.parseAndDumpMapContent()
     self.assertEqual(code,0)        
     self.tear_down(self.parser.xml_file + "_dump")
Exemple #4
0
 def dump_files(self,op,object,output_format,verbose):
     '''
     dumps HDF files objects from the map files found
     '''
     
     for filename in self.hdfFoundFiles:
         file_path=filename[1].replace("/","")
         if not os.path.exists(self.dir + file_path + "_dump"):# Be careful with relative paths in Linux
             try:
                 os.makedirs(self.dir + file_path + "_dump")
                 print "Directory created :" + file_path + "_dump"
             except:
                 print "Failed to create a sub directory to store the output files: " + "maps/" + file_path + "_dump"
                 exit(-1)                        
         else:
             print "The output directory already exist: " + file_path + "_dump"
             
         parser= XMLparser( self.dir + file_path,op,object,output_format,verbose)   
         if parser.tree!= None:
             code=parser.parseAndDumpMapContent() 
             if code==0:
                 self.tear_down(self.dir + file_path + "_dump")
 def test_airs_sds_extraction(self):   
     print ('\n Testing AIRS SDS extraction: \n')     
     self.parser= XMLparser("test_maps/AIRS.NASA.PROD.hdf.xml","e","ALL",None,True)
     self.create_dump_dir(self.parser.xml_file + "_dump")
     code=self.parser.parseAndDumpMapContent()
     self.assertEqual(code,0)        
 def test_listing(self):
     print ('\n Testing map listing: \n')
     self.parser= XMLparser("test_maps/multidimensional_chunked_sds.hdf.xml","l","ALL",None,True)
     code=self.parser.parseAndDumpMapContent()
     self.assertEqual(code,0)    
 def test_unexisting_xml(self):
     print ('\n This should be a warning message: \n')
     self.parser= XMLparser("test_maps/thisfiledoesnotexist.xml","e","ALL",None,True)
     self.assertEquals(self.parser.tree,None)      
     self.tear_down()  
 def test_invalid_xml(self):
     self.parser= XMLparser("test_maps/incorrect-invalid-xml.hdf.xml","e","ALL",None,True)
     self.assertEquals(self.parser.tree,None)
     self.tear_down()
class TestMapReader(unittest.TestCase):

    def setUp(self):        
        '''
        '''
        
    def create_dump_dir(self,dir):        
        if not os.path.exists(dir):
            os.makedirs(dir)

        
    def tear_down(self,dir=''):
        print "Tear down ... "
        if os.path.exists(dir):
            shutil.rmtree(dir)
        self.parser=None
        
    def test_invalid_xml(self):
        self.parser= XMLparser("test_maps/incorrect-invalid-xml.hdf.xml","e","ALL",None,True)
        self.assertEquals(self.parser.tree,None)
        self.tear_down()
        
    def test_unexisting_xml(self):
        print ('\n This should be a warning message: \n')
        self.parser= XMLparser("test_maps/thisfiledoesnotexist.xml","e","ALL",None,True)
        self.assertEquals(self.parser.tree,None)      
        self.tear_down()  
        
    def test_listing(self):
        print ('\n Testing map listing: \n')
        self.parser= XMLparser("test_maps/multidimensional_chunked_sds.hdf.xml","l","ALL",None,True)
        code=self.parser.parseAndDumpMapContent()
        self.assertEqual(code,0)    
                
                     
    def test_multi_dimensional_sds_extraction(self):  
        print ('\n Testing multi-dimensional SDS extraction: \n')      
        self.parser= XMLparser("test_maps/multidimensional_chunked_sds.hdf.xml","e","SDS",None,True)
        self.create_dump_dir(self.parser.xml_file + "_dump")
        code=self.parser.parseAndDumpMapContent()
        self.assertEqual(code,0)
        self.tear_down(self.parser.xml_file + "_dump")
        

    def test_two_dimensional_sds_extraction(self):   
        print ('\n Testing 2-dimensional SDS extraction: \n')     
        self.parser= XMLparser("test_maps/two-dimensional-chunked-sds.hdf.xml","e","SDS",None,True)
        self.create_dump_dir(self.parser.xml_file + "_dump")
        code=self.parser.parseAndDumpMapContent()
        self.assertEqual(code,0)        
        self.tear_down(self.parser.xml_file + "_dump")
        
        
    def test_one_dimensional_sds_extraction(self):   
        print ('\n Testing one-dimensional SDS extraction: \n')     
        self.parser= XMLparser("test_maps/one-dimensional-sds.xml","e","SDS",None,True)
        self.create_dump_dir(self.parser.xml_file + "_dump")
        code=self.parser.parseAndDumpMapContent()
        self.assertEqual(code,0)        
        self.tear_down(self.parser.xml_file + "_dump")
        
    def test_vdata_extraction(self):   
        print ('\n Testing VData extraction: \n')     
        self.parser= XMLparser("test_maps/vdata-tables.hdf.xml","e","VData",None,True)
        self.create_dump_dir(self.parser.xml_file + "_dump")
        code=self.parser.parseAndDumpMapContent()
        self.assertEqual(code,0)        
        self.tear_down(self.parser.xml_file + "_dump")

    def test_raster_extraction(self):   
        print ('\n Testing RIS extraction: \n')     
        self.parser= XMLparser("test_maps/raster-images.hdf.xml","e","RIS",None,True)
        self.create_dump_dir(self.parser.xml_file + "_dump")
        code=self.parser.parseAndDumpMapContent()
        self.assertEqual(code,0)        
        self.tear_down(self.parser.xml_file + "_dump")
        
    def test_airs_sds_extraction(self):   
        print ('\n Testing AIRS SDS extraction: \n')     
        self.parser= XMLparser("test_maps/AIRS.NASA.PROD.hdf.xml","e","ALL",None,True)
        self.create_dump_dir(self.parser.xml_file + "_dump")
        code=self.parser.parseAndDumpMapContent()
        self.assertEqual(code,0)        
    def RunFromXML(self, filename):
        parsed = XMLparser(filename)

        #The output of XMLparse contains at least two arrays of dictionarys, one for dgsreduction keywords and one for other keywords
        # datadictsdgs
        # datadictsother

        for d in range(len(parsed.datadictsdgs)):

            #initialize the text variables.
            self.loadtext = ''
            self.vantext = ''
            self.reductiontext = ''

            #Deal with the vanadium sensitivity correction
            vandict = self.loadvan(parsed)

            #determine the scantype
            #single OR sweep
            if parsed.datadictsother[d][
                    'ScanType'] == 'single' or parsed.datadictsother[d][
                        'ScanType'] == 'sweep':
                FileName = parsed.datadictsother[d]['Instrument'] + "_" + str(
                    parsed.datadictsother[d]['Runs'][0])

                #Load each file and fix the time-series to start at 'zero'
                data = Load(Filename=FileName)
                self.loadtext += 'Data files loaded ' + FileName + '\n'

                path = data.getRun()['Filename'].value
                #do the correction for log times
                CorrectLogTimes('data')
                self.loadtext += 'Data files corrected for log times\n'

                monitors = Load_2_Monitors(path)
                monitors = monitors.rename()

                if parsed.datadictsother[d]['FilterBadPulses']:
                    data = FilterBadPulses(InputWorkspace=data)
                    self.loadtext += "Bad pulses filtered from data files\n"

                #filter by additional log values.
                if parsed.datadictsother[d]['FilterNames'] != None:
                    for cntr, part in enumerate(
                            parsed.datadictsother[d]['FilterNames']):
                        data = FilterByLogValue(
                            InputWorkspace='data',
                            LogName=part,
                            MinimumValue=parsed.datadictsother[d]['FilterMin']
                            [cntr],
                            MaximumValue=parsed.datadictsother[d]['FilterMax']
                            [cntr],
                            TimeTolerance=0,
                            LogBoundary='Left')

                        self.loadtext += "Data filtered by " + part + " between " + str(
                            MinimumValue) + " and " + str(MaximumValue) + ".\n"
                        #print "Data filtered by "+part+" between "+str(parsed.datadictsother[d]['FilterMin'][cntr])+" and "+str(parsed.datadictsother[d]['FilterMax'][cntr])+".\n"

                #now deal with all the other runs, if there are more than one.
                if len(parsed.datadictsother[d]['Runs']) > 1:
                    for i in range(1, len(parsed.datadictsother[d]['Runs'])):
                        FileName = parsed.datadictsother[d][
                            'Instrument'] + "_" + str(
                                parsed.datadictsother[d]['Runs'][i])
                        #Load each file and fix the time-series to start at 'zero'
                        datatemp = Load(Filename=FileName)
                        path = datatemp.getRun()['Filename'].value
                        self.loadtext += 'Data files loaded ' + FileName + '\n'

                        #Fix all of the time series log values to start at the same time as the proton_charge
                        #do the correction for log times
                        CorrectLogTimes('datatemp')
                        self.loadtext += 'Data files corrected for log times\n'
                        monitorstemp = Load_2_Monitors(path)
                        monitors += monitorstemp

                        if parsed.datadictsother[d]['FilterBadPulses']:
                            datatemp = FilterBadPulses(InputWorkspace=datatemp)
                            self.loadtext += "Bad pulses filtered from data files\n"

                        #filter by additional log values.
                        if parsed.datadictsother[d]['FilterNames'] != None:
                            for cntr, part in enumerate(
                                    parsed.datadictsother[d]['FilterNames']):
                                datatemp = FilterByLogValue(
                                    InputWorkspace='datatemp',
                                    LogName=part,
                                    MinimumValue=parsed.datadictsother[d]
                                    ['FilterMin'][cntr],
                                    MaximumValue=parsed.datadictsother[d]
                                    ['FilterMax'][cntr],
                                    TimeTolerance=0,
                                    LogBoundary='Left')
                                self.loadtext += "Data filtered by " + part + " between " + str(
                                    MinimumValue) + " and " + str(
                                        MaximumValue) + ".\n"

                        data += datatemp

                        self.loadtext += 'Data added to the previous workspace\n'

                #This is where the reduction is done.
                if parsed.datadictsother[d]['ScanType'] == 'single':
                    self.ProcessWorkspace(data, monitors,
                                          parsed.datadictsdgs[d],
                                          parsed.datadictsother[d], vandict)
                else:
                    #split up the sweep by the sweep variable.

                    logvalue = parsed.datadictsother[d]['LogValue']
                    logvaluemin = parsed.datadictsother[d]['LogValueMin']
                    logvaluemax = parsed.datadictsother[d]['LogValueMax']
                    logvaluestep = parsed.datadictsother[d]['LogValueStep']

                    #Check if the logvalue has been set
                    if logvalue == None or data.run().hasProperty(
                            logvalue) == False:
                        raise ValueError(
                            "No LogValue given OR the given log value was not found in the file."
                        )

                    #need to split the data by an independt variable , some log value.
                    #Create the array of logvalue BOUNDARIES
                    if logvaluemin == None:
                        logvaluemin = array(
                            data.run().getProperty(logvalue).value).min()
                    if logvaluemax == None:
                        logvaluemax = array(
                            dat.run().getProperty(logvalue).value).max()
                    if logvaluestep == None:
                        logvaluestep = logvaluemax - logvaluemin

                    bounds = arange(float(logvaluemin),
                                    float(logvaluemax) + float(logvaluestep),
                                    float(logvaluestep))

                    #Get the time correlation correct if you set the time correlation keyword.
                    #To first approximation, set the time to zero for the first.
                    for i in range(len(bounds) - 1):
                        dataslice = FilterByLogValue(
                            InputWorkspace=data,
                            LogName=logvalue,
                            MinimumValue=float(bounds[i]),
                            MaximumValue=float(bounds[i + 1]))
                        if dataslice.getNumberEvents() > 0:
                            values = array(
                                dataslice.run().getProperty(logvalue).value)
                            self.reductiontext = "Processing data for " + logvalue + " between " + str(
                                bounds[i]) + " and " + str(
                                    bounds[i + 1]) + ", mean=" + str(
                                        values.mean()) + " std=" + str(
                                            values.std()) + "\n"
                            self.ProcessWorkspace(dataslice, monitors,
                                                  parsed.datadictsdgs[d],
                                                  parsed.datadictsother[d],
                                                  vandict)

            if parsed.datadictsother[d]['ScanType'] == 'step':

                for currentrun in parsed.datadictsother[d]['Runs']:

                    FileName = parsed.datadictsother[d][
                        'Instrument'] + "_" + str(currentrun)

                    #Load each file and fix the time-series to start at 'zero'
                    data = Load(Filename=FileName)
                    self.loadtext = 'Data files loaded ' + FileName + '\n'
                    path = data.getRun()['Filename'].value
                    #do the correction for log times
                    CorrectLogTimes('data')
                    monitors = Load_2_Monitors(path)
                    self.loadtext += 'Data files corrected for log times\n'

                    if parsed.datadictsother[d]['FilterBadPulses']:
                        data = FilterBadPulses(InputWorkspace=data)
                        self.loadtext += "Bad pulses filtered from data files\n"
                    #filter by additional log values.
                    if parsed.datadictsother[d]['FilterNames'] != None:
                        for cntr, part in enumerate(
                                parsed.datadictsother[d]['FilterNames']):
                            data = FilterByLogValue(
                                InputWorkspace='data',
                                LogName=part,
                                MinimumValue=parsed.datadictsother[d]
                                ['FilterMin'][cntr],
                                MaximumValue=parsed.datadictsother[d]
                                ['FilterMax'][cntr],
                                TimeTolerance=0,
                                LogBoundary='Left')
                            self.loadtext += "Data filtered by " + part + " between " + str(
                                MinimumValue) + " and " + str(
                                    MaximumValue) + ".\n"

                    self.reductiontext = ''
                    self.ProcessWorkspace(data, monitors,
                                          parsed.datadictsdgs[d],
                                          parsed.datadictsother[d], vandict)
Exemple #11
0
import pandas
import numpy
import psycopg2
import Utils
from XMLparser import XMLparser

#For testing purposes
if __name__ == "__main__":
    
    print("Framing Payment Data...", Utils.DATA_HOSPITAL_PAYMENT)
    p1 = XMLparser(Utils.DATA_HOSPITAL_PAYMENT, Utils.PaymentTags)
    f1 = p1.parse()
    print("Done.")
    while(True):
        flag = input("Print out portion of data?(y/n)")
        if(flag == "y"):
            print(f1)
            break
        if(flag == "n"):
            break
    
    print("Framing Comp&Death Data...", Utils.DATA_COMPLICATIONS)
    p2 = XMLparser(Utils.DATA_COMPLICATIONS, Utils.CompTags)
    f2 = p2.parse()
    print("Done.")
    while(True):
        flag = input("Print out portion of data?(y/n)")
        if(flag == "y"):
            print(f1)
            break
        if(flag == "n"):