Beispiel #1
0
 def __init__(self, *args, **kw):
     super(TestUsingHadoop, self).__init__(*args, **kw)
     cur_time = time.time()
     fetch_data.main()
     self.data_path = 'hadoopy-test-data/%f/' % cur_time
     try:
         hadoopy.mkdir('hadoopy-test-data')
     except IOError:
         pass
Beispiel #2
0
 def __init__(self, *args, **kw):
     super(TestUsingHadoop, self).__init__(*args, **kw)
     cur_time = time.time()
     fetch_data.main()
     self.data_path = 'hadoopy-test-data/%f/' % cur_time
     try:
         hadoopy.mkdir('hadoopy-test-data')
     except IOError:
         pass
Beispiel #3
0
def main(argv):
    '''Wrapper script:  Call geogrid and if data is missing, fetch it, and repeat.'''
    global lastmissingvar
    global criticalfail
    
    # set path names
    geogridexe="geogrid.exe"
    geoem="geo_em.d01.nc"
    defaultnml="namelist.fire.default"
    runnml="namelist.wps"
    cwd="."
    geocmd=os.path.join(cwd,geogridexe)
    expandbdy=.5
    
    # parse commandline options
    parse=OptionParser("usage: %prog [options]")
    parse.add_option("-f","--force",action="store_true",
                     help="delete destination data directory if it exists")
    parse.set_defaults(force=False)
    (opts,args)=parse.parse_args(argv)
    if len(args) > 0:
        parse.print_usage()
    
    # find namelist copy to runtime namelist
    if not os.path.isfile(runnml):
        if not os.path.isfile(defaultnml):
            print "Can't find a namelist file in %s or %s" % (runnml,defaultnml)
            sys.exit(2)
        shutil.copy(defaultnml,runnml)
        
    # read in the namelist
    print "reading namelist %s" % runnml
    nml=namelist.Namelist(runnml)
    
    # get relevant options, geog data path MUST be there,
    # others we can infer defaults
    try:
        geog_data_path=nml['geogrid']['par'][0]['geog_data_path'][0]
    except:
        print "Namelist, %s, doesn't seem to be valid." % runnml
        sys.exit(2)
        
    try:
        max_dom=int(nml['share']['par'][0]['max_dom'][0])
    except:
        max_dom=1
        
    try:
        ioform=int(nml['share']['par'][0]['io_form_geogrid'][0])
    except:
        ioform=2
        
    try:
        tblpath=nml['geogrid']['par'][0]['opt_geogrid_tbl_path'][0]
    except:
        tblpath=os.path.join('.','geogrid','')
        
    outbase="geo_em.d%02i."
    tblfile=tblpath+"GEOGRID.TBL"
    if ioform == 1:
        outpat=outbase+"int"
    elif ioform == 2:
        outpat=outbase+"nc"
    elif ioform == 3:
        outpat=outbase+"gr1"
    else:
        print "Bad 'io_form_geogrid' value in %s" % runnml
    
    
    # make sure geogrid is built
    if not os.access(geocmd,os.F_OK | os.R_OK | os.X_OK):
        print "geogrid.exe doesn't seem to exist or is not executable"
        print "have you run configure/compile?"
        sys.exit(2)
   
    # look at GEOGRID.TBL and make sure that a shell directory exists
    # for each data source that can be fetched automatically
    # make it more foolproof by checking that the file exists and 
    # copying from the default if it does not.
    if not os.access(tblfile, os.F_OK):
        # GEOGRID.TBL doesn't exist, check for the default
        dtblfile=os.path.join('.','geogrid','GEOGRID.TBL.FIRE.NED')
        if not os.access(dtblfile,os.R_OK):
         print "Cannot find or access %s" % tblfile
        sys.exit(2)
        shutil.copy(dtblfile,tblfile)
        
    proc_tbl(tblfile)


    # run geogrid and pipe output in a buffer
    print "running geogrid.exe"
    now=time.time()
    p=sp.Popen(geocmd,stdout=sp.PIPE,stderr=sp.STDOUT,bufsize=-1)
    
    # get output
    (sto,ste)=p.communicate()
    
    # now do a number of things to check if it was successful
    # check error code (requires a patched module_parallel)
    # check existence of and modification date of geo_em.d??.*
    # stderr is empty, check stdout for error strings
    errpat=re.compile("\s*ERROR:")
    if p.returncode == 0 and \
       all([ os.path.isfile(outpat % i) for i in range(1,max_dom+1)]) and \
       all([ os.path.getmtime(outpat % i) > now for i in range(1,max_dom+1)]) and \
       errpat.search(sto) is None:
        print "Geogrid completed successfully."
        sys.exit(0)
    else:
        pass
        #print "returncode=",p.returncode
        #print "isfile=",os.path.isfile(outpat % 1)
        #print "mtime=",os.path.getmtime(outpat % 1),now
        #print "errorstring: ",errpat.search(sto)
    
    # if we got here something went wrong in geogrid, see if it is missing data:
    r=re.compile("Missing value encountered in output field (?P<field>\w*)\n")
    field=r.search(sto)
    
    if field is None:
        print sto
        print "An error occurred while running geogrid, but it doesn't seem to be caused "+\
              "by missing data."
        sys.exit(1)
    
    field=field.group('field').strip()
    if not field.strip() in destfields:  # + others once fetch_data.py is generalized
        print "Data is missing in field, %s, but I don't know how to fetch it." % field.strip()
        sys.exit(1)
        
    if field == lastmissingvar:
        print "I already tried to fetch %s, but it is still missing!!" % field
        sys.exit(1)
    
    lastmissingvar=field
    
    if donotfetch:
        return

    destfield=field
    outputi=destfields.index(field)
    output=outputfields[outputi]

    # Now we know that we need to get NED data from usgs, but we need the domain bounds.
    # regexp the boundaries to get the whole domain.  
    # the findall syntax is for running in parallel, but it probably won't actually work
    # without patching geogrid source because only process 0 prints.
    fnumber='-?[0-9]+[.]?[0-9]*'
    r=re.compile("LATSOUTH=(?P<south>%s)" % fnumber)
    south=r.findall(sto)
    r=re.compile("LATNORTH=(?P<north>%s)" % fnumber)
    north=r.findall(sto)
    r=re.compile("LONWEST=(?P<west>%s)" % fnumber)
    west=r.findall(sto)
    r=re.compile("LONEAST=(?P<east>%s)" % fnumber)
    east=r.findall(sto)

    if south == [] or north == [] or west == [] or east == []:
        print "Can't parse domain boundaries."
        sys.exit(1)
    
    # get domain boundaries and call data fetcher
    south=min(float(x) for x in south)
    north=max(float(x) for x in north)
    west=min(float(x) for x in west)
    east=max(float(x) for x in east)
    
    epsy=(north-south)*expandbdy
    epsx=(east-west)*expandbdy
    south -= epsy
    north += epsy
    west -= epsx
    east += epsx
    
    
    print "Executing: fetch_data.py -d %s -- " % output['source']\
           ,north,south,east,west
    try:
        files=fetch_data.main(["-d",output['source'],"--",str(north),str(south),str(east),str(west)])
    except:
        print "fetch_data.py seems to have failed."
        print "For more information, try running:"
        print "fetch_data.py -v  -d %s -- " % output['source'],north,south,east,west
        raise
        
    # exctract files that were downloaded (assumes tar files with bzip2 or gzip compression)
    print "Extracting data files."
    datafiles=[]
    for f in files:
        ft=tarfile.open(f)
        fn=ft.getnames()
        for i in fn:
            if i[-3:] == "tif":
                datafiles.append(i)
                break
        else:
            print "%s doesn't seem to contain a geotiff file" % f
            sys.exit(1)
    
        # extract and clean up tar files
        ft.extractall()
        os.remove(ft.name)
    
    # run source data to geogrid conversion
    if opts.force or makeforce:
        argv=['-f']
    else:
        argv=[]
    if output['maxcat'] is not None:
        argv.extend(['-m',str(output['maxcat']),'-a','-A',str(output['maxcat']),'-w','1'])
    argv.extend(['-d',output['desc'],'-u',output['units'],'--script','--',output['dir']])
    argv.extend(datafiles)
    print "Running geogrid.py %s" % " ".join(argv)
    try:
        gdir=geogrid.mainprog(argv)
    except:
        print "geogrid.py failed"
        raise
Beispiel #4
0
    imsave("bracco.png", thetas_im)

    thetas_im = reshape(theta[1:,3].T, (32,32))
    imsave("gilpin.png", thetas_im)      

    thetas_im = reshape(theta[1:,4].T, (32,32))
    imsave("harmon.png", thetas_im)

    thetas_im = reshape(theta[1:,5].T, (32,32))
    imsave("hader.png", thetas_im) 

if __name__ == "__main__":
    #Format images into desired format (i.e. crop, grayscale, resize) 
    print("Fetching Images")
    print("Actresses \n")
    fetch_data.main("female")
    print("Actors \n")
    fetch_data.main("male")

    filelist_baldwin = glob.glob("cropped/baldwin*")
    filelist_carell = glob.glob("cropped/carell*")
    filelist_bracco = glob.glob("cropped/bracco*")
    filelist_gilpin = glob.glob("cropped/gilpin*")
    filelist_harmon = glob.glob("cropped/harmon*")
    filelist_hader = glob.glob("cropped/hader*")

    #Part 2: Seperating dataset into the 3 sets 
    print("Splitting images into 3 sets \n")
    sleep(5)
    baldwin = get_sets(filelist_baldwin)
    carell = get_sets(filelist_carell)
Beispiel #5
0
import fetch_data
import save_to_gs

fetch_data.main()
save_to_gs.main()
Beispiel #6
0
def main(argv):
    '''Wrapper script:  Call geogrid and if data is missing, fetch it, and repeat.'''
    global lastmissingvar
    global criticalfail

    # set path names
    geogridexe = "geogrid.exe"
    geoem = "geo_em.d01.nc"
    defaultnml = "namelist.fire.default"
    runnml = "namelist.wps"
    cwd = "."
    geocmd = os.path.join(cwd, geogridexe)
    expandbdy = .5

    # parse commandline options
    parse = OptionParser("usage: %prog [options]")
    parse.add_option("-f",
                     "--force",
                     action="store_true",
                     help="delete destination data directory if it exists")
    parse.set_defaults(force=False)
    (opts, args) = parse.parse_args(argv)
    if len(args) > 0:
        parse.print_usage()

    # find namelist copy to runtime namelist
    if not os.path.isfile(runnml):
        if not os.path.isfile(defaultnml):
            print "Can't find a namelist file in %s or %s" % (runnml,
                                                              defaultnml)
            sys.exit(2)
        shutil.copy(defaultnml, runnml)

    # read in the namelist
    print "reading namelist %s" % runnml
    nml = namelist.Namelist(runnml)

    # get relevant options, geog data path MUST be there,
    # others we can infer defaults
    try:
        geog_data_path = nml['geogrid']['par'][0]['geog_data_path'][0]
    except:
        print "Namelist, %s, doesn't seem to be valid." % runnml
        sys.exit(2)

    try:
        max_dom = int(nml['share']['par'][0]['max_dom'][0])
    except:
        max_dom = 1

    try:
        ioform = int(nml['share']['par'][0]['io_form_geogrid'][0])
    except:
        ioform = 2

    try:
        tblpath = nml['geogrid']['par'][0]['opt_geogrid_tbl_path'][0]
    except:
        tblpath = os.path.join('.', 'geogrid', '')

    outbase = "geo_em.d%02i."
    tblfile = tblpath + "GEOGRID.TBL"
    if ioform == 1:
        outpat = outbase + "int"
    elif ioform == 2:
        outpat = outbase + "nc"
    elif ioform == 3:
        outpat = outbase + "gr1"
    else:
        print "Bad 'io_form_geogrid' value in %s" % runnml

    # make sure geogrid is built
    if not os.access(geocmd, os.F_OK | os.R_OK | os.X_OK):
        print "geogrid.exe doesn't seem to exist or is not executable"
        print "have you run configure/compile?"
        sys.exit(2)

    # look at GEOGRID.TBL and make sure that a shell directory exists
    # for each data source that can be fetched automatically
    # make it more foolproof by checking that the file exists and
    # copying from the default if it does not.
    if not os.access(tblfile, os.F_OK):
        # GEOGRID.TBL doesn't exist, check for the default
        dtblfile = os.path.join('.', 'geogrid', 'GEOGRID.TBL.FIRE.NED')
        if not os.access(dtblfile, os.R_OK):
            print "Cannot find or access %s" % tblfile
        sys.exit(2)
        shutil.copy(dtblfile, tblfile)

    proc_tbl(tblfile)

    # run geogrid and pipe output in a buffer
    print "running geogrid.exe"
    now = time.time()
    p = sp.Popen(geocmd, stdout=sp.PIPE, stderr=sp.STDOUT, bufsize=-1)

    # get output
    (sto, ste) = p.communicate()

    # now do a number of things to check if it was successful
    # check error code (requires a patched module_parallel)
    # check existence of and modification date of geo_em.d??.*
    # stderr is empty, check stdout for error strings
    errpat = re.compile("\s*ERROR:")
    if p.returncode == 0 and \
       all([ os.path.isfile(outpat % i) for i in range(1,max_dom+1)]) and \
       all([ os.path.getmtime(outpat % i) > now for i in range(1,max_dom+1)]) and \
       errpat.search(sto) is None:
        print "Geogrid completed successfully."
        sys.exit(0)
    else:
        pass
        #print "returncode=",p.returncode
        #print "isfile=",os.path.isfile(outpat % 1)
        #print "mtime=",os.path.getmtime(outpat % 1),now
        #print "errorstring: ",errpat.search(sto)

    # if we got here something went wrong in geogrid, see if it is missing data:
    r = re.compile(
        "Missing value encountered in output field (?P<field>\w*)\n")
    field = r.search(sto)

    if field is None:
        print sto
        print "An error occurred while running geogrid, but it doesn't seem to be caused "+\
              "by missing data."
        sys.exit(1)

    field = field.group('field').strip()
    if not field.strip(
    ) in destfields:  # + others once fetch_data.py is generalized
        print "Data is missing in field, %s, but I don't know how to fetch it." % field.strip(
        )
        sys.exit(1)

    if field == lastmissingvar:
        print "I already tried to fetch %s, but it is still missing!!" % field
        sys.exit(1)

    lastmissingvar = field

    if donotfetch:
        return

    destfield = field
    outputi = destfields.index(field)
    output = outputfields[outputi]

    # Now we know that we need to get NED data from usgs, but we need the domain bounds.
    # regexp the boundaries to get the whole domain.
    # the findall syntax is for running in parallel, but it probably won't actually work
    # without patching geogrid source because only process 0 prints.
    fnumber = '-?[0-9]+[.]?[0-9]*'
    r = re.compile("LATSOUTH=(?P<south>%s)" % fnumber)
    south = r.findall(sto)
    r = re.compile("LATNORTH=(?P<north>%s)" % fnumber)
    north = r.findall(sto)
    r = re.compile("LONWEST=(?P<west>%s)" % fnumber)
    west = r.findall(sto)
    r = re.compile("LONEAST=(?P<east>%s)" % fnumber)
    east = r.findall(sto)

    if south == [] or north == [] or west == [] or east == []:
        print "Can't parse domain boundaries."
        sys.exit(1)

    # get domain boundaries and call data fetcher
    south = min(float(x) for x in south)
    north = max(float(x) for x in north)
    west = min(float(x) for x in west)
    east = max(float(x) for x in east)

    epsy = (north - south) * expandbdy
    epsx = (east - west) * expandbdy
    south -= epsy
    north += epsy
    west -= epsx
    east += epsx


    print "Executing: fetch_data.py -d %s -- " % output['source']\
           ,north,south,east,west
    try:
        files = fetch_data.main([
            "-d", output['source'], "--",
            str(north),
            str(south),
            str(east),
            str(west)
        ])
    except:
        print "fetch_data.py seems to have failed."
        print "For more information, try running:"
        print "fetch_data.py -v  -d %s -- " % output[
            'source'], north, south, east, west
        raise

    # exctract files that were downloaded (assumes tar files with bzip2 or gzip compression)
    print "Extracting data files."
    datafiles = []
    for f in files:
        ft = tarfile.open(f)
        fn = ft.getnames()
        for i in fn:
            if i[-3:] == "tif":
                datafiles.append(i)
                break
        else:
            print "%s doesn't seem to contain a geotiff file" % f
            sys.exit(1)

        # extract and clean up tar files
        ft.extractall()
        os.remove(ft.name)

    # run source data to geogrid conversion
    if opts.force or makeforce:
        argv = ['-f']
    else:
        argv = []
    if output['maxcat'] is not None:
        argv.extend([
            '-m',
            str(output['maxcat']), '-a', '-A',
            str(output['maxcat']), '-w', '1'
        ])
    argv.extend([
        '-d', output['desc'], '-u', output['units'], '--script', '--',
        output['dir']
    ])
    argv.extend(datafiles)
    print "Running geogrid.py %s" % " ".join(argv)
    try:
        gdir = geogrid.mainprog(argv)
    except:
        print "geogrid.py failed"
        raise