def __init__(self, filename, display_id=None, timestamp=None, parent=None, load = True):
     super( GeneralDataRecord, self ).__init__( timestamp )
     #print "RCR110:", type(filename), isinstance(filename, AstroData)
     if isinstance(filename, GeneralData):
         self.filename = filename.filename
         self.gd = filename
         self.parent = parent #filename.filename
     elif type( filename ) == str:
         self.filename = filename
         if load == True:
             self.gd = GeneralData.create_data_object( filename )
         else:
             self.gd = None
         self.parent = parent
     elif type( filename ) == GeneralDataRecord:
         gdr = filename
         self.display_id = gdr.display_id
         self.filename   = gdr.filename
         self.gd         = gdr.ad
         self.parent     = gdr.parent        
         return                      
     else:
         raise "BAD ARGUMENT"
     ##@@TODO: display_id may be obsolete
     self.display_id = display_id
Ejemplo n.º 2
0
 def adaptSetType(self, rc):
     for inp in rc.get_inputs():
         
         rec = inp.recommend_data_object()
         log.stdinfo("adaptSetType: recommended dataset object %s" % rec)
         mandc = None
             
         if rec:
             # we'll take the random first if there is more than one recommendation, atm
                 
             typ = None
             for typ in rec:
                 mandc = rec[typ]
                 if mandc:
                     break
             
         if mandc:
             mod,clas = mandc
             result = "module=%s class=%s" % (mod, clas)
             try:
                 newset = GeneralData.create_data_object(inp, hint=mandc)
                 newset.add("types", typ)
                 rc.report_output(newset)
             except:
                 rc.report_output(inp, stream="")
                 raise
         else:
             result = "..no recommendation.."
         log.info("(pSR18) %s-> %s" % (inp.basename, result))
     yield rc
Ejemplo n.º 3
0
    def adaptSetType(self, rc):
        for inp in rc.get_inputs():

            rec = inp.recommend_data_object()
            log.stdinfo("adaptSetType: recommended dataset object %s" % rec)
            mandc = None

            if rec:
                # we'll take the random first if there is more than one recommendation, atm

                typ = None
                for typ in rec:
                    mandc = rec[typ]
                    if mandc:
                        break

            if mandc:
                mod, clas = mandc
                result = "module=%s class=%s" % (mod, clas)
                try:
                    newset = GeneralData.create_data_object(inp, hint=mandc)
                    newset.add("types", typ)
                    rc.report_output(newset)
                except:
                    rc.report_output(inp, stream="")
                    raise
            else:
                result = "..no recommendation.."
            log.info("(pSR18) %s-> %s" % (inp.basename, result))
        yield rc
Ejemplo n.º 4
0
def store_datasets(dataset_names, remove_local = False, elements = None):        
    datasetnames = dataset_names
    if len(args.datasets)>5:
        if not args.all:
            print tc.colored( "%d datasets, showing first 5, use --all to show all"
                                % len(args.datasets), 
                                "red", "on_white")
            datasetnames = args.datasets[:5]
        
    for fname in datasetnames:
        print "  DATASET: %s" % tc.colored(fname, attrs=["bold"])
        setref = GeneralData.create_data_object(fname)
        if setref == None:
            continue
        setref.put("_data.warehouse.types", setref.get_types())
        if elements:
            setref.put("_data.warehouse.elements", elements)
        
        # populate_region may rely on the elements
        if hasattr(setref, "populate_region"):
            setref.populate_region()
        
        pkg = package_class(setref=setref)
        setref.put("_data.warehouse.store_path", pkg.get_store_path(setref, 
                    elements = elements))
        setref.put("_data.warehouse.store_dir", os.path.dirname(pkg.get_store_path(setref)))
        
        print "    TYPES: %s" % tc.colored(", ".join( setref.get_types() ) , "blue", "on_white")
        print "STORE_KEY: %s" % pkg.get_store_path(setref)
        print "STORE_DIR: %s" % os.path.dirname(pkg.get_store_path(setref))
        setref.do_write_header()
        if args.store or args.archive:
            pkg.transport_to_warehouse(remove_local = remove_local)
Ejemplo n.º 5
0
    def highlight(self, rc):
        import string
        pd.set_option("display.width", 120)
        pd.set_option("display.max_colwidth", 120)
        log.status("pC25:highlight")
        startcol = rc["start"] if rc["start"] else "p1_4"
        endcol = rc["end"] if rc["end"] else "p1000_4"
        counties = GeneralData.create_data_object("msa_county_reference12.h5")
        naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5")
        cdf = counties.dataframe
        ndf = naicsinfo.dataframe
        for inp in rc.get_inputs():
            df = inp.dataframe
            cmap = business_cols(df)
            busy = df[df["est"] > 100]
            maxind = busy.loc[:, startcol:endcol].idxmax()
            log.status("numrows=%d" % len(inp.dataframe))
            #log.status("maxind=\n%s" % maxind)
            for (key, val) in maxind.iteritems():
                log.status(
                    "====\nmax %s companies of this size = %s %s%%" %
                    (key, df[cmap[key]].iloc[val], df[key].iloc[val] * 100))
                msa = df["msa"].iloc[val]
                log.status(
                    "msa   = %s %s " %
                    (msa,
                     COLORSTR(cdf[cdf["msa"] == msa].iloc[0]["name_msa"],
                              attrs=["bold"])))
                naicsstr = df["naics"].iloc[val]
                cnt = naicsstr.count("/")
                order = pow(10, cnt)
                naics = naicsstr.replace("/", "0")

                try:
                    naics = int(naics)
                    nline = ndf[ndf.iloc[:, 0] >= naics][ndf.iloc[:,
                                                                  0] < naics +
                                                         order]
                except:
                    nline = "couldn't find"
                    pass

                log.status("naics = %s\n%s" % (naicsstr, nline))

        yield rc
Ejemplo n.º 6
0
 def highlight(self, rc):
     import string
     pd.set_option("display.width",120)
     pd.set_option("display.max_colwidth",120)
     log.status("pC25:highlight")
     startcol = rc["start"] if rc["start"] else "p1_4"
     endcol = rc["end"] if rc["end"] else "p1000_4"
     counties = GeneralData.create_data_object("msa_county_reference12.h5")
     naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5")
     cdf = counties.dataframe
     ndf = naicsinfo.dataframe
     for inp in rc.get_inputs():
         df = inp.dataframe
         cmap = business_cols(df)
         busy = df[df["est"]>100]
         maxind = busy.loc[:, startcol:endcol].idxmax()
         log.status("numrows=%d" %len(inp.dataframe))
         #log.status("maxind=\n%s" % maxind)
         for (key,val) in maxind.iteritems():
             log.status("====\nmax %s companies of this size = %s %s%%" % (key,
                                                                         df[cmap[key]].iloc[val],
                                                                         df[key].iloc[val]*100));
             msa = df["msa"].iloc[val]
             log.status("msa   = %s %s " % (msa,
                                             COLORSTR(cdf[cdf["msa"]==msa].iloc[0]["name_msa"],
                                                     attrs=["bold"])
                                           ))
             naicsstr = df["naics"].iloc[val]
             cnt = naicsstr.count("/")
             order = pow(10,cnt)
             naics = naicsstr.replace("/","0")
             
             try:
                 naics = int(naics)
                 nline = ndf[ndf.iloc[:,0] >= naics][ndf.iloc[:,0] < naics+order]
             except:
                 nline = "couldn't find"
                 pass
             
             log.status("naics = %s\n%s" % (naicsstr, nline))
             
     yield rc
Ejemplo n.º 7
0
 def naics_interpret(self, rc):
     pd.set_option("display.width",120)
     pd.set_option("display.max_colwidth",120)
     counties = GeneralData.create_data_object("msa_county_reference12.h5")
     naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5")
     cdf = counties.dataframe
     ndf = naicsinfo.dataframe
 
     for inp in rc.get_inputs():
         df = inp.dataframe
         df = df.reset_index()
         cols = list(df.columns)
         cols[0] = "naics"
         df.columns = cols
         
         df["industry"] = df["naics"]
         
         ncols = list(df.columns.values)
         cols = [ncols[-1]]
         cols.extend(ncols[:-1])
         df = df[cols]
         
         for i in range(len(df)):
             naics = df.iloc[i]["naics"]
             naics = int(naics)
             industry = ndf[ndf.iloc[:,0] == naics].iloc[0,1]
             
             
             df["industry"].iloc[i] = industry 
             cols = list(df.columns)
             #log.status("naics = %s" % naics)
             #log.status("industry = %s" % industry)
             #log.status("%s" % df.iloc[i])
         inp.dataframe = df
         rc.report_output(inp)
         
     yield rc
Ejemplo n.º 8
0
    def naics_interpret(self, rc):
        pd.set_option("display.width", 120)
        pd.set_option("display.max_colwidth", 120)
        counties = GeneralData.create_data_object("msa_county_reference12.h5")
        naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5")
        cdf = counties.dataframe
        ndf = naicsinfo.dataframe

        for inp in rc.get_inputs():
            df = inp.dataframe
            df = df.reset_index()
            cols = list(df.columns)
            cols[0] = "naics"
            df.columns = cols

            df["industry"] = df["naics"]

            ncols = list(df.columns.values)
            cols = [ncols[-1]]
            cols.extend(ncols[:-1])
            df = df[cols]

            for i in range(len(df)):
                naics = df.iloc[i]["naics"]
                naics = int(naics)
                industry = ndf[ndf.iloc[:, 0] == naics].iloc[0, 1]

                df["industry"].iloc[i] = industry
                cols = list(df.columns)
                #log.status("naics = %s" % naics)
                #log.status("industry = %s" % industry)
                #log.status("%s" % df.iloc[i])
            inp.dataframe = df
            rc.report_output(inp)

        yield rc
 def load(self):
     self.gd = GeneralData.create_data_object(self.filename)
     # print "RCR221: loading %s %s" %(self.filename, self.gd)
     return self
Ejemplo n.º 10
0
    def on_message(self, message):
        print "message:",type(message),message
        msg = json.loads(message)
        cmd = msg["cmd"]
        if cmd == "depot_msg":
            print "handle depot_msg"
            subcmd = msg["subcmd"] if "subcmd" in msg else None
            ##### DISPLAY
            if subcmd == "display":
                print "client wants a %s" % IMEXT
                fn = msg["options"]["args"][0]
                imgname = "%s.%s" % (fn,imext)
                numnz=-1
                if not os.path.exists(imgname):
                    stats = os.stat(fn)
                    imsize = stats.st_size 
                    a = GeneralData.create_data_object(fn)
                    
                    if imsize > 100000:
                        progct = { "cmd":"nrm_depot",
                                    "subcmd":"display_status",
                                    "status_msg": "creating quick view on server"
                                  }
                        pmsg = json.dumps(progct)
                        self.ws.send(pmsg)
                    
                    nd = a.get_nd(1)
                    if a.data.RasterCount >=3:
                        cd = np.zeros( (nd.shape[0], nd.shape[1], 3), dtype=np.uint8)
                        cd[:,:,0] = nd[:]
                        for i in range(1,a.data.RasterCount-1):
                            xd = a.get_nd(i+i)
                            cd[:,:,i] = xd[:,:]
                        nd = cd

                    if imsize > 100000:
                        progct = { "cmd":"nrm_depot",
                                    "subcmd":"display_status",
                                    "status_msg": "produce %s" % imext
                                  }
                        pmsg = json.dumps(progct)
                        self.ws.send(pmsg)
                        
                    if  nd.shape[0] > 1000:
                        bd = nd[::3,::3]
                    else:
                        bd = nd
                    a = imshow(bd, interpolation = "none", extent=[0,nd.shape[1], nd.shape[0], 0])
                    
                    if imsize > 100000:
                        progct = { "cmd":"nrm_depot",
                                    "subcmd":"display_status",
                                    "status_msg": "transfering"
                                  }
                        pmsg = json.dumps(progct)
                        self.ws.send(pmsg)
                    savefig(imgname, bbox_inches='tight', dpi=32)     
                image = open(imgname)
                done = False
                
                imdata = image.read()
                datastr = b64encode(imdata)
                cmdct = {   
                            "num_nonzero":numnz,
                            "cmd":"nrm_depot",
                            "subcmd":"display",
                            "answering":msg,
                            "data64":"data:image/%s;base64,%s" % (imext,datastr)
                        }
                msg = json.dumps(cmdct)
                self.ws.send(msg)
                return
            elif subcmd == "local_data":
                print "client wants local_data description"
                ldata = {}
                for root,dirs, files in os.walk("."):
                    ldata["root"] = root
                    ldata["dirs"] = dirs
                    ldata["files"] = files
                    datasets = []
                    datasets_ct = {}
                    setrefs = []
                    ldata["datasets"] = datasets
                    print "sous45:", dw_info
                    ldata["datawarehouse"] = dw_info
                    for fil in files:
                        ext = os.path.splitext(fil)[1]
                        
                        #print "sous37: ext", ext
                        ext_type = None
                        if len(ext):
                            ext = ext[1:]
                            # setref pairing
                            # put setrefs in secondary list to check later
                            if ext == "setref":
                                setrefs.append(fil)
                            elif ext in generaldata._data_object_classes:
                                ext_type = ".".join(generaldata._data_object_classes[ext])
                                ds = {
                                        "filename":fil,
                                        "ext_type":ext_type
                                     }
                                imgname = "%s.%s" % (fil,imext)
                                if os.path.exists(imgname):
                                    ds["img_exists"] = True
                                else:
                                    ds["img_exists"] = False
                                datasets.append(ds)
                                # @@ISSUE?: fil cannot already be in dict right?
                                datasets_ct[fil] = ds
                    ## don't recurse into subdirectores
                    break
                
                #setrefs
                for fil in setrefs:
                    rawname = fil[:-7]
                    if rawname in datasets_ct:
                        datasets_ct[rawname]["has_setref"] = True
                        datasets_ct[rawname]["setref_name"] = fil
                        srfile = open(fil)
                        srstr = srfile.read()
                        srfile.close()
                        setrefct = json.loads(srstr)
                        datasets_ct[rawname]["setref"] = setrefct
                
                ldata["cmd"] = "nrm_depot"
                ldata["subcmd"] = "local_data"
                
                mtxt = json.dumps(ldata)
                #print "sous37:",mtxt
                self.ws.send(mtxt)
                return
        elif cmd == "run_recipe":
            # build commands
            cmdargs = ["kit"]
            opts = msg["options"]
            positional = opts["args"]
            del opts["args"]
            
            for key in opts:
                if opts[key] != True:
                    if len(key) == 1:
                        opt = "-%s '%s'" % (key, opts[key])
                    else:
                        opt = "--%s '%s'" % (key, opts[key])
                else:
                    if len(key) == 1:
                        opt = "-%s" %  key
                    else:
                        opt = "--%s" % key  
                cmdargs.append(opt)
           
            cmdargs.extend(positional)
            
            args = cmdargs
            cmdline = " ".join(cmdargs)
            print "sous35: args:", args
            
            proc = Popen(cmdline, shell=True, stdout = PIPE, stderr = PIPE)
            
            print "process = ", proc
            self._ra_proc = proc
            self._ra_stdin = proc.stdin
            self._ra_stdout = proc.stdout
            self._ra_stderr = proc.stderr
            
            done = False
            i = 0
            while not done:
                #sleep(.1)
                proc.stdout.flush()
                #print "reading stdout"
                #buf = proc.stdout.readline().strip()
                #buf = proc.stdout.read(10)
                buf = ""
                #print "reading stderr"
                errbuf = proc.stderr.readline().strip()
                #errbuf = proc.stderr.read(10)
                #
                #print "sous: |%s|%s|%s "%(  buf,errbuf, proc.poll())
                cmdct = {   
                            "cmd":"nrm_log",
                            "stdio": buf,
                            "stderr": errbuf,
                            "ansi":"%s%s" % ( buf, errbuf)
                        }
                msg = json.dumps(cmdct)
                #self.ws.send(buf)
                #self.ws.send(errbuf)
                self.ws.send(msg)
                #print "sous:poll"
                if proc.poll() != None:
                    done = True
                i+=1
                
            print "END CONNECTION"
            return 
Ejemplo n.º 11
0
 # elements are what get's printed into the shelf/format strings
 elements = {}
 try:
     packager_key = int(packager)
     package_class_struct = package_class_list[packager_key]
 except:
     for pckr in package_class_list:
         if packager == pckr.keys()[0]:
             package_class_struct = pckr
 print "packager = %s" % package_class_struct.keys()[0]
 package_type = None
 package_class = None
 
 if args.fileinfo:
     for fil in args.dataset:
         gd = GeneralData.create_data_object(fil)
         print "File: %s" % gd.basename
         print "      %s" % ", ".join(gd.get_types())
 # some flags imply others
 if args.store or args.archive:
     args.all = True
 
 remove_local = False
 if args.archive:
     remove_local = True
 if args.store:
     remove_local = False
 
 if args.remove_local != None:
     remove_local = args.remove_local