def queryDatasetMap(datasetNames, Session, extra_fields=False): """Query the database for a dataset map. datasetNames is a list of (datasetName, version) tuples. Returns (dataset_map, offline_map) where dataset_map is a dictionary: (dataset_id, version) => [(path, size), (path, size), ...] and offline_map is a dictionary: dataset_id => True | False, where True iff the corresponding dataset is offline. If extra_fields = True. returns (dataset_map, offline_map, extraFields) where extrafields[(dataset_id, absolute_file_path, *field_name*)] => field_value where *field_name* is one of: - ``mod_time`` """ dmap = {} offlineMap = {} extraFields = {} for versionId in datasetNames: name, useVersion = parseDatasetVersionId(versionId) dset = Dataset.lookup(name, Session) session = Session() if dset is None: raise ESGQueryError("Dataset not found: %s" % name) session.add(dset) if useVersion == -1: useVersion = dset.getVersion() versionObj = dset.getVersionObj(useVersion) if versionObj is None: raise ESGPublishError( "Version %d of dataset %s not found, cannot republish." % (useVersion, dset.name)) filelist = versionObj.getFiles() # file versions dmap[(name, useVersion)] = [(file.getLocation(), ` file.getSize() `) for file in filelist]
def show_extracted_info( self, datasets, dset_error, list_fields, versionObjs ): # set the color for each item in the row dcolor1 = Pmw.Color.changebrightness(self.parent.parent, 'aliceblue', 0.8 ) dcolor2 = Pmw.Color.changebrightness(self.parent.parent, 'aliceblue', 0.7 ) dcolor3 = Pmw.Color.changebrightness(self.parent.parent, 'aliceblue', 0.5 ) dcolor4 = Pmw.Color.changebrightness(self.parent.parent, 'aliceblue', 0.6 ) dcolor5 = Pmw.Color.changebrightness(self.parent.parent, 'aliceblue', 0.7 ) dcolor6 = Pmw.Color.changebrightness(self.parent.parent, 'aliceblue', 0.8 ) dcolor7 = Pmw.Color.changebrightness(self.parent.parent, 'aliceblue', 0.85 ) selected_page = self.parent.parent.main_frame.selected_top_page dobj = {} for dset,versobj in zip(datasets,versionObjs): dobj[(dset.name,versobj.version)] = (dset,versobj) if dset.getVersion()==versobj.version: dobj[(dset.name,-1)] = (dset,versobj) #t_version = -1 #for x in self.parent.parent.main_frame.top_page_id[selected_page]: # dset_row = self.parent.parent.main_frame.top_page_id[selected_page][x].cget('text') # dset_text = self.parent.parent.main_frame.top_page_id2[selected_page][x].cget('text') # dsetName,dsetVers = parseDatasetVersionId(dset_text) # if (dsetVers > t_version): # t_version = dsetVers #print 'Highest version is %s' % t_version for x in self.parent.parent.main_frame.top_page_id[selected_page]: dset_row = self.parent.parent.main_frame.top_page_id[selected_page][x].cget('text') dset_text = self.parent.parent.main_frame.top_page_id2[selected_page][x].cget('text') #if (self.parent.parent.main_frame.) # ganz added this 1/21/11 NOT NEC # if (self.parent.parent.main_frame.version_label[selected_page] ): # dsetName = self.parent.parent.main_frame.top_page_id2[selected_page][x].cget('text') # dsetVers = self.parent.parent.main_frame.version_label[selected_page][x].cget('text') ##################################################################################### # else: dsetName,dsetVers = parseDatasetVersionId(dset_text) dsetId = (dsetName,dsetVers) if dsetId in dobj.keys(): dset, versobj = dobj[dsetId] dsetVersionName = generateDatasetVersionId((dset.name, versobj.version)) if self.parent.parent.main_frame.top_page_id[selected_page][x].cget('relief') == 'raised': frame = self.parent.parent.main_frame.add_row_frame[selected_page][x] if not dset.has_warnings(self.Session): ok_err = Tkinter.Button( frame, text = 'Ok', bg = dcolor1, highlightcolor = dcolor1, width = 4, relief = 'sunken') #was 4 else: warningLevel = dset.get_max_warning_level(self.Session) if warningLevel>=ERROR_LEVEL: buttonColor = "pink" buttonText = "Error" else: buttonColor = "yellow" buttonText = "Warning" ok_err = Tkinter.Button( frame, text = buttonText, bg = buttonColor, width = 4, ## was 4 ganz relief = 'raised', command = pub_controls.Command( self.error_extraction_button,dset ) ) #ok_err.grid(row = dset_row, column = 1, sticky = 'nsew') #self.parent.parent.main_frame.ok_err[selected_page][x] = ok_err ok_err.grid(row = dset_row, column = 1, sticky = 'nsew') self.parent.parent.main_frame.ok_err[selected_page][x] = ok_err status = pollDatasetPublicationStatus(dset.get_name(self.Session), self.Session) status_text = pub_controls.return_status_text( status ) self.parent.parent.main_frame.status_label[selected_page][x] = Tkinter.Label( frame, text = status_text, bg = dcolor1, width = 10, relief = 'sunken') # 4 was 10 self.parent.parent.main_frame.status_label[selected_page][x].grid(row = dset_row, column = 2, sticky = 'nsew') if 'id' in list_fields: id = Tkinter.Label( frame, text = `dset.id`, bg = dcolor2, width = 6, relief = 'sunken') id.grid(row = dset_row, column = 3, sticky = 'nsew') #ganz adding rows here...need to add versions #dset.name, versobj.version ver_1 = versobj.version if (ver_1 ==-1): ver_1 = "N/A" # ganz TODO test this to see if this records the version 1/12/11 """ Ganz: this code is a test to see if I can save the version label, 1/17/11 comment out the top code and insert the rest """ #version = Tkinter.Label( frame, text = ver_1, bg = dcolor2, width = 6, relief = 'sunken') #version.grid(row = dset_row, column = 4, sticky = 'nsew') # width was 6 column = 4 self.parent.parent.main_frame.version_label[selected_page][x] = Tkinter.Label( frame, text = ver_1, bg = dcolor2, width = 11, relief = 'sunken' ) self.parent.parent.main_frame.version_label[selected_page][x].grid(row = dset_row, column = 4, sticky = 'nsew') """ end of test """ #self.parent.parent.main_frame.version_label1[selected_page][x] = Tkinter.Label( frame, text = ver_1, bg = dcolor2, width = 6, relief = 'sunken') #self.parent.parent.main_frame.version_label1[selected_page][x].grid(row=dset_row,column = 4, columnspan=2, sticky = 'nsew') # create a menu # popup = Menu(version, tearoff=0) # popup.add_command(label="Show All Versions") # , command=next) etc... # popup.add_command(label="Show Latest Versions") # popup.add_separator() # popup.add_command(label="Home 3a") # def do_popupv1(event): # display the popup menu # try: # popup.tk_popup(event.x_root, event.y_root, 0) # finally: # make sure to release the grab (Tk 8.0a1 only) # popup.grab_release() # version.bind("<Button-3>", do_popupv1) self.parent.parent.main_frame.top_page_id2[selected_page][x].configure( width=71, relief='raised', bg = dcolor7, text=dset.name) #dsetVersionName) self.parent.parent.main_frame.top_page_id2[selected_page][x].grid(row=dset_row,column = 5, columnspan=2, sticky = 'nsew') # ganz add this code to enable users to view all the files/dates and times within a dataset # create a menu # popup = Menu(ok_err, tearoff=0) # popup.add_command(label="Show All Files", command=pub_controls.Command( self.file_display_button,dset )) # def do_popupv1(event): # display the popup menu # try: # popup.tk_popup(event.x_root, event.y_root, 0) # finally: # make sure to release the grab (Tk 8.0a1 only) # popup.grab_release() # self.parent.parent.main_frame.version_label[selected_page][x].bind("<Button-3>", self.evt_file_display_button) if 'project' in list_fields: project = Tkinter.Label( frame, text = dset.get_project(self.Session), bg = dcolor3, width = 20, relief = 'sunken', borderwidth = 2) project.grid(row = dset_row, column = 7, sticky = 'nsew') if 'model' in list_fields: model = Tkinter.Label( frame, text = dset.get_model(self.Session), bg = dcolor4, width = 20, relief = 'sunken', borderwidth = 2) model.grid(row = dset_row, column = 8, sticky = 'nsew') if 'experiment' in list_fields: experiment = Tkinter.Label( frame, text = dset.get_experiment(self.Session), bg = dcolor5, width = 20, relief = 'sunken', borderwidth = 2) experiment.grid(row = dset_row, column = 9, sticky = 'nsew') if 'run_name' in list_fields: run_name = Tkinter.Label( frame, text = dset.get_run_name(self.Session), bg = dcolor6, width = 20, relief = 'sunken', borderwidth = 2) run_name.grid(row = dset_row, column = 10, sticky = 'nsew') else: #GANZ tested removed and replaced this 3/20/2011 frame = self.parent.parent.main_frame.add_row_frame[selected_page][x] ok_err = Tkinter.Button( frame, text = 'N/A', bg = 'salmon', highlightcolor = dcolor1, width = 4, relief = 'sunken') #was dcolor1 ok_err.grid(row = dset_row, column = 1, sticky = 'nsew') status = Tkinter.Label( frame, text = 'N/A', bg = 'salmon', width = 10, relief = 'sunken') status.grid(row = dset_row, column = 2, sticky = 'nsew') id = Tkinter.Label( frame, text = 'N/A', bg = 'salmon', width = 6, relief = 'sunken') id.grid(row = dset_row, column = 3, sticky = 'nsew') # test ganz 1/17/11 #version = Tkinter.Label( frame, text = 'N/A', bg = dcolor2, width = 4, relief = 'sunken') #version.grid(row = dset_row, column = 4, sticky = 'nsew') # was dcolor2 self.parent.parent.main_frame.version_label[selected_page][x] = Tkinter.Label( frame, text = 'N/A', bg = dcolor2, width = 4, relief = 'sunken') self.parent.parent.main_frame.version_label[selected_page][x].grid(row = dset_row, column = 4, sticky = 'nsew') # same test as above self.parent.parent.main_frame.top_page_id2[selected_page][x].configure( width=71, relief='sunken', bg = 'salmon', fg = 'black' ) self.parent.parent.main_frame.top_page_id2[selected_page][x].grid(row=dset_row,column = 5, columnspan=2, sticky = 'nsew') x += 1
def return_content2(self, appendOpt=False): from esgcet.publish import iterateOverDatasets, processIterator from esgcet.config import getHandlerByName from esgcet.model import eventName from esgcet.config import loadConfig # Initialize parameters for interating over datasets initcontext = {} aggregateOnly = False # appendOpt = False initcontext = {} properties = {} publish = False publishOnly = False thredds = False testProgress1 = [self.parent.parent.statusbar.show, 0, 50] testProgress2 = [self.parent.parent.statusbar.show, 50, 100] handlerDictionary = {} # Get the currently selected tab and the selected datasets tab_name = self.parent.parent.top_notebook.getcurselection() selected_page = self.parent.parent.main_frame.selected_top_page datasetNames = [] # datasetNames2 = [] if (selected_page is None): warning("Must generate a list of datasets to scan before data extraction can occur.") return if (selected_page is not None) or (self.parent.parent.hold_offline[selected_page] == True): extraFields = None if (self.parent.parent.hold_offline[selected_page] == False) or (isinstance(self.parent.parent.hold_offline[selected_page], types.DictType)): for x in self.parent.parent.main_frame.top_page_id[selected_page]: dsetVersionName = self.parent.parent.main_frame.top_page_id2[selected_page][x].cget('text') # GANZ TODO version_label # ganz added this 1/21/11 if (self.parent.parent.main_frame.version_label[selected_page] ): dset_name = self.parent.parent.main_frame.top_page_id2[selected_page][x].cget('text') dsetVersion = self.parent.parent.main_frame.version_label[selected_page][x].cget('text') ##################################################################################### else: dset_name, dsetVersion = parseDatasetVersionId(dsetVersionName) # Retrieve all the datasets in the collection for display """ ganz test code status = pollDatasetPublicationStatus(dset_name, self.Session) status_text = pub_controls.return_status_text( status ) if status_text != 'Error': dsetTuple = parseDatasetVersionId(self.parent.parent.main_frame.top_page_id2[selected_page][x].cget('text')) datasetNames2.append(dsetTuple) """ # Retrieve only the datasets that have been selected if self.parent.parent.main_frame.top_page_id[selected_page][x].cget('bg') != 'salmon': dsetTuple = parseDatasetVersionId(self.parent.parent.main_frame.top_page_id2[selected_page][x].cget('text')) datasetNames.append(dsetTuple) dmap = self.parent.parent.main_frame.dmap[selected_page] extraFields = self.parent.parent.main_frame.extraFields[selected_page] datasetMapfile = self.parent.parent.main_frame.datasetMapfile[selected_page] projectName = self.parent.parent.main_frame.projectName[selected_page] directoryMap = self.parent.parent.directoryMap[selected_page] if dmap is not None: for x in datasetNames: dsetId = x[0] datasetName = x try: dmapentry = dmap[datasetName] except: # Check if the dataset map key was changed from (dsetname,-1) to (dsetname,version). # If so, replace the entry with the new key. trykey = (datasetName[0], -1) dmapentry = dmap[trykey] del dmap[trykey] dmap[datasetName] = dmapentry firstFile = dmapentry[0][0] self.parent.parent.handlerDictionary[dsetId] = getHandlerByName(projectName, firstFile, self.Session) handler = self.parent.parent.handlerDictionary[dsetId] # Copy the defaultGlobalValues into initcontext initcontext = self.parent.parent.main_frame.defaultGlobalValues[selected_page] else: # more test code myholdDirectoryMap = self.parent.parent.directoryMap[selected_page] #mydatasetNames = [(item,-1) for item in myholdDirectoryMap.keys()] mydatasetNames = [(item) for item in myholdDirectoryMap.keys()] #end for x in mydatasetNames: dsetId = x[0] datasetName = x # ganz this is test code try: dmapentry = myholdDirectoryMap[datasetName] except: # Check if the dataset map key was changed from (dsetname,-1) to (dsetname,version). # If so, replace the entry with the new key. trykey = (datasetName[0], -1) dmapentry = myholdDirectoryMap[trykey] del myholdDirectoryMap[trykey] myholdDirectoryMap[datasetName] = dmapentry firstFile = dmapentry[0][1] #end of test code #firstFile = self.parent.parent.main_frame.dirp_firstfile[selected_page] self.parent.parent.handlerDictionary[dsetId] = getHandlerByName(projectName, firstFile, self.Session) handler = self.parent.parent.handlerDictionary[dsetId] else: # working off-line projectName = self.parent.parent.main_frame.projectName[selected_page] if self.parent.parent.offline_file_directory[selected_page] == "directory": if self.parent.parent.config is None: extraction_controls.call_sessionmaker( self.parent.parent ) datasetPaths = [] dmap = {self.parent.parent.offline_datasetName : datasetPaths} listerSection = getOfflineLister(self.parent.parent.config, "project:%s"%projectName, None) offlineLister = self.parent.parent.config.get(listerSection, 'offline_lister_executable') lastargs = self.parent.parent.offline_directories commandArgs = "--config-section %s "%listerSection commandArgs += " ".join(lastargs) for filepath, size in processIterator(offlineLister, commandArgs, filefilt=self.parent.parent.filefilt): datasetPaths.append((filepath, str(size))) datasetNames = self.parent.parent.datasetNames directoryMap = None # get the handler for x in datasetNames: dsetId = x[0] self.parent.parent.handlerDictionary[dsetId] = getHandlerByName(projectName, None, self.Session, offline=True) elif self.parent.parent.offline_file_directory[selected_page] == "file": dmap = self.parent.parent.main_frame.dmap[selected_page] extraFields = self.parent.parent.main_frame.extraFields[selected_page] datasetMapfile = self.parent.parent.main_frame.datasetMapfile[selected_page] projectName = self.parent.parent.main_frame.projectName[selected_page] directoryMap = None if datasetMapfile is not None: dmap, extraFields = readDatasetMap(datasetMapfile, parse_extra_fields=True) datasetNames = dmap.keys() # get the handlers for x in datasetNames: dsetId = x[0] self.parent.parent.handlerDictionary[dsetId] = getHandlerByName(projectName, None, self.Session, offline=True) # Iterate over datasets if appendOpt: operation = UPDATE_OP else: operation = CREATE_OP datasets = iterateOverDatasets(projectName, dmap, directoryMap, datasetNames, self.Session, self.parent.parent.aggregateDimension, operation, self.parent.parent.filefilt, initcontext, self.parent.parent.hold_offline[selected_page], properties, comment=self.comments, testProgress1=testProgress1, testProgress2=testProgress2 , handlerDictionary=self.parent.parent.handlerDictionary, extraFields=extraFields, readFiles=True) # If working on-line then replace the scanned list of datasets with # the complete list of datasets #test """ print 'datasetNames:' for t1 in datasetNames: print t1 print 'datasetNames2:' for t2 in datasetNames2: print t2 """ if not self.parent.parent.hold_offline[selected_page]: datasets = [] versionObjs = [] # ganz finally, tested datasetNames2 here for dsetName, version in datasetNames: result = Dataset.lookup(dsetName, self.Session, version=version) if result is not None: entry, versionObj = result datasets.append(entry) versionObjs.append(versionObj) # Get the summary of errors after doing a data extraction dset_error = [] for dset in datasets: status = dset.get_publication_status(self.Session) status_name = eventName[status] if dset.has_warnings(self.Session): dset_error.append(dset.get_name(self.Session)) try: list_fields = getQueryFields( handler ) except: handler = getHandlerByName(projectName, None, self.Session) list_fields = getQueryFields( handler ) # Display the datasets in the "Collection" page # if self.parent.parent.hold_offline[selected_page] == True: # tab_name = "Collection_Offline" # from_tab = "Collection" # pub_editorviewer = self.parent.parent.create_publisher_editor_viewer( self.parent.parent, tab_name, dataset, from_tab, self.Session) # Show the extracted datasets self.set_column_labels( len(datasets), list_fields ) self.show_extracted_info(datasets, dset_error, list_fields, versionObjs) # Enable the "Data Publication" button self.parent.ControlButton3.configure( state = 'normal' )
def evt_refresh_list_of_datasets(self, selected_page): # Start the busy routine to indicate to the users something is happening self.parent.parent.busyCursor = "watch" self.parent.parent.busyWidgets = [ self.parent.parent.pane2.pane("EditPaneTop"), self.parent.parent.pane2.pane("EditPaneBottom"), self.parent.parent.pane2.pane("EditPaneStatus"), self.parent.parent.pane.pane("ControlPane"), ] pub_busy.busyStart(self.parent.parent) try: if self.parent.parent.refreshButton[selected_page].cget("relief") == "raised": for x in self.parent.parent.main_frame.top_page_id[selected_page]: if self.parent.parent.main_frame.top_page_id[selected_page][x].cget("relief") == "raised": dsetVersionName = self.parent.parent.main_frame.top_page_id2[selected_page][x].cget("text") # ganz added this 1/18/11 query_name = self.parent.parent.main_frame.top_page_id2[selected_page][x].cget("text") versionNum = self.parent.parent.main_frame.version_label[selected_page][x].cget("text") ##################################################################################### query_name, versionNum = parseDatasetVersionId(dsetVersionName) # ganz TODO test only remove # print query_name # print versionNum status = pollDatasetPublicationStatus(query_name, self.Session) # ganz catch non selected Red entries to skip 3/20/2011 try: self.parent.parent.main_frame.status_label[selected_page][x].configure( text=pub_controls.return_status_text(status) ) except: continue # Make sure you update the Ok/Err button # ganz added this (1/18/11) here to catch the case when dset=None (e.g. no local db entry exists) dset = Dataset.lookup(query_name, self.Session) if dset == None: buttonColor = "yellow" buttonText = "Warning" self.parent.parent.main_frame.ok_err[selected_page][x].configure( bg=buttonColor, text=buttonText ) elif dset.has_warnings(self.Session): warningLevel = dset.get_max_warning_level(self.Session) if warningLevel >= ERROR_LEVEL: buttonColor = "pink" buttonText = "Error" else: buttonColor = "yellow" buttonText = "Warning" self.parent.parent.main_frame.ok_err[selected_page][x].configure( bg=buttonColor, text=buttonText ) except: pub_busy.busyEnd(self.parent.parent) # catch here in order to turn off the busy cursor ganz raise finally: pub_busy.busyEnd(self.parent.parent) # pub_busy.busyEnd( self.parent.parent ) info("Completed refreshing the display.")
def main(argv): try: args, lastargs = getopt.getopt(argv, "hi:", [ 'database-delete', 'database-only', 'echo-sql', 'map=', 'no-republish', 'no-thredds-reinit', 'skip-gateway', 'skip-index', 'las', 'log=', 'rest-api', 'skip-thredds', 'sync-thredds', 'use-list=' ]) except getopt.error: print sys.exc_value return deleteAll = False datasetMap = None deleteDset = False unpublishOnGateway = False echoSql = False init_file = None gatewayOp = DELETE las = False log_filename = None republish = True restApi = None thredds = True syncThredds = False useList = False threddsReinit = True for flag, arg in args: if flag == '--database-delete': deleteDset = True elif flag == '--database-only': gatewayOp = NO_OPERATION thredds = False deleteDset = True elif flag == '--echo-sql': echoSql = True elif flag in ['-h', '--help']: return elif flag == '-i': init_file = arg elif flag == '--map': datasetMap = readDatasetMap(arg) elif flag == '--skip-gateway': gatewayOp = NO_OPERATION elif flag == '--skip-index': gatewayOp = NO_OPERATION elif flag == '--las': las = True elif flag == '--log': log_filename = arg elif flag == '--no-republish': republish = False elif flag == '--no-thredds-reinit': threddsReinit = False elif flag == '--rest-api': restApi = True elif flag == '--skip-thredds': thredds = False elif flag == '--sync-thredds': syncThredds = True elif flag == '--use-list': useList = True useListPath = arg if gatewayOp != NO_OPERATION and unpublishOnGateway: gatewayOp = UNPUBLISH # Load the configuration and set up a database connection config = loadConfig(init_file) engine = create_engine(config.getdburl('extract'), echo=echoSql, pool_recycle=3600) initLogging('extract', override_sa=engine, log_filename=log_filename) Session = sessionmaker(bind=engine, autoflush=True, autocommit=False) if config is None: raise ESGPublishError("No configuration file found.") threddsRoot = config.get('DEFAULT', 'thredds_root') # Get the default publication interface (REST or Hessian) if restApi is None: restApi = config.getboolean('DEFAULT', 'use_rest_api', default=False) if datasetMap is None: if not useList: datasetNames = [parseDatasetVersionId(item) for item in lastargs] else: if useListPath == '-': namelist = sys.stdin else: namelist = open(useListPath) datasetNames = [] for line in namelist.readlines(): versionId = parseDatasetVersionId(line.strip()) datasetNames.append(versionId) else: datasetNames = datasetMap.keys() datasetNames.sort() result = deleteDatasetList(datasetNames, Session, gatewayOp, thredds, las, deleteDset, deleteAll=deleteAll, republish=republish, reinitThredds=threddsReinit, restInterface=restApi) # Republish previous versions as needed. This will happen if the latest version # was deleted from the database, and is not # the only version. In this case the previous version will be rescanned to generate the aggregations. if republish: statusDict, republishList = result if len(republishList) > 0: # Register project handlers. registerHandlers() info("Republishing modified datasets:") republishDatasetNames = [ generateDatasetVersionId(dsetTuple) for dsetTuple in republishList ] dmap, offline = queryDatasetMap(republishDatasetNames, Session) datasetNames = dmap.keys() datasets = iterateOverDatasets(None, dmap, None, republishList, Session, "time", UPDATE_OP, None, {}, offline, {}, forceAggregate=True) republishOp = (gatewayOp != NO_OPERATION ) # Don't republish if skipping the gateway op result = publishDatasetList(datasetNames, Session, publish=republishOp, thredds=thredds) # Synchronize database and THREDDS catalogs if syncThredds: threddsRoot = config.get('DEFAULT', 'thredds_root') # Make a dictionary of catalogs from the database session = Session() subcatalogs = session.query(Catalog).select_from( join(Catalog, Dataset, Catalog.dataset_name == Dataset.name)).all() catdict = {} for catalog in subcatalogs: location = os.path.join(threddsRoot, catalog.location) catdict[location] = 1 session.close() # Scan all XML files in the threddsroot os.path.walk(threddsRoot, cleanupCatalogs, catdict)