def find_checksum( self ): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns for x in self.get_xtable_runs( [self._project, self._parent_project], [2, 0] ): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('Checking tape: run=%d, subrun=%d ...' % (run,subrun)) statusCode = 2 in_file = self._infile_format % ( run, subrun ) samweb = samweb_cli.SAMWebClient(experiment="uboone") meta = {} try: meta = samweb.getMetadata(filenameorid=in_file) checksum_info = meta['checksum'][0].split(':') if checksum_info[0] == 'enstore': self._data = checksum_info[1] statusCode = 0 else: statusCode = 1 except samweb_cli.exceptions.FileNotFound: subject = 'Failed to locate file %s at SAM' % in_file text = 'File %s is not found at SAM!' % in_file pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text ) statusCode = 100 # Create a status object to be logged to DB (if necessary) status = ds_status( project = self._project, run = run, subrun = subrun, seq = 0, status = statusCode, data = self._data ) # Log status self.log_status( status ) # Break from loop if counter became 0 if not ctr: break
def samweb(): global samweb_obj if samweb_obj == None: samweb_obj = samweb_cli.SAMWebClient(experiment=get_experiment()) os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates' return samweb_obj
def validate_sam( self ): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() self.info('Here, self._nruns=%d ... ' % (self._nruns)) # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns for x in self.get_xtable_runs([self._project,self._parent_project], [12,0]): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('Checking the SAM declaration: run=%d, subrun=%d ...' % (run,subrun)) status = 12 in_file_base = self._infile_format % ( run, subrun ) samweb = samweb_cli.SAMWebClient(experiment="uboone") # Check if the file already exists at SAM try: samweb.getMetadata(filenameorid=in_file_base) status = 10 except samweb_cli.exceptions.FileNotFound: status = 1 # Pretend I'm doing something time.sleep(1) # Create a status object to be logged to DB (if necessary) status = ds_status( project = self._project, run = int(x[0]), subrun = int(x[1]), seq = 0, status = status ) # Log status self.log_status( status ) # Break from loop if counter became 0 if not ctr: break
def getFileListFromDefinition(defname): samweb = samweb_cli.SAMWebClient(experiment='uboone') flist = [] try: flist = samweb.listFiles(defname=args.defname) except: print "Failed to get the list of files in %s" % args.defname sys.exit(1) flist.sort() if (not args.noheader): print "Definition %s contains %i files" % (defname, len(flist)) return flist
def matadataValidation(infile,jsonData): jsonFileName = str(infile)+".json" # Check if we actually generated the json file # If not, exit with error if not os.path.isfile(jsonFileName): sys.exit(in_file +": Metadata File Not Found") # Set up the experiment for SAM samweb = samweb_cli.SAMWebClient(experiment="uboone") # Check if metadata is valid, or exit with error # If it takes too much time exit with error signal.signal(signal.SIGALRM, handler) signal.alarm(4) try: samweb.validateFileMetadata(jsonData) return True except Exception: sys.exit(in_file +" : Invalid/Corrupted Metadata")
def get_dropbox(filename): # Get metadata. md = {} exp = 'sbnd' if 'SAM_EXPERIMENT' in os.environ: exp = os.environ['SAM_EXPERIMENT'] samweb = samweb_cli.SAMWebClient(experiment=exp) try: md = samweb.getMetadata(filenameorid=filename) except: pass # Extract the metadata fields that we need. file_type = '' group = '' data_tier = '' if 'file_type' in md: file_type = md['file_type'] if 'group' in md: group = md['group'] if 'data_tier' in md: data_tier = md['data_tier'] if not file_type or not group or not data_tier: raise RuntimeError('Missing or invalid metadata for file %s.' % filename) # Construct dropbox path. #path = '/sbnd/data/sbndsoft/dropbox/%s/%s/%s' % (file_type, group, data_tier) if 'FTS_DROPBOX' in os.environ: dropbox_root = os.environ['FTS_DROPBOX'] else: dropbox_root = '/pnfs/sbnd/scratch/sbndpro/dropbox' path = '%s/%s/%s/%s' % (dropbox_root, file_type, group, data_tier) return path
import ifdh import os import pprint import samweb_cli import subprocess samweb = samweb_cli.SAMWebClient(experiment='dune') #files = samweb.listFiles("file_name like snb_timedep_dune10kt_1x2x6%root%") files = samweb.listFiles("file_name like snb_radio_dune10kt_1x2x6%root%") #print(files) #pprint.pprint(list(files)) locs = [] IFDH = ifdh.ifdh() for i in range(len(files)): file = files[i] cloc = samweb.locateFile(file) print(cloc[0]) loc = cloc[0]['full_path'] + "/" + file loc = loc[8:] loc = IFDH.locateFile(file) sLoc = loc[0].partition("(") sLoc = sLoc[0].partition(":") loc = sLoc[-1] loc = loc + "/" + file print(loc) locs.append(loc) print(locs[-1])
def process_newruns(self): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() # self.info('Here, self._nruns=%d ... ' % (self._nruns)) # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns for x in self.get_xtable_runs([self._project, self._parent_project], [1, 0]): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('processing new run: run=%d, subrun=%d ...' % (run, subrun)) status = 1 in_file_holder = '%s/%s' % (self._in_dir, self._infile_format % (run, subrun)) filelist = glob.glob(in_file_holder) if (len(filelist) < 1): self.error( 'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run, subrun)) status_code = 100 status = ds_status(project=self._project, run=run, subrun=subrun, seq=0, status=status_code) self.log_status(status) continue if (len(filelist) > 1): self.error( 'ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run, subrun)) self.error('ERROR: List of files found %s' % filelist) in_file = filelist[0] out_file = '%s.json' % in_file self.info('Found %s' % (in_file)) if in_file.strip().split('.')[-1] == "ubdaq": status, jsonData = self.get_ubdaq_metadata( in_file, run, subrun) else: try: jsonData = extractor_dict.getmetadata(in_file) status = 3 self.info( 'Successfully extract metadata from the swizzled file.' ) except: status = 100 self.error( 'Failed extracting metadata from the swizzled file.') if not status == 100: with open(out_file, 'w') as ofile: json.dump(jsonData, ofile, sort_keys=True, indent=4, ensure_ascii=False) # To Eric: what are you doing here? try: samweb = samweb_cli.SAMWebClient(experiment="uboone") # samweb.validateFileMetadata(json_file) # this throws/raises exception status = 2 except: self.error("Problem with samweb metadata: ", jsonData) self.error(sys.exc_info()[0]) status = 100 else: status = 1000 self.error('Did not find the input file %s' % in_file) # Pretend I'm doing something time.sleep(1) # Create a status object to be logged to DB (if necessary) status = ds_status(project=self._project, run=int(x[0]), subrun=int(x[1]), seq=0, status=status) # Log status self.log_status(status) # Break from loop if counter became 0 if not ctr: break
def declare_to_sam(self): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() # self.info('Here, self._nruns=%d ... ' % (self._nruns)) self._project_requirement[0] = 1 # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns for x in self.get_xtable_runs(self._project_list, self._project_requirement): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('Declaring a file to SAM: run=%d, subrun=%d ...' % (run, subrun)) status = 1 # Check input file exists. Otherwise report error in_file_holder = '%s/%s' % (self._in_dir, self._infile_format % (run, subrun)) filelist = glob.glob(in_file_holder) if (len(filelist) < 1): self.error( 'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run, subrun)) status_code = 100 status = ds_status(project=self._project, run=run, subrun=subrun, seq=0, status=status_code) self.log_status(status) continue if (len(filelist) > 1): self.error( 'ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run, subrun)) self.error('ERROR: List of files found %s' % filelist) in_file = filelist[0] in_json = '%s.json' % in_file self.info('Declaring ' + in_file + ' to SAM: using ' + in_json) if os.path.isfile(in_file) and os.path.isfile(in_json): self.info('Found %s' % (in_file)) self.info('Found %s' % (in_json)) json_dict = json.load(open(in_json)) # native SAM python call, instead of a system call # make sure you've done get-cert # Perhaps we want a try block for samweb? samweb = samweb_cli.SAMWebClient(experiment="uboone") # Check if the file already exists at SAM try: in_file_base = os.path.basename(in_file) samweb.getMetadata(filenameorid=in_file_base) status = 101 # Email the experts subject = 'File %s Existing at SAM' % in_file_base text = """ File %s has already exists at SAM! """ % in_file_base pub_smtp(os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text) except samweb_cli.exceptions.FileNotFound: # metadata already validated in get_assembler_metadata_file.py try: samweb.declareFile(md=json_dict) status = 2 except Exception as e: # print "Unexpected error: samweb declareFile problem: " self.error( "Unexpected error: samweb declareFile problem: ") self.error("%s" % e) subject = "samweb declareFile problem: %s" % in_file_base text = """ File %s failed to be declared to SAM! %s """ % (in_file_base, traceback.print_exc()) pub_smtp(os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text) # print "Give some null properties to this meta data" self.error("Give this file a status 102") status = 102 else: status = 100 # Pretend I'm doing something time.sleep(1) # Create a status object to be logged to DB (if necessary) status = ds_status(project=self._project, run=int(x[0]), subrun=int(x[1]), seq=0, status=status) # Log status self.log_status(status) # Break from loop if counter became 0 if not ctr: break
def validate_sam(self): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() # self.info('Here, self._nruns=%d ... ' % (self._nruns) ) self._project_requirement[0] = 2 # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns #for x in [(391,10,0,0)]: for x in self.get_xtable_runs(self._project_list, self._project_requirement): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('Checking the SAM declaration: run=%d, subrun=%d ...' % (run, subrun)) status = 12 in_file_holder = '%s/%s' % (self._in_dir, self._infile_format % (run, subrun)) filelist = glob.glob(in_file_holder) if (len(filelist) < 1): self.error( 'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run, subrun)) status_code = 100 status = ds_status(project=self._project, run=run, subrun=subrun, seq=0, status=status_code) self.log_status(status) continue if (len(filelist) > 1): self.error( 'ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run, subrun)) self.error('ERROR: List of files found %s' % filelist) in_file = filelist[0] in_file_base = os.path.basename(in_file) samweb = samweb_cli.SAMWebClient(experiment="uboone") # Check if the file already exists at SAM try: samweb.getMetadata(filenameorid=in_file_base) status = 0 except samweb_cli.exceptions.FileNotFound: status = 1 # Pretend I'm doing something time.sleep(1) # Create a status object to be logged to DB (if necessary) status = ds_status(project=self._project, run=int(x[0]), subrun=int(x[1]), seq=0, status=status) # Log status self.log_status(status) # Break from loop if counter became 0 if not ctr: break
def process_newruns(self): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() self.info('Here, self._nruns=%d ... ' % (self._nruns)) # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns # Below picks up successfully swizzled files for x in self.get_xtable_runs([self._project, self._parent_project], [1, 0]): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('processing new run: run=%d, subrun=%d ...' % (run, subrun)) status = 100 # Check input file exists. Otherwise report error in_file = '%s/%s' % (self._in_dir, self._infile_format % (run, subrun)) print 'Looking for %s' % (in_file) if os.path.isfile(in_file): self.info('Found %s' % (in_file)) # Check metadata has_metadata = False try: samweb = samweb_cli.SAMWebClient(experiment="uboone") md = samweb.getMetadata(filenameorid=in_file) print 'Here 0' % md self.info( 'Weirdly, metadata already registered in SAM for %s. ... ' % (in_file)) has_metadata = True except: pass # Should be that metadata is in the artroot file. (But not yet declared it to SAM.) # Thus, retrieve metadata from file; use it to declare file with SAM. if not has_metadata: try: self.info(' I feel a couple woos comin on, cus ') md = extractor_dict.getmetadata(in_file) self.info( ' ... there it was. ... (Just extracted the meta data from root file.) ' ) status = 3 try: samweb = samweb_cli.SAMWebClient( experiment="uboone") samweb.declareFile(md=md) status = 2 self.info( 'Successful extraction of artroot metadata and declaring it to SAM for %s. ... ' % (in_file)) except: self.info( 'Failed declaring metadata to SAM for %s. ... ' % (in_file)) except: self.info( 'Failed extracting artroot metadata for %s. ... ' % (in_file)) # Create a status object to be logged to DB (if necessary) status = ds_status(project=self._project, run=int(x[0]), subrun=int(x[1]), seq=0, status=status) # Log status self.log_status(status) # Break from loop if counter became 0 if not ctr: break
# This script will take a file that has been processed at ANL, modify its parent # to be the swizzled file, add in the swizzler version to the metadata and then # create a json file for declaring to SAM # To run do python modify_metadata.py <your files> # ------------------------------------------------------------------------------ # Turn this on to print the metadata dict # 0 = not much printed # 1 = print input files # 2 = print the meta data dictonaries debug = 1 # Start time of script start = time.time() samweb = samweb_cli.SAMWebClient(experiment='uboone') for infile in sys.argv[1:]: if (debug == 1): print infile # Get the file metadata mdgen = extractor_dict.expMetaData('uboone', infile) md = mdgen.getmetadata() # Modify the parent file to be the swizzled file parent = md['parents'] parent_name=parent[0]['file_name'].split("_event",1)[0]+".root" md['parents'] = parent_name # Now we want to get the swizzle version and add this to the metadata
int(s) return True except ValueError: return False def _chunk(iterable, chunksize): """ Helper to divide an iterable into chunks of a given size """ iterator = iter(iterable) from itertools import islice while True: l = list(islice(iterator, chunksize)) if l: yield l else: return try: samweb = samweb_cli.SAMWebClient(experiment="lariat") except: raise Exception('Not able to open up the SAMWeb Connection') query = "run_number = -1" if is_intstring(sys.argv[1]): query = "run_number = %d" % (int(sys.argv[1])) try: list_o_files=samweb.listFiles(query) except: raise Exception('Unable to get list of files for this query:' + query) for filenames in _chunk(list_o_files, 10): mdlist = samweb.getMultipleMetadata(filenames)
def declare_to_sam(self): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() self.info('Here, self._nruns=%d ... ' % (self._nruns)) # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns for x in self.get_xtable_runs([self._project,self._parent_project], [1,0]): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('Declaring a file to SAM: run=%d, subrun=%d ...' % (run,subrun)) status = 1 # Check input file exists. Otherwise report error in_file_base = self._infile_format % ( run, subrun ) in_file = '%s/%s' % ( self._in_dir, in_file_base ) in_json = '%s/%s.json' %( self._meta_dir, in_file_base ) if os.path.isfile(in_file) and os.path.isfile(in_json): self.info('Found %s' % (in_file)) self.info('Found %s' % (in_json)) json_dict = json.load( open( in_json ) ) # native SAM python call, instead of a system call # make sure you've done get-cert # Perhaps we want a try block for samweb? samweb = samweb_cli.SAMWebClient(experiment="uboone") # Check if the file already exists at SAM try: samweb.getMetadata(filenameorid=in_file_base) status = 101 # Want to email the experts except samweb_cli.exceptions.FileNotFound: # metadata already validated in get_assembler_metadata_file.py try: samweb.declareFile(md=json_dict) status = 12 except: print "Unexpected error: samweb declareFile problem: " print traceback.print_exc() # print "Give some null properties to this meta data" print "Give this file a status 11" status = 11 else: status = 100 # Pretend I'm doing something time.sleep(1) # Create a status object to be logged to DB (if necessary) status = ds_status( project = self._project, run = int(x[0]), subrun = int(x[1]), seq = 0, status = status ) # Log status self.log_status( status ) # Break from loop if counter became 0 if not ctr: break
def getDataGivenFileList(flist, r): #query SAM for each file in file list and gets run and subrun processed from meta data #puts that into dictionary rslist[run]=list_of_subruns con = sqlite3.connect("%s/run.db" % dbdir) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("ATTACH DATABASE '%s/bnb_v%i.db' AS bnb" % (dbdir, version)) cur.execute("ATTACH DATABASE '%s/numi_v%i.db' AS numi" % (dbdir, version)) cfgDB = confDB.confDB() bnbwarn = False numiwarn = False otherwarn = False prescalewarn = False samweb = samweb_cli.SAMWebClient(experiment='uboone') try: meta = samweb.getMetadataIterator(flist) except Exception as e: print "Failed to get metadata from SAM." print "Make sure to setup sam_web_client v2_1 or higher." print e sys.exit(0) missbnb = {} missnumi = {} missother = {} missprescale = {} mcount = 0 for m in meta: mcount += 1 for rs in m['runs']: pf = None if prescaleFactor: pf = cfgDB.getAllPrescaleFactors(int(rs[0])) if pf is not None: for pfkey in pf: if pfkey not in r: r[pfkey] = 0 query = "%s WHERE r.run=%i AND r.subrun=%i" % ( dbquerybase, int(rs[0]), int(rs[1])) cur.execute(query) row = cur.fetchone() for k in r: if k in row.keys() and row[k] is not None: if pf is not None: for pfkey in pf: if "EXT" in k and "EXT_" in pfkey: r[pfkey] += pf[pfkey] * row[k] elif "Gate1" in k and "NUMI_" in pfkey: r[pfkey] += pf[pfkey] * row[k] elif "Gate2" in k and "BNB_" in pfkey: r[pfkey] += pf[pfkey] * row[k] elif prescaleFactor: if rs[0] not in missprescale: missprescale[rs[0]] = [rs[1]] elif rs[1] not in missprescale[rs[0]]: missprescale[rs[0]].append(rs[1]) prescalewarn = True r[k] += row[k] elif k in bnbcols: if rs[0] not in missbnb: missbnb[rs[0]] = [rs[1]] elif rs[1] not in missbnb[rs[0]]: missbnb[rs[0]].append(rs[1]) bnbwarn = True elif k in numicols: if rs[0] not in missnumi: missnumi[rs[0]] = [rs[1]] elif rs[1] not in missnumi[rs[0]]: missnumi[rs[0]].append(rs[1]) numiwarn = True elif k is "EXT": if rs[0] not in missother: missother[rs[0]] = [rs[1]] elif rs[1] not in missother[rs[0]]: missother[rs[0]].append(rs[1]) otherwarn = True r['bnbwarn'] = bnbwarn r['numiwarn'] = numiwarn r['otherwarn'] = otherwarn r['prescalewarn'] = prescalewarn r['missbnb'] = missbnb r['missnumi'] = missnumi r['missother'] = missother r['missprescale'] = missprescale if mcount != len(flist): print "Warning! Did not get metadata for all files. Looped through %i files, but only got metadata for %i. Check list for repeats or bad file names." % ( len(flist), mcount) logging.debug("Warning! Did not get metadata for all files.") con.close() return
def process_newruns(self): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() # self.info('Here, self._nruns=%d ... ' % (self._nruns)) # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns for x in self.get_xtable_runs([self._project,self._parent_project], [1,0]): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('processing new run: run=%d, subrun=%d ...' % (run,subrun)) status = 1 # Check input file exists. Otherwise report error in_file = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun)) out_file = '%s/%s' % (self._out_dir,self._outfile_format % (run,subrun)) # # Looks fine now, but if there are new troubles: run this project with NRUNS=1 # if os.path.isfile(in_file): self.info('Found %s' % (in_file)) # shutil.copyfile(in_file,out_file) if in_file.strip().split('.')[-1] == "ubdaq": status, jsonData = self.get_ubdaq_metadata( in_file, run, subrun ) else: try: jsonData = extractor_dict.getmetadata( in_file ) status = 3 self.info('Successfully extract metadata from the swizzled file.') except: status = 100 self.error('Failed extracting metadata from the swizzled file.') if not status == 100: with open(out_file, 'w') as ofile: json.dump(jsonData, ofile, sort_keys = True, indent = 4, ensure_ascii=False) # To Eric: what are you doing here? try: samweb = samweb_cli.SAMWebClient(experiment="uboone") # samweb.validateFileMetadata(json_file) # this throws/raises exception status = 2 except: print "Problem with samweb metadata: ", jsonData print sys.exc_info()[0] status=100 else: status = 1000 self.error('Did not find the input file %s' % in_file ) # Pretend I'm doing something time.sleep(1) # Create a status object to be logged to DB (if necessary) status = ds_status( project = self._project, run = int(x[0]), subrun = int(x[1]), seq = 0, status = status ) # Log status self.log_status( status ) # Break from loop if counter became 0 if not ctr: break
def getmetadata(inputfile): # Set up the experiment name for samweb Python API samweb = samweb_cli.SAMWebClient( experiment=project_utilities.get_experiment()) # Extract metadata into a pipe. local = project_utilities.path_to_local(inputfile) if local != '': proc = subprocess.Popen(["sam_metadata_dumper", "-H", local], stdout=subprocess.PIPE) else: url = project_utilities.path_to_url(inputfile) proc = subprocess.Popen(["sam_metadata_dumper", "-H", url], stdout=subprocess.PIPE) lines = proc.stdout.readlines() if local != '' and local != inputfile: os.remove(local) # Count the number of lines in the file (for later use!) num_lines = len(lines) # define an empty python dictionary md = {} # Read tbe columns from the file and fill the dictionary c = 0 p = 0 parents = [] PName = False gen = False for line in lines: c = c + 1 columns = line.split(" ") columns = [col.strip() for col in columns] if c >= 4 and c <= num_lines - 2: if columns[1] == 'dataTier': md['data_tier'] = columns[-1] if columns[-1] == 'generated': gen = True elif columns[1] == 'endTime': E = time.localtime(int(columns[-1])) md['end_time'] = str(E[0]) + '-' + str(E[1]) + '-' + str( E[2]) + 'T' + str(E[3]) + ':' + str(E[4]) + ':' + str(E[5]) elif columns[1] == 'startTime': S = time.localtime(int(columns[-1])) md['start_time'] = str(S[0]) + '-' + str(S[1]) + '-' + str( S[2]) + 'T' + str(S[3]) + ':' + str(S[4]) + ':' + str(S[5]) elif columns[1] == 'group': md['group'] = columns[-1] elif columns[1] == 'eventCount': md['event_count'] = columns[-1] elif columns[1] == 'fclName': md['fcl.name'] = columns[-1] elif columns[1] == 'fclVersion': md['fcl.version'] = columns[-1] elif columns[1] == 'fileFormat': md['file_format'] = columns[-1] elif columns[1] == 'ubProjectStage': md['ub_project.stage'] = columns[-1] elif columns[1] == 'ubProjectVersion': md['ub_project.version'] = columns[-1] elif columns[1] == 'lastEvent': md['last_event'] = columns[-1] elif columns[1] == 'firstEvent': md['first_event'] = columns[-1] elif columns[1] == 'fileType': md['file_type'] = columns[-1] elif columns[1] == 'group': md['group'] = columns[-1] elif columns[1] == 'group': md['group'] = columns[-1] elif columns[1] == 'run': run = columns[-1] elif columns[1] == 'runType': run_type = columns[-1] elif columns[1] == 'applicationFamily': app_family = columns[-1] elif columns[1] == 'applicationVersion': app_version = columns[-1] elif columns[1] == 'process_name': app_name = columns[-1] elif columns[1] == 'ubProjectName': PName = True md['ub_project.name'] = columns[-1] elif columns[1] == 'parent': parents.append({'file_name': columns[-1]}) # Get the other meta data field parameters md['file_name'] = inputfile.split("/")[-1] md['file_size'] = os.path.getsize(inputfile) # For now, skip the checksum for dCache files. md['crc'] = root_metadata.fileEnstoreChecksum(inputfile) md['runs'] = [[run, run_type]] md['application'] = { 'family': app_family, 'name': app_name, 'version': app_version } md['parents'] = parents # If ub_project.name is not in the internal metadata, # for generator files, get the ub_project.name from the fcl_filename (without the '.fcl' part) for gen files. # for all other stages, get this from the parents if gen == True: md['parents'] = [] if PName == False: md['ub_project.name'] = md['fcl.name'].split(".fcl")[0] else: if PName == False: if 'parents' in md: parent = md['parents'][0]['file_name'] mdparent = samweb.getMetadata(parent) if 'ub_project.name' in mdparent: md['ub_project.name'] = mdparent['ub_project.name'] return md
def pnnl_transfer( self, file_arg ): # sshftp-to-sshftp not enabled on near1, so must use gsiftp: must thus ship from the Sam'd-up dcache file. # uboonepro from near1 has an ssh-key to sshftp to dtn2.pnl.gov as chur558. # This requires that uboonepro owns a valid proxy. We might get 2 Gbps throughput with this scenario. # Need more logging to message service ... also let's put all this into a function that we call. cmd = "voms-proxy-info -all " proc = sub.Popen(cmd,shell=True,stderr=sub.PIPE,stdout=sub.PIPE) (out,err) = proc.communicate() goodProxy = False for line in out.split('\n'): if "timeleft" in line: if int(line.split(" : ")[1].replace(":","")) > 0: goodProxy = True break; if not goodProxy: self.error('uboonepro has no proxy.') raise Exception in_file = os.path.basename(file_arg) # We do a samweb.fileLocate on basename of in_file. This project's parent must be check_root_on_tape. transfer = 0 samweb = samweb_cli.SAMWebClient(experiment="uboone") loc = samweb.locateFile(filenameorid=in_file) size_in = samweb.getMetadata(filenameorid=in_file)['file_size'] samcode = 12 if not ('enstore' in loc[0]["full_path"] and 'pnfs' in loc[0]["full_path"]): self.error('No enstore or pnfs in loc[0]["full_path"]') return (transfer, samcode) full_file = loc[0]["full_path"].replace('enstore:/pnfs/uboone','') + "/" + in_file pnnl_machine = "dtn2.pnl.gov" pnnl_dir = 'pic/projects/microboone/data/' ddir = str(samweb.getMetadata(filenameorid=in_file)['runs'][0][0]) cmd_mkdir = "ssh chur558@" + pnnl_machine + " mkdir -p " + "/" + pnnl_dir + ddir proc = sub.Popen(cmd_mkdir,shell=True,stderr=sub.PIPE,stdout=sub.PIPE) # block, but plow on w.o. regard to whether I was successful to create ddir. (Cuz this will complain if run is not new.) (out,err) = proc.communicate() pnnl_loc = pnnl_machine + "/" + pnnl_dir + ddir + "/" + in_file cmd_gsiftp_to_sshftp = "globus-url-copy -rst -vb -p 10 gsiftp://fndca1.fnal.gov:2811" + full_file + " sshftp://chur558@" + pnnl_loc # Popen() gymnastics here ntry = 0 delay = 5 ntry_max = 1 # more than 1 is not demonstrably helping. In fact, it creates lotsa orphaned ssh's. EC, 8-Aug-2015. ndelays = 20 while (ntry != ntry_max): self.info('Will launch ' + cmd_gsiftp_to_sshftp) info_str = "pnnl_transfer trying " + str(ntry+1) + " (of " + str(ntry_max) + ") times." self.info (info_str) proc = sub.Popen(cmd_gsiftp_to_sshftp,shell=True,stderr=sub.PIPE,stdout=sub.PIPE) wait = 0 transfer = 0 while proc.poll() is None: wait+=delay if wait > delay*ndelays: self.error ("pnnl_transfer timed out in awaiting transfer.") proc.kill() transfer = 11 break self.info('pnnl_transfer process ' + str(proc.pid) + ' active for ' + str(wait) + ' [sec]') time.sleep (delay) if (transfer != 11): break # This rm usually fails for reasons I don't understand. If it succeeded the retry would work. # Commenting it out, since we're setting ntry_max=1 now anyway, EC 8-Aug-2015. # rm the (usually) 0 length file. #cmd_rm = "ssh chur558@" + pnnl_machine + " rm -f " + "/" + pnnl_dir + ddir + "/" + in_file #proc = sub.Popen(cmd_rm,shell=True,stderr=sub.PIPE,stdout=sub.PIPE) #self.info("pnnl_transfer process: Attempting to remove " + pnnl_dir + ddir + "/" + in_file) #time.sleep(5) # give it 5 seconds, then kill it if not done. #if proc.poll() is None: # proc.kill() # self.error("pnnl_transfer process: " + cmd_rm + " failed.") ntry+=1 ndelays+=10 # extend time period to force timeout for next effort. size_out = 0 if not transfer: (out,err) = proc.communicate() transfer = proc.returncode # also grep the out for indication of success at end. if not transfer: # self.info('out is ' + out) li = out.split(" ") mind = max(ind for ind, val in enumerate(li) if val == 'bytes') - 1 size_out = int(li[mind]) transfer = 10 if size_out == size_in: transfer = 0 # end file lives in enstore if not transfer: try: # Then samweb.addFileLocation() to pnnl location, with resj capturing that return status. pnnl_loc_withcolon = pnnl_machine + ":/" + pnnl_dir + ddir + "/" + in_file samadd = samweb.addFileLocation(filenameorid=in_file,location=pnnl_loc_withcolon) samloc = samweb.locateFile(filenameorid=in_file) if len(samloc)>0: samcode = 0 self.info('pnnl_transfer() finished moving ' + in_file + ', size ' + str(size_in) + ' [bytes], to PNNL') self.info('Transfer rate was ' + str(out.split("\n")[4].split(" ")[8])) self.info('Transfer and samaddFile are successful. Full SAM location for file is now ' + str(samloc)) except: self.error('pnnl_transfer finished with a problem on ' + in_file + ' during addFile or locFile. samadd/samloc is: ' + str(samadd)+"/"+str(samloc)) else: self.error('pnnl_transfer finished with a problem on ' + in_file) return (transfer, samcode)
def compare_dropbox_checksum(self): # Attempt to connect DB. If failure, abort if not self.connect(): self.error('Cannot connect to DB! Aborting...') return # If resource info is not yet read-in, read in. if self._nruns is None: self.get_resource() self.info('Here, self._nruns=%d ... ' % (self._nruns)) # Fetch runs from DB and process for # runs specified for this instance. ctr = self._nruns for x in self.get_xtable_runs([self._project, self._parent_project], [1, 0]): # Counter decreases by 1 ctr -= 1 (run, subrun) = (int(x[0]), int(x[1])) # Report starting self.info('Calculating the file checksum: run=%d, subrun=%d ...' % (run, subrun)) statusCode = 1 in_file_holder = '%s/%s' % (self._in_dir, self._infile_format % (run, subrun)) filelist = glob.glob(in_file_holder) if (len(filelist) < 1): self.error( 'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run, subrun)) status_code = 100 status = ds_status(project=self._project, run=run, subrun=subrun, seq=0, status=status_code) self.log_status(status) continue if (len(filelist) > 1): self.error( 'ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run, subrun)) self.error('ERROR: List of files found %s' % filelist) in_file = filelist[0] in_file_name = os.path.basename(in_file) out_file = '%s/%s' % (self._out_dir, in_file_name) #Note that this has the sequence number hard coded as number 0 RefStatus = self._api.get_status( ds_status(self._ref_project, run, subrun, 0)) near1_checksum = RefStatus._data try: pnfs_adler32_1, pnfs_size = get_pnfs_1_adler32_and_size( out_file) near1_adler32_1 = convert_0_adler32_to_1_adler32( near1_checksum, pnfs_size) if near1_adler32_1 == pnfs_adler32_1: statusCode = 0 else: subject = 'Checksum different in run %d, subrun %d between %s and PNFS' % ( run, subrun, self._ref_project) text = '%s\n' % subject text += 'Run %d, subrun %d\n' % (run, subrun) text += 'Converted %s checksum: %s\n' % (self._ref_project, near1_adler32_1) text += 'Converted PNFS checksum: %s\n' % (pnfs_adler32_1) pub_smtp(os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text) statusCode = 1000 self._data = '%s:%s;PNFS:%s' % ( self._ref_project, near1_adler32_1, pnfs_adler32_1) except LookupError: self.warning("Could not find file in the dropbox %s" % out_file) self.warning("Gonna go looking on tape %s" % in_file_name) samweb = samweb_cli.SAMWebClient(experiment="uboone") meta = {} try: meta = samweb.getMetadata(filenameorid=in_file_name) checksum_info = meta['checksum'][0].split(':') if checksum_info[0] == 'enstore': self._data = checksum_info[1] statusCode = 0 else: statusCode = 10 except samweb_cli.exceptions.FileNotFound: subject = 'Failed to locate file %s at SAM' % in_file text = 'File %s is not found at SAM!' % in_file pub_smtp(os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text) statusCode = 100 # Create a status object to be logged to DB (if necessary) status = ds_status(project=self._project, run=run, subrun=subrun, seq=0, status=statusCode, data=self._data) # Log status self.log_status(status) # Break from loop if counter became 0 if not ctr: break