def add_sample(NAME, localpath, type, dataset_nevents, nselected, AnaUrl, FWUrl, dataset_id): # Large part of this imported from SAMADhi add_sample.py sample = Sample(unicode(NAME), unicode(localpath), unicode(type), dataset_nevents) sample.nevents = nselected sample.normalization = 1.0 sample.luminosity = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good # sample.user_comment = sample.source_dataset_id = dataset_id # sample.source_sample_id = None sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) # sample.creation_time = # connect to the MySQL database using default credentials dbstore = DbStore() # check that source dataset exist if dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).is_empty(): raise IndexError("No dataset with such index: %d"%sample.source_dataset_id) # check that there is no existing entry checkExisting = dbstore.find(Sample,Sample.name==sample.name) if checkExisting.is_empty(): print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.add(sample) # compute the luminosity, if possible if sample.luminosity is None: dbstore.flush() sample.luminosity = sample.getLuminosity() else: existing = checkExisting.one() prompt = "Replace existing " prompt += str(existing) prompt += "\nby new " prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(sample) if existing.luminosity is None: dbstore.flush() existing.luminosity = existing.getLuminosity() # commit dbstore.commit()
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw # sample.luminosity = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json import json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.commit() return else: sample.luminosity = sample.getLuminosity() prompt = "A sample with the same name already exists in the database. Replace by:\n" prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): dbstore.commit() return # rollback dbstore.rollback()
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment): # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id' dbstore = DbStore() sample = None # check that source dataset exist # Skip: should exist, the check has been done before calling this function # check that there is no existing entry update = False localpath = '' nevents = 0 checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) # collecting contents sample.nevents_processed = 0 sample.nevents = 0 sample.normalization = 1 sample.event_weight_sum = 0 extras_event_weight_sum = {} dataset_nevents = 0 processed_lumi = LumiList() for i, s in enumerate(samples): if i == 0: sample.source_dataset_id = s['dataset_id'] sample.source_sample_id = s['sample_id'] results = dbstore.find(Sample, Sample.sample_id == s['sample_id']) # Should exist, the check has been done before calling this function sample.nevents_processed += results[0].nevents_processed sample.nevents += results[0].nevents sample.event_weight_sum += results[0].event_weight_sum extra_sumw = results[0].extras_event_weight_sum if extra_sumw is not None: extra_sumw = json.loads(extra_sumw) for key in extra_sumw: try: extras_event_weight_sum[key] += extra_sumw[key] except KeyError: extras_event_weight_sum[key] = extra_sumw[key] tmp_processed_lumi = results[0].processed_lumi if tmp_processed_lumi is not None: tmp_processed_lumi = json.loads( tmp_processed_lumi ) processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi) # Get info from file table results = dbstore.find(File, File.sample_id == s['sample_id']) for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)): f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents) sample.files.add(f) # Get info from parent datasets results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id']) dataset_nevents += results[0].nevents if len(extras_event_weight_sum) > 0: sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum)) if len(processed_lumi.getCompactList()) > 0: sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList())) sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if sample.nevents_processed != dataset_nevents: sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment) else: sample.user_comment = unicode(comment) sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw sample.extras_event_weight_sum = unicode(json.dumps(extras_sumw, separators=(',', ':'))) sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw # sample.luminosity = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs sample.code_version = unicode( AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json import json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.commit() return else: sample.luminosity = sample.getLuminosity() prompt = "A sample with the same name already exists in the database. Replace by:\n" prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): dbstore.commit() return # rollback dbstore.rollback()
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None): dbstore = DbStore() sample = None # check that source dataset exist if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty(): raise IndexError("No dataset with such index: %d" % sample.dataset_id) # check that there is no existing entry update = False checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) sample.nevents_processed = nevents sample.nevents = nselected sample.normalization = 1 sample.event_weight_sum = sumw sample.extras_event_weight_sum = unicode( json.dumps(extras_sumw, separators=(',', ':'))) sample.code_version = unicode( AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if not has_job_processed_everything: sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed") else: sample.user_comment = u"" sample.source_dataset_id = dataset_id sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if processed_lumi: # Convert to json processed_lumi = json.dumps(processed_lumi, separators=(',', ':')) sample.processed_lumi = unicode(processed_lumi) else: sample.processed_lumi = None for f in files: sample.files.add(f) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
def main(): """Main function""" # get the options optmgr = MyOptionParser() opts = optmgr.get_opt() # build the sample from user input sample = Sample(unicode(opts.name), unicode(opts.path), unicode(opts.sampletype), opts.nevents_processed) sample.nevents = opts.nevents sample.normalization = opts.normalization sample.luminosity = opts.luminosity sample.code_version = unicode(opts.code_version) sample.user_comment = unicode(opts.user_comment) sample.source_dataset_id = opts.source_dataset_id sample.source_sample_id = opts.source_sample_id sample.author = unicode(opts.author) sample.creation_time = opts.datetime # connect to the MySQL database using default credentials dbstore = DbStore() # unless the source is set, prompt the user and present a list to make a choice if sample.source_dataset_id is None: prompt_dataset(sample,dbstore) if sample.source_sample_id is None: prompt_sample(sample,dbstore) # check that source sample and dataset exist if sample.source_dataset_id is not None: checkExisting = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id) if checkExisting.is_empty(): raise IndexError("No dataset with such index: %d"%sample.source_dataset_id) if sample.source_sample_id is not None: checkExisting = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id) if checkExisting.is_empty(): raise IndexError("No sample with such index: %d"%sample.source_sample_id) # if opts.nevents is not set, take #events from source sample (if set) or from source dataset (if set) in that order if sample.nevents_processed is None and sample.source_sample_id is not None: sample.nevents_processed = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id).one().nevents_processed if sample.nevents_processed is None and sample.source_dataset_id is not None: sample.nevents_processed = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).one().nevents if sample.nevents_processed is None: print "Warning: Number of processed events not given, and no way to guess it." # check that there is no existing entry checkExisting = dbstore.find(Sample,Sample.name==sample.name) if checkExisting.is_empty(): print sample if confirm(prompt="Insert into the database?", resp=True): dbstore.add(sample) # compute the luminosity, if possible if sample.luminosity is None: dbstore.flush() sample.luminosity = sample.getLuminosity() else: existing = checkExisting.one() prompt = "Replace existing " prompt += str(existing) prompt += "\nby new " prompt += str(sample) prompt += "\n?" if confirm(prompt, resp=False): existing.replaceBy(sample) if existing.luminosity is None: dbstore.flush() existing.luminosity = existing.getLuminosity() # commit dbstore.commit()
def add_merged_sample(samples, name, comment, store): # Retrieve the sample from the database if it already exists. Otherwise, create a new # sample update = False sample = store.find(Sample, Sample.name == unicode(name)).one() if not sample: sample = Sample(unicode(name), unicode(''), unicode('NTUPLES'), 0) store.add(sample) else: update = True sample.removeFiles(store) store.flush() # Set as parent dataset of the merged sample the parent dataset # of the first sample sample.source_dataset_id = samples[0].source_dataset_id # Reset sample content sample.nevents_processed = 0 sample.nevents = 0 sample.normalization = 1 sample.event_weight_sum = 0 extras_event_weight_sum = {} dataset_nevents = 0 processed_lumi = LumiList() for i, s in enumerate(samples): sample.derived_samples.add(s) sample.nevents_processed += s.nevents_processed sample.nevents += s.nevents sample.event_weight_sum += s.event_weight_sum extra_sumw = s.extras_event_weight_sum if extra_sumw: extra_sumw = json.loads(extra_sumw) for key in extra_sumw: if key in extras_event_weight_sum: extras_event_weight_sum[key] += extra_sumw[key] else: extras_event_weight_sum[key] = extra_sumw[key] if s.processed_lumi is not None: sample_processed_lumi = json.loads(s.processed_lumi) processed_lumi = processed_lumi | LumiList( compactList=sample_processed_lumi) for f in s.files: sample.files.add(f) # Get info from parent datasets dataset_nevents += s.source_dataset.nevents if len(extras_event_weight_sum) > 0: sample.extras_event_weight_sum = unicode( json.dumps(extras_event_weight_sum)) if len(processed_lumi.getCompactList()) > 0: sample.processed_lumi = unicode( json.dumps(processed_lumi.getCompactList())) sample.code_version = samples[0].code_version if sample.nevents_processed != dataset_nevents: sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment) else: sample.user_comment = unicode(comment) sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) sample.luminosity = sample.getLuminosity() print("") print("Merged sample %s:" % ("updated" if update else "created")) print(sample) store.commit()