Ejemplo n.º 1
0
def add_sample(NAME, localpath, type, dataset_nevents, nselected, AnaUrl, FWUrl, dataset_id):
    # Large part of this imported from SAMADhi add_sample.py
    sample = Sample(unicode(NAME), unicode(localpath), unicode(type), dataset_nevents) 
    sample.nevents = nselected
    sample.normalization = 1.0
    sample.luminosity  = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
#    sample.user_comment =
    sample.source_dataset_id = dataset_id
#    sample.source_sample_id = None
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)
#    sample.creation_time = 
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # check that source dataset exist
    if dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d"%sample.source_dataset_id)
    # check that there is no existing entry
    checkExisting = dbstore.find(Sample,Sample.name==sample.name)
    if checkExisting.is_empty():
      print sample
      if confirm(prompt="Insert into the database?", resp=True):
        dbstore.add(sample)
        # compute the luminosity, if possible
        if sample.luminosity is None:
          dbstore.flush()
          sample.luminosity = sample.getLuminosity()
    else:
      existing = checkExisting.one()
      prompt  = "Replace existing "
      prompt += str(existing)
      prompt += "\nby new "
      prompt += str(sample)
      prompt += "\n?"
      if confirm(prompt, resp=False):
        existing.replaceBy(sample)
        if existing.luminosity is None:
          dbstore.flush()
          existing.luminosity = existing.getLuminosity()
    # commit
    dbstore.commit()
Ejemplo n.º 2
0
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
#    sample.luminosity  = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        import json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        if confirm(prompt="Insert into the database?", resp=True):
            dbstore.commit()
            return

    else:
        sample.luminosity = sample.getLuminosity()
        prompt  = "A sample with the same name already exists in the database. Replace by:\n"
        prompt += str(sample)
        prompt += "\n?"
        if confirm(prompt, resp=False):
            dbstore.commit()
            return

    # rollback
    dbstore.rollback()
Ejemplo n.º 3
0
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment):
    # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id'
    dbstore = DbStore()
    sample = None

    # check that source dataset exist
    # Skip: should exist, the check has been done before calling this function

    # check that there is no existing entry
    update = False
    localpath = ''
    nevents = 0
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    # collecting contents
    sample.nevents_processed = 0
    sample.nevents = 0
    sample.normalization = 1
    sample.event_weight_sum = 0
    extras_event_weight_sum = {}
    dataset_nevents = 0
    processed_lumi = LumiList()
    for i, s in enumerate(samples):
        if i == 0:
            sample.source_dataset_id = s['dataset_id']
            sample.source_sample_id = s['sample_id']
        results = dbstore.find(Sample, Sample.sample_id == s['sample_id'])
        # Should exist, the check has been done before calling this function
        sample.nevents_processed += results[0].nevents_processed
        sample.nevents += results[0].nevents
        sample.event_weight_sum += results[0].event_weight_sum
        extra_sumw = results[0].extras_event_weight_sum
        if extra_sumw is not None:
            extra_sumw = json.loads(extra_sumw)
            for key in extra_sumw:
                try:
                    extras_event_weight_sum[key] += extra_sumw[key]
                except KeyError:
                    extras_event_weight_sum[key] = extra_sumw[key]
        tmp_processed_lumi = results[0].processed_lumi
        if tmp_processed_lumi is not None:
            tmp_processed_lumi = json.loads( tmp_processed_lumi )
            processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi)
        # Get info from file table
        results = dbstore.find(File, File.sample_id == s['sample_id'])
        for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)):
            f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents)
            sample.files.add(f)
        # Get info from parent datasets
        results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id'])
        dataset_nevents +=  results[0].nevents
    if len(extras_event_weight_sum) > 0:
        sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum))
    if len(processed_lumi.getCompactList()) > 0:
        sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList()))
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if sample.nevents_processed != dataset_nevents:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment)
    else:
        sample.user_comment = unicode(comment)
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
Ejemplo n.º 4
0
def add_sample(NAME, localpath, type, nevents, nselected, AnaUrl, FWUrl, dataset_id, sumw, extras_sumw, has_job_processed_everything, dataset_nevents, files, processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
    sample.extras_event_weight_sum = unicode(json.dumps(extras_sumw, separators=(',', ':')))
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(nevents) + "/" + str(dataset_nevents) + " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
Ejemplo n.º 5
0
def add_sample(NAME,
               localpath,
               type,
               nevents,
               nselected,
               AnaUrl,
               FWUrl,
               dataset_id,
               sumw,
               has_job_processed_everything,
               dataset_nevents,
               files,
               processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type),
                        nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
    #    sample.luminosity  = 40028954.499 / 1e6 # FIXME: figure out the fix for data whenever the tools will stabilize and be on cvmfs
    sample.code_version = unicode(
        AnaUrl + ' ' +
        FWUrl)  #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " +
                                      str(nevents) + "/" +
                                      str(dataset_nevents) +
                                      " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        import json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        if confirm(prompt="Insert into the database?", resp=True):
            dbstore.commit()
            return

    else:
        sample.luminosity = sample.getLuminosity()
        prompt = "A sample with the same name already exists in the database. Replace by:\n"
        prompt += str(sample)
        prompt += "\n?"
        if confirm(prompt, resp=False):
            dbstore.commit()
            return

    # rollback
    dbstore.rollback()
Ejemplo n.º 6
0
def add_sample(NAME,
               localpath,
               type,
               nevents,
               nselected,
               AnaUrl,
               FWUrl,
               dataset_id,
               sumw,
               extras_sumw,
               has_job_processed_everything,
               dataset_nevents,
               files,
               processed_lumi=None):
    dbstore = DbStore()

    sample = None

    # check that source dataset exist
    if dbstore.find(Dataset, Dataset.dataset_id == dataset_id).is_empty():
        raise IndexError("No dataset with such index: %d" % sample.dataset_id)

    # check that there is no existing entry
    update = False
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type),
                        nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    sample.nevents_processed = nevents
    sample.nevents = nselected
    sample.normalization = 1
    sample.event_weight_sum = sumw
    sample.extras_event_weight_sum = unicode(
        json.dumps(extras_sumw, separators=(',', ':')))
    sample.code_version = unicode(
        AnaUrl + ' ' +
        FWUrl)  #NB: limited to 255 characters, but so far so good
    if not has_job_processed_everything:
        sample.user_comment = unicode("Sample was not fully processed, only " +
                                      str(nevents) + "/" +
                                      str(dataset_nevents) +
                                      " events were processed")
    else:
        sample.user_comment = u""
    sample.source_dataset_id = dataset_id
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if processed_lumi:
        # Convert to json
        processed_lumi = json.dumps(processed_lumi, separators=(',', ':'))
        sample.processed_lumi = unicode(processed_lumi)
    else:
        sample.processed_lumi = None

    for f in files:
        sample.files.add(f)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
Ejemplo n.º 7
0
def main():
    """Main function"""
    # get the options
    optmgr = MyOptionParser()
    opts   = optmgr.get_opt()
    # build the sample from user input
    sample  = Sample(unicode(opts.name), unicode(opts.path), unicode(opts.sampletype), opts.nevents_processed)
    sample.nevents = opts.nevents
    sample.normalization = opts.normalization
    sample.luminosity = opts.luminosity
    sample.code_version = unicode(opts.code_version)
    sample.user_comment = unicode(opts.user_comment)
    sample.source_dataset_id = opts.source_dataset_id
    sample.source_sample_id = opts.source_sample_id
    sample.author = unicode(opts.author)
    sample.creation_time = opts.datetime
    # connect to the MySQL database using default credentials
    dbstore = DbStore()
    # unless the source is set, prompt the user and present a list to make a choice
    if sample.source_dataset_id is None:
      prompt_dataset(sample,dbstore)
    if sample.source_sample_id is None:
      prompt_sample(sample,dbstore)
    # check that source sample and dataset exist
    if sample.source_dataset_id is not None:
      checkExisting = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id)
      if checkExisting.is_empty():
        raise IndexError("No dataset with such index: %d"%sample.source_dataset_id)
    if sample.source_sample_id is not None:
      checkExisting = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id)
      if checkExisting.is_empty():
        raise IndexError("No sample with such index: %d"%sample.source_sample_id)
    # if opts.nevents is not set, take #events from source sample (if set) or from source dataset (if set) in that order
    if sample.nevents_processed is None and sample.source_sample_id is not None:
      sample.nevents_processed = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id).one().nevents_processed
    if sample.nevents_processed is None and sample.source_dataset_id is not None:
      sample.nevents_processed = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).one().nevents
    if sample.nevents_processed is None:
      print "Warning: Number of processed events not given, and no way to guess it."
    # check that there is no existing entry
    checkExisting = dbstore.find(Sample,Sample.name==sample.name)
    if checkExisting.is_empty():
      print sample
      if confirm(prompt="Insert into the database?", resp=True):
        dbstore.add(sample)
        # compute the luminosity, if possible
        if sample.luminosity is None:
          dbstore.flush()
          sample.luminosity = sample.getLuminosity()
    else:
      existing = checkExisting.one()
      prompt  = "Replace existing "
      prompt += str(existing)
      prompt += "\nby new "
      prompt += str(sample)
      prompt += "\n?"
      if confirm(prompt, resp=False):
        existing.replaceBy(sample)
        if existing.luminosity is None:
          dbstore.flush()
          existing.luminosity = existing.getLuminosity()
    # commit
    dbstore.commit()
def add_merged_sample(samples, name, comment, store):

    # Retrieve the sample from the database if it already exists. Otherwise, create a new
    # sample
    update = False
    sample = store.find(Sample, Sample.name == unicode(name)).one()
    if not sample:
        sample = Sample(unicode(name), unicode(''), unicode('NTUPLES'), 0)
        store.add(sample)
    else:
        update = True
        sample.removeFiles(store)

    store.flush()

    # Set as parent dataset of the merged sample the parent dataset
    # of the first sample
    sample.source_dataset_id = samples[0].source_dataset_id

    # Reset sample content
    sample.nevents_processed = 0
    sample.nevents = 0
    sample.normalization = 1
    sample.event_weight_sum = 0
    extras_event_weight_sum = {}
    dataset_nevents = 0
    processed_lumi = LumiList()

    for i, s in enumerate(samples):
        sample.derived_samples.add(s)

        sample.nevents_processed += s.nevents_processed
        sample.nevents += s.nevents
        sample.event_weight_sum += s.event_weight_sum
        extra_sumw = s.extras_event_weight_sum
        if extra_sumw:
            extra_sumw = json.loads(extra_sumw)
            for key in extra_sumw:
                if key in extras_event_weight_sum:
                    extras_event_weight_sum[key] += extra_sumw[key]
                else:
                    extras_event_weight_sum[key] = extra_sumw[key]

        if s.processed_lumi is not None:
            sample_processed_lumi = json.loads(s.processed_lumi)
            processed_lumi = processed_lumi | LumiList(
                compactList=sample_processed_lumi)

        for f in s.files:
            sample.files.add(f)

        # Get info from parent datasets
        dataset_nevents += s.source_dataset.nevents

    if len(extras_event_weight_sum) > 0:
        sample.extras_event_weight_sum = unicode(
            json.dumps(extras_event_weight_sum))

    if len(processed_lumi.getCompactList()) > 0:
        sample.processed_lumi = unicode(
            json.dumps(processed_lumi.getCompactList()))

    sample.code_version = samples[0].code_version

    if sample.nevents_processed != dataset_nevents:
        sample.user_comment = unicode("Sample was not fully processed, only " +
                                      str(sample.nevents_processed) + "/" +
                                      str(dataset_nevents) +
                                      " events were processed. " + comment)
    else:
        sample.user_comment = unicode(comment)

    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    sample.luminosity = sample.getLuminosity()

    print("")
    print("Merged sample %s:" % ("updated" if update else "created"))
    print(sample)

    store.commit()