Exemple #1
0
def uploadWorker(input, results, dbsUrl):
    """
    _uploadWorker_

    Put JSONized blocks in the input
    Get confirmation in the output
    """

    # Init DBS Stuff
    logging.debug("Creating dbsAPI with address %s" % dbsUrl)
    dbsApi = DbsApi(url = dbsUrl)


    while True:

        try:
            work = input.get()
        except (EOFError, IOError):
            crashMessage = "Hit EOF/IO in getting new work\n"
            crashMessage += "Assuming this is a graceful break attempt.\n"
            logging.error(crashMessage)
            break

        if work == 'STOP':
            # Then halt the process
            break

        name  = work.get('name', None)
        block = work.get('block', None)

        # Do stuff with DBS
        try:
            logging.debug("About to call insert block with block: %s" % block)
            dbsApi.insertBulkBlock(blockDump = block)
            results.put({'name': name, 'success': "uploaded"})
        except Exception as ex:
            exString = str(ex)
            if 'Block %s already exists' % name in exString:
                # Then this is probably a duplicate
                # Ignore this for now
                logging.error("Had duplicate entry for block %s. Ignoring for now." % name)
                logging.debug("Exception: %s" % exString)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                results.put({'name': name, 'success': "uploaded"})
            elif 'Proxy Error' in exString:
                # This is probably a successfully inserton that went bad.
                # Put it on the check list
                msg = "Got a proxy error for block (%s)." % name
                logging.error(msg)
                logging.error(str(traceback.format_exc()))
                results.put({'name': name, 'success': "check"})
            else:
                msg =  "Error trying to process block %s through DBS.\n" % name
                msg += exString
                logging.error(msg)
                logging.error(str(traceback.format_exc()))
                logging.debug("block: %s \n" % block)
                results.put({'name': name, 'success': "error", 'error': msg})

    return
def uploadWorker(workInput, results, dbsUrl):
    """
    _uploadWorker_

    Put JSONized blocks in the workInput
    Get confirmation in the output
    """

    # Init DBS Stuff
    logging.debug("Creating dbsAPI with address %s", dbsUrl)
    dbsApi = DbsApi(url = dbsUrl)


    while True:

        try:
            work = workInput.get()
        except (EOFError, IOError):
            crashMessage = "Hit EOF/IO in getting new work\n"
            crashMessage += "Assuming this is a graceful break attempt.\n"
            logging.error(crashMessage)
            break

        if work == 'STOP':
            # Then halt the process
            break

        name  = work.get('name', None)
        block = work.get('block', None)

        # Do stuff with DBS
        try:
            logging.debug("About to call insert block with block: %s", block)
            dbsApi.insertBulkBlock(blockDump = block)
            results.put({'name': name, 'success': "uploaded"})
        except Exception as ex:
            exString = str(ex)
            if 'Block %s already exists' % name in exString:
                # Then this is probably a duplicate
                # Ignore this for now
                logging.error("Had duplicate entry for block %s. Ignoring for now.", name)
                logging.debug("Exception: %s", exString)
                logging.debug("Traceback: %s", str(traceback.format_exc()))
                results.put({'name': name, 'success': "uploaded"})
            elif 'Proxy Error' in exString:
                # This is probably a successfully inserton that went bad.
                # Put it on the check list
                msg = "Got a proxy error for block (%s)." % name
                logging.error(msg)
                logging.error(str(traceback.format_exc()))
                results.put({'name': name, 'success': "check"})
            else:
                msg =  "Error trying to process block %s through DBS.\n" % name
                msg += exString
                logging.error(msg)
                logging.error(str(traceback.format_exc()))
                logging.debug("block: %s \n", block)
                results.put({'name': name, 'success': "error", 'error': msg})

    return
Exemple #3
0
def uploadWorker(workInput, results, dbsUrl):
    """
    _uploadWorker_

    Put JSONized blocks in the workInput
    Get confirmation in the output
    """

    # Init DBS Stuff
    logging.debug("Creating dbsAPI with address %s", dbsUrl)
    dbsApi = DbsApi(url=dbsUrl)

    while True:

        try:
            work = workInput.get()
        except (EOFError, IOError):
            crashMessage = "Hit EOF/IO in getting new work\n"
            crashMessage += "Assuming this is a graceful break attempt.\n"
            logging.error(crashMessage)
            break

        if work == 'STOP':
            # Then halt the process
            break

        name = work.get('name', None)  # this is the block name
        block = work.get('block', None)  # this is the block data structure

        # Do stuff with DBS
        try:
            logging.debug("About to call insert block with block: %s", block)
            dbsApi.insertBulkBlock(blockDump=block)
            results.put({'name': name, 'success': "uploaded"})
        except Exception as ex:
            exString = str(ex)
            if 'Block %s already exists' % name in exString:
                # Then this is probably a duplicate
                # Ignore this for now
                logging.warning("Block %s already exists. Marking it as uploaded.", name)
                logging.debug("Exception: %s", exString)
                results.put({'name': name, 'success': "uploaded"})
            elif 'Proxy Error' in exString:
                # This is probably a successfully insertion that went bad.
                # Put it on the check list
                msg = "Got a proxy error for block %s." % name
                logging.warning(msg)
                results.put({'name': name, 'success': "check"})
            elif 'Missing data when inserting to dataset_parents' in exString:
                msg = "Parent dataset is not inserted yet for block %s." % name
                logging.warning(msg)
                results.put({'name': name, 'success': "error", 'error': msg})
            else:
                msg = "Error trying to process block %s through DBS. Error: %s" % (name, exString)
                logging.exception(msg)
                logging.debug("block info: %s \n", block)
                results.put({'name': name, 'success': "error", 'error': msg})

    return
Exemple #4
0
def uploadWorker(input, results, dbsUrl):
    """
    _uploadWorker_

    Put JSONized blocks in the input
    Get confirmation in the output
    """

    # Init DBS Stuff
    logging.debug("Creating dbsAPI with address %s" % dbsUrl)
    dbsApi = DbsApi(url = dbsUrl)


    while True:

        try:
            work = input.get()
        except (EOFError, IOError):
            crashMessage = "Hit EOF/IO in getting new work\n"
            crashMessage += "Assuming this is a graceful break attempt.\n"
            logging.error(crashMessage)
            break

        if work == 'STOP':
            # Then halt the process
            break

        name  = work.get('name', None)
        block = work.get('block', None)


        # Do stuff with DBS
        try:
            logging.debug("About to call insert block with block: %s" % block)
            dbsApi.insertBulkBlock(blockDump = block)
            results.put({'name': name, 'success': True})        
        except Exception, ex:
            exString = str(ex)
            if 'Duplicate entry' in exString:
                # Then this is probably a duplicate
                # Ignore this for now
                logging.error("Had duplicate entry for block %s\n" % name)
                logging.error("Ignoring for now.\n")
                logging.error("Exception: %s\n" % exString)
                logging.error("Traceback: %s\n" % str(traceback.format_exc()))
                results.put({'name': name, 'success': True})
            else:
                msg =  "Error trying to process block %s through DBS.\n" % name
                msg += exString
                logging.error(msg)
                logging.error(str(traceback.format_exc()))
                results.put({'name': name, 'success': False, 'error': msg})
def upload_to_dbs(dataset_info_file,
                  file_info_file,
                  origin_site_name,
                  dry=False):
    print("Uploading to DBS3...")
    with open(dataset_info_file, "r") as f:
        dataset_info = json.loads(f.read())
    with open(file_info_file, "r") as f:
        file_info = json.loads(f.read())
    phy3WriteUrl = "https://cmsweb.cern.ch/dbs/prod/phys03/DBSWriter"
    writeApi = DbsApi(url=phy3WriteUrl, debug=1)
    total_files = 0
    total_events = 0
    print("insert block in DBS3: %s" % writeApi.url)
    print("Preparing upload for {}".format(dataset_info["processed_ds"]))
    print("Blocks to be processed: {}".format(len(file_info["blocks"])))
    print("DatasetName: {}".format(
        createEmptyBlock(dataset_info, origin_site_name,
                         "asdf")["dataset"]["dataset"]))
    for block in file_info["blocks"]:
        blockid = block["blockid"]
        filedata = block["files"]
        filelist = []
        print("Processing block {} - Number of files: {}".format(
            blockid, len(filedata)))
        total_files += len(filedata)
        blockDict = createEmptyBlock(dataset_info, origin_site_name, blockid)
        for file in filedata:
            fileDic = {}
            lfn = file["name"]
            fileDic["file_type"] = "EDM"
            fileDic["logical_file_name"] = lfn
            for key in ["check_sum", "adler32", "file_size", "event_count"]:
                fileDic[key] = file[key]
            total_events += file["event_count"]
            fileDic["file_lumi_list"] = file["lumis"]
            fileDic["auto_cross_section"] = 0.0
            fileDic["last_modified_by"] = "sbrommer"
            filelist.append(fileDic)
        # now upload the block
        blockDict = addFilesToBlock(blockDict, filelist)
        if not dry:
            writeApi.insertBulkBlock(blockDict)
        else:
            print("Dry run, not inserting block into DBS3")
            pprint.pprint(blockDict)
            exit()
    print("Total files: {} // Total Events: {}".format(total_files,
                                                       total_events))
Exemple #6
0
    lfn = common_lfn_prefix + directory_path + file['name']
    fileDic['file_type'] = common_file_type
    fileDic['logical_file_name'] = lfn
    for key in ['check_sum', 'adler32', 'file_size', 'event_count']:
        fileDic[key] = file[key]
    fileDic['file_lumi_list'] = common_dummy_lumi

    files.append(fileDic)
    files_in_block += 1
    print "file count %d" % files_in_block
    if files_in_block == max_files_in_block:
        blockDict = addFilesToBlock(blockDict, files)
        print "insert block in DBS3: %s" % writeApi.url
        print "ALAN: just before writing to DBS."
        pprint.pprint(blockDict)
        sys.exit(0)
        writeApi.insertBulkBlock(blockDict)
        files_in_block = 0

    # end loop on input Files

# any leftovers ?

if files_in_block:
    blockDict = addFilesToBlock(blockDict, files)
    print "insert block in DBS3: %s" % writeApi.url
    print "ALAN: leftovers just before writing to DBS."
    pprint.pprint(blockDict)
    sys.exit(0)
    writeApi.insertBulkBlock(blockDict)
Exemple #7
0
    fileDic={}
    lfn=common_lfn_prefix + directory_path + file['name']
    print "inserting file:",lfn
    fileDic['file_type'] = common_file_type
    fileDic['logical_file_name'] = lfn
    for key in ['check_sum','adler32','file_size','event_count']:
        fileDic[key] = file [key]
    fileDic['file_lumi_list'] = common_dummy_lumi
    
    files.append(fileDic)
    files_in_block += 1
    if files_in_block == max_files_in_block:
        blockDict = addFilesToBlock(blockDict, files)
        # print "insert block in DBS3: %s" % writeApi.url
        # pprint.pprint(blockDict)
        writeApi.insertBulkBlock(blockDict)
        files_in_block = 0
    
    # end loop on input Files

# any leftovers ?

if files_in_block:
    blockDict = addFilesToBlock(blockDict, files)
    # print "insert block in DBS3: %s" % writeApi.url
    # pprint.pprint(blockDict)
    writeApi.insertBulkBlock(blockDict)


print "/%s/%s/%s" % (dataset_info['primary_ds'], dataset_info['processed_ds'], dataset_info['tier'])
Exemple #8
0
def get_command_line_options():
    parser = OptionParser(usage='%prog --in MyBlock.txt --url=<DBS_Instance_URL>')
    parser.add_option("-i", "--in", dest="input", help="Input file containing the block dump. Wildcard support.",
                      metavar="MyBlock*.txt")
    parser.add_option("-u", "--url", dest="url", help="DBS Instance url", metavar="DBS_Instance_URL")

    (options, args) = parser.parse_args()

    if not (options.input and options.url):
        parser.print_help()
        parser.error('Mandatory options are --input and --url')

    return options, args

if __name__ == '__main__':
    options, args = get_command_line_options()

    input_files = glob.glob(options.input)

    for input_file in input_files:
        with open(input_file, 'r') as f:
            block_dump = literal_eval(f.read())
            api = DbsApi(url=options.url)
            try:
                api.insertBulkBlock(block_dump)
            except:
                raise
            else:
                print("Successfully inserted block!")
def insertFilesToBlock(files, injectNode, injectSE, mode, commit):

    # pick a DBS3 instance
    # instance = 'dev'
    instance = 'int'
    # instance = 'prod'

    if instance=='dev':
        # host = 'dbs3-dev01.cern.ch'
        host = 'cmsweb-dev.cern.ch'

    if instance=='int':
        host = 'cmsweb-testbed.cern.ch'

    if instance=='prod':
        host = 'cmsweb.cern.ch'

    globReadUrl = 'https://%s/dbs/%s/global/DBSReader' % (host, instance)
    globWriteUrl = 'https://%s/dbs/%s/global/DBSWriter' % (host, instance)
    phy3ReadUrl = 'https://%s/dbs/%s/phys03/DBSReader' % (host, instance)
    phy3WriteUrl = 'https://%s/dbs/%s/phys03/DBSWriter' % (host, instance)

    readApi   = DbsApi(url=globReadUrl)
    writeApi  = DbsApi(url=globWriteUrl)
    # readApi   = DbsApi(url=phy3ReadUrl)
    # writeApi  = DbsApi(url=phy3WriteUrl)

    if mode == "lhe":
        ds_info = {
            'data_type'       : 'mc',
            'acquisition_era' : 'LHE',
            'primary_ds'      : 'QCD_HT-100To250_8TeV-madgraph',
            'processed_ds'    : 'LHE-testAlan_Attempt3-v2',
            'data_tier'       : 'LHE',
            'physics_group'   : 'GEN',
            'application'     : 'Madgraph',
            'app_version'     : 'Mad_5_1_3_30',
            'proc_version'    : 1,
            'proc_descript'   : 'test_LHE_injection'
            }
    elif mode == "pixel":
        ds_info = {
            'data_type'       : 'data',
            'acquisition_era' : 'Run2012',
            'primary_ds'      : 'QCD_HT-100To250_8TeV-madgraph',
            'processed_ds'    : 'LHE-testAlan_Attempt3-v2',
            'data_tier'       : 'LHE',
            'physics_group'   : None,
            'application'     : 'Madgraph',
            'app_version'     : 'Mad_5_1_3_30',
            'proc_version'    : 1,
            'proc_descript'   : 'test_LHE_injection'
            }

    acquisition_era_config = { 'acquisition_era_name' : ds_info['acquisition_era'],
                               'start_date' : int(time.time())
                               }

    processing_era_config = { 'processing_version': ds_info['proc_version'],
                              'description': ds_info['proc_descript']
                              }

    primds_config = { 'primary_ds_type': ds_info['data_type'],
                      'primary_ds_name': ds_info['primary_ds']
                      }

    dataset_name = "/%s/%s/%s" % (ds_info['primary_ds'], ds_info['processed_ds'], ds_info['data_tier'])
    dataset_config = { 'physics_group_name' : ds_info['physics_group'],
                       'dataset_access_type' : 'VALID',
                       'data_tier_name' : ds_info['data_tier'],
                       'processed_ds_name' : ds_info['processed_ds'],
                       'dataset' : dataset_name
                       }

    block_name = "%s#%s" % (dataset_name, str(uuid.uuid4()))
    block_config = { 'block_name' : block_name,
                     'origin_site_name' : injectSE,
                     'open_for_writing' : 0
                     }

    dataset_conf_list = [ { 'app_name' : ds_info['application'],
                            'global_tag' : 'dummytag',
                            'output_module_label' : 'out',
                            'pset_hash' : 'dummyhash',
                            'release_version' : ds_info['app_version']
                            } ]

    blockDict = { 'files': files,
                  'processing_era': processing_era_config,
                  'primds': primds_config,
                  'dataset': dataset_config,
                  'dataset_conf_list' : dataset_conf_list,
                  'acquisition_era': acquisition_era_config,
                  'block': block_config,
                  'file_parent_list':[],
                  'file_conf_list':[]
                  }

    blockDict['files'] = files
    blockDict['block']['file_count'] = len(files)
    blockDict['block']['block_size'] = sum([int(file['file_size']) for file in files])

    if commit:
        logging.info("inserted block into DBS : %s" % writeApi.url)
        logging.debug(pprint.pformat(blockDict))
        writeApi.insertBulkBlock(blockDict)
    else:
        logging.info("dry run, this block would have been inserted into DBS : %s" % writeApi.url)
        logging.info(pprint.pformat(blockDict))

    injectionSpec = XMLDrop.XMLInjectionSpec(writeApi.url)
    datasetSpec = injectionSpec.getDataset(dataset_name)
    blockSpec = datasetSpec.getFileblock(block_name, "n")
    for f in files:
        blockSpec.addFile(f['logical_file_name'],
                          { 'adler32' : f['adler32'] },
                          f['file_size'])

    xmlData = injectionSpec.save()

    # SELECT PHEDEX URL
    phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/dev/"
    #phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/prod/"

    if commit:
        logging.info("inserting block into PhEDEx : %s" % phedexURL)
        logging.debug(pprint.pformat(xmlData))
        phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        injectRes = phedex.injectBlocks(injectNode, xmlData)
    else:
        logging.info("dry run, this block would have been inserted into PhEDEx : %s" % phedexURL)
        logging.info(pprint.pformat(xmlData))

    return
blockDict['block']['block_size'] = sum([int(file['file_size']) for file in files])

if options.verbose:
     pprint.pprint(blockDict)

if not options.publish:
     print "Dry run ended. Please use --publish option if you want to publish files in DBS"
     sys.exit()

# Insert primary dataset name. It's safe to do it for already existing primary datasets
primds_config = {'primary_ds_name': primary_ds_name, 'primary_ds_type': 'mc'}
dbsWriter.insertPrimaryDataset(primds_config)

# Insert block of files
try:
    dbsWriter.insertBulkBlock(blockDict)
except HTTPError, he:
    print he

# 
# Info
#
# Missing: file_lumi_list
# Example:
# 'file_lumi_list': [{u'lumi_section_num': 4027414, u'run_num': 1}, 
#                  {u'lumi_section_num': 26422, u'run_num': 2},
#                  41{u'lumi_section_num': 29838, u'run_num': 3}]
# https://cmsweb.cern.ch/dbs/prod/phys03/DBSWriter/
# https://github.com/dmwm/AsyncStageout/blob/master/src/python/AsyncStageOut/PublisherWorker.py#L743
# xrd eoscms stat /store/user/dmytro/lhe/DM_ttbar01j/DMScalar_ttbar01j_mphi_200_mchi_150_gSM_1p0_gDM_1p0.root
# xrd cms-xrd-global.cern.ch stat /store/user/dmytro/lhe/DM_ttbar01j/DMScalar_ttbar01j_mphi_200_mchi_150_gSM_1p0_gDM_1p0.root
Exemple #11
0
def main():
    # get a validate file name from args

    parser = argparse.ArgumentParser()
    parser.add_argument('--file',
                        help='file containing the dump of the block',
                        default=None,
                        required=True)
    args = parser.parse_args()
    fileName = args.file
    #fileName = 'failed-block-at-1611258668.34.txt' # just an example

    failedBlocksDir = '/data/srv/Publisher_files/FailedBlocks/'
    filePath = failedBlocksDir + fileName
    if not os.path.isfile(filePath):
        print("File %s not found in %s" % (fileName, failedBlocksDir))
        return

    # initialize DBS access
    # if X509 vars are not defined, use default Publisher location
    userProxy = os.getenv('X509_USER_PROXY')
    if userProxy:
        os.environ['X509_USER_CERT'] = userProxy
        os.environ['X509_USER_KEY'] = userProxy
    if not os.getenv('X509_USER_CERT'):
        os.environ['X509_USER_CERT'] = '/data/certs/servicecert.pem'
    if not os.getenv('X509_USER_KEY'):
        os.environ['X509_USER_KEY'] = '/data/certs/servicekey.pem'
    #migUrl = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSMigrate'
    phy3Url = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader'
    #globUrl = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
    destUrl = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSWriter'
    #apiG = DbsApi(url=globUrl)
    apiP3 = DbsApi(url=phy3Url)
    #apiMig = DbsApi(url=migUrl)
    apiDest = DbsApi(url=destUrl)

    with open(filePath) as fp:
        blockData = fp.read()
    # from pprint.pprint format to a dictionary (slow, unsafe, but handy)
    block = eval(blockData)  # pylint: disable=eval-used

    targetDataset = block['dataset']['dataset']

    print('Block is meant to be added to dataset\n%s' % targetDataset)

    # look for files already present in DBS phys03
    alreadyPresentFile = False
    lfns = [f['logical_file_name'] for f in block['files']]
    print('Block contains %d files' % len(lfns))
    numPresent = 0
    sameDSet = 0
    otherDSet = 0
    otherDSlist = set()
    for lfn in lfns:
        ret = apiP3.listFiles(logical_file_name=lfn)
        if ret:
            alreadyPresentFile = True
            numPresent += 1
            if numPresent < 5:
                print('file %s found in DBS' % lfn)
            if numPresent == 5:
                print('more files found ...')
            #details = apiP3.listFiles(logical_file_name=lfn, detail=True)
            #print(details)
            lfnDSet = apiP3.listDatasets(logical_file_name=lfn)[0]['dataset']
            if lfnDSet == targetDataset:
                sameDSet += 1
                if sameDSet < 5:
                    print('this lfn is already in target dataset')
            else:
                otherDSet += 1
                if otherDSet < 5:
                    print('this lfn belongs to another dataset:\n%s' % lfnDSet)
                if not lfnDSet in otherDSlist:
                    otherDSlist.add(lfnDSet)

            #lfnBlock = apiP3.listBlocks(logical_file_name=lfn)
            #print('in block:\n%s' % lfnBlock[0]['block_name'])

    if alreadyPresentFile:
        print(
            '%d/%d file(s) from input blocks are already in DBS/phys03. Publication will fail'
            % (numPresent, len(lfns)))
        print('files already present in target dataset: %d' % sameDSet)
        print('files present in DBS in another dataset: %d' % otherDSet)
        if otherDSet:
            print('other datasets containing files from this block:\n%s' %
                  otherDSlist)
        return

    print(
        'No obvious reason for Publication failure found, try to insert again')
    try:
        apiDest.insertBulkBlock(block)
    except Exception as ex:
        print("Publication failed with exception:\n%s" % str(ex))
        return
    print("Block publication done OK")

    return