l = 0 for host in lines: if (options.host == host.split()[0]): options.auth = host.split()[1] break else: logger.critical( "\033[1m [CRITICAL] " + "For CKAN database delete mode valid URL of CKAN instance (option --host) and API key (--auth or read from ~/.netrc) must be given" + "\033[0;0m") sys.exit(-1) CKAN = CKAN_CLIENT(options.host, options.auth) ## UP = UPLOADER(CKAN, OUT, options.outdir,options.fromdate) UP = Uploader(CKAN, options.ckan_check, HandleClient, cred, OUT, options.outdir, options.fromdate, options.host) if (options.identifier): list = [options.identifier] listtext = 'given by option -i (%d id\'s)' % len(list) elif (options.list): f = open(options.list, 'r') list = f.readlines() f.close() listtext = 'got from file %s (%d id\'s)' % (options.list, len(list)) elif (options.community): ##UP.purge_group(options.community) UP.get_packages(options.community) ##HEW??? UP.get_group_list(options.community) print "--- Start get community list from CKAN---\n" list = UP.package_list.keys() ##clist = UP.get_packages(options.community).keys()
l = 0 for host in lines: if(options.host == host.split()[0]): options.auth = host.split()[1] break else: logger.critical( "\033[1m [CRITICAL] " + "For CKAN database delete mode valid URL of CKAN instance (option --host) and API key (--auth or read from ~/.netrc) must be given" + "\033[0;0m" ) sys.exit(-1) CKAN = CKAN_CLIENT(options.host,options.auth) ## UP = UPLOADER(CKAN, OUT, options.outdir,options.fromdate) UP = Uploader(CKAN,options.ckan_check,HandleClient,cred,OUT,options.outdir,options.fromdate,options.host) if (options.identifier): list = [ options.identifier ] listtext='given by option -i (%d id\'s)' % len(list) elif (options.list): f = open(options.list,'r') list = f.readlines() f.close() listtext='got from file %s (%d id\'s)' % (options.list,len(list)) elif (options.community): ##UP.purge_group(options.community) UP.get_packages(options.community) ##HEW??? UP.get_group_list(options.community) print "--- Start get community list from CKAN---\n" list = UP.package_list.keys() ##clist = UP.get_packages(options.community).keys()
def process_delete(OUT, dir, options): print("###JM# Don't use this function. It is not up to date.") return False # create CKAN object CKAN = CKAN_CLIENT(options.iphost, options.auth) UP = Uploader(CKAN, OUT, options.outdir) ##HEW-D-ec credentials,ec = None,None # create credentials try: cred = b2handle.clientcredentials.PIDClientCredentials.load_from_JSON( 'credentials_11098') except Exception: logging.critical( "[CRITICAL] %s Could not create credentials from credstore %s" % (options.handle_check)) p.print_help() sys.exit(-1) else: logging.debug( "Create handle client instance to add uuid to handle server") for delete_file in glob.glob(dir + '/*.del'): community, mdprefix = os.path.splitext( os.path.basename(delete_file))[0].split('-') logging.info('\n## Deleting datasets from community "%s" ##' % (community)) # get packages from the group in CKAN: UP.get_packages(community) # open the delete file and loop over its lines: file_content = '' try: f = open(delete_file, 'r') file_content = f.read() f.close() except IOError: logging.critical("Cannot read data from '{0}'".format(delete_file)) f.close else: # rename the file in a crash backup file: os.rename(delete_file, delete_file + '.crash-backup') results = {'count': 0, 'ecount': 0, 'tcount': 0, 'time': 0} # use a try-except-finally environment to gurantee that no deleted metadata information will be lost: try: logging.info(' | | %-4s | %-50s | %-50s |\n |%s|' % ('#', 'oai identifier', 'CKAN identifier', "-" * 116)) deletestart = time.time() for line in file_content.split('\n'): # ignore empty lines: if not line: continue results['tcount'] += 1 subset, identifier = line.split('\t') # dataset name uniquely generated from oai identifier uid = uuid.uuid5(uuid.NAMESPACE_DNS, identifier.encode('ascii', 'replace')) ds = str(uid) # output: logging.info(' | d | %-4d | %-50s | %-50s |' % (results['tcount'], identifier, ds)) ### CHECK STATUS OF DATASET IN CKAN AND PID: # status of data set dsstatus = "unknown" # check against handle server handlestatus = "unknown" ##HEW-D-ec??? pid = credentials.prefix + "/eudat-jmd_" + ds pid = "11098/eudat-jmd_" + ds_id pidRecord["CHECKSUM"] = client.get_value_from_handle( pid, "CHECKSUM") if (pidRecord["CHECKSUM"] == None): logging.debug( " |-> Can not access pid %s to get checksum" % (pid)) handlestatus = "new" else: logging.debug(" |-> pid %s exists" % (pid)) handlestatus = "exist" # check against CKAN database ckanstatus = 'unknown' ckanstatus = UP.check_dataset(ds, None) delete = 0 # depending on handle status delete record from B2FIND if (handlestatus == "new" and ckanstatus == "new"): # no action required logging.info(' |-> %s' % ('No deletion required')) else: delete = UP.delete(ds, ckanstatus) if (delete == 1): logging.info(' |-> %s' % ('Deletion was successful')) results['count'] += 1 # delete handle (to keep the symmetry between handle and B2FIND server) if (handlestatus == "exist"): logging.info( " |-> Delete handle %s with checksum %s" % (pid, pidRecord["CHECKSUM"])) try: client.delete_handle(pid) except GenericHandleError as err: logging.error('[ERROR] Unexpected Error: %s' % err) except Exception: logging.error('[ERROR] Unexpected Error:') else: logging.info( " |-> No action (deletion) required for handle %s" % pid) else: logging.info(' |-> %s' % ('Deletion failed')) results['ecount'] += 1 except Exception: logging.error('[ERROR] Unexpected Error') logging.error('You find the ids of the deleted metadata in "%s"' % (delete_file + '.crash-backup')) raise else: # all worked fine you can remove the crash-backup file: os.remove(delete_file + '.crash-backup') deletetime = time.time() - deletestart results['time'] = deletetime # save stats: OUT.save_stats(community + '-' + mdprefix, subset, 'd', results)
def process(options, pstat, OUT): ## process (options,pstat,OUT) - function # Starts processing as specified in pstat['tbd'] and # according the request list given bey the options # # Parameters: # ----------- # 1. options (OptionsParser object) # 2. pstat (process status dict) # # set list of request lsits for single or multi mode: mode = None procOptions = [ 'community', 'source', 'verb', 'mdprefix', 'mdsubset', 'target_mdschema' ] if (options.source): mode = 'single' mandParams = ['community', 'verb', 'mdprefix'] # mandatory processing params for param in mandParams: if not getattr(options, param): logger.critical( "Processing parameter %s is required in single mode" % param) sys.exit(-1) reqlist = [[ options.community, options.source, options.verb, options.mdprefix, options.mdsubset, options.ckan_check, options.handle_check, options.target_mdschema ]] elif (options.list): mode = 'multi' logger.debug(' |- Joblist: \t%s' % options.list) reqlist = parse_list_file(options) logger.debug(' |- Requestlist: \t%s' % reqlist) ## check job request (processing) options logger.debug('|- Command line options') for opt in procOptions: if hasattr(options, opt): logger.debug(' |- %s:\t%s' % (opt.upper(), getattr(options, opt))) ## HARVESTING mode: if (pstat['status']['h'] == 'tbd'): logger.info('\n|- Harvesting started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) HV = Harvester(OUT, pstat, options.outdir, options.fromdate) process_harvest(HV, reqlist) ## MAPPINING - Mode: if (pstat['status']['m'] == 'tbd'): logger.info('\n|- Mapping started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) MP = Mapper(OUT, options.outdir, options.fromdate) process_map(MP, reqlist) ## VALIDATING - Mode: if (pstat['status']['v'] == 'tbd'): logger.info(' |- Validating started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) MP = Mapper(OUT, options.outdir, options.fromdate) process_validate(MP, reqlist) ## OAI-CONVERTING - Mode: if (pstat['status']['o'] == 'tbd'): logger.info('\n|- OAI-Converting started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) MP = Mapper(OUT, options.outdir, options.fromdate) process_oaiconvert(MP, reqlist) ## UPLOADING - Mode: if (pstat['status']['u'] == 'tbd'): logger.info('\n|- Uploading started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) # create CKAN object CKAN = CKAN_CLIENT(options.iphost, options.auth) # create credentials and handle client if required if (options.handle_check): try: cred = PIDClientCredentials.load_from_JSON('credentials_11098') except Exception as err: logger.critical( "%s : Could not create credentials from credstore %s" % (err, options.handle_check)) ##p.print_help() sys.exit(-1) else: logger.debug("Create EUDATHandleClient instance") HandleClient = EUDATHandleClient.instantiate_with_credentials( cred) else: cred = None HandleClient = None UP = Uploader(CKAN, options.ckan_check, HandleClient, cred, OUT, options.outdir, options.fromdate, options.iphost) logger.info(' |- Host: \t%s' % CKAN.ip_host) process_upload(UP, reqlist) ## DELETING - Mode: if (pstat['status']['d'] == 'tbd'): # start the process deleting: logger.info('\n|- Deleting started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) if mode is 'multi': dir = options.outdir + '/delete' if os.path.exists(dir): process_delete(OUT, dir, options) else: logger.error( '[ERROR] The directory "%s" does not exist! No files for deleting are found!' % (dir)) else: logger.critical( "[CRITICAL] Deleting mode only supported in 'multi mode' and an explicitly deleting script given !" )
def process_delete(OUT, dir, options): print ("###JM# Don't use this function. It is not up to date.") return False # create CKAN object CKAN = CKAN_CLIENT(options.iphost,options.auth) UP = Uploader(CKAN,OUT,options.outdir) ##HEW-D-ec credentials,ec = None,None # create credentials try: cred = b2handle.clientcredentials.PIDClientCredentials.load_from_JSON('credentials_11098') except Exception: logging.critical("[CRITICAL] %s Could not create credentials from credstore %s" % (options.handle_check)) p.print_help() sys.exit(-1) else: logging.debug("Create handle client instance to add uuid to handle server") for delete_file in glob.glob(dir+'/*.del'): community, mdprefix = os.path.splitext(os.path.basename(delete_file))[0].split('-') logging.info('\n## Deleting datasets from community "%s" ##' % (community)) # get packages from the group in CKAN: UP.get_packages(community) # open the delete file and loop over its lines: file_content = '' try: f = open(delete_file, 'r') file_content = f.read() f.close() except IOError : logging.critical("Cannot read data from '{0}'".format(delete_file)) f.close else: # rename the file in a crash backup file: os.rename(delete_file,delete_file+'.crash-backup') results = { 'count':0, 'ecount':0, 'tcount':0, 'time':0 } # use a try-except-finally environment to gurantee that no deleted metadata information will be lost: try: logging.info(' | | %-4s | %-50s | %-50s |\n |%s|' % ('#','oai identifier','CKAN identifier',"-" * 116)) deletestart = time.time() for line in file_content.split('\n'): # ignore empty lines: if not line: continue results['tcount'] += 1 subset, identifier = line.split('\t') # dataset name uniquely generated from oai identifier uid = uuid.uuid5(uuid.NAMESPACE_DNS, identifier.encode('ascii','replace')) ds = str(uid) # output: logging.info(' | d | %-4d | %-50s | %-50s |' % (results['tcount'],identifier,ds)) ### CHECK STATUS OF DATASET IN CKAN AND PID: # status of data set dsstatus="unknown" # check against handle server handlestatus="unknown" ##HEW-D-ec??? pid = credentials.prefix + "/eudat-jmd_" + ds pid = "11098/eudat-jmd_" + ds_id pidRecord["CHECKSUM"] = client.get_value_from_handle(pid, "CHECKSUM") if (pidRecord["CHECKSUM"] == None): logging.debug(" |-> Can not access pid %s to get checksum" % (pid)) handlestatus="new" else: logging.debug(" |-> pid %s exists" % (pid)) handlestatus="exist" # check against CKAN database ckanstatus = 'unknown' ckanstatus=UP.check_dataset(ds,None) delete = 0 # depending on handle status delete record from B2FIND if ( handlestatus == "new" and ckanstatus == "new") : # no action required logging.info(' |-> %s' % ('No deletion required')) else: delete = UP.delete(ds,ckanstatus) if (delete == 1): logging.info(' |-> %s' % ('Deletion was successful')) results['count'] += 1 # delete handle (to keep the symmetry between handle and B2FIND server) if (handlestatus == "exist"): logging.info(" |-> Delete handle %s with checksum %s" % (pid,pidRecord["CHECKSUM"])) try: client.delete_handle(pid) except GenericHandleError as err: logging.error('[ERROR] Unexpected Error: %s' % err) except Exception: logging.error('[ERROR] Unexpected Error:') else: logging.info(" |-> No action (deletion) required for handle %s" % pid) else: logging.info(' |-> %s' % ('Deletion failed')) results['ecount'] += 1 except Exception: logging.error('[ERROR] Unexpected Error') logging.error('You find the ids of the deleted metadata in "%s"' % (delete_file+'.crash-backup')) raise else: # all worked fine you can remove the crash-backup file: os.remove(delete_file+'.crash-backup') deletetime=time.time()-deletestart results['time'] = deletetime # save stats: OUT.save_stats(community+'-'+mdprefix,subset,'d',results)
def process(options, pstat, OUT): ## process (options,pstat) - function # Starts processing as specified in pstat['tbd'] and # according the request list given bey the options # # Parameters: # ----------- # 1. options (OptionsParser object) # 2. pstat (process status dict) # # set single or multi mode: mode = None procOptions = [ 'community', 'source', 'verb', 'mdprefix', 'mdsubset', 'target_mdschema' ] if (options.source): mode = 'single' ##HEW Not used in training options.target_mdschema = None mandParams = ['community', 'verb', 'mdprefix'] # mandatory processing params for param in mandParams: if not getattr(options, param): logger.critical( "Processing parameter %s is required in single mode" % param) sys.exit(-1) reqlist = [[ options.community, options.source, options.verb, options.mdprefix, options.mdsubset, options.ckan_check, options.handle_check, options.target_mdschema ]] elif (options.list): if (pstat['status']['g'] == 'tbd'): logger.critical( " Processing parameter [ --source | -s SOURCE ] is required in generation mode" ) sys.exit(-1) mode = 'multi' logger.debug(' |- Joblist: \t%s' % options.list) ## HEW set options.target_mdschema to NONE for Training ## options.target_mdschema=None reqlist = parse_list_file(options) ## check job request (processing) options for opt in procOptions: if hasattr(options, opt): logger.debug(' |- %s:\t%s' % (opt.upper(), getattr(options, opt))) ## GENERATION mode: if (pstat['status']['g'] == 'tbd'): GEN = Generator(pstat, options.outdir) process_generate(GEN, reqlist) ## HARVESTING mode: if (pstat['status']['h'] == 'tbd'): ### print('\n|- Harvesting started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) HV = Harvester(pstat, options.outdir, options.fromdate) process_harvest(HV, reqlist) ## MAPPINING - Mode: if (pstat['status']['m'] == 'tbd'): print('\n|- Mapping started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) MP = Mapper(OUT, options.outdir, options.fromdate) process_map(MP, reqlist) ## VALIDATOR - Mode: if (pstat['status']['v'] == 'tbd'): print('\n|- Validating started : %s' % time.strftime("%Y-%m-%d %H:%M:%S")) MP = Mapper(OUT, options.outdir, options.fromdate) process_validate(MP, reqlist) ## UPLOADING - Mode: if (pstat['status']['u'] == 'tbd'): # create CKAN object CKAN = CKAN_CLIENT(options.iphost, options.auth) # create credentials and handle client if required if (options.handle_check): try: cred = PIDClientCredentials.load_from_JSON('credentials_11098') except Exception as err: logger.critical( "%s : Could not create credentials from credstore %s" % (err, options.handle_check)) ##p.print_help() sys.exit(-1) else: logger.debug("Create EUDATHandleClient instance") HandleClient = EUDATHandleClient.instantiate_with_credentials( cred) else: cred = None HandleClient = None UP = Uploader(CKAN, options.ckan_check, HandleClient, cred, OUT, options.outdir, options.fromdate, options.iphost, options.ckan_organization) logger.info(' |- Host: \t%s' % CKAN.ip_host) process_upload(UP, reqlist)