コード例 #1
0
ファイル: cleaner.py プロジェクト: thorstenhater/md-ingestion
                        raise

            ### check for and remove ckan dataset
            if (options.ckan_check == 'True'):
                # check for and remove ckan dataset
                ckanstatus = 'unknown'
                if (options.community):
                    checksum = 'fe5f25c9f6d17ba289d6551afc98a8c3'
                    ckanstatus = UP.check_dataset(id, checksum)
                if (ckanstatus == 'unknown' or ckanstatus == 'changed'
                        or ckanstatus == 'unchanged'):
                    actionreq += ' remove ckan dataset'
                    try:
                        if (not options.quiet
                            ):  ##HEW-ADD and ( b2findversion != '1.0'):
                            delete = UP.delete(id, ckanstatus)
                            if (delete == 1):
                                ##                        logger.info('        |-> %s' % ('Deletion was successful'))
                                ccount += 1
                                ckanaction = 'removed'
                            else:
                                ckanaction = 'failed'
                    except Exception, e:
                        logger.error('[ERROR] Unexpected Error: %s' % e)
                        raise
        print '| %-6d | %-35s | %-6s | %-6s | %-6s | %-6s |' % (
            n, id, xmlstatus, jsonstatus, ckanstatus, handlestatus)
        if (not options.quiet):
            print '--> %-42s | %-6s | %-6s | %-6s | %-6s |' % (
                'action performed', xmlaction, jsonaction, ckanaction,
                handleaction)
コード例 #2
0
def process_delete(OUT, dir, options):
    print("###JM# Don't use this function. It is not up to date.")
    return False

    # create CKAN object
    CKAN = CKAN_CLIENT(options.iphost, options.auth)
    UP = Uploader(CKAN, OUT, options.outdir)

    ##HEW-D-ec credentials,ec = None,None

    # create credentials
    try:
        cred = b2handle.clientcredentials.PIDClientCredentials.load_from_JSON(
            'credentials_11098')
    except Exception:
        logging.critical(
            "[CRITICAL] %s Could not create credentials from credstore %s" %
            (options.handle_check))
        p.print_help()
        sys.exit(-1)
    else:
        logging.debug(
            "Create handle client instance to add uuid to handle server")

    for delete_file in glob.glob(dir + '/*.del'):
        community, mdprefix = os.path.splitext(
            os.path.basename(delete_file))[0].split('-')

        logging.info('\n## Deleting datasets from community "%s" ##' %
                     (community))

        # get packages from the group in CKAN:
        UP.get_packages(community)

        # open the delete file and loop over its lines:
        file_content = ''
        try:
            f = open(delete_file, 'r')
            file_content = f.read()
            f.close()
        except IOError:
            logging.critical("Cannot read data from '{0}'".format(delete_file))
            f.close
        else:
            # rename the file in a crash backup file:
            os.rename(delete_file, delete_file + '.crash-backup')

        results = {'count': 0, 'ecount': 0, 'tcount': 0, 'time': 0}

        # use a try-except-finally environment to gurantee that no deleted metadata information will be lost:
        try:
            logging.info('    |   | %-4s | %-50s | %-50s |\n    |%s|' %
                         ('#', 'oai identifier', 'CKAN identifier', "-" * 116))

            deletestart = time.time()

            for line in file_content.split('\n'):
                # ignore empty lines:
                if not line:
                    continue

                results['tcount'] += 1
                subset, identifier = line.split('\t')

                # dataset name uniquely generated from oai identifier
                uid = uuid.uuid5(uuid.NAMESPACE_DNS,
                                 identifier.encode('ascii', 'replace'))
                ds = str(uid)

                # output:
                logging.info('    | d | %-4d | %-50s | %-50s |' %
                             (results['tcount'], identifier, ds))

                ### CHECK STATUS OF DATASET IN CKAN AND PID:
                # status of data set
                dsstatus = "unknown"

                # check against handle server
                handlestatus = "unknown"
                ##HEW-D-ec???  pid = credentials.prefix + "/eudat-jmd_" + ds
                pid = "11098/eudat-jmd_" + ds_id
                pidRecord["CHECKSUM"] = client.get_value_from_handle(
                    pid, "CHECKSUM")

                if (pidRecord["CHECKSUM"] == None):
                    logging.debug(
                        "        |-> Can not access pid %s to get checksum" %
                        (pid))
                    handlestatus = "new"
                else:
                    logging.debug("        |-> pid %s exists" % (pid))
                    handlestatus = "exist"

                # check against CKAN database
                ckanstatus = 'unknown'
                ckanstatus = UP.check_dataset(ds, None)

                delete = 0
                # depending on handle status delete record from B2FIND
                if (handlestatus == "new"
                        and ckanstatus == "new"):  # no action required
                    logging.info('        |-> %s' % ('No deletion required'))
                else:
                    delete = UP.delete(ds, ckanstatus)
                    if (delete == 1):
                        logging.info('        |-> %s' %
                                     ('Deletion was successful'))
                        results['count'] += 1

                        # delete handle (to keep the symmetry between handle and B2FIND server)
                        if (handlestatus == "exist"):
                            logging.info(
                                "        |-> Delete handle %s with checksum %s"
                                % (pid, pidRecord["CHECKSUM"]))
                            try:
                                client.delete_handle(pid)
                            except GenericHandleError as err:
                                logging.error('[ERROR] Unexpected Error: %s' %
                                              err)
                            except Exception:
                                logging.error('[ERROR] Unexpected Error:')

                        else:
                            logging.info(
                                "        |-> No action (deletion) required for handle %s"
                                % pid)
                    else:
                        logging.info('        |-> %s' % ('Deletion failed'))
                        results['ecount'] += 1
        except Exception:
            logging.error('[ERROR] Unexpected Error')
            logging.error('You find the ids of the deleted metadata in "%s"' %
                          (delete_file + '.crash-backup'))
            raise
        else:
            # all worked fine you can remove the crash-backup file:
            os.remove(delete_file + '.crash-backup')

        deletetime = time.time() - deletestart
        results['time'] = deletetime

        # save stats:
        OUT.save_stats(community + '-' + mdprefix, subset, 'd', results)
コード例 #3
0
ファイル: cleaner.py プロジェクト: EUDAT-B2FIND/md-ingestion
                 logger.error('[ERROR] Unexpected Error: %s' % e)
                 handleaction='failed'
                 raise

         ### check for and remove ckan dataset
         if (options.ckan_check == 'True'):
           # check for and remove ckan dataset
           ckanstatus = 'unknown'
           if (options.community):              
              checksum='fe5f25c9f6d17ba289d6551afc98a8c3'
              ckanstatus=UP.check_dataset(id,checksum)
           if (ckanstatus == 'unknown' or ckanstatus == 'changed' or ckanstatus == 'unchanged'):
             actionreq+=' remove ckan dataset'
             try:
               if (not options.quiet): ##HEW-ADD and ( b2findversion != '1.0'):
                 delete = UP.delete(id,ckanstatus)
                 if (delete == 1):
##                        logger.info('        |-> %s' % ('Deletion was successful'))
                        ccount +=  1
                        ckanaction='removed'
                 else:
                        ckanaction='failed'
             except Exception, e:
               logger.error('[ERROR] Unexpected Error: %s' % e)
               raise
       print '| %-6d | %-35s | %-6s | %-6s | %-6s | %-6s |' % (n, id,xmlstatus,jsonstatus,ckanstatus,handlestatus)
       if (not options.quiet):
         print '--> %-42s | %-6s | %-6s | %-6s | %-6s |' % ('action performed',xmlaction,jsonaction,ckanaction,handleaction)

    logger.info('end of cleaning ...')
コード例 #4
0
ファイル: manager.py プロジェクト: EUDAT-B2FIND/md-ingestion
def process_delete(OUT, dir, options):
    print ("###JM# Don't use this function. It is not up to date.")
    return False

    # create CKAN object                       
    CKAN = CKAN_CLIENT(options.iphost,options.auth)
    UP = Uploader(CKAN,OUT,options.outdir)
    
    ##HEW-D-ec credentials,ec = None,None

    # create credentials
    try:
        cred = b2handle.clientcredentials.PIDClientCredentials.load_from_JSON('credentials_11098')
    except Exception:
        logging.critical("[CRITICAL] %s Could not create credentials from credstore %s" % (options.handle_check))
        p.print_help()
        sys.exit(-1)
    else:
        logging.debug("Create handle client instance to add uuid to handle server")

    for delete_file in glob.glob(dir+'/*.del'):
        community, mdprefix = os.path.splitext(os.path.basename(delete_file))[0].split('-')
        
        logging.info('\n## Deleting datasets from community "%s" ##' % (community))
        
        # get packages from the group in CKAN:
        UP.get_packages(community)
        
        # open the delete file and loop over its lines:
        file_content = ''
        try:
            f = open(delete_file, 'r')
            file_content = f.read()
            f.close()
        except IOError :
            logging.critical("Cannot read data from '{0}'".format(delete_file))
            f.close
        else:
            # rename the file in a crash backup file:
            os.rename(delete_file,delete_file+'.crash-backup')
        
        results = {
            'count':0,
            'ecount':0,
            'tcount':0,
            'time':0
        }

        # use a try-except-finally environment to gurantee that no deleted metadata information will be lost:
        try:
            logging.info('    |   | %-4s | %-50s | %-50s |\n    |%s|' % ('#','oai identifier','CKAN identifier',"-" * 116))
            
            deletestart = time.time()
     
            for line in file_content.split('\n'):
                # ignore empty lines:
                if not line:
                    continue
                   
                results['tcount'] += 1
                subset, identifier = line.split('\t')
         
                # dataset name uniquely generated from oai identifier
                uid = uuid.uuid5(uuid.NAMESPACE_DNS, identifier.encode('ascii','replace'))
                ds = str(uid)

                # output:
                logging.info('    | d | %-4d | %-50s | %-50s |' % (results['tcount'],identifier,ds))

                ### CHECK STATUS OF DATASET IN CKAN AND PID:
                # status of data set
                dsstatus="unknown"
         
                # check against handle server
                handlestatus="unknown"
                ##HEW-D-ec???  pid = credentials.prefix + "/eudat-jmd_" + ds
                pid = "11098/eudat-jmd_" + ds_id
                pidRecord["CHECKSUM"] = client.get_value_from_handle(pid, "CHECKSUM")

                if (pidRecord["CHECKSUM"] == None):
                  logging.debug("        |-> Can not access pid %s to get checksum" % (pid))
                  handlestatus="new"
                else:
                  logging.debug("        |-> pid %s exists" % (pid))
                  handlestatus="exist"

                # check against CKAN database
                ckanstatus = 'unknown'                  
                ckanstatus=UP.check_dataset(ds,None)

                delete = 0
                # depending on handle status delete record from B2FIND
                if ( handlestatus == "new" and ckanstatus == "new") : # no action required
                    logging.info('        |-> %s' % ('No deletion required'))
                else:
                    delete = UP.delete(ds,ckanstatus)
                    if (delete == 1):
                        logging.info('        |-> %s' % ('Deletion was successful'))
                        results['count'] +=  1
                        
                        # delete handle (to keep the symmetry between handle and B2FIND server)
                        if (handlestatus == "exist"):
                           logging.info("        |-> Delete handle %s with checksum %s" % (pid,pidRecord["CHECKSUM"]))
                           try:
                               client.delete_handle(pid)
                           except GenericHandleError as err:
                               logging.error('[ERROR] Unexpected Error: %s' % err)
                           except Exception:
                               logging.error('[ERROR] Unexpected Error:')

                        else:
                           logging.info("        |-> No action (deletion) required for handle %s" % pid)
                    else:
                        logging.info('        |-> %s' % ('Deletion failed'))
                        results['ecount'] += 1
        except Exception:
            logging.error('[ERROR] Unexpected Error')
            logging.error('You find the ids of the deleted metadata in "%s"' % (delete_file+'.crash-backup'))
            raise
        else:
            # all worked fine you can remove the crash-backup file:
            os.remove(delete_file+'.crash-backup')
            
        deletetime=time.time()-deletestart
        results['time'] = deletetime
        
        # save stats:
        OUT.save_stats(community+'-'+mdprefix,subset,'d',results)