Python read_manifest Exemples, grokmirror.read_manifest Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : fsck.py Projet : lfit/grokmirror

def fsck_mirror(name, config, verbose=False, force=False):
    global logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Running grok-fsck for [%s]' % name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s' % config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError:
        logger.info('Could not obtain exclusive lock on %s' % config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s' % config['statusfile'])
        stfh = open(config['statusfile'], 'r')
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #    },
            #    ...
            #  }

            status = json.load(stfh)
        except:
            # Huai le!
            logger.critical('Failed to parse %s' % config['statusfile'])
            lockf(flockh, LOCK_UN)
            flockh.close()
            return 1
    else:
        status = {}

    frequency = int(config['frequency'])

    today = datetime.datetime.today()

    workdone = False

    # Go through the manifest and compare with status
    for gitdir in manifest.keys():
        fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/'))
        if fullpath not in status.keys():
            # Newly added repository
            # Randomize next check between now and frequency
            delay = random.randint(0, frequency)
            nextdate = today + datetime.timedelta(days=delay)
            nextcheck = nextdate.strftime('%F')
            status[fullpath] = {
                'lastcheck': 'never',
                'nextcheck': nextcheck,
            }
            logger.info('Added new repository %s with next check on %s' % (
                gitdir, nextcheck))
            workdone = True

    # Go through status and queue checks for all the dirs that are due today
    # (unless --force, which is EVERYTHING)
    todayiso = today.strftime('%F')
    for fullpath in status.keys():
        # Check to make sure it's still in the manifest
        gitdir = fullpath.replace(config['toplevel'], '', 1)
        gitdir = '/' + gitdir.lstrip('/')

        if gitdir not in manifest.keys():
            del status[fullpath]
            logger.info('Removed %s which is no longer in manifest' % gitdir)
            continue

        # If nextcheck is before today, set it to today
        # XXX: If a system comes up after being in downtime for a while, this
        #      may cause pain for them, so perhaps use randomization here?
        nextcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'],
                                               '%Y-%m-%d')

        if force or nextcheck <= today:
            logger.debug('Queueing to check %s' % fullpath)
            # Calculate elapsed seconds
            startt = time.time()
            run_git_fsck(fullpath, config)
            endt = time.time()

            status[fullpath]['lastcheck'] = todayiso
            status[fullpath]['s_elapsed'] = round(endt - startt, 2)

            if force:
                # Use randomization for next check, again
                delay = random.randint(1, frequency)
            else:
                delay = frequency

            nextdate = today + datetime.timedelta(days=delay)
            status[fullpath]['nextcheck'] = nextdate.strftime('%F')
            workdone = True

    # Do quickie checks
    if 'quick_checks_max_min' in config.keys():
        # Convert to seconds for ease of tracking
        max_time = int(config['quick_checks_max_min']) * 60
        logger.debug('max_time=%s' % max_time)
        if max_time < 60:
            logger.warning('quick_checks_max_min must be at least 1 minute')
            max_time = 60
        logger.info('Performing quick checks')
        # Find the smallest s_elapsed not yet checked today
        # and run a check on it until we either run out of time
        # or repos to check.
        total_elapsed_time = 0
        quickies_checked = 0
        while True:
            # use this var to track which repo is smallest on s_elapsed
            least_elapsed = None
            repo_to_check = None
            for fullpath in status.keys():
                if status[fullpath]['lastcheck'] in ('never', todayiso):
                    # never been checked or checked today, skip
                    continue
                if 's_elapsed' not in status[fullpath].keys():
                    # something happened to the s_elapsed entry?
                    continue
                prev_elapsed = status[fullpath]['s_elapsed']
                if total_elapsed_time + prev_elapsed > max_time:
                    # would take us too long to check it, skip
                    continue
                if least_elapsed is None or prev_elapsed < least_elapsed:
                    least_elapsed = prev_elapsed
                    repo_to_check = fullpath

            if repo_to_check is None:
                if quickies_checked == 0:
                    logger.info('No repos qualified for quick checks')
                else:
                    logger.info('Quick-checked %s repos in %s seconds' % (
                        quickies_checked, total_elapsed_time))
                break

            # check repo and record the necessary bits
            startt = time.time()
            run_git_fsck(repo_to_check, config)
            endt = time.time()

            # We don't adjust nextcheck, since it kinda becomes meaningless
            status[repo_to_check]['lastcheck'] = todayiso
            status[repo_to_check]['s_elapsed'] = round(endt - startt, 2)

            total_elapsed_time += status[repo_to_check]['s_elapsed']
            quickies_checked += 1
            workdone = True

    # Write out the new status
    if workdone:
        logger.info('Writing new status file in %s' % config['statusfile'])
        stfh = open(config['statusfile'], 'w')
        json.dump(status, stfh, indent=2)
        stfh.close()

    lockf(flockh, LOCK_UN)
    flockh.close()

Exemple #2

0

Afficher le fichier

Fichier : fsck.py Projet : doghrim/grokmirror

def fsck_mirror(name, config, verbose=False, force=False):
    global logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Running grok-fsck for [%s]' % name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s' % config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError:
        logger.info('Could not obtain exclusive lock on %s' % config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s' % config['statusfile'])
        stfh = open(config['statusfile'], 'r')
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #    },
            #    ...
            #  }

            status = json.load(stfh)
        except:
            # Huai le!
            logger.critical('Failed to parse %s' % config['statusfile'])
            lockf(flockh, LOCK_UN)
            flockh.close()
            return 1
    else:
        status = {}

    frequency = int(config['frequency'])

    today = datetime.datetime.today()

    workdone = False

    # Go through the manifest and compare with status
    for gitdir in manifest.keys():
        fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/'))
        if fullpath not in status.keys():
            # Newly added repository
            # Randomize next check between now and frequency
            delay = random.randint(0, frequency)
            nextdate = today + datetime.timedelta(days=delay)
            nextcheck = nextdate.strftime('%F')
            status[fullpath] = {
                'lastcheck': 'never',
                'nextcheck': nextcheck,
            }
            logger.info('Added new repository %s with next check on %s' %
                        (gitdir, nextcheck))
            workdone = True

    # Go through status and queue checks for all the dirs that are due today
    # (unless --force, which is EVERYTHING)
    todayiso = today.strftime('%F')
    for fullpath in status.keys():
        # Check to make sure it's still in the manifest
        gitdir = fullpath.replace(config['toplevel'], '', 1)
        gitdir = '/' + gitdir.lstrip('/')

        if gitdir not in manifest.keys():
            del status[fullpath]
            logger.info('Removed %s which is no longer in manifest' % gitdir)
            continue

        # If nextcheck is before today, set it to today
        # XXX: If a system comes up after being in downtime for a while, this
        #      may cause pain for them, so perhaps use randomization here?
        nextcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'],
                                               '%Y-%m-%d')

        if force or nextcheck <= today:
            logger.debug('Queueing to check %s' % fullpath)
            # Calculate elapsed seconds
            startt = time.time()
            run_git_fsck(fullpath, config)
            endt = time.time()

            status[fullpath]['lastcheck'] = todayiso
            status[fullpath]['s_elapsed'] = round(endt - startt, 2)

            if force:
                # Use randomization for next check, again
                delay = random.randint(1, frequency)
            else:
                delay = frequency

            nextdate = today + datetime.timedelta(days=delay)
            status[fullpath]['nextcheck'] = nextdate.strftime('%F')
            workdone = True

    # Do quickie checks
    if 'quick_checks_max_min' in config.keys():
        # Convert to seconds for ease of tracking
        max_time = int(config['quick_checks_max_min']) * 60
        logger.debug('max_time=%s' % max_time)
        if max_time < 60:
            logger.warning('quick_checks_max_min must be at least 1 minute')
            max_time = 60
        logger.info('Performing quick checks')
        # Find the smallest s_elapsed not yet checked today
        # and run a check on it until we either run out of time
        # or repos to check.
        total_elapsed_time = 0
        quickies_checked = 0
        while True:
            # use this var to track which repo is smallest on s_elapsed
            least_elapsed = None
            repo_to_check = None
            for fullpath in status.keys():
                if status[fullpath]['lastcheck'] in ('never', todayiso):
                    # never been checked or checked today, skip
                    continue
                if 's_elapsed' not in status[fullpath].keys():
                    # something happened to the s_elapsed entry?
                    continue
                prev_elapsed = status[fullpath]['s_elapsed']
                if total_elapsed_time + prev_elapsed > max_time:
                    # would take us too long to check it, skip
                    continue
                if least_elapsed is None or prev_elapsed < least_elapsed:
                    least_elapsed = prev_elapsed
                    repo_to_check = fullpath

            if repo_to_check is None:
                if quickies_checked == 0:
                    logger.info('No repos qualified for quick checks')
                else:
                    logger.info('Quick-checked %s repos in %s seconds' %
                                (quickies_checked, total_elapsed_time))
                break

            # check repo and record the necessary bits
            startt = time.time()
            run_git_fsck(repo_to_check, config)
            endt = time.time()

            # We don't adjust nextcheck, since it kinda becomes meaningless
            status[repo_to_check]['lastcheck'] = todayiso
            status[repo_to_check]['s_elapsed'] = round(endt - startt, 2)

            total_elapsed_time += status[repo_to_check]['s_elapsed']
            quickies_checked += 1
            workdone = True

    # Write out the new status
    if workdone:
        logger.info('Writing new status file in %s' % config['statusfile'])
        stfh = open(config['statusfile'], 'w')
        json.dump(status, stfh, indent=2)
        stfh.close()

    lockf(flockh, LOCK_UN)
    flockh.close()

Exemple #3

0

Afficher le fichier

Fichier : pull.py Projet : pombredanne/grokmirror

            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=StringIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read()
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception, ex:
            logger.warning('Failed to parse %s' % config['manifest'])
            logger.warning('Error was: %s' % ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable'
                        % toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])

Exemple #4

0

Afficher le fichier

Fichier : pull.py Projet : pombredanne/grokmirror

def pull_mirror(name, config, verbose=False, force=False, nomtime=False,
                verify=False, verify_subpath='*', noreuse=False,
                purge=False, pretty=False, forcepurge=False):
    global logger
    global lock_fails

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]' % name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s' % config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!'
                            % config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s' % (manifile, fstat[8]))

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s' % my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s' % manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.critical('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s' % config['manifest'])

        # Do we have username:password@ in the URL?
        chunks = urlparse.urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug('Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s' % manifesturl)
            request = urllib2.Request(manifesturl)

            password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
            opener = urllib2.build_opener(auth_handler)

        else:
            request = urllib2.Request(config['manifest'])
            opener = urllib2.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s' % (mymanifest, mtime))
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s' % my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except urllib2.HTTPError, ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Server returned: %s' % ex)
            return 1
        except urllib2.URLError, ex:
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Error was: %s' % ex)
            return 1

Exemple #5

0

Afficher le fichier

Fichier : pull.py Projet : pombredanne/grokmirror

def pull_mirror(name,
                config,
                verbose=False,
                force=False,
                nomtime=False,
                verify=False,
                verify_subpath='*',
                noreuse=False,
                purge=False,
                pretty=False,
                forcepurge=False):
    global logger
    global lock_fails

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]' % name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s' % config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!' %
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s' % (manifile, fstat[8]))

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s' % my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s' % manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.critical('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s' % config['manifest'])

        # Do we have username:password@ in the URL?
        chunks = urlparse.urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug(
                'Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s' % manifesturl)
            request = urllib2.Request(manifesturl)

            password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
            opener = urllib2.build_opener(auth_handler)

        else:
            request = urllib2.Request(config['manifest'])
            opener = urllib2.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s' % (mymanifest, mtime))
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s' % my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except urllib2.HTTPError, ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Server returned: %s' % ex)
            return 1
        except urllib2.URLError, ex:
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Error was: %s' % ex)
            return 1

Exemple #6

0

Afficher le fichier

Fichier : fsck.py Projet : mjeanson/grokmirror

def fsck_mirror(name,
                config,
                verbose=False,
                force=False,
                repack_only=False,
                conn_only=False,
                repack_all_quick=False,
                repack_all_full=False):
    global logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    if 'log' in config:
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config:
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    if conn_only or repack_all_quick or repack_all_full:
        force = True

    logger.info('Running grok-fsck for [%s]', name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s', config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError:
        logger.info('Could not obtain exclusive lock on %s', config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s', config['statusfile'])
        stfh = open(config['statusfile'], 'rb')
        # noinspection PyBroadException
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #      'lastrepack': 'YYYY-MM-DD',
            #      'fingerprint': 'sha-1',
            #      's_elapsed': seconds,
            #      'quick_repack_count': times,
            #    },
            #    ...
            #  }

            status = json.loads(stfh.read().decode('utf-8'))
        except:
            # Huai le!
            logger.critical('Failed to parse %s', config['statusfile'])
            lockf(flockh, LOCK_UN)
            flockh.close()
            return 1
    else:
        status = {}

    if 'frequency' in config:
        frequency = int(config['frequency'])
    else:
        frequency = 30

    today = datetime.datetime.today()
    todayiso = today.strftime('%F')

    if force:
        # Use randomization for next check, again
        checkdelay = random.randint(1, frequency)
    else:
        checkdelay = frequency

    if 'precious' not in config:
        config['precious'] = 'yes'
    if 'commitgraph' not in config:
        config['commitgraph'] = 'yes'

    # Is our git version new enough to support it?
    if config['commitgraph'] == 'yes' and not grokmirror.git_newer_than(
            '2.18.0'):
        logger.info('Git version too old to support commit graphs, disabling')
        config['commitgraph'] = 'no'

    # Go through the manifest and compare with status
    # noinspection PyTypeChecker
    e_find = em.counter(total=len(manifest),
                        desc='Discovering:',
                        unit='repos',
                        leave=False)
    for gitdir in list(manifest):
        e_find.update()
        fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/'))
        if fullpath not in status.keys():
            # Newly added repository
            if not force:
                # Randomize next check between now and frequency
                delay = random.randint(0, frequency)
                nextdate = today + datetime.timedelta(days=delay)
                nextcheck = nextdate.strftime('%F')
            else:
                nextcheck = todayiso

            status[fullpath] = {
                'lastcheck': 'never',
                'nextcheck': nextcheck,
            }
            logger.info('%s:', fullpath)
            logger.info('  added : next check on %s', nextcheck)

    e_find.close()

    # record newly found repos in the status file
    logger.debug('Updating status file in %s', config['statusfile'])
    with open(config['statusfile'], 'wb') as stfh:
        stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    # Go through status and find all repos that need work done on them.
    # This is a dictionary that contains:
    # full_path_to_repo:
    #   repack: 0, 1, 2 (0-no, 1-needs quick repack, 2-needs full repack)
    #   fsck: 0/1

    to_process = {}

    total_checked = 0
    total_elapsed = 0

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(status),
                       desc='Analyzing:',
                       unit='repos',
                       leave=False)
    for fullpath in list(status):
        e_cmp.update()

        # Check to make sure it's still in the manifest
        gitdir = fullpath.replace(config['toplevel'], '', 1)
        gitdir = '/' + gitdir.lstrip('/')

        if gitdir not in manifest.keys():
            del status[fullpath]
            logger.debug('%s is gone, no longer in manifest', gitdir)
            continue

        needs_repack = needs_prune = needs_fsck = 0

        obj_info = get_repo_obj_info(fullpath)
        try:
            packs = int(obj_info['packs'])
            count_loose = int(obj_info['count'])
        except KeyError:
            logger.warning('Unable to count objects in %s, skipping' %
                           fullpath)
            continue

        schedcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'],
                                                '%Y-%m-%d')
        nextcheck = today + datetime.timedelta(days=checkdelay)

        if 'repack' not in config or config['repack'] != 'yes':
            # don't look at me if you turned off repack
            logger.debug('Not repacking because repack=no in config')
            needs_repack = 0
        elif repack_all_full and (count_loose > 0 or packs > 1):
            logger.debug('needs_repack=2 due to repack_all_full')
            needs_repack = 2
        elif repack_all_quick and count_loose > 0:
            logger.debug('needs_repack=1 due to repack_all_quick')
            needs_repack = 1
        elif conn_only:
            # don't do any repacks if we're running forced connectivity checks, unless
            # you specifically passed --repack-all-foo
            logger.debug('needs_repack=0 due to --conn-only')
            needs_repack = 0
        else:
            # for now, hardcode the maximum loose objects and packs
            # TODO: we can probably set this in git config values?
            #       I don't think this makes sense as a global setting, because
            #       optimal values will depend on the size of the repo as a whole
            max_loose_objects = 1200
            max_packs = 20
            pc_loose_objects = 10
            pc_loose_size = 10

            # first, compare against max values:
            if packs >= max_packs:
                logger.debug('Triggering full repack of %s because packs > %s',
                             fullpath, max_packs)
                needs_repack = 2
            elif count_loose >= max_loose_objects:
                logger.debug(
                    'Triggering quick repack of %s because loose objects > %s',
                    fullpath, max_loose_objects)
                needs_repack = 1
            else:
                # is the number of loose objects or their size more than 10% of
                # the overall total?
                in_pack = int(obj_info['in-pack'])
                size_loose = int(obj_info['size'])
                size_pack = int(obj_info['size-pack'])
                total_obj = count_loose + in_pack
                total_size = size_loose + size_pack
                # set some arbitrary "worth bothering" limits so we don't
                # continuously repack tiny repos.
                if total_obj > 500 and count_loose / total_obj * 100 >= pc_loose_objects:
                    logger.debug(
                        'Triggering repack of %s because loose objects > %s%% of total',
                        fullpath, pc_loose_objects)
                    needs_repack = 1
                elif total_size > 1024 and size_loose / total_size * 100 >= pc_loose_size:
                    logger.debug(
                        'Triggering repack of %s because loose size > %s%% of total',
                        fullpath, pc_loose_size)
                    needs_repack = 1

        if needs_repack > 0 and (config['precious'] == 'always'
                                 and check_precious_objects(fullpath)):
            # if we have preciousObjects, then we only repack based on the same
            # schedule as fsck.
            logger.debug('preciousObjects is set')
            # for repos with preciousObjects, we use the fsck schedule for repacking
            if schedcheck <= today:
                logger.debug(
                    'Time for a full periodic repack of a preciousObjects repo'
                )
                status[fullpath]['nextcheck'] = nextcheck.strftime('%F')
                needs_repack = 2
            else:
                logger.debug(
                    'Not repacking preciousObjects repo outside of schedule')
                needs_repack = 0

        # Do we need to fsck it?
        if not (repack_all_quick or repack_all_full or repack_only):
            if schedcheck <= today or force:
                status[fullpath]['nextcheck'] = nextcheck.strftime('%F')
                needs_fsck = 1

        if needs_repack or needs_fsck or needs_prune:
            # emit a warning if we find garbage in a repo
            # we do it here so we don't spam people nightly on every cron run,
            # but only do it when a repo needs actual work done on it
            if obj_info['garbage'] != '0':
                logger.warning(
                    '%s:\n\tcontains %s garbage files (garbage-size: %s KiB)',
                    fullpath, obj_info['garbage'], obj_info['size-garbage'])

            to_process[fullpath] = {
                'repack': needs_repack,
                'prune': needs_prune,
                'fsck': needs_fsck,
            }

    e_cmp.close()

    if not len(to_process):
        logger.info('No repos need attention.')
        em.stop()
        return

    logger.info('Processing %s repositories', len(to_process))

    # noinspection PyTypeChecker
    run = em.counter(total=len(to_process),
                     desc='Processing:',
                     unit='repos',
                     leave=False)
    for fullpath, needs in to_process.items():
        logger.info('%s:', fullpath)
        # Calculate elapsed seconds
        run.refresh()
        startt = time.time()

        # Wait till the repo is available and lock it for the duration of checks,
        # otherwise there may be false-positives if a mirrored repo is updated
        # in the middle of fsck or repack.
        grokmirror.lock_repo(fullpath, nonblocking=False)
        if needs['repack']:
            if run_git_repack(fullpath, config, needs['repack']):
                status[fullpath]['lastrepack'] = todayiso
                if needs['repack'] > 1:
                    status[fullpath]['lastfullrepack'] = todayiso
            else:
                logger.warning(
                    'Repacking %s was unsuccessful, '
                    'not running fsck.', fullpath)
                grokmirror.unlock_repo(fullpath)
                run.update()
                continue

        if needs['prune']:
            run_git_prune(fullpath, config)

        if needs['fsck']:
            run_git_fsck(fullpath, config, conn_only)
            endt = time.time()
            status[fullpath]['lastcheck'] = todayiso
            status[fullpath]['s_elapsed'] = int(endt - startt)

            logger.info('   done : %ss, next check on %s',
                        status[fullpath]['s_elapsed'],
                        status[fullpath]['nextcheck'])

        run.update()

        # We're done with the repo now
        grokmirror.unlock_repo(fullpath)
        total_checked += 1
        total_elapsed += time.time() - startt

        # Write status file after each check, so if the process dies, we won't
        # have to recheck all the repos we've already checked
        logger.debug('Updating status file in %s', config['statusfile'])
        with open(config['statusfile'], 'wb') as stfh:
            stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    run.close()
    em.stop()
    logger.info('Processed %s repos in %0.2fs', total_checked, total_elapsed)

    with open(config['statusfile'], 'wb') as stfh:
        stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    lockf(flockh, LOCK_UN)
    flockh.close()

Exemple #7

0

Afficher le fichier

def fsck_mirror(name, config, verbose=False, force=False):
    global logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Running grok-fsck for [%s]' % name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s' % config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError:
        logger.info('Could not obtain exclusive lock on %s' % config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s' % config['statusfile'])
        stfh = open(config['statusfile'], 'r')
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #      'lastrepack': 'YYYY-MM-DD',
            #      'fingerprint': 'sha-1',
            #      's_elapsed': seconds,
            #      'quick_repack_count': times,
            #    },
            #    ...
            #  }

            status = json.load(stfh)
        except:
            # Huai le!
            logger.critical('Failed to parse %s' % config['statusfile'])
            lockf(flockh, LOCK_UN)
            flockh.close()
            return 1
    else:
        status = {}

    frequency = int(config['frequency'])

    today = datetime.datetime.today()

    # Go through the manifest and compare with status
    for gitdir in manifest.keys():
        fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/'))
        if fullpath not in status.keys():
            # Newly added repository
            # Randomize next check between now and frequency
            delay = random.randint(0, frequency)
            nextdate = today + datetime.timedelta(days=delay)
            nextcheck = nextdate.strftime('%F')
            status[fullpath] = {
                'lastcheck': 'never',
                'nextcheck': nextcheck,
            }
            logger.info('Added new repository %s with next check on %s' % (
                gitdir, nextcheck))

    total_checked = 0
    total_elapsed = 0

    # Go through status and queue checks for all the dirs that are due today
    # (unless --force, which is EVERYTHING)
    todayiso = today.strftime('%F')
    for fullpath in status.keys():
        # Check to make sure it's still in the manifest
        gitdir = fullpath.replace(config['toplevel'], '', 1)
        gitdir = '/' + gitdir.lstrip('/')

        if gitdir not in manifest.keys():
            del status[fullpath]
            logger.info('Removed %s which is no longer in manifest' % gitdir)
            continue

        # If nextcheck is before today, set it to today
        # XXX: If a system comes up after being in downtime for a while, this
        #      may cause pain for them, so perhaps use randomization here?
        nextcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'],
                                               '%Y-%m-%d')

        if force or nextcheck <= today:
            logger.debug('Preparing to check %s' % fullpath)
            # Calculate elapsed seconds
            startt = time.time()
            run_git_fsck(fullpath, config)
            total_checked += 1

            # Did the fingerprint change since last time we repacked?
            oldfpr = None
            if 'fingerprint' in status[fullpath].keys():
                oldfpr = status[fullpath]['fingerprint']

            fpr = grokmirror.get_repo_fingerprint(config['toplevel'], gitdir, force=True)

            if fpr != oldfpr or force:
                full_repack = False
                if not 'quick_repack_count' in status[fullpath].keys():
                    status[fullpath]['quick_repack_count'] = 0

                quick_repack_count = status[fullpath]['quick_repack_count']
                if 'full_repack_every' in config.keys():
                    # but did you set 'full_repack_flags' as well?
                    if 'full_repack_flags' not in config.keys():
                        logger.critical('full_repack_every is set, but not full_repack_flags')
                    else:
                        full_repack_every = int(config['full_repack_every'])
                        # is it anything insane?
                        if full_repack_every < 2:
                            full_repack_every = 2
                            logger.warning('full_repack_every is too low, forced to 2')

                        # is it time to trigger full repack?
                        # We -1 because if we want a repack every 10th time, then we need to trigger
                        # when current repack count is 9.
                        if quick_repack_count >= full_repack_every-1:
                            logger.debug('Time to do full repack on %s' % fullpath)
                            full_repack = True
                            quick_repack_count = 0
                            status[fullpath]['lastfullrepack'] = todayiso
                        else:
                            logger.debug('Repack count for %s not yet reached full repack trigger' % fullpath)
                            quick_repack_count += 1

                run_git_repack(fullpath, config, full_repack)
                run_git_prune(fullpath, config, manifest)
                status[fullpath]['lastrepack'] = todayiso
                status[fullpath]['quick_repack_count'] = quick_repack_count

            else:
                logger.debug('No changes to %s since last run, not repacking' % gitdir)

            endt = time.time()

            total_elapsed += endt-startt

            status[fullpath]['fingerprint'] = fpr
            status[fullpath]['lastcheck'] = todayiso
            status[fullpath]['s_elapsed'] = int(endt - startt)

            if force:
                # Use randomization for next check, again
                delay = random.randint(1, frequency)
            else:
                delay = frequency

            nextdate = today + datetime.timedelta(days=delay)
            status[fullpath]['nextcheck'] = nextdate.strftime('%F')

            # Write status file after each check, so if the process dies, we won't
            # have to recheck all the repos we've already checked
            logger.debug('Updating status file in %s' % config['statusfile'])
            stfh = open(config['statusfile'], 'w')
            json.dump(status, stfh, indent=2)
            stfh.close()

    if not total_checked:
        logger.info('No new repos to check.')
    else:
        logger.info('Repos checked: %s' % total_checked)
        logger.info('Total running time: %s s' % int(total_elapsed))

    lockf(flockh, LOCK_UN)
    flockh.close()

Exemple #8

0

Afficher le fichier

Fichier : grok-pull.py Projet : pypingou/grokmirror

def pull_mirror(name, config, opts):
    global logger
    global lock_fails

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if opts.verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]' % name)
    mymanifest = config['mymanifest']

    if opts.verify:
        logger.info('Verifying mirror against %s' % config['manifest'])
        opts.nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!' %
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s' % (manifile, fstat[8]))

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s' % my_last_modified)
            if not (opts.force
                    or opts.nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s' % manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.critical('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s' % config['manifest'])
        request = urllib2.Request(config['manifest'])
        opener = urllib2.build_opener()

        # Find out if we need to run at all first
        if not (opts.force or opts.nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s' % (mymanifest, mtime))
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s' % my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except urllib2.HTTPError, ex:
            if ex.code == 304:
                logger.info(
                    'Server says we have the latest manifest. Quitting.')
                return 0
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Server returned: %s' % ex)
            return 1
        except urllib2.URLError, ex:
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Error was: %s' % ex)
            return 1

Exemple #9

0

Afficher le fichier

Fichier : grok-manifest.py Projet : pypingou/grokmirror

    logger.addHandler(ch)

    if opts.logfile is not None:
        ch = logging.FileHandler(opts.logfile)
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)

        ch.setLevel(logging.DEBUG)
        logger.addHandler(ch)

    # push our logger into grokmirror to override the default
    grokmirror.logger = logger

    grokmirror.manifest_lock(opts.manifile)
    manifest = grokmirror.read_manifest(opts.manifile, wait=opts.wait)

    # If manifest is empty, don't use current timestamp
    if not len(manifest.keys()):
        opts.usenow = False

    if opts.remove and len(args):
        # Remove the repos as required, write new manfiest and exit
        for fullpath in args:
            repo = fullpath.replace(opts.toplevel, '', 1)
            if repo in manifest.keys():
                del manifest[repo]
                logger.info('Repository %s removed from manifest' % repo)
            else:
                logger.info('Repository %s not in manifest' % repo)

Exemple #10

0

Afficher le fichier

def pull_mirror(name,
                config,
                verbose=False,
                force=False,
                nomtime=False,
                verify=False,
                verify_subpath='*',
                noreuse=False,
                purge=False,
                pretty=False,
                forcepurge=False):
    global logger
    global lock_fails

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]', name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s', config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!',
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s', manifile, fstat[8])

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s', my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s', manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.warning('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s', config['manifest'])

        # Do we have username:password@ in the URL?
        chunks = urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug(
                'Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s', manifesturl)
            request = urllib_request.Request(manifesturl)

            password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr)
            opener = urllib_request.build_opener(auth_handler)

        else:
            request = urllib_request.Request(config['manifest'])
            opener = urllib_request.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s', mymanifest, mtime)
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s', my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except HTTPError as ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Server returned: %s', ex)
            return 1
        except (URLError, ssl.SSLError, ssl.CertificateError) as ex:
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

        last_modified = ufh.headers.get('Last-Modified')
        last_modified = time.strptime(last_modified,
                                      '%a, %d %b %Y %H:%M:%S %Z')
        last_modified = calendar.timegm(last_modified)

        # We don't use read_manifest for the remote manifest, as it can be
        # anything, really. For now, blindly open it with gzipfile if it ends
        # with .gz. XXX: some http servers will auto-deflate such files.
        try:
            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=BytesIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read().decode('utf-8')
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception as ex:
            logger.warning('Failed to parse %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable',
                        toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])
        if pull_threads < 1:
            logger.info('pull_threads is less than 1, forcing to 1')
            pull_threads = 1
    else:
        # be conservative
        logger.info('pull_threads is not set, consider setting it')
        pull_threads = 5

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(culled),
                       desc='Comparing:',
                       unit='repos',
                       leave=False)

    for gitdir in list(culled):
        fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
        e_cmp.update()

        # fingerprints were added in later versions, so deal if the upstream
        # manifest doesn't have a fingerprint
        if 'fingerprint' not in culled[gitdir]:
            culled[gitdir]['fingerprint'] = None

        # Attempt to lock the repo
        try:
            grokmirror.lock_repo(fullpath, nonblocking=True)
        except IOError:
            logger.info('Could not lock %s, skipping', gitdir)
            lock_fails.append(gitdir)
            # Force the fingerprint to what we have in mymanifest,
            # if we have it.
            culled[gitdir]['fingerprint'] = None
            if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]:
                culled[gitdir]['fingerprint'] = mymanifest[gitdir][
                    'fingerprint']
            if len(lock_fails) >= pull_threads:
                logger.info('Too many repositories locked (%s). Exiting.',
                            len(lock_fails))
                return 0
            continue

        if verify:
            if culled[gitdir]['fingerprint'] is None:
                logger.debug('No fingerprint for %s, not verifying', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            if not fnmatch.fnmatch(gitdir, verify_subpath):
                grokmirror.unlock_repo(fullpath)
                continue

            logger.debug('Verifying %s', gitdir)
            if not os.path.exists(fullpath):
                verify_fails.append(gitdir)
                logger.info('Verify: %s ABSENT', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                             gitdir,
                                                             force=force)

            if my_fingerprint == culled[gitdir]['fingerprint']:
                logger.info('Verify: %s OK', gitdir)
            else:
                logger.critical('Verify: %s FAILED', gitdir)
                verify_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            continue

        # Is the directory in place?
        if os.path.exists(fullpath):
            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.info('Reclone requested for %s:', gitdir)
                with open(rfile, 'r') as rfh:
                    reason = rfh.read()
                    logger.info('  %s', reason)

                to_clone.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # Fix owner and description, if necessary
            if gitdir in mymanifest.keys():
                # This code is hurky and needs to be cleaned up
                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = None
                if config['ignore_repo_references'] != 'yes':
                    ref = culled[gitdir].get('reference')

                # dirty hack to force on-disk owner/description checks
                # when we're called with -n, in case our manifest
                # differs from what is on disk for owner/description/alternates
                myref = None
                if nomtime:
                    mydesc = None
                    myowner = None
                else:
                    mydesc = mymanifest[gitdir].get('description')
                    myowner = mymanifest[gitdir].get('owner')

                    if config['ignore_repo_references'] != 'yes':
                        myref = mymanifest[gitdir].get('reference')

                    if myowner is None:
                        myowner = config['default_owner']

                if owner is None:
                    owner = config['default_owner']

                if desc != mydesc or owner != myowner or ref != myref:
                    # we can do this right away without waiting
                    set_repo_params(toplevel, gitdir, owner, desc, ref)

            else:
                # It exists on disk, but not in my manifest?
                if noreuse:
                    logger.critical('Found existing git repo in %s', fullpath)
                    logger.critical('But you asked NOT to reuse repos')
                    logger.critical('Skipping %s', gitdir)
                    grokmirror.unlock_repo(fullpath)
                    continue

                logger.info('Setting new origin for %s', gitdir)
                fix_remotes(gitdir, toplevel, config['site'])
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # fingerprints were added late, so if we don't have them
            # in the remote manifest, fall back on using timestamps
            changed = False
            if culled[gitdir]['fingerprint'] is not None:
                logger.debug('Will use fingerprints to compare %s', gitdir)
                my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                                 gitdir,
                                                                 force=force)

                if my_fingerprint != culled[gitdir]['fingerprint']:
                    logger.debug('No fingerprint match, will pull %s', gitdir)
                    changed = True
                else:
                    logger.debug('Fingerprints match, skipping %s', gitdir)
            else:
                logger.debug('Will use timestamps to compare %s', gitdir)
                if force:
                    logger.debug('Will force-pull %s', gitdir)
                    changed = True
                    # set timestamp to 0 as well
                    grokmirror.set_repo_timestamp(toplevel, gitdir, 0)
                else:
                    ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
                    if ts < culled[gitdir]['modified']:
                        changed = True

            if changed:
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue
            else:
                logger.debug('Repo %s unchanged', gitdir)
                # if we don't have a fingerprint for it, add it now
                if culled[gitdir]['fingerprint'] is None:
                    fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir)
                    culled[gitdir]['fingerprint'] = fpr
                existing.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

        else:
            # Newly incoming repo
            to_clone.append(gitdir)
            grokmirror.unlock_repo(fullpath)
            continue

        # If we got here, something is odd.
        # noinspection PyUnreachableCode
        logger.critical('Could not figure out what to do with %s', gitdir)
        grokmirror.unlock_repo(fullpath)

    logger.info('Compared new manifest against %s repositories in %0.2fs',
                len(culled), e_cmp.elapsed)
    e_cmp.close()

    if verify:
        if len(verify_fails):
            logger.critical('%s repos failed to verify', len(verify_fails))
            return 1
        else:
            logger.info('Verification successful')
            return 0

    hookscript = config['post_update_hook']

    if len(to_pull):

        if len(lock_fails) > 0:
            pull_threads -= len(lock_fails)

        # Don't spin up more threads than we need
        if pull_threads > len(to_pull):
            pull_threads = len(to_pull)

        # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra
        # precaution doesn't hurt
        if pull_threads <= 0:
            logger.info('Too many repositories locked. Exiting.')
            return 0

        logger.info('Will use %d threads to pull repos', pull_threads)

        # noinspection PyTypeChecker
        e_pull = em.counter(total=len(to_pull),
                            desc='Updating :',
                            unit='repos',
                            leave=False)
        logger.info('Updating %s repos from %s', len(to_pull), config['site'])
        in_queue = Queue()
        out_queue = Queue()

        for gitdir in to_pull:
            in_queue.put((gitdir, culled[gitdir]['fingerprint'],
                          culled[gitdir]['modified']))

        for i in range(pull_threads):
            logger.debug('Spun up thread %s', i)
            t = PullerThread(in_queue, out_queue, config, i, e_pull)
            t.setDaemon(True)
            t.start()

        # wait till it's all done
        in_queue.join()
        logger.info('All threads finished.')

        while not out_queue.empty():
            # see if any of it failed
            (gitdir, my_fingerprint, status) = out_queue.get()
            # We always record our fingerprint in our manifest
            culled[gitdir]['fingerprint'] = my_fingerprint
            if not status:
                # To make sure we check this again during next run,
                # fudge the manifest accordingly.
                logger.debug('Will recheck %s during next run', gitdir)
                culled[gitdir] = mymanifest[gitdir]
                # this is rather hackish, but effective
                last_modified -= 1

        logger.info('Updates completed in %0.2fs', e_pull.elapsed)
        e_pull.close()
    else:
        logger.info('No repositories need updating')

    # how many lockfiles have we seen?
    # If there are more lock_fails than there are
    # pull_threads configured, we skip cloning out of caution
    if len(to_clone) and len(lock_fails) > pull_threads:
        logger.info(
            'Too many repositories locked. Skipping cloning new repos.')
        to_clone = []

    if len(to_clone):
        # noinspection PyTypeChecker
        e_clone = em.counter(total=len(to_clone),
                             desc='Cloning  :',
                             unit='repos',
                             leave=False)
        logger.info('Cloning %s repos from %s', len(to_clone), config['site'])
        # we use "existing" to track which repos can be used as references
        existing.extend(to_pull)

        to_clone_sorted = []
        clone_order(to_clone, manifest, to_clone_sorted, existing)

        for gitdir in to_clone_sorted:
            e_clone.refresh()

            fullpath = os.path.join(toplevel, gitdir.lstrip('/'))

            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.debug('Removing %s for reclone', gitdir)
                shutil.move(fullpath, '%s.reclone' % fullpath)
                shutil.rmtree('%s.reclone' % fullpath)

            # Do we still need to clone it, or has another process
            # already done this for us?
            ts = grokmirror.get_repo_timestamp(toplevel, gitdir)

            if ts > 0:
                logger.debug('Looks like %s already cloned, skipping', gitdir)
                e_clone.update()
                continue

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)
            except IOError:
                logger.info('Could not lock %s, skipping', gitdir)
                lock_fails.append(gitdir)
                e_clone.update()
                continue

            reference = None
            if config['ignore_repo_references'] != 'yes':
                reference = culled[gitdir]['reference']

            if reference is not None and reference in existing:
                # Make sure we can lock the reference repo
                refrepo = os.path.join(toplevel, reference.lstrip('/'))
                try:
                    grokmirror.lock_repo(refrepo, nonblocking=True)
                    success = clone_repo(toplevel,
                                         gitdir,
                                         config['site'],
                                         reference=reference)
                    grokmirror.unlock_repo(refrepo)
                except IOError:
                    logger.info('Cannot lock reference repo %s, skipping %s',
                                reference, gitdir)
                    if reference not in lock_fails:
                        lock_fails.append(reference)

                    grokmirror.unlock_repo(fullpath)
                    e_clone.update()
                    continue
            else:
                success = clone_repo(toplevel, gitdir, config['site'])

            # check dir to make sure cloning succeeded and then add to existing
            if os.path.exists(fullpath) and success:
                logger.debug('Cloning of %s succeeded, adding to existing',
                             gitdir)
                existing.append(gitdir)

                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = culled[gitdir].get('reference')

                if owner is None:
                    owner = config['default_owner']
                set_repo_params(toplevel, gitdir, owner, desc, ref)
                set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
                my_fingerprint = grokmirror.set_repo_fingerprint(
                    toplevel, gitdir)
                culled[gitdir]['fingerprint'] = my_fingerprint
                run_post_update_hook(hookscript, toplevel, gitdir)
            else:
                logger.warning('Was not able to clone %s', gitdir)
                # Remove it from our manifest so we can try re-cloning
                # next time grok-pull runs
                del culled[gitdir]
                git_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            e_clone.update()

        logger.info('Clones completed in %0.2fs' % e_clone.elapsed)
        e_clone.close()

    else:
        logger.info('No repositories need cloning')

    # loop through all entries and find any symlinks we need to set
    # We also collect all symlinks to do purging correctly
    symlinks = []
    for gitdir in culled.keys():
        if 'symlinks' in culled[gitdir].keys():
            source = os.path.join(config['toplevel'], gitdir.lstrip('/'))
            for symlink in culled[gitdir]['symlinks']:
                if symlink not in symlinks:
                    symlinks.append(symlink)
                target = os.path.join(config['toplevel'], symlink.lstrip('/'))

                if os.path.exists(source):
                    if os.path.islink(target):
                        # are you pointing to where we need you?
                        if os.path.realpath(target) != source:
                            # Remove symlink and recreate below
                            logger.debug('Removed existing wrong symlink %s',
                                         target)
                            os.unlink(target)
                    elif os.path.exists(target):
                        logger.warning('Deleted repo %s, because it is now'
                                       ' a symlink to %s' % (target, source))
                        shutil.rmtree(target)

                    # Here we re-check if we still need to do anything
                    if not os.path.exists(target):
                        logger.info('Symlinking %s -> %s', target, source)
                        # Make sure the leading dirs are in place
                        if not os.path.exists(os.path.dirname(target)):
                            os.makedirs(os.path.dirname(target))
                        os.symlink(source, target)

    manifile = config['mymanifest']
    grokmirror.manifest_lock(manifile)

    # Is the local manifest newer than last_modified? That would indicate
    # that another process has run and "culled" is no longer the latest info
    if os.path.exists(manifile):
        fstat = os.stat(manifile)
        if fstat[8] > last_modified:
            logger.info('Local manifest is newer, not saving.')
            grokmirror.manifest_unlock(manifile)
            return 0

    if purge:
        to_purge = []
        found_repos = 0
        for founddir in grokmirror.find_all_gitdirs(config['toplevel']):
            gitdir = founddir.replace(config['toplevel'], '')
            found_repos += 1

            if gitdir not in culled.keys() and gitdir not in symlinks:
                to_purge.append(founddir)

        if len(to_purge):
            # Purge-protection engage
            try:
                purge_limit = int(config['purgeprotect'])
                assert 1 <= purge_limit <= 99
            except (ValueError, AssertionError):
                logger.critical('Warning: "%s" is not valid for purgeprotect.',
                                config['purgeprotect'])
                logger.critical('Please set to a number between 1 and 99.')
                logger.critical('Defaulting to purgeprotect=5.')
                purge_limit = 5

            purge_pc = len(to_purge) * 100 / found_repos
            logger.debug('purgeprotect=%s', purge_limit)
            logger.debug('purge prercentage=%s', purge_pc)

            if not forcepurge and purge_pc >= purge_limit:
                logger.critical('Refusing to purge %s repos (%s%%)',
                                len(to_purge), purge_pc)
                logger.critical('Set purgeprotect to a higher percentage, or'
                                ' override with --force-purge.')
                logger.info('Not saving local manifest')
                return 1
            else:
                # noinspection PyTypeChecker
                e_purge = em.counter(total=len(to_purge),
                                     desc='Purging  :',
                                     unit='repos',
                                     leave=False)
                for founddir in to_purge:
                    e_purge.refresh()
                    if os.path.islink(founddir):
                        logger.info('Removing unreferenced symlink %s', gitdir)
                        os.unlink(founddir)
                    else:
                        # is anything using us for alternates?
                        gitdir = '/' + os.path.relpath(founddir,
                                                       toplevel).lstrip('/')
                        if grokmirror.is_alt_repo(toplevel, gitdir):
                            logger.info(
                                'Not purging %s because it is used by '
                                'other repos via alternates', founddir)
                        else:
                            try:
                                logger.info('Purging %s', founddir)
                                grokmirror.lock_repo(founddir,
                                                     nonblocking=True)
                                shutil.rmtree(founddir)
                            except IOError:
                                lock_fails.append(gitdir)
                                logger.info('%s is locked, not purging',
                                            gitdir)
                    e_purge.update()

                logger.info('Purging completed in %0.2fs', e_purge.elapsed)
                e_purge.close()

        else:
            logger.info('No repositories need purging')

    # Done with progress bars
    em.stop()

    # Go through all repos in culled and get the latest local timestamps.
    for gitdir in culled:
        ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
        culled[gitdir]['modified'] = ts

    # If there were any lock failures, we fudge last_modified to always
    # be older than the server, which will force the next grokmirror run.
    if len(lock_fails):
        logger.info('%s repos could not be locked. Forcing next run.',
                    len(lock_fails))
        last_modified -= 1
    elif len(git_fails):
        logger.info('%s repos failed. Forcing next run.', len(git_fails))
        last_modified -= 1

    # Once we're done, save culled as our new manifest
    grokmirror.write_manifest(manifile,
                              culled,
                              mtime=last_modified,
                              pretty=pretty)

    grokmirror.manifest_unlock(manifile)

    # write out projects.list, if asked to
    write_projects_list(culled, config)

    return 127

Exemple #11

0

Afficher le fichier

Fichier : fsck.py Projet : mricon/grokmirror

def fsck_mirror(name, config, verbose=False, force=False, repack_only=False,
                conn_only=False, repack_all_quick=False, repack_all_full=False):
    global logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    if conn_only or repack_all_quick or repack_all_full:
        force = True

    logger.info('Running grok-fsck for [%s]', name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s', config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError:
        logger.info('Could not obtain exclusive lock on %s', config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s', config['statusfile'])
        stfh = open(config['statusfile'], 'rb')
        # noinspection PyBroadException
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #      'lastrepack': 'YYYY-MM-DD',
            #      'fingerprint': 'sha-1',
            #      's_elapsed': seconds,
            #      'quick_repack_count': times,
            #    },
            #    ...
            #  }

            status = json.loads(stfh.read().decode('utf-8'))
        except:
            # Huai le!
            logger.critical('Failed to parse %s', config['statusfile'])
            lockf(flockh, LOCK_UN)
            flockh.close()
            return 1
    else:
        status = {}

    if 'frequency' in config:
        frequency = int(config['frequency'])
    else:
        frequency = 30

    today = datetime.datetime.today()
    todayiso = today.strftime('%F')

    if force:
        # Use randomization for next check, again
        checkdelay = random.randint(1, frequency)
    else:
        checkdelay = frequency

    # Go through the manifest and compare with status
    # noinspection PyTypeChecker
    e_find = em.counter(total=len(manifest), desc='Discovering:', unit='repos', leave=False)
    for gitdir in list(manifest):
        e_find.update()
        fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/'))
        if fullpath not in status.keys():
            # Newly added repository
            if not force:
                # Randomize next check between now and frequency
                delay = random.randint(0, frequency)
                nextdate = today + datetime.timedelta(days=delay)
                nextcheck = nextdate.strftime('%F')
            else:
                nextcheck = todayiso

            status[fullpath] = {
                'lastcheck': 'never',
                'nextcheck': nextcheck,
            }
            logger.info('%s:', fullpath)
            logger.info('  added : next check on %s', nextcheck)

    e_find.close()

    # record newly found repos in the status file
    logger.debug('Updating status file in %s', config['statusfile'])
    with open(config['statusfile'], 'wb') as stfh:
        stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    # Go through status and find all repos that need work done on them.
    # This is a dictionary that contains:
    # full_path_to_repo:
    #   repack: 0, 1, 2 (0-no, 1-needs quick repack, 2-needs full repack)
    #   fsck: 0/1

    to_process = {}

    total_checked = 0
    total_elapsed = 0

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(status), desc='Analyzing:', unit='repos', leave=False)
    for fullpath in list(status):
        e_cmp.update()

        # Check to make sure it's still in the manifest
        gitdir = fullpath.replace(config['toplevel'], '', 1)
        gitdir = '/' + gitdir.lstrip('/')

        if gitdir not in manifest.keys():
            del status[fullpath]
            logger.debug('%s is gone, no longer in manifest', gitdir)
            continue

        needs_repack = needs_prune = needs_fsck = 0

        obj_info = get_repo_obj_info(fullpath)
        try:
            packs = int(obj_info['packs'])
            count_loose = int(obj_info['count'])
        except KeyError:
            logger.warning('Unable to count objects in %s, skipping' % fullpath)
            continue

        schedcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'], '%Y-%m-%d')
        nextcheck = today + datetime.timedelta(days=checkdelay)

        if 'repack' not in config.keys() or config['repack'] != 'yes':
            # don't look at me if you turned off repack
            logger.debug('Not repacking because repack=no in config')
            needs_repack = 0
        elif repack_all_full and (count_loose > 0 or packs > 1):
            logger.debug('needs_repack=2 due to repack_all_full')
            needs_repack = 2
        elif repack_all_quick and count_loose > 0:
            logger.debug('needs_repack=1 due to repack_all_quick')
            needs_repack = 1
        elif conn_only:
            # don't do any repacks if we're running forced connectivity checks, unless
            # you specifically passed --repack-all-foo
            logger.debug('needs_repack=0 due to --conn-only')
            needs_repack = 0
        else:
            # for now, hardcode the maximum loose objects and packs
            # TODO: we can probably set this in git config values?
            #       I don't think this makes sense as a global setting, because
            #       optimal values will depend on the size of the repo as a whole
            max_loose_objects = 1200
            max_packs = 20
            pc_loose_objects = 10
            pc_loose_size = 10

            # first, compare against max values:
            if packs >= max_packs:
                logger.debug('Triggering full repack of %s because packs > %s', fullpath, max_packs)
                needs_repack = 2
            elif count_loose >= max_loose_objects:
                logger.debug('Triggering quick repack of %s because loose objects > %s', fullpath, max_loose_objects)
                needs_repack = 1
            else:
                # is the number of loose objects or their size more than 10% of
                # the overall total?
                in_pack = int(obj_info['in-pack'])
                size_loose = int(obj_info['size'])
                size_pack = int(obj_info['size-pack'])
                total_obj = count_loose + in_pack
                total_size = size_loose + size_pack
                # set some arbitrary "worth bothering" limits so we don't
                # continuously repack tiny repos.
                if total_obj > 500 and count_loose/total_obj*100 >= pc_loose_objects:
                    logger.debug('Triggering repack of %s because loose objects > %s%% of total',
                                 fullpath, pc_loose_objects)
                    needs_repack = 1
                elif total_size > 1024 and size_loose/total_size*100 >= pc_loose_size:
                    logger.debug('Triggering repack of %s because loose size > %s%% of total',
                                 fullpath, pc_loose_size)
                    needs_repack = 1

        if needs_repack > 0 and check_precious_objects(fullpath):
            # if we have preciousObjects, then we only repack based on the same
            # schedule as fsck.
            logger.debug('preciousObjects is set')
            # for repos with preciousObjects, we use the fsck schedule for repacking
            if schedcheck <= today:
                logger.debug('Time for a full periodic repack of a preciousObjects repo')
                status[fullpath]['nextcheck'] = nextcheck.strftime('%F')
                needs_repack = 2
            else:
                logger.debug('Not repacking preciousObjects repo outside of schedule')
                needs_repack = 0

        # Do we need to fsck it?
        if not (repack_all_quick or repack_all_full or repack_only):
            if schedcheck <= today or force:
                status[fullpath]['nextcheck'] = nextcheck.strftime('%F')
                needs_fsck = 1

        if needs_repack or needs_fsck or needs_prune:
            # emit a warning if we find garbage in a repo
            # we do it here so we don't spam people nightly on every cron run,
            # but only do it when a repo needs actual work done on it
            if obj_info['garbage'] != '0':
                logger.warning('%s:\n\tcontains %s garbage files (garbage-size: %s KiB)',
                               fullpath, obj_info['garbage'], obj_info['size-garbage'])

            to_process[fullpath] = {
                'repack': needs_repack,
                'prune': needs_prune,
                'fsck': needs_fsck,
            }

    e_cmp.close()

    if not len(to_process):
        logger.info('No repos need attention.')
        em.stop()
        return

    logger.info('Processing %s repositories', len(to_process))

    # noinspection PyTypeChecker
    run = em.counter(total=len(to_process), desc='Processing:', unit='repos', leave=False)
    for fullpath, needs in to_process.items():
        logger.info('%s:', fullpath)
        # Calculate elapsed seconds
        run.refresh()
        startt = time.time()

        # Wait till the repo is available and lock it for the duration of checks,
        # otherwise there may be false-positives if a mirrored repo is updated
        # in the middle of fsck or repack.
        grokmirror.lock_repo(fullpath, nonblocking=False)
        if needs['repack']:
            if run_git_repack(fullpath, config, needs['repack']):
                status[fullpath]['lastrepack'] = todayiso
                if needs['repack'] > 1:
                    status[fullpath]['lastfullrepack'] = todayiso
            else:
                logger.warning('Repacking %s was unsuccessful, '
                               'not running fsck.', fullpath)
                grokmirror.unlock_repo(fullpath)
                continue

        if needs['prune']:
            run_git_prune(fullpath, config)

        if needs['fsck']:
            run_git_fsck(fullpath, config, conn_only)
            endt = time.time()
            status[fullpath]['lastcheck'] = todayiso
            status[fullpath]['s_elapsed'] = int(endt-startt)

            logger.info('   done : %ss, next check on %s',
                        status[fullpath]['s_elapsed'],
                        status[fullpath]['nextcheck'])

        run.update()

        # We're done with the repo now
        grokmirror.unlock_repo(fullpath)
        total_checked += 1
        total_elapsed += time.time()-startt

        # Write status file after each check, so if the process dies, we won't
        # have to recheck all the repos we've already checked
        logger.debug('Updating status file in %s', config['statusfile'])
        with open(config['statusfile'], 'wb') as stfh:
            stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    run.close()
    em.stop()
    logger.info('Processed %s repos in %0.2fs', total_checked, total_elapsed)

    with open(config['statusfile'], 'wb') as stfh:
        stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    lockf(flockh, LOCK_UN)
    flockh.close()

Exemple #12

0

Afficher le fichier

Fichier : grok-fsck.py Projet : kscherer/grokmirror

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Running grok-fsck for [%s]' % name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s' % config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError, ex:
        logger.info('Could not obtain exclusive lock on %s' % config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s' % config['statusfile'])
        stfh = open(config['statusfile'], 'r')
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #    },
            #    ...
            #  }

            status = json.load(stfh)

Exemple #13

0

Afficher le fichier

Fichier : pull.py Projet : mricon/grokmirror

def pull_mirror(name, config, verbose=False, force=False, nomtime=False,
                verify=False, verify_subpath='*', noreuse=False,
                purge=False, pretty=False, forcepurge=False):
    global logger
    global lock_fails

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]', name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s', config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!',
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s', manifile, fstat[8])

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s', my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s', manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.warning('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s', config['manifest'])

        # Do we have username:password@ in the URL?
        chunks = urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug('Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s', manifesturl)
            request = urllib_request.Request(manifesturl)

            password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr)
            opener = urllib_request.build_opener(auth_handler)

        else:
            request = urllib_request.Request(config['manifest'])
            opener = urllib_request.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s', mymanifest, mtime)
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s', my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except HTTPError as ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Server returned: %s', ex)
            return 1
        except (URLError, ssl.SSLError, ssl.CertificateError) as ex:
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

        last_modified = ufh.headers.get('Last-Modified')
        last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')
        last_modified = calendar.timegm(last_modified)

        # We don't use read_manifest for the remote manifest, as it can be
        # anything, really. For now, blindly open it with gzipfile if it ends
        # with .gz. XXX: some http servers will auto-deflate such files.
        try:
            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=BytesIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read().decode('utf-8')
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception as ex:
            logger.warning('Failed to parse %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable',
                        toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])
        if pull_threads < 1:
            logger.info('pull_threads is less than 1, forcing to 1')
            pull_threads = 1
    else:
        # be conservative
        logger.info('pull_threads is not set, consider setting it')
        pull_threads = 5

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False)

    for gitdir in list(culled):
        fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
        e_cmp.update()

        # fingerprints were added in later versions, so deal if the upstream
        # manifest doesn't have a fingerprint
        if 'fingerprint' not in culled[gitdir]:
            culled[gitdir]['fingerprint'] = None

        # Attempt to lock the repo
        try:
            grokmirror.lock_repo(fullpath, nonblocking=True)
        except IOError:
            logger.info('Could not lock %s, skipping', gitdir)
            lock_fails.append(gitdir)
            # Force the fingerprint to what we have in mymanifest,
            # if we have it.
            culled[gitdir]['fingerprint'] = None
            if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]:
                culled[gitdir]['fingerprint'] = mymanifest[gitdir][
                    'fingerprint']
            if len(lock_fails) >= pull_threads:
                logger.info('Too many repositories locked (%s). Exiting.',
                            len(lock_fails))
                return 0
            continue

        if verify:
            if culled[gitdir]['fingerprint'] is None:
                logger.debug('No fingerprint for %s, not verifying', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            if not fnmatch.fnmatch(gitdir, verify_subpath):
                grokmirror.unlock_repo(fullpath)
                continue

            logger.debug('Verifying %s', gitdir)
            if not os.path.exists(fullpath):
                verify_fails.append(gitdir)
                logger.info('Verify: %s ABSENT', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            my_fingerprint = grokmirror.get_repo_fingerprint(
                toplevel, gitdir, force=force)

            if my_fingerprint == culled[gitdir]['fingerprint']:
                logger.info('Verify: %s OK', gitdir)
            else:
                logger.critical('Verify: %s FAILED', gitdir)
                verify_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            continue

        # Is the directory in place?
        if os.path.exists(fullpath):
            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.info('Reclone requested for %s:', gitdir)
                with open(rfile, 'r') as rfh:
                    reason = rfh.read()
                    logger.info('  %s', reason)

                to_clone.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # Fix owner and description, if necessary
            if gitdir in mymanifest.keys():
                # This code is hurky and needs to be cleaned up
                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = None
                if config['ignore_repo_references'] != 'yes':
                    ref = culled[gitdir].get('reference')

                # dirty hack to force on-disk owner/description checks
                # when we're called with -n, in case our manifest
                # differs from what is on disk for owner/description/alternates
                myref = None
                if nomtime:
                    mydesc = None
                    myowner = None
                else:
                    mydesc = mymanifest[gitdir].get('description')
                    myowner = mymanifest[gitdir].get('owner')

                    if config['ignore_repo_references'] != 'yes':
                        myref = mymanifest[gitdir].get('reference')

                    if myowner is None:
                        myowner = config['default_owner']

                if owner is None:
                    owner = config['default_owner']

                if desc != mydesc or owner != myowner or ref != myref:
                    # we can do this right away without waiting
                    set_repo_params(toplevel, gitdir, owner, desc, ref)

            else:
                # It exists on disk, but not in my manifest?
                if noreuse:
                    logger.critical('Found existing git repo in %s', fullpath)
                    logger.critical('But you asked NOT to reuse repos')
                    logger.critical('Skipping %s', gitdir)
                    grokmirror.unlock_repo(fullpath)
                    continue

                logger.info('Setting new origin for %s', gitdir)
                fix_remotes(gitdir, toplevel, config['site'])
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # fingerprints were added late, so if we don't have them
            # in the remote manifest, fall back on using timestamps
            changed = False
            if culled[gitdir]['fingerprint'] is not None:
                logger.debug('Will use fingerprints to compare %s', gitdir)
                my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                                 gitdir,
                                                                 force=force)

                if my_fingerprint != culled[gitdir]['fingerprint']:
                    logger.debug('No fingerprint match, will pull %s', gitdir)
                    changed = True
                else:
                    logger.debug('Fingerprints match, skipping %s', gitdir)
            else:
                logger.debug('Will use timestamps to compare %s', gitdir)
                if force:
                    logger.debug('Will force-pull %s', gitdir)
                    changed = True
                    # set timestamp to 0 as well
                    grokmirror.set_repo_timestamp(toplevel, gitdir, 0)
                else:
                    ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
                    if ts < culled[gitdir]['modified']:
                        changed = True

            if changed:
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue
            else:
                logger.debug('Repo %s unchanged', gitdir)
                # if we don't have a fingerprint for it, add it now
                if culled[gitdir]['fingerprint'] is None:
                    fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir)
                    culled[gitdir]['fingerprint'] = fpr
                existing.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

        else:
            # Newly incoming repo
            to_clone.append(gitdir)
            grokmirror.unlock_repo(fullpath)
            continue

        # If we got here, something is odd.
        # noinspection PyUnreachableCode
        logger.critical('Could not figure out what to do with %s', gitdir)
        grokmirror.unlock_repo(fullpath)

    logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed)
    e_cmp.close()

    if verify:
        if len(verify_fails):
            logger.critical('%s repos failed to verify', len(verify_fails))
            return 1
        else:
            logger.info('Verification successful')
            return 0

    hookscript = config['post_update_hook']

    if len(to_pull):

        if len(lock_fails) > 0:
            pull_threads -= len(lock_fails)

        # Don't spin up more threads than we need
        if pull_threads > len(to_pull):
            pull_threads = len(to_pull)

        # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra
        # precaution doesn't hurt
        if pull_threads <= 0:
            logger.info('Too many repositories locked. Exiting.')
            return 0

        logger.info('Will use %d threads to pull repos', pull_threads)

        # noinspection PyTypeChecker
        e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False)
        logger.info('Updating %s repos from %s', len(to_pull), config['site'])
        in_queue = Queue()
        out_queue = Queue()

        for gitdir in to_pull:
            in_queue.put((gitdir, culled[gitdir]['fingerprint'],
                          culled[gitdir]['modified']))

        for i in range(pull_threads):
            logger.debug('Spun up thread %s', i)
            t = PullerThread(in_queue, out_queue, config, i, e_pull)
            t.setDaemon(True)
            t.start()

        # wait till it's all done
        in_queue.join()
        logger.info('All threads finished.')

        while not out_queue.empty():
            # see if any of it failed
            (gitdir, my_fingerprint, status) = out_queue.get()
            # We always record our fingerprint in our manifest
            culled[gitdir]['fingerprint'] = my_fingerprint
            if not status:
                # To make sure we check this again during next run,
                # fudge the manifest accordingly.
                logger.debug('Will recheck %s during next run', gitdir)
                culled[gitdir] = mymanifest[gitdir]
                # this is rather hackish, but effective
                last_modified -= 1

        logger.info('Updates completed in %0.2fs', e_pull.elapsed)
        e_pull.close()
    else:
        logger.info('No repositories need updating')

    # how many lockfiles have we seen?
    # If there are more lock_fails than there are
    # pull_threads configured, we skip cloning out of caution
    if len(to_clone) and len(lock_fails) > pull_threads:
        logger.info('Too many repositories locked. Skipping cloning new repos.')
        to_clone = []

    if len(to_clone):
        # noinspection PyTypeChecker
        e_clone = em.counter(total=len(to_clone), desc='Cloning  :', unit='repos', leave=False)
        logger.info('Cloning %s repos from %s', len(to_clone), config['site'])
        # we use "existing" to track which repos can be used as references
        existing.extend(to_pull)

        to_clone_sorted = []
        clone_order(to_clone, manifest, to_clone_sorted, existing)

        for gitdir in to_clone_sorted:
            e_clone.refresh()

            fullpath = os.path.join(toplevel, gitdir.lstrip('/'))

            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.debug('Removing %s for reclone', gitdir)
                shutil.move(fullpath, '%s.reclone' % fullpath)
                shutil.rmtree('%s.reclone' % fullpath)

            # Do we still need to clone it, or has another process
            # already done this for us?
            ts = grokmirror.get_repo_timestamp(toplevel, gitdir)

            if ts > 0:
                logger.debug('Looks like %s already cloned, skipping', gitdir)
                continue

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)
            except IOError:
                logger.info('Could not lock %s, skipping', gitdir)
                lock_fails.append(gitdir)
                e_clone.update()
                continue

            reference = None
            if config['ignore_repo_references'] != 'yes':
                reference = culled[gitdir]['reference']

            if reference is not None and reference in existing:
                # Make sure we can lock the reference repo
                refrepo = os.path.join(toplevel, reference.lstrip('/'))
                try:
                    grokmirror.lock_repo(refrepo, nonblocking=True)
                    success = clone_repo(toplevel, gitdir, config['site'],
                                         reference=reference)
                    grokmirror.unlock_repo(refrepo)
                except IOError:
                    logger.info('Cannot lock reference repo %s, skipping %s',
                                reference, gitdir)
                    if reference not in lock_fails:
                        lock_fails.append(reference)

                    grokmirror.unlock_repo(fullpath)
                    e_clone.update()
                    continue
            else:
                success = clone_repo(toplevel, gitdir, config['site'])

            # check dir to make sure cloning succeeded and then add to existing
            if os.path.exists(fullpath) and success:
                logger.debug('Cloning of %s succeeded, adding to existing',
                             gitdir)
                existing.append(gitdir)

                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = culled[gitdir].get('reference')

                if owner is None:
                    owner = config['default_owner']
                set_repo_params(toplevel, gitdir, owner, desc, ref)
                set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
                my_fingerprint = grokmirror.set_repo_fingerprint(toplevel,
                                                                 gitdir)
                culled[gitdir]['fingerprint'] = my_fingerprint
                run_post_update_hook(hookscript, toplevel, gitdir)
            else:
                logger.warning('Was not able to clone %s', gitdir)
                # Remove it from our manifest so we can try re-cloning
                # next time grok-pull runs
                del culled[gitdir]
                git_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            e_clone.update()

        logger.info('Clones completed in %0.2fs' % e_clone.elapsed)
        e_clone.close()

    else:
        logger.info('No repositories need cloning')

    # loop through all entries and find any symlinks we need to set
    # We also collect all symlinks to do purging correctly
    symlinks = []
    for gitdir in culled.keys():
        if 'symlinks' in culled[gitdir].keys():
            source = os.path.join(config['toplevel'], gitdir.lstrip('/'))
            for symlink in culled[gitdir]['symlinks']:
                if symlink not in symlinks:
                    symlinks.append(symlink)
                target = os.path.join(config['toplevel'], symlink.lstrip('/'))

                if os.path.exists(source):
                    if os.path.islink(target):
                        # are you pointing to where we need you?
                        if os.path.realpath(target) != source:
                            # Remove symlink and recreate below
                            logger.debug('Removed existing wrong symlink %s',
                                         target)
                            os.unlink(target)
                    elif os.path.exists(target):
                        logger.warning('Deleted repo %s, because it is now'
                                       ' a symlink to %s' % (target, source))
                        shutil.rmtree(target)

                    # Here we re-check if we still need to do anything
                    if not os.path.exists(target):
                        logger.info('Symlinking %s -> %s', target, source)
                        # Make sure the leading dirs are in place
                        if not os.path.exists(os.path.dirname(target)):
                            os.makedirs(os.path.dirname(target))
                        os.symlink(source, target)

    manifile = config['mymanifest']
    grokmirror.manifest_lock(manifile)

    # Is the local manifest newer than last_modified? That would indicate
    # that another process has run and "culled" is no longer the latest info
    if os.path.exists(manifile):
        fstat = os.stat(manifile)
        if fstat[8] > last_modified:
            logger.info('Local manifest is newer, not saving.')
            grokmirror.manifest_unlock(manifile)
            return 0

    if purge:
        to_purge = []
        found_repos = 0
        for founddir in grokmirror.find_all_gitdirs(config['toplevel']):
            gitdir = founddir.replace(config['toplevel'], '')
            found_repos += 1

            if gitdir not in culled.keys() and gitdir not in symlinks:
                to_purge.append(founddir)

        if len(to_purge):
            # Purge-protection engage
            try:
                purge_limit = int(config['purgeprotect'])
                assert 1 <= purge_limit <= 99
            except (ValueError, AssertionError):
                logger.critical('Warning: "%s" is not valid for purgeprotect.',
                                config['purgeprotect'])
                logger.critical('Please set to a number between 1 and 99.')
                logger.critical('Defaulting to purgeprotect=5.')
                purge_limit = 5

            purge_pc = len(to_purge) * 100 / found_repos
            logger.debug('purgeprotect=%s', purge_limit)
            logger.debug('purge prercentage=%s', purge_pc)

            if not forcepurge and purge_pc >= purge_limit:
                logger.critical('Refusing to purge %s repos (%s%%)',
                                len(to_purge), purge_pc)
                logger.critical('Set purgeprotect to a higher percentage, or'
                                ' override with --force-purge.')
                logger.info('Not saving local manifest')
                return 1
            else:
                # noinspection PyTypeChecker
                e_purge = em.counter(total=len(to_purge), desc='Purging  :', unit='repos', leave=False)
                for founddir in to_purge:
                    e_purge.refresh()
                    if os.path.islink(founddir):
                        logger.info('Removing unreferenced symlink %s', gitdir)
                        os.unlink(founddir)
                    else:
                        # is anything using us for alternates?
                        gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/')
                        if grokmirror.is_alt_repo(toplevel, gitdir):
                            logger.info('Not purging %s because it is used by '
                                        'other repos via alternates', founddir)
                        else:
                            try:
                                logger.info('Purging %s', founddir)
                                grokmirror.lock_repo(founddir, nonblocking=True)
                                shutil.rmtree(founddir)
                            except IOError:
                                lock_fails.append(gitdir)
                                logger.info('%s is locked, not purging',
                                            gitdir)
                    e_purge.update()

                logger.info('Purging completed in %0.2fs', e_purge.elapsed)
                e_purge.close()

        else:
            logger.info('No repositories need purging')

    # Done with progress bars
    em.stop()

    # Go through all repos in culled and get the latest local timestamps.
    for gitdir in culled:
        ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
        culled[gitdir]['modified'] = ts

    # If there were any lock failures, we fudge last_modified to always
    # be older than the server, which will force the next grokmirror run.
    if len(lock_fails):
        logger.info('%s repos could not be locked. Forcing next run.',
                    len(lock_fails))
        last_modified -= 1
    elif len(git_fails):
        logger.info('%s repos failed. Forcing next run.', len(git_fails))
        last_modified -= 1

    # Once we're done, save culled as our new manifest
    grokmirror.write_manifest(manifile, culled, mtime=last_modified,
                              pretty=pretty)

    grokmirror.manifest_unlock(manifile)

    # write out projects.list, if asked to
    write_projects_list(culled, config)

    return 127

Exemple #14

0

Afficher le fichier

Fichier : grok-manifest.py Projet : kscherer/grokmirror

    logger.addHandler(ch)

    if opts.logfile is not None:
        ch = logging.FileHandler(opts.logfile)
        formatter = logging.Formatter("[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)

        ch.setLevel(logging.DEBUG)
        logger.addHandler(ch)

    # push our logger into grokmirror to override the default
    grokmirror.logger = logger

    grokmirror.manifest_lock(opts.manifile)
    manifest = grokmirror.read_manifest(opts.manifile, wait=opts.wait)

    # If manifest is empty, don't use current timestamp
    if not len(manifest.keys()):
        opts.usenow = False

    if opts.remove and len(args):
        # Remove the repos as required, write new manfiest and exit
        for fullpath in args:
            repo = fullpath.replace(opts.toplevel, '', 1)
            if repo in manifest.keys():
                del manifest[repo]
                logger.info('Repository %s removed from manifest' % repo)
            else:
                logger.info('Repository %s not in manifest' % repo)

Exemple #15

0

Afficher le fichier

Fichier : pull.py Projet : pombredanne/grokmirror

            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=StringIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read()
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception, ex:
            logger.warning('Failed to parse %s' % config['manifest'])
            logger.warning('Error was: %s' % ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable' %
                        toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])

Exemple #16

0

Afficher le fichier

def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False,
                  check_export_ok=False, purge=False, remove=False,
                  pretty=False, ignore=None, wait=False, verbose=False):

    if args is None:
        args = []
    if ignore is None:
        ignore = []

    logger.setLevel(logging.DEBUG)
    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    if logfile is not None:
        ch = logging.FileHandler(logfile)
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)

        ch.setLevel(logging.DEBUG)
        logger.addHandler(ch)

    # push our logger into grokmirror to override the default
    grokmirror.logger = logger

    grokmirror.manifest_lock(manifile)
    manifest = grokmirror.read_manifest(manifile, wait=wait)

    # If manifest is empty, don't use current timestamp
    if not len(manifest.keys()):
        usenow = False

    if remove and len(args):
        # Remove the repos as required, write new manfiest and exit
        for fullpath in args:
            repo = fullpath.replace(toplevel, '', 1)
            if repo in manifest.keys():
                del manifest[repo]
                logger.info('Repository %s removed from manifest', repo)
            else:
                logger.info('Repository %s not in manifest', repo)

        # XXX: need to add logic to make sure we don't break the world
        #      by removing a repository used as a reference for others
        #      also make sure we clean up any dangling symlinks

        grokmirror.write_manifest(manifile, manifest, pretty=pretty)
        grokmirror.manifest_unlock(manifile)
        return 0

    gitdirs = []

    if purge or not len(args) or not len(manifest.keys()):
        # We automatically purge when we do a full tree walk
        gitdirs = grokmirror.find_all_gitdirs(toplevel, ignore=ignore)
        purge_manifest(manifest, toplevel, gitdirs)

    if len(manifest) and len(args):
        # limit ourselves to passed dirs only when there is something
        # in the manifest. This precaution makes sure we regenerate the
        # whole file when there is nothing in it or it can't be parsed.
        gitdirs = args
        # Don't draw a progress bar for a single repo
        em.enabled = False

    symlinks = []
    # noinspection PyTypeChecker
    run = em.counter(total=len(gitdirs), desc='Processing:', unit='repos', leave=False)
    for gitdir in gitdirs:
        run.update()
        # check to make sure this gitdir is ok to export
        if (check_export_ok and not
                os.path.exists(os.path.join(gitdir, 'git-daemon-export-ok'))):
            # is it curently in the manifest?
            repo = gitdir.replace(toplevel, '', 1)
            if repo in list(manifest):
                logger.info('Repository %s is no longer exported, '
                            'removing from manifest', repo)
                del manifest[repo]

            # XXX: need to add logic to make sure we don't break the world
            #      by removing a repository used as a reference for others
            #      also make sure we clean up any dangling symlinks
            continue

        if os.path.islink(gitdir):
            symlinks.append(gitdir)
        else:
            update_manifest(manifest, toplevel, gitdir, usenow)

    logger.info('Updated %s records in %0.2fs', len(gitdirs), run.elapsed)
    run.close()
    em.stop()


    if len(symlinks):
        set_symlinks(manifest, toplevel, symlinks)

    grokmirror.write_manifest(manifile, manifest, pretty=pretty)
    grokmirror.manifest_unlock(manifile)

Exemple #17

0

Afficher le fichier

Fichier : grok-pull.py Projet : kscherer/grokmirror

def pull_mirror(name, config, opts):
    global logger
    global lock_fails

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter("[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if opts.verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]' % name)
    mymanifest = config['mymanifest']

    if opts.verify:
        logger.info('Verifying mirror against %s' % config['manifest'])
        opts.nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!' % config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s' % (manifile, fstat[8]))

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s' % my_last_modified)
            if not (opts.force or opts.nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s' % manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.critical('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s' % config['manifest'])
        request = urllib2.Request(config['manifest'])
        opener  = urllib2.build_opener()

        # Find out if we need to run at all first
        if not (opts.force or opts.nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s' % (mymanifest, mtime))
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                time.gmtime(mtime))
            logger.debug('Our last-modified is: %s' % my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except urllib2.HTTPError, ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. Quitting.')
                return 0
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Server returned: %s' % ex)
            return 1
        except urllib2.URLError, ex:
            logger.warning('Could not fetch %s' % config['manifest'])
            logger.warning('Error was: %s' % ex)
            return 1