def test_get_manager(self):

        # stdout is attached to a tty
        with redirect_output('stdout', self.tty.stdout):
            self.assertTrue(sys.stdout.isatty())
            manager = enlighten.get_manager(unit='knights')
            self.assertIsInstance(manager, enlighten.Manager)
            self.assertTrue('unit' in manager.defaults)

        # stdout is not attached to a tty
        with redirect_output('stdout', OUTPUT):
            self.assertFalse(sys.stdout.isatty())
            manager = enlighten.get_manager(unit='knights')
            self.assertIsInstance(manager, enlighten.Manager)
            self.assertTrue('unit' in manager.defaults)
Exemple #2
0
def call_xml_cleaner(args):
    '''
    Takes in a list of files and args and generates new files. 
    '''
    list_of_file_paths = get_file_directories(args)
    dir_info = directory_info(args)
    root_name = dir_info.get_root_name()

    manager = enlighten.get_manager()
    enterprise = manager.counter(total=len(list_of_file_paths),
                                 desc='Tidying files:',
                                 unit='files')

    name = get_subfolder_name(list_of_file_paths[0], root_name)

    for path in list_of_file_paths:
        name = get_subfolder_name(path, root_name)
        filename = '.\data\\04 - clean\\' + name + '.txt'
        list_to_create = []
        currCenter = manager.counter(total=get_file_length(path),
                                     unit='lines',
                                     leave=False)
        with open(path, encoding='utf-8') as file:
            for line in file:
                list_to_create.append(clean_text_from_xml(line))
                if len(list_to_create) > 1000:
                    create_file(list_to_create, filename, mode='a')
                    list_to_create = []

                currCenter.update()
        if list_to_create:
            create_file(list_to_create, filename, mode='a')
        currCenter.close()
        enterprise.update()
    enterprise.close()
Exemple #3
0
    def query_data(self):
        t = Timer("Querying USPTO: {}".format(self.query_json))
        count_patents = self.query_sounding()
        count_to_collect = self.limit if self.limit is not None and self.limit < count_patents else count_patents
        pages = math.ceil(count_to_collect / self.per_page)
        logger.info("Collecting {}/{} docs in {} page{}".format(
            count_to_collect, count_patents, pages, "s" if pages > 0 else ""))

        manager = enlighten.get_manager()
        ticker = manager.counter(total=pages, desc='Ticks', unit='ticks')
        for i in range(pages):
            if Config.ENV_NAME != "local":
                logger.info("{}/{}".format(i, pages))
            page_df = self.query_paginated(i + 1, self.per_page)
            if self.df is None:
                self.df = page_df
            else:
                self.df = self.df.append(page_df, ignore_index=True)
            ticker.update()
        ticker.close()

        self.handle_external()

        t.log()
        logger.info("Collected {} edges".format(self.df.shape[0]))
Exemple #4
0
def downloadData(param: Param, download: bool = True):
    '''Download user data (if {download} is True) to json files, merge them into a flat pandas.DataFrame, and write it to disk.'''
    logging.info(f"{param.filePath().name.replace('.','|')}")
    if download:
        subMethod = param.splitMethod(lower=True)
        for f in param.filePath(glob='*json'):
            f.unlink()
        pbarManager = enlighten.get_manager()
        with pbarManager.counter(unit='page', leave=False) as pbar:
            while param.page <= param.nPages:
                fileName = param.filePath(ext=f'.{param.page:04d}.json')
                response = getReq(param=param,
                                  pbarManager=pbarManager,
                                  collapse=False)
                param.page = int(
                    response.get(subMethod).get('@attr').get('page'))
                param.nPages = int(
                    response.get(subMethod).get('@attr').get('totalPages'))
                pbar.total = param.nPages  # [tqdm: update total without resetting time elapsed](https://stackoverflow.com/a/58961015/13019084)
                pbar.update()
                param.filePath().parent.mkdir(exist_ok=True)
                with open(file=fileName, mode='w') as jsonF:
                    json.dump(obj=response, fp=jsonF)
                param.page += 1
                time.sleep(param.sleep)
        pbarManager.stop()
    DF = loadJSON(param)
    df = flattenDF(param=param, DF=DF, writeToDisk=True)
    if param.splitMethod() in ['TopArtists', 'TopAlbums', 'TopTracks']:
        writeCSV(param=param, df=df)
Exemple #5
0
def convert_video_progress_bar(source: str, dest: str, manager=None):
    if manager is None:
        manager = enlighten.get_manager()
    name = source.rsplit(os.path.sep,1)[-1]
    if get_bitdepth(source).is_10bit:
        args = CONVERT_COMMAND_10Bits.format(source=source, dest=dest)
    else:
        args = CONVERT_COMMAND.format(source=source, dest=dest)
    proc = expect.spawn(args, encoding='utf-8')
    pbar = None
    try:
        proc.expect(pattern_duration)
        total = sum(map(lambda x: float(x[1])*60**x[0],enumerate(reversed(proc.match.groups()[0].strip().split(':')))))
        cont = 0
        pbar = manager.counter(total=100, desc=name, unit='%',bar_format=BAR_FMT, counter_format=COUNTER_FMT)
        while True:
            proc.expect(pattern_progress)
            progress = sum(map(lambda x: float(x[1])*60**x[0],enumerate(reversed(proc.match.groups()[0].strip().split(':')))))
            percent = progress/total*100
            pbar.update(percent-cont)
            cont = percent
    except expect.EOF:
        pass
    finally:
        if pbar is not None:
            pbar.close()
    proc.expect(expect.EOF)
    res = proc.before
    res += proc.read()
    exitstatus = proc.wait()
    if exitstatus:
        raise ffmpeg.Error('ffmpeg','',res)
Exemple #6
0
class CountTrackable(BaseTrackable):

    _manager = enlighten.get_manager()

    def __init__(self, name: str, total: int, *, parent: BaseTrackable = None):
        super().__init__(name, parent=parent)

        self._total = total
        self._pbar = self._manager.counter(desc=name, total=total)

    @classmethod
    def reset_all(cls):
        cls._manager = enlighten.get_manager()

    @property
    def total(self):
        return self._total

    def update(self):
        self._pbar.update()
        if self._total is not None and self._pbar.count > self._total:
            raise PBarOutOfBound(f'Progress bar ran out of bound.')
        for trackable in self.children:
            trackable.reset()

    def reset(self):
        self._pbar.start = time.time()
        self._pbar.count = 0
        self._pbar.refresh()

    @property
    def value(self):
        return self._pbar.count
Exemple #7
0
 def __init__(self, graph=None) -> None:
     super().__init__(graph)
     self.__display_manager = enlighten.get_manager()
     self.__status_bar: enlighten.StatusBar = None
     self.__all_nodes = set()
     self.__running = set()
     self.__completed = set()
 def _insideEllipses(self):
     self._memoryUsage()
     for inst in self.list_inst:
         print('Computing ellipses for %s sample' % inst.label)
         manager = enlighten.get_manager()
         pbar = manager.counter(total=len(self.sigmas) * inst.size,
                                desc='Progress',
                                unit='Events %s' % inst.label)
         list_inside = []
         for idx, sigma in enumerate(self.sigmas):
             print('....... Counting ellipse with sigma = %0.1f' % sigma)
             inside = np.zeros(inst.size)
             for i in range(inst.size):
                 masspoint = inst.data.iloc[i, :][['jj_M', 'lljj_M']]
                 check = self.window.isInWindow(self.center, sigma,
                                                masspoint)
                 if check: inside[i] = 1
                 pbar.update()
             inside = pd.DataFrame(inside)
             inside.columns = [self.sigmas_columns[idx]]
             list_inside.append(inside)
             self._memoryUsage()
         print('....... Concatenating the results into a single DF')
         manager.stop()
         for inside in list_inside:
             inst.data = pd.concat([inst.data, inside], axis=1)
         self._memoryUsage()
Exemple #9
0
    def __init__(self):
        self.CS_LOW = '['
        self.CS_HIGH = ']'
        self.MISO_BEGIN = '('
        self.MISO_END = ')'

        self.SECTOR_SIZE = 4 * KB

        self.MEM_PATTERN_LABELS  = [\
            "Timestamp(ms)",\
            "Operation",\
            "Address",\
            "Value"\
        ]

        self._clock = 0

        self._company = None
        self._chip_size = 0
        self._def_address_size = 0
        self._address_size = 0
        self._model = None
        self._frequency = None
        self._clock_width = None
        self._op_codes = {}

        self._config_done = False

        self.progressbar = enlighten.get_manager()
    def __init__(self):
        super().__init__()

        # Initialize the status_bars for header and the targets .
        self.manager = enlighten.get_manager()
        self.status_header = None
        self.status_target = {}
Exemple #11
0
def main():
    """
    Main function
    """

    with enlighten.get_manager() as manager:
        process_files(manager)
Exemple #12
0
def main():
    """
    Main function
    """

    with enlighten.get_manager() as manager:
        status = manager.status_bar(
            status_format=u'Enlighten{fill}Stage: {demo}{fill}{elapsed}',
            color='bold_underline_bright_white_on_lightslategray',
            justify=enlighten.Justify.CENTER,
            demo='Initializing',
            autorefresh=True,
            min_delta=0.5)
        docs = manager.term.link(
            'https://python-enlighten.readthedocs.io/en/stable/examples.html',
            'Read the Docs')
        manager.status_bar(' More examples on %s! ' % docs,
                           position=1,
                           fill='-',
                           justify=enlighten.Justify.CENTER)

        initialize(manager, 15)
        status.update(demo='Loading')
        load(manager, 40)
        status.update(demo='Testing')
        run_tests(manager, 20)
        status.update(demo='Downloading')
        download(manager, 2.0 * 2**20)
        status.update(demo='File Processing')
        process_files(manager)
Exemple #13
0
def main():
    global MANAGER

    logging.basicConfig(level=logging.INFO)
    first_pass_parser, git_repo_group = Auditor._build_first_pass_parser()
    first_pass_args, extras = first_pass_parser.parse_known_args()
    first_pass_args_dict = vars(first_pass_args)
    with _DB(**first_pass_args_dict) as db:
        repo_reader = _RepoReader(db, **first_pass_args_dict)
        jira_reader = _JiraReader(db, **first_pass_args_dict)
        second_pass_parser = Auditor._build_second_pass_parser(
            repo_reader, first_pass_parser, git_repo_group)
        second_pass_args = second_pass_parser.parse_args(
            extras, first_pass_args)
        second_pass_args_dict = vars(second_pass_args)
        auditor = Auditor(repo_reader, jira_reader, db,
                          **second_pass_args_dict)
        with enlighten.get_manager() as MANAGER:
            if second_pass_args.populate_from_git:
                auditor.populate_db_from_git()
            if second_pass_args.populate_from_jira:
                auditor.populate_db_from_jira()
            if second_pass_args.report_new_for_release_line:
                release_line = second_pass_args.report_new_for_release_line
                auditor.report_new_for_release_line(release_line)
            if second_pass_args.report_new_for_release_branch:
                release_branch = second_pass_args.report_new_for_release_branch
                auditor.report_new_for_release_branch(release_branch)
Exemple #14
0
    def add_duplicates(df_endog, bin_size_weeks):
        start_date = df_endog["patent_date"].min()
        t_from_start = ((df_endog["patent_date"] - start_date) / bin_size_weeks).astype(int)
        df_endog["t"] = df_endog["t"] + t_from_start
        # iterate through rows where next t is zero - so iterating through last entry in each series
        data = []
        ind = []
        # a mask where true if row after is less than row before
        mask = (df_endog["t"].shift(-1) < df_endog["t"])
        manager = enlighten.get_manager()
        ticker = manager.counter(
            total=df_endog[mask].shape[0],
            desc='Patent Samples Transformed',
            unit='patents'
        )
        for row in df_endog[mask][["log(knowledge_forward_cites)", "t", "patent_date"]].itertuples():
            index, k, t, date = row
            # append the last k entry as many times as necessary to reach the present
            for i in range(int(df_endog["t"].max()) - int(t)):
                data.append((k, t + 1 + i, date))
                ind.append(index)
            ticker.update()
        ticker.close()

        to_add = pd.DataFrame(data, index=ind, columns=["log(knowledge_forward_cites)", "t", "patent_date"])
        df_endog = df_endog.append(to_add)

        return df_endog
Exemple #15
0
def folder(path: Path = typer.Option(default='.',
                                     exists=True,
                                     file_okay=True,
                                     dir_okay=True,
                                     readable=True,
                                     resolve_path=True)):
    """
    Convert all videos in a folder
    """

    videos = []

    for dir, folders, files in os.walk(path):
        base_dir = Path(dir)
        for file in files:
            file = base_dir / file
            guess = filetype.guess(str(file))

            if guess and 'video' in guess.mime:
                videos.append(file)

    manager = enlighten.get_manager()
    pbar = manager.counter(total=len(videos), desc='Video', unit='videos')
    for video in videos:
        typer.secho(f'Processing: {video}')
        if get_codec(str(video)) != 'hevc':
            new_path = convertion_path(video)
            convert_video(str(video), str(new_path))
            os.remove(str(video))
            shutil.move(new_path, str(video))
        pbar.update()
    def __init__(self, config_filename: str,
                 selected_analysis_options: params.SelectedAnalysisOptions,
                 manager_task_name: str, **kwargs: Any):
        self.config_filename = config_filename
        self.selected_analysis_options = selected_analysis_options
        self.task_name = manager_task_name

        # Retrieve YAML config for manager configuration
        # NOTE: We don't store the overridden selected_analysis_options because in principle they depend
        #       on the selected task. In practice, such options are unlikely to vary between the manager
        #       and the analysis tasks. However, the validation cannot handle the overridden options
        #       (because the leading hadron bias enum is converting into the object). So we just use
        #       the overridden option in formatting the output prefix (where it is required to determine
        #       the right path), and then passed the non-overridden values to the analysis objects.
        self.config, overridden_selected_analysis_options = analysis_config.read_config_using_selected_options(
            task_name=self.task_name,
            config_filename=self.config_filename,
            selected_analysis_options=self.selected_analysis_options)
        # Determine the formatting options needed for the output prefix
        formatting_options = analysis_config.determine_formatting_options(
            task_name=self.task_name,
            config=self.config,
            selected_analysis_options=overridden_selected_analysis_options)
        # Additional helper variables
        self.task_config = self.config[self.task_name]
        self.output_info = analysis_objects.PlottingOutputWrapper(
            # Format to ensure that the selected analysis options are filled in.
            output_prefix=self.config["outputPrefix"].format(
                **formatting_options),
            printing_extensions=self.config["printingExtensions"],
        )

        # Monitor the progress of the analysis.
        self._progress_manager = enlighten.get_manager()
Exemple #17
0
def main():
    """
    Main function
    """

    with enlighten.get_manager() as manager:
        multiprocess_systems(manager, random.randint(*SYSTEMS))
Exemple #18
0
def build_nltk_wrappers():
    imports = _walk(nltk)
    imports += _walk(nltk.cluster)
    imports += _walk(gensim.models)
    # imports += _walk(nltk.chunk.named_entity)
    imports += _walk(nltk.tag)

    manager = enlighten.get_manager()
    counter = manager.counter(total=len(imports), unit="classes")
    path = Path(__file__).parent / "_generated.py"

    imports = set(imports)

    with open(path, "w") as fp:
        fp.write(
            textwrap.dedent(f"""
            # AUTOGENERATED ON {datetime.datetime.now()}
            ## DO NOT MODIFY THIS FILE MANUALLY

            from autogoal.grammar import Continuous, Discrete, Categorical, Boolean
            from autogoal.contrib.nltk._builder import NltkStemmer, NltkTokenizer, NltkLemmatizer, NltkTagger, NltkTrainedTagger
            from autogoal.kb import *
            from autogoal.utils import nice_repr
            from numpy import inf, nan
            """))

        for cls in imports:
            counter.update()
            _write_class(cls, fp)

    black.reformat_one(path, True, black.WriteBack.YES, black.FileMode(),
                       black.Report())

    counter.close()
    manager.stop()
Exemple #19
0
 def combine_games_for_season(self):
     result = audit_report_season_prompt(self.app.audit_report)
     if result.failure:
         return result
     self.scrape_year = result.value
     self.pbar_manager = enlighten.get_manager()
     self.init_progress_bars(game_date=self.all_dates_in_season[0])
     subprocess.run(["clear"])
     for game_date in self.all_dates_in_season:
         if self.every_eligible_game_is_combined():
             num_days_remaining = self.get_number_of_days_remaining()
             self.update_progress_bars(game_date)
             self.date_progress_bar.update(num_days_remaining)
             LOGGER.info(
                 f"Processed all eligible games for MLB {self.scrape_year}."
             )
             time.sleep(1.5)
             break
         game_ids = self.date_game_id_map.get(game_date, None)
         if not game_ids:
             self.update_progress_bars(game_date)
             self.date_progress_bar.update()
             time.sleep(0.75)
             continue
         result = self.combine_selected_games(game_date, game_ids)
         if result.failure:
             return result
         self.date_progress_bar.update()
     self.close_progress_bars()
     return Result.Ok()
Exemple #20
0
def build_sklearn_wrappers():
    imports = _walk(sklearn)

    manager = enlighten.get_manager()
    counter = manager.counter(total=len(imports), unit="classes")

    path = Path(__file__).parent / "_generated.py"

    with open(path, "w") as fp:
        fp.write(
            textwrap.dedent(
                f"""
            # AUTOGENERATED ON {datetime.datetime.now()}
            ## DO NOT MODIFY THIS FILE MANUALLY

            from numpy import inf, nan

            from autogoal.grammar import Continuous, Discrete, Categorical, Boolean
            from autogoal.contrib.sklearn._builder import SklearnEstimator, SklearnTransformer
            from autogoal.kb import *
            """
            )
        )

        for cls in imports:
            counter.update()
            _write_class(cls, fp)

    black.reformat_one(
        path, True, black.WriteBack.YES, black.FileMode(), black.Report()
    )

    counter.close()
    manager.stop()
Exemple #21
0
def folder(path: Path = typer.Argument(default='.',
                                       exists=True,
                                       file_okay=True,
                                       dir_okay=True,
                                       readable=True,
                                       resolve_path=True),
           ignore_extension: str = typer.Option(default=None),
           ignore_path: Path = typer.Option(default=None,
                                            exists=True,
                                            file_okay=True,
                                            dir_okay=True,
                                            readable=True,
                                            resolve_path=True)):
    """
    Convert all videos and audios in a folder
    """

    videos, audios = get_videos_and_audios(path, ignore_extension, ignore_path)
    manager = enlighten.get_manager()
    errors_files = []
    pbar = manager.counter(total=len(videos) + len(audios),
                           desc='Files',
                           unit='files')

    errors_files, pbar = process_files(videos, False, manager, errors_files,
                                       pbar)
    errors_files, pbar = process_files(audios, True, manager, errors_files,
                                       pbar)

    if errors_files:
        typer.secho('This videos could not be processed:', fg=RED)
        typer.secho(str(errors_files), fg=RED)
Exemple #22
0
def select_images(df, operetta_folder, method="copy"):
    render_path = o.ensure_dir(os.path.join(operetta_folder, 'out', 'render'))
    manager = enlighten.get_manager()
    bar = manager.counter(total=len(df), desc='Progress', unit='files')

    for ix, r in df.iterrows():
        destination_folder = o.ensure_dir(
            os.path.join(operetta_folder, 'out', 'selected-images',
                         '%s@%s' % (r["Cell Type"], r["Cell Count"]),
                         r["Compound"]))

        # name, original_path = o.ConfiguredChannels.filename_of_render(r, render_path)
        name = 'r%d-c%d-f%d-p%s-i%d.jpg' % (r["row"], r["col"], r["fid"],
                                            str(r["p"]), r["zid"])
        original_path = os.path.join(render_path, name)
        destination_path = o.ensure_dir(os.path.join(destination_folder, name))

        try:
            if method == "link":
                logger.debug('linking %s to %s' % (name, destination_folder))
                os.symlink(original_path, destination_path, False)
            elif method == "copy":
                logger.debug('copying %s to %s' % (name, destination_folder))
                copyfile(original_path, destination_path)
            elif method == "move":
                logger.debug('moving %s to %s' % (name, destination_folder))
                os.rename(original_path, destination_path)
            bar.update()
        except Exception as e:
            logger.warning('no render for %s' % original_path)
            logger.warning(e)
            # traceback.print_stack()
    manager.stop()
Exemple #23
0
def main():
    """
    Main function
    """

    manager = enlighten.get_manager()
    run_tests(manager, 100)
    load(manager, 80)
Exemple #24
0
 def __init__(self, freeze_info):
     self.manager = enlighten.get_manager()
     self.counter = self.manager.counter(
         total=100, color='red', bar_format=bar_format)
     self.failure_counter = self.counter
     self.success_counter = self.counter.add_subcounter('cyan')
     freeze_info.add_hook('page_frozen', self.update_bar)
     freeze_info.add_hook('page_failed', self.update_bar)
Exemple #25
0
def main():
    """
    Main function
    """

    manager = enlighten.get_manager()
    process_files(manager)
    manager.stop()  # Clears all temporary counters and progress bars
Exemple #26
0
    def fetch_ib_contract_details(self, ):
        contracts_db = ContractsDatabase()
        columns = [
            'contract_id', 'contract_type_from_listing', 'broker_symbol',
            'exchange', 'currency'
        ]
        filters = {'primary_exchange': "NULL"}
        contracts = contracts_db.get_contracts(filters=filters,
                                               return_columns=columns)
        logging.info(
            f"Found {len(contracts)} contracts with missing IB details in master listing."
        )

        if len(contracts) == 0:
            return

        # Setup progress bar
        manager = enlighten.get_manager()
        pbar = manager.counter(total=len(contracts),
                               desc="Contracts",
                               unit="contracts")

        exiter = GracefulExiter()

        tws = Tws()
        ib_details_db = IbDetailsDatabase()
        tws.connect()
        logging.info(f"Connnected to TWS.")

        try:
            for contract in contracts:
                # Check for abort conditions
                if exiter.exit() or tws.has_error():
                    logging.info(f"Abort fetching of IB details.")
                    break

                contract_details = tws.download_contract_details(
                    contract_type_from_listing=contract[
                        'contract_type_from_listing'],
                    broker_symbol=contract['broker_symbol'],
                    exchange=contract['exchange'],
                    currency=contract['currency'])

                if contract_details is not None:
                    ib_details_db.insert_ib_details(
                        contract_id=contract['contract_id'],
                        contract_type_from_details=contract_details.stockType,
                        primary_exchange=contract_details.contract.
                        primaryExchange,
                        industry=contract_details.industry,
                        category=contract_details.category,
                        subcategory=contract_details.subcategory)

                pbar.update()

        finally:
            tws.disconnect()
            logging.info(f"Disconnnected from TWS.")
Exemple #27
0
    def __init__(self, iterable):
        try:
            total = len(iterable)
        except (TypeError, AttributeError):
            total = None

        self.iterable = iterable
        self.manager = enlighten.get_manager()
        self.pbar = self.manager.counter(total=total)
def process_files():
    """
    Process a random number of files on a random number of systems across multiple data centers
    """

    # Start with a manager
    manager = enlighten.get_manager()

    # Simulated preparation
    prep = manager.counter(total=SPLINES, desc='Reticulating:', unit='splines')
    for num in range(SPLINES):  # pylint: disable=unused-variable
        time.sleep(random.uniform(0.1, 0.5))  # Random processing time
        prep.update()
    prep.close()

    # Get a top level progress bar
    enterprise = manager.counter(total=DATACENTERS,
                                 desc='Processing:',
                                 unit='datacenters')

    # Iterate through data centers
    for dnum in range(1, DATACENTERS + 1):
        systems = random.randint(*SYSTEMS)  # Random number of systems
        # Get a child progress bar. leave is False so it can be replaced
        currCenter = manager.counter(total=systems,
                                     desc='  Datacenter %d:' % dnum,
                                     unit='systems',
                                     leave=False)

        # Iterate through systems
        for snum in range(1, systems + 1):

            # Has no total, so will act as counter. Leave is False
            system = manager.counter(desc='    System %d:' % snum,
                                     unit='files',
                                     leave=False)
            files = random.randint(*FILES)  # Random file count

            # Iterate through files
            for fnum in range(files):  # pylint: disable=unused-variable
                system.update()  # Update count
                time.sleep(random.uniform(0.0001,
                                          0.0005))  # Random processing time

            system.close()  # Close counter so it gets removed
            # Log status
            LOGGER.info('Updated %d files on System %d in Datacenter %d',
                        files, snum, dnum)
            currCenter.update()  # Update count

        currCenter.close()  # Close counter so it gets removed

        enterprise.update()  # Update count

    enterprise.close()  # Close counter, won't be removed but does a refresh

    manager.stop()  # Clears all temporary counters and progress bars
Exemple #29
0
 def __init__(self):
     self.__yt = None
     self.__choice = 0
     self.__mp3_mode = False
     self.__directory = self.__get_default_download_path()
     self.__vids = {}  # list of videos to download
     self.__thread_pool = []
     self.__parallel_download = False
     self.__manager = enlighten.get_manager()
     self.__ticks = {}
Exemple #30
0
 def begin(self, generations, pop_size):
     self.manager = enlighten.get_manager()
     self.pop_counter = self.manager.counter(total=pop_size,
                                             unit="evals",
                                             leave=True,
                                             desc="Current Gen")
     self.total_counter = self.manager.counter(total=generations * pop_size,
                                               unit="evals",
                                               leave=True,
                                               desc="Best: 0.000")
Exemple #31
0
def fsck_mirror(name, config, verbose=False, force=False, repack_only=False,
                conn_only=False, repack_all_quick=False, repack_all_full=False):
    global logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    if conn_only or repack_all_quick or repack_all_full:
        force = True

    logger.info('Running grok-fsck for [%s]', name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s', config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError:
        logger.info('Could not obtain exclusive lock on %s', config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s', config['statusfile'])
        stfh = open(config['statusfile'], 'rb')
        # noinspection PyBroadException
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #      'lastrepack': 'YYYY-MM-DD',
            #      'fingerprint': 'sha-1',
            #      's_elapsed': seconds,
            #      'quick_repack_count': times,
            #    },
            #    ...
            #  }

            status = json.loads(stfh.read().decode('utf-8'))
        except:
            # Huai le!
            logger.critical('Failed to parse %s', config['statusfile'])
            lockf(flockh, LOCK_UN)
            flockh.close()
            return 1
    else:
        status = {}

    if 'frequency' in config:
        frequency = int(config['frequency'])
    else:
        frequency = 30

    today = datetime.datetime.today()
    todayiso = today.strftime('%F')

    if force:
        # Use randomization for next check, again
        checkdelay = random.randint(1, frequency)
    else:
        checkdelay = frequency

    # Go through the manifest and compare with status
    # noinspection PyTypeChecker
    e_find = em.counter(total=len(manifest), desc='Discovering:', unit='repos', leave=False)
    for gitdir in list(manifest):
        e_find.update()
        fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/'))
        if fullpath not in status.keys():
            # Newly added repository
            if not force:
                # Randomize next check between now and frequency
                delay = random.randint(0, frequency)
                nextdate = today + datetime.timedelta(days=delay)
                nextcheck = nextdate.strftime('%F')
            else:
                nextcheck = todayiso

            status[fullpath] = {
                'lastcheck': 'never',
                'nextcheck': nextcheck,
            }
            logger.info('%s:', fullpath)
            logger.info('  added : next check on %s', nextcheck)

    e_find.close()

    # record newly found repos in the status file
    logger.debug('Updating status file in %s', config['statusfile'])
    with open(config['statusfile'], 'wb') as stfh:
        stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    # Go through status and find all repos that need work done on them.
    # This is a dictionary that contains:
    # full_path_to_repo:
    #   repack: 0, 1, 2 (0-no, 1-needs quick repack, 2-needs full repack)
    #   fsck: 0/1

    to_process = {}

    total_checked = 0
    total_elapsed = 0

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(status), desc='Analyzing:', unit='repos', leave=False)
    for fullpath in list(status):
        e_cmp.update()

        # Check to make sure it's still in the manifest
        gitdir = fullpath.replace(config['toplevel'], '', 1)
        gitdir = '/' + gitdir.lstrip('/')

        if gitdir not in manifest.keys():
            del status[fullpath]
            logger.debug('%s is gone, no longer in manifest', gitdir)
            continue

        needs_repack = needs_prune = needs_fsck = 0

        obj_info = get_repo_obj_info(fullpath)
        try:
            packs = int(obj_info['packs'])
            count_loose = int(obj_info['count'])
        except KeyError:
            logger.warning('Unable to count objects in %s, skipping' % fullpath)
            continue

        schedcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'], '%Y-%m-%d')
        nextcheck = today + datetime.timedelta(days=checkdelay)

        if 'repack' not in config.keys() or config['repack'] != 'yes':
            # don't look at me if you turned off repack
            logger.debug('Not repacking because repack=no in config')
            needs_repack = 0
        elif repack_all_full and (count_loose > 0 or packs > 1):
            logger.debug('needs_repack=2 due to repack_all_full')
            needs_repack = 2
        elif repack_all_quick and count_loose > 0:
            logger.debug('needs_repack=1 due to repack_all_quick')
            needs_repack = 1
        elif conn_only:
            # don't do any repacks if we're running forced connectivity checks, unless
            # you specifically passed --repack-all-foo
            logger.debug('needs_repack=0 due to --conn-only')
            needs_repack = 0
        else:
            # for now, hardcode the maximum loose objects and packs
            # TODO: we can probably set this in git config values?
            #       I don't think this makes sense as a global setting, because
            #       optimal values will depend on the size of the repo as a whole
            max_loose_objects = 1200
            max_packs = 20
            pc_loose_objects = 10
            pc_loose_size = 10

            # first, compare against max values:
            if packs >= max_packs:
                logger.debug('Triggering full repack of %s because packs > %s', fullpath, max_packs)
                needs_repack = 2
            elif count_loose >= max_loose_objects:
                logger.debug('Triggering quick repack of %s because loose objects > %s', fullpath, max_loose_objects)
                needs_repack = 1
            else:
                # is the number of loose objects or their size more than 10% of
                # the overall total?
                in_pack = int(obj_info['in-pack'])
                size_loose = int(obj_info['size'])
                size_pack = int(obj_info['size-pack'])
                total_obj = count_loose + in_pack
                total_size = size_loose + size_pack
                # set some arbitrary "worth bothering" limits so we don't
                # continuously repack tiny repos.
                if total_obj > 500 and count_loose/total_obj*100 >= pc_loose_objects:
                    logger.debug('Triggering repack of %s because loose objects > %s%% of total',
                                 fullpath, pc_loose_objects)
                    needs_repack = 1
                elif total_size > 1024 and size_loose/total_size*100 >= pc_loose_size:
                    logger.debug('Triggering repack of %s because loose size > %s%% of total',
                                 fullpath, pc_loose_size)
                    needs_repack = 1

        if needs_repack > 0 and check_precious_objects(fullpath):
            # if we have preciousObjects, then we only repack based on the same
            # schedule as fsck.
            logger.debug('preciousObjects is set')
            # for repos with preciousObjects, we use the fsck schedule for repacking
            if schedcheck <= today:
                logger.debug('Time for a full periodic repack of a preciousObjects repo')
                status[fullpath]['nextcheck'] = nextcheck.strftime('%F')
                needs_repack = 2
            else:
                logger.debug('Not repacking preciousObjects repo outside of schedule')
                needs_repack = 0

        # Do we need to fsck it?
        if not (repack_all_quick or repack_all_full or repack_only):
            if schedcheck <= today or force:
                status[fullpath]['nextcheck'] = nextcheck.strftime('%F')
                needs_fsck = 1

        if needs_repack or needs_fsck or needs_prune:
            # emit a warning if we find garbage in a repo
            # we do it here so we don't spam people nightly on every cron run,
            # but only do it when a repo needs actual work done on it
            if obj_info['garbage'] != '0':
                logger.warning('%s:\n\tcontains %s garbage files (garbage-size: %s KiB)',
                               fullpath, obj_info['garbage'], obj_info['size-garbage'])

            to_process[fullpath] = {
                'repack': needs_repack,
                'prune': needs_prune,
                'fsck': needs_fsck,
            }

    e_cmp.close()

    if not len(to_process):
        logger.info('No repos need attention.')
        em.stop()
        return

    logger.info('Processing %s repositories', len(to_process))

    # noinspection PyTypeChecker
    run = em.counter(total=len(to_process), desc='Processing:', unit='repos', leave=False)
    for fullpath, needs in to_process.items():
        logger.info('%s:', fullpath)
        # Calculate elapsed seconds
        run.refresh()
        startt = time.time()

        # Wait till the repo is available and lock it for the duration of checks,
        # otherwise there may be false-positives if a mirrored repo is updated
        # in the middle of fsck or repack.
        grokmirror.lock_repo(fullpath, nonblocking=False)
        if needs['repack']:
            if run_git_repack(fullpath, config, needs['repack']):
                status[fullpath]['lastrepack'] = todayiso
                if needs['repack'] > 1:
                    status[fullpath]['lastfullrepack'] = todayiso
            else:
                logger.warning('Repacking %s was unsuccessful, '
                               'not running fsck.', fullpath)
                grokmirror.unlock_repo(fullpath)
                continue

        if needs['prune']:
            run_git_prune(fullpath, config)

        if needs['fsck']:
            run_git_fsck(fullpath, config, conn_only)
            endt = time.time()
            status[fullpath]['lastcheck'] = todayiso
            status[fullpath]['s_elapsed'] = int(endt-startt)

            logger.info('   done : %ss, next check on %s',
                        status[fullpath]['s_elapsed'],
                        status[fullpath]['nextcheck'])

        run.update()

        # We're done with the repo now
        grokmirror.unlock_repo(fullpath)
        total_checked += 1
        total_elapsed += time.time()-startt

        # Write status file after each check, so if the process dies, we won't
        # have to recheck all the repos we've already checked
        logger.debug('Updating status file in %s', config['statusfile'])
        with open(config['statusfile'], 'wb') as stfh:
            stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    run.close()
    em.stop()
    logger.info('Processed %s repos in %0.2fs', total_checked, total_elapsed)

    with open(config['statusfile'], 'wb') as stfh:
        stfh.write(json.dumps(status, indent=2).encode('utf-8'))

    lockf(flockh, LOCK_UN)
    flockh.close()
Exemple #32
0
from __future__ import print_function, absolute_import
import enlighten

progress_manager = enlighten.get_manager()
active_counters = []

class Progress(object):

    def __init__(self, total, desc='', leave=False):
        self.counter = progress_manager.counter(total=total, desc=desc, leave=leave)
        active_counters.append(self.counter)

    def __iter__(self):
        return self
    
    def __next__(self):
        raise NotImplementedError()
    
    def close(self):
        self.counter.close()
        active_counters.remove(self.counter)
        if len(active_counters) == 0:
            progress_manager.stop()

    def __enter__(self):
        return self
    
    def __exit__(self, exception_type, exception_value, traceback):
        self.close()

class RangeProgress(Progress):
Exemple #33
0
def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False,
                  check_export_ok=False, purge=False, remove=False,
                  pretty=False, ignore=None, wait=False, verbose=False):

    if args is None:
        args = []
    if ignore is None:
        ignore = []

    logger.setLevel(logging.DEBUG)
    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    if logfile is not None:
        ch = logging.FileHandler(logfile)
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)

        ch.setLevel(logging.DEBUG)
        logger.addHandler(ch)

    # push our logger into grokmirror to override the default
    grokmirror.logger = logger

    grokmirror.manifest_lock(manifile)
    manifest = grokmirror.read_manifest(manifile, wait=wait)

    # If manifest is empty, don't use current timestamp
    if not len(manifest.keys()):
        usenow = False

    if remove and len(args):
        # Remove the repos as required, write new manfiest and exit
        for fullpath in args:
            repo = fullpath.replace(toplevel, '', 1)
            if repo in manifest.keys():
                del manifest[repo]
                logger.info('Repository %s removed from manifest', repo)
            else:
                logger.info('Repository %s not in manifest', repo)

        # XXX: need to add logic to make sure we don't break the world
        #      by removing a repository used as a reference for others
        #      also make sure we clean up any dangling symlinks

        grokmirror.write_manifest(manifile, manifest, pretty=pretty)
        grokmirror.manifest_unlock(manifile)
        return 0

    gitdirs = []

    if purge or not len(args) or not len(manifest.keys()):
        # We automatically purge when we do a full tree walk
        gitdirs = grokmirror.find_all_gitdirs(toplevel, ignore=ignore)
        purge_manifest(manifest, toplevel, gitdirs)

    if len(manifest) and len(args):
        # limit ourselves to passed dirs only when there is something
        # in the manifest. This precaution makes sure we regenerate the
        # whole file when there is nothing in it or it can't be parsed.
        gitdirs = args
        # Don't draw a progress bar for a single repo
        em.enabled = False

    symlinks = []
    # noinspection PyTypeChecker
    run = em.counter(total=len(gitdirs), desc='Processing:', unit='repos', leave=False)
    for gitdir in gitdirs:
        run.update()
        # check to make sure this gitdir is ok to export
        if (check_export_ok and not
                os.path.exists(os.path.join(gitdir, 'git-daemon-export-ok'))):
            # is it curently in the manifest?
            repo = gitdir.replace(toplevel, '', 1)
            if repo in list(manifest):
                logger.info('Repository %s is no longer exported, '
                            'removing from manifest', repo)
                del manifest[repo]

            # XXX: need to add logic to make sure we don't break the world
            #      by removing a repository used as a reference for others
            #      also make sure we clean up any dangling symlinks
            continue

        if os.path.islink(gitdir):
            symlinks.append(gitdir)
        else:
            update_manifest(manifest, toplevel, gitdir, usenow)

    logger.info('Updated %s records in %0.2fs', len(gitdirs), run.elapsed)
    run.close()
    em.stop()


    if len(symlinks):
        set_symlinks(manifest, toplevel, symlinks)

    grokmirror.write_manifest(manifile, manifest, pretty=pretty)
    grokmirror.manifest_unlock(manifile)
Exemple #34
0
def pull_mirror(name, config, verbose=False, force=False, nomtime=False,
                verify=False, verify_subpath='*', noreuse=False,
                purge=False, pretty=False, forcepurge=False):
    global logger
    global lock_fails

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]', name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s', config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!',
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s', manifile, fstat[8])

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s', my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s', manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.warning('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s', config['manifest'])

        # Do we have username:password@ in the URL?
        chunks = urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug('Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s', manifesturl)
            request = urllib_request.Request(manifesturl)

            password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr)
            opener = urllib_request.build_opener(auth_handler)

        else:
            request = urllib_request.Request(config['manifest'])
            opener = urllib_request.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s', mymanifest, mtime)
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s', my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except HTTPError as ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Server returned: %s', ex)
            return 1
        except (URLError, ssl.SSLError, ssl.CertificateError) as ex:
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

        last_modified = ufh.headers.get('Last-Modified')
        last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')
        last_modified = calendar.timegm(last_modified)

        # We don't use read_manifest for the remote manifest, as it can be
        # anything, really. For now, blindly open it with gzipfile if it ends
        # with .gz. XXX: some http servers will auto-deflate such files.
        try:
            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=BytesIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read().decode('utf-8')
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception as ex:
            logger.warning('Failed to parse %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable',
                        toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])
        if pull_threads < 1:
            logger.info('pull_threads is less than 1, forcing to 1')
            pull_threads = 1
    else:
        # be conservative
        logger.info('pull_threads is not set, consider setting it')
        pull_threads = 5

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False)

    for gitdir in list(culled):
        fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
        e_cmp.update()

        # fingerprints were added in later versions, so deal if the upstream
        # manifest doesn't have a fingerprint
        if 'fingerprint' not in culled[gitdir]:
            culled[gitdir]['fingerprint'] = None

        # Attempt to lock the repo
        try:
            grokmirror.lock_repo(fullpath, nonblocking=True)
        except IOError:
            logger.info('Could not lock %s, skipping', gitdir)
            lock_fails.append(gitdir)
            # Force the fingerprint to what we have in mymanifest,
            # if we have it.
            culled[gitdir]['fingerprint'] = None
            if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]:
                culled[gitdir]['fingerprint'] = mymanifest[gitdir][
                    'fingerprint']
            if len(lock_fails) >= pull_threads:
                logger.info('Too many repositories locked (%s). Exiting.',
                            len(lock_fails))
                return 0
            continue

        if verify:
            if culled[gitdir]['fingerprint'] is None:
                logger.debug('No fingerprint for %s, not verifying', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            if not fnmatch.fnmatch(gitdir, verify_subpath):
                grokmirror.unlock_repo(fullpath)
                continue

            logger.debug('Verifying %s', gitdir)
            if not os.path.exists(fullpath):
                verify_fails.append(gitdir)
                logger.info('Verify: %s ABSENT', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            my_fingerprint = grokmirror.get_repo_fingerprint(
                toplevel, gitdir, force=force)

            if my_fingerprint == culled[gitdir]['fingerprint']:
                logger.info('Verify: %s OK', gitdir)
            else:
                logger.critical('Verify: %s FAILED', gitdir)
                verify_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            continue

        # Is the directory in place?
        if os.path.exists(fullpath):
            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.info('Reclone requested for %s:', gitdir)
                with open(rfile, 'r') as rfh:
                    reason = rfh.read()
                    logger.info('  %s', reason)

                to_clone.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # Fix owner and description, if necessary
            if gitdir in mymanifest.keys():
                # This code is hurky and needs to be cleaned up
                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = None
                if config['ignore_repo_references'] != 'yes':
                    ref = culled[gitdir].get('reference')

                # dirty hack to force on-disk owner/description checks
                # when we're called with -n, in case our manifest
                # differs from what is on disk for owner/description/alternates
                myref = None
                if nomtime:
                    mydesc = None
                    myowner = None
                else:
                    mydesc = mymanifest[gitdir].get('description')
                    myowner = mymanifest[gitdir].get('owner')

                    if config['ignore_repo_references'] != 'yes':
                        myref = mymanifest[gitdir].get('reference')

                    if myowner is None:
                        myowner = config['default_owner']

                if owner is None:
                    owner = config['default_owner']

                if desc != mydesc or owner != myowner or ref != myref:
                    # we can do this right away without waiting
                    set_repo_params(toplevel, gitdir, owner, desc, ref)

            else:
                # It exists on disk, but not in my manifest?
                if noreuse:
                    logger.critical('Found existing git repo in %s', fullpath)
                    logger.critical('But you asked NOT to reuse repos')
                    logger.critical('Skipping %s', gitdir)
                    grokmirror.unlock_repo(fullpath)
                    continue

                logger.info('Setting new origin for %s', gitdir)
                fix_remotes(gitdir, toplevel, config['site'])
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # fingerprints were added late, so if we don't have them
            # in the remote manifest, fall back on using timestamps
            changed = False
            if culled[gitdir]['fingerprint'] is not None:
                logger.debug('Will use fingerprints to compare %s', gitdir)
                my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                                 gitdir,
                                                                 force=force)

                if my_fingerprint != culled[gitdir]['fingerprint']:
                    logger.debug('No fingerprint match, will pull %s', gitdir)
                    changed = True
                else:
                    logger.debug('Fingerprints match, skipping %s', gitdir)
            else:
                logger.debug('Will use timestamps to compare %s', gitdir)
                if force:
                    logger.debug('Will force-pull %s', gitdir)
                    changed = True
                    # set timestamp to 0 as well
                    grokmirror.set_repo_timestamp(toplevel, gitdir, 0)
                else:
                    ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
                    if ts < culled[gitdir]['modified']:
                        changed = True

            if changed:
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue
            else:
                logger.debug('Repo %s unchanged', gitdir)
                # if we don't have a fingerprint for it, add it now
                if culled[gitdir]['fingerprint'] is None:
                    fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir)
                    culled[gitdir]['fingerprint'] = fpr
                existing.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

        else:
            # Newly incoming repo
            to_clone.append(gitdir)
            grokmirror.unlock_repo(fullpath)
            continue

        # If we got here, something is odd.
        # noinspection PyUnreachableCode
        logger.critical('Could not figure out what to do with %s', gitdir)
        grokmirror.unlock_repo(fullpath)

    logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed)
    e_cmp.close()

    if verify:
        if len(verify_fails):
            logger.critical('%s repos failed to verify', len(verify_fails))
            return 1
        else:
            logger.info('Verification successful')
            return 0

    hookscript = config['post_update_hook']

    if len(to_pull):

        if len(lock_fails) > 0:
            pull_threads -= len(lock_fails)

        # Don't spin up more threads than we need
        if pull_threads > len(to_pull):
            pull_threads = len(to_pull)

        # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra
        # precaution doesn't hurt
        if pull_threads <= 0:
            logger.info('Too many repositories locked. Exiting.')
            return 0

        logger.info('Will use %d threads to pull repos', pull_threads)

        # noinspection PyTypeChecker
        e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False)
        logger.info('Updating %s repos from %s', len(to_pull), config['site'])
        in_queue = Queue()
        out_queue = Queue()

        for gitdir in to_pull:
            in_queue.put((gitdir, culled[gitdir]['fingerprint'],
                          culled[gitdir]['modified']))

        for i in range(pull_threads):
            logger.debug('Spun up thread %s', i)
            t = PullerThread(in_queue, out_queue, config, i, e_pull)
            t.setDaemon(True)
            t.start()

        # wait till it's all done
        in_queue.join()
        logger.info('All threads finished.')

        while not out_queue.empty():
            # see if any of it failed
            (gitdir, my_fingerprint, status) = out_queue.get()
            # We always record our fingerprint in our manifest
            culled[gitdir]['fingerprint'] = my_fingerprint
            if not status:
                # To make sure we check this again during next run,
                # fudge the manifest accordingly.
                logger.debug('Will recheck %s during next run', gitdir)
                culled[gitdir] = mymanifest[gitdir]
                # this is rather hackish, but effective
                last_modified -= 1

        logger.info('Updates completed in %0.2fs', e_pull.elapsed)
        e_pull.close()
    else:
        logger.info('No repositories need updating')

    # how many lockfiles have we seen?
    # If there are more lock_fails than there are
    # pull_threads configured, we skip cloning out of caution
    if len(to_clone) and len(lock_fails) > pull_threads:
        logger.info('Too many repositories locked. Skipping cloning new repos.')
        to_clone = []

    if len(to_clone):
        # noinspection PyTypeChecker
        e_clone = em.counter(total=len(to_clone), desc='Cloning  :', unit='repos', leave=False)
        logger.info('Cloning %s repos from %s', len(to_clone), config['site'])
        # we use "existing" to track which repos can be used as references
        existing.extend(to_pull)

        to_clone_sorted = []
        clone_order(to_clone, manifest, to_clone_sorted, existing)

        for gitdir in to_clone_sorted:
            e_clone.refresh()

            fullpath = os.path.join(toplevel, gitdir.lstrip('/'))

            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.debug('Removing %s for reclone', gitdir)
                shutil.move(fullpath, '%s.reclone' % fullpath)
                shutil.rmtree('%s.reclone' % fullpath)

            # Do we still need to clone it, or has another process
            # already done this for us?
            ts = grokmirror.get_repo_timestamp(toplevel, gitdir)

            if ts > 0:
                logger.debug('Looks like %s already cloned, skipping', gitdir)
                continue

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)
            except IOError:
                logger.info('Could not lock %s, skipping', gitdir)
                lock_fails.append(gitdir)
                e_clone.update()
                continue

            reference = None
            if config['ignore_repo_references'] != 'yes':
                reference = culled[gitdir]['reference']

            if reference is not None and reference in existing:
                # Make sure we can lock the reference repo
                refrepo = os.path.join(toplevel, reference.lstrip('/'))
                try:
                    grokmirror.lock_repo(refrepo, nonblocking=True)
                    success = clone_repo(toplevel, gitdir, config['site'],
                                         reference=reference)
                    grokmirror.unlock_repo(refrepo)
                except IOError:
                    logger.info('Cannot lock reference repo %s, skipping %s',
                                reference, gitdir)
                    if reference not in lock_fails:
                        lock_fails.append(reference)

                    grokmirror.unlock_repo(fullpath)
                    e_clone.update()
                    continue
            else:
                success = clone_repo(toplevel, gitdir, config['site'])

            # check dir to make sure cloning succeeded and then add to existing
            if os.path.exists(fullpath) and success:
                logger.debug('Cloning of %s succeeded, adding to existing',
                             gitdir)
                existing.append(gitdir)

                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = culled[gitdir].get('reference')

                if owner is None:
                    owner = config['default_owner']
                set_repo_params(toplevel, gitdir, owner, desc, ref)
                set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
                my_fingerprint = grokmirror.set_repo_fingerprint(toplevel,
                                                                 gitdir)
                culled[gitdir]['fingerprint'] = my_fingerprint
                run_post_update_hook(hookscript, toplevel, gitdir)
            else:
                logger.warning('Was not able to clone %s', gitdir)
                # Remove it from our manifest so we can try re-cloning
                # next time grok-pull runs
                del culled[gitdir]
                git_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            e_clone.update()

        logger.info('Clones completed in %0.2fs' % e_clone.elapsed)
        e_clone.close()

    else:
        logger.info('No repositories need cloning')

    # loop through all entries and find any symlinks we need to set
    # We also collect all symlinks to do purging correctly
    symlinks = []
    for gitdir in culled.keys():
        if 'symlinks' in culled[gitdir].keys():
            source = os.path.join(config['toplevel'], gitdir.lstrip('/'))
            for symlink in culled[gitdir]['symlinks']:
                if symlink not in symlinks:
                    symlinks.append(symlink)
                target = os.path.join(config['toplevel'], symlink.lstrip('/'))

                if os.path.exists(source):
                    if os.path.islink(target):
                        # are you pointing to where we need you?
                        if os.path.realpath(target) != source:
                            # Remove symlink and recreate below
                            logger.debug('Removed existing wrong symlink %s',
                                         target)
                            os.unlink(target)
                    elif os.path.exists(target):
                        logger.warning('Deleted repo %s, because it is now'
                                       ' a symlink to %s' % (target, source))
                        shutil.rmtree(target)

                    # Here we re-check if we still need to do anything
                    if not os.path.exists(target):
                        logger.info('Symlinking %s -> %s', target, source)
                        # Make sure the leading dirs are in place
                        if not os.path.exists(os.path.dirname(target)):
                            os.makedirs(os.path.dirname(target))
                        os.symlink(source, target)

    manifile = config['mymanifest']
    grokmirror.manifest_lock(manifile)

    # Is the local manifest newer than last_modified? That would indicate
    # that another process has run and "culled" is no longer the latest info
    if os.path.exists(manifile):
        fstat = os.stat(manifile)
        if fstat[8] > last_modified:
            logger.info('Local manifest is newer, not saving.')
            grokmirror.manifest_unlock(manifile)
            return 0

    if purge:
        to_purge = []
        found_repos = 0
        for founddir in grokmirror.find_all_gitdirs(config['toplevel']):
            gitdir = founddir.replace(config['toplevel'], '')
            found_repos += 1

            if gitdir not in culled.keys() and gitdir not in symlinks:
                to_purge.append(founddir)

        if len(to_purge):
            # Purge-protection engage
            try:
                purge_limit = int(config['purgeprotect'])
                assert 1 <= purge_limit <= 99
            except (ValueError, AssertionError):
                logger.critical('Warning: "%s" is not valid for purgeprotect.',
                                config['purgeprotect'])
                logger.critical('Please set to a number between 1 and 99.')
                logger.critical('Defaulting to purgeprotect=5.')
                purge_limit = 5

            purge_pc = len(to_purge) * 100 / found_repos
            logger.debug('purgeprotect=%s', purge_limit)
            logger.debug('purge prercentage=%s', purge_pc)

            if not forcepurge and purge_pc >= purge_limit:
                logger.critical('Refusing to purge %s repos (%s%%)',
                                len(to_purge), purge_pc)
                logger.critical('Set purgeprotect to a higher percentage, or'
                                ' override with --force-purge.')
                logger.info('Not saving local manifest')
                return 1
            else:
                # noinspection PyTypeChecker
                e_purge = em.counter(total=len(to_purge), desc='Purging  :', unit='repos', leave=False)
                for founddir in to_purge:
                    e_purge.refresh()
                    if os.path.islink(founddir):
                        logger.info('Removing unreferenced symlink %s', gitdir)
                        os.unlink(founddir)
                    else:
                        # is anything using us for alternates?
                        gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/')
                        if grokmirror.is_alt_repo(toplevel, gitdir):
                            logger.info('Not purging %s because it is used by '
                                        'other repos via alternates', founddir)
                        else:
                            try:
                                logger.info('Purging %s', founddir)
                                grokmirror.lock_repo(founddir, nonblocking=True)
                                shutil.rmtree(founddir)
                            except IOError:
                                lock_fails.append(gitdir)
                                logger.info('%s is locked, not purging',
                                            gitdir)
                    e_purge.update()

                logger.info('Purging completed in %0.2fs', e_purge.elapsed)
                e_purge.close()

        else:
            logger.info('No repositories need purging')

    # Done with progress bars
    em.stop()

    # Go through all repos in culled and get the latest local timestamps.
    for gitdir in culled:
        ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
        culled[gitdir]['modified'] = ts

    # If there were any lock failures, we fudge last_modified to always
    # be older than the server, which will force the next grokmirror run.
    if len(lock_fails):
        logger.info('%s repos could not be locked. Forcing next run.',
                    len(lock_fails))
        last_modified -= 1
    elif len(git_fails):
        logger.info('%s repos failed. Forcing next run.', len(git_fails))
        last_modified -= 1

    # Once we're done, save culled as our new manifest
    grokmirror.write_manifest(manifile, culled, mtime=last_modified,
                              pretty=pretty)

    grokmirror.manifest_unlock(manifile)

    # write out projects.list, if asked to
    write_projects_list(culled, config)

    return 127