Ejemplo n.º 1
0
    # Create datetime using updated_on:
    modified_since = convert_date_string(updated_on)

    # Map feed entries to list of import objects:
    print(
        f'Importing all entries that have been updated since {modified_since}.'
    )
    modified_entries = filter_modified_since(d.entries, modified_since)
    print(f'{len(modified_entries)} import objects created.')

    if not dry_run:
        create_batch(modified_entries)
        print(
            f'{len(modified_entries)} entries added to the batch import job.')
    else:
        for record in modified_entries:
            print(json.dumps(record))

    # Store timestamp for header
    if not dry_run:
        with open(LAST_UPDATED_TIME, 'w+') as f:
            f.write(last_modified)
            print(f'Last updated timestamp written to: {LAST_UPDATED_TIME}')


if __name__ == '__main__':
    print("Start: Standard Ebooks import job")
    FnToCLI(import_job).run()
    print("End: Standard Ebooks import job")
Ejemplo n.º 2
0
    offset = 0
    while True:
        if done:
            # Done! Wait for any previous workers that are still going
            await asyncio.gather(*active_workers)
            break
        elif len(active_workers) >= instances:
            # Too many running; wait for one to finish
            finished, pending = await asyncio.wait(
                active_workers,
                return_when=asyncio.FIRST_COMPLETED)
            active_workers = pending
            done = any(task.result() < chunk_size for task in finished)
        else:
            # Can start another worker
            task = asyncio.create_task(index_subjects(
                subject_type,
                offset=offset,
                limit=chunk_size,
                solr_base_url=solr_base_url,
                skip_id_check=skip_id_check,
            ))
            active_workers.add(task)
            offset += chunk_size


if __name__ == '__main__':
    cli = FnToCLI(index_all_subjects)
    print(cli.parse_args())
    cli.run()
Ejemplo n.º 3
0
    if isinstance(dest_ol, OpenLibrary):
        section = "[%s]" % web.lstrips(dest, "http://").strip("/")
        if section in read_lines(os.path.expanduser("~/.olrc")):
            dest_ol.autologin()
        else:
            dest_ol.login("admin", "admin123")

    for list_key in (lists or []):
        copy_list(src_ol, dest_ol, list_key, comment=comment)

    if search:
        assert isinstance(src_ol, OpenLibrary), "Search only works with OL src"
        keys += [
            doc['key'] for doc in src_ol.search(
                search, limit=search_limit, fields=['key'])['docs']
        ]

    keys = list(expand(src_ol, ('/' + k.lstrip('/') for k in keys)))

    copy(src_ol,
         dest_ol,
         keys,
         comment=comment,
         recursive=recursive,
         editions=editions)


if __name__ == '__main__':
    FnToCLI(main).run()
Ejemplo n.º 4
0
    while True:
        records = logfile.read_records()
        keys = parse_log(records, load_ia_scans)
        count = await update_keys(keys)

        if logfile.tell() != offset:
            offset = logfile.tell()
            logger.info("saving offset %s", offset)
            with open(state_file, "w") as f:
                f.write(offset)

        if commit:
            solr.commit(ndocs=count)
        else:
            logger.info("not doing solr commit as commit is off")

        # don't sleep after committing some records.
        # While the commit was on, some more edits might have happened.
        if count == 0:
            logger.debug("No more log records available, sleeping...")
            time.sleep(5)


if __name__ == "__main__":
    from scripts.solr_builder.solr_builder.fn_to_cli import FnToCLI

    cli = FnToCLI(main)
    args = cli.args_dict()
    cli.run()