def main(arg):
    global updated, deleted, loaded, store, reader, deleter, total, nas_root

    parser = argparse.ArgumentParser(
        description="""This tool will sync the catalog with the disk contents. The disk is taken as truth,
    the catalog is changed based on what is on disk.

    New data on disk will be loaded into the catalog.
    Items in the catalog that are not on disk anymore are removed from the catalog, if they were moved,
    that will be detected. Items that changed on disk with the same name are not detected.
    (change in size, modify time..)""")
    parser.add_argument('dirname', type=str, help='name of directory to look at')
    parser.add_argument('--quiet', '-q', action='store_true', help='no verbose output')
    root_arguments(parser)
    elastic_arguments(parser)
    args = parser.parse_args(arg)

    connection = elastic.Connection(args.host, args.port)
    if args.index is not None:
        connection.index = args.index

    with open("config.json", 'r') as file:
        config = json.load(file)
    nas_root = config['nas_root'] if not args.nas_root else args.nas_root

    if not args.quiet:
        print(f"Checking catalog on {connection.host}:{connection.port} with index {connection.index}")

    store = elastic.Store(connection)
    reader = elastic.Retrieve(connection)
    deleter = elastic.Delete(connection)

    updated = deleted = total = loaded = 0

    check_files_in_catalog(args.dirname)
    check_files_on_disk(os.path.join(nas_root, args.dirname))
    if not args.quiet:
        print(f"Catalog entries in sync: {len(elastic_paths)}")
        print(f"Catalog entries not found on disk: {len(in_catalog_only)}")
        print(f"On disk but not on catalog: {len(on_disk_only)}")
    elastic_paths.clear()

    # detected moved file or deleted duplicate
    store_list = []
    updated_checksums.clear()
    for new_file in on_disk_only:
        if new_file.checksum in in_catalog_only_checksums:
            for cat_item in reader.get_by_checksum(new_file.checksum):
                update_catalog_entry_for_moved_file(cat_item, new_file)
                break
        else:
            found = False
            for item in reader.get_by_checksum(new_file.checksum):
                if not os.path.exists(os.path.join(nas_root, item.full_path)):
                    update_catalog_entry_for_moved_file(item, new_file)
                    found = True
                    break
            if not found:
                print(f"File {new_file.full_path} is only on disk but not in Catalog")
                yes = input("Load into catalog y/n?")
                if yes.lower().startswith('y'):
                    new_file.path = new_file.path[len(nas_root) + 1:]
                    new_file.nas = True
                    store_list.append(new_file)
                    loaded += 1

    if len(store_list) > 0:
        store.list(item for item in store_list)

    for file in in_catalog_only:
        if file.checksum in updated_checksums:
            continue
        print(f"""{file.full_path} is only in the catalog.""")
        skip = False
        for item in reader.get_by_checksum(file.checksum):
            if item.id != file.id:
                print(f"{file.full_path} is still in catalog as {item.full_path}. Deleting this duplicate.")
                deleter.id(file.id)
                deleted += 1
                skip = True
                break
        if skip:
            continue
        yes = input('Delete this file from catalog y/n?:')
        if yes.lower().startswith('y'):
            deleter.id(file.id)
            deleted += 1

    return
Beispiel #2
0
    The directory structure has meaning here. The data will be taken from the old structure down. 
    Starting from a YEAR we will traverse into the given months and subdirs are kept as such.
    Duplicates in the same dir are not stored, but duplicates in named directories outside of month are."""
    )
    parser.add_argument('basedir',
                        type=str,
                        help='Base directory of old catalog.')
    parser.add_argument('year', type=str, nargs='+', help='Year to import.')
    parser.add_argument('--dropbox',
                        action='store_true',
                        help='Also create the dropbox copy. Defaults to FALSE')
    root_arguments(parser)
    parser.add_argument('--month',
                        type=str,
                        help='Only catalog the given month.')
    elastic_arguments(parser)
    args = parser.parse_args()

    year = re.compile("[1-2]\d\d\d")

    for y in args.year:
        if not year.match(y):
            print("Year needs to be a 4 digit number")
            sys.exit(-1)

        import_path = os.path.join(args.basedir, y)
        if not os.path.isdir(import_path):
            print(f"Invalid directory {import_path}")
            sys.exit(-1)

        index = ""