Ejemplo n.º 1
0
def test_syncfile_01(testdir):
    """
    Attempt to sync a source file that doesn't exist
    Should throw an error
    source NO
    """
    testdir.reset()
    syncopts = syncopts_defaults.copy()
    syncopts['tmpbase'] = os.path.abspath(testdir.psconfig.TMP_DIR)
    f = testdir.objects.values()[0][0]
    src = fsitem.FSItem('{0}xyz'.format(f.path))
    tgt = fsitem.FSItem(src.absname.replace(testdir.source, testdir.target))
    with pytest.raises(pylut.SyncError) as einfo:
        pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
    assert 'No such file or directory' in einfo.value.reason
Ejemplo n.º 2
0
def test_syncfile_07(testdir):
    """
    Testing existing tmp mismatch, keep tmp
    tmp MISMATCH
    tgt NO
    Expect tmp to get unlinked, then test is same as test_syncfile_02
    Verify tmp has new FID
    test-pair = 11
    """
    testdir.reset()
    testdir.mk_all_tgtdirs()
    syncopts = syncopts_defaults.copy()
    syncopts.update(tmpbase=os.path.abspath(testdir.psconfig.TMP_DIR))
    for f in testdir.files:
        src = fsitem.FSItem(f.path)
        tgt = fsitem.FSItem(src.absname.replace(testdir.source,
                                                testdir.target))
        # initial sync to create tmp
        syncopts.update(keeptmp=True)
        tmp, action = pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
        # delete tgt
        os.unlink(str(tgt))
        assert os.path.lexists(str(tgt)) == False
        tgt.update()
        # save current tmp FID
        tmp_fid_orig = tmp.inode()
        # change src file
        if src.is_regular():
            # sleep is long, faster to just change the file data
            change = random.randint(1, 1024)
            _mkregfile(src, size=src.size + change)
        else:
            # no choice but sleep for non-regular files
            time.sleep(1)
            _touch(src)
        src.update()
        # sync again, expect a new tmp file
        tmp, action = pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
        # expect tmp is new, so ensure metadata is up to date
        tmp.update()
        # verify src and tgt are in sync
        assert _files_match(src, tgt, syncopts)
        # verify tmp and target are the same file
        assert _files_equal(tgt, tmp)
        # verify tmp has different FID
        assert tmp_fid_orig != tmp.inode()
Ejemplo n.º 3
0
def test_syncfile_02(testdir):
    """
    Initial sync, keep tmp
    tmp NO
    target NO
    keeptmp YES
    test-pair = 03
    """
    testdir.reset()
    testdir.mk_all_tgtdirs()
    syncopts = syncopts_defaults.copy()
    syncopts.update(keeptmp=True,
                    tmpbase=os.path.abspath(testdir.psconfig.TMP_DIR))
    for f in testdir.files:
        src = fsitem.FSItem(f.path)
        tgt = fsitem.FSItem(src.absname.replace(testdir.source,
                                                testdir.target))
        pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
        assert _files_match(src, tgt, syncopts)
Ejemplo n.º 4
0
def test_syncfile_06(testdir):
    """
    Testing existing tmp ok, keep tmp
    tmp OK
    tgt NO
    expect tgt file hardlink to be created
    tmp file should remain untouched
    test-pair = 10
    """
    testdir.reset()
    testdir.mk_all_tgtdirs()
    syncopts = syncopts_defaults.copy()
    syncopts.update(tmpbase=os.path.abspath(testdir.psconfig.TMP_DIR))
    for f in testdir.files:
        src = fsitem.FSItem(f.path)
        tgt = fsitem.FSItem(src.absname.replace(testdir.source,
                                                testdir.target))
        # initial sync to create tmp
        syncopts.update(keeptmp=True)
        tmp, action = pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
        # delete tgt
        os.unlink(str(tgt))
        assert os.path.lexists(str(tgt)) == False
        tgt.update()
        # save original tmp FID
        tmp_fid_orig = tmp.inode()
        tmp.update()
        # sync again, should be fast since valid tmp already exists
        #        starttime = time.time()
        tmp, action = pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
        #        endtime = time.time()
        # verify src and tgt are in sync
        assert _files_match(src, tgt, syncopts)
        # verify tmp and target are the same file
        assert _files_equal(tgt, tmp)
        #        # check that elapsed time was <1 second
        #        elapsedtime = endtime - starttime
        #        assert elapsedtime < 1
        # verify tmp has same FID as before
        assert tmp_fid_orig == tmp.inode()
Ejemplo n.º 5
0
def test_syncfile_04(testdir):
    """
    Testing existing target ok, keep tmp
    tmp NO
    tgt OK
    expect tmp file hardlink to be created, tgt file should remain untouched
    test-pair = 08
    """
    testdir.reset()
    testdir.mk_all_tgtdirs()
    syncopts = syncopts_defaults.copy()
    syncopts.update(tmpbase=os.path.abspath(testdir.psconfig.TMP_DIR))
    for f in testdir.files:
        src = fsitem.FSItem(f.path)
        tgt = fsitem.FSItem(src.absname.replace(testdir.source,
                                                testdir.target))
        # make initial sync so tgt exists, don't keep tmpfile
        syncopts.update(keeptmp=False)
        tmp, action = pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
        # save original tgt FID
        tgt_fid_orig = tgt.inode()
        tgt.update()
        # verify tmp does not exist
        assert os.path.lexists(str(tmp)) == False
        # sync again, keep tmpfile this time
        syncopts.update(keeptmp=True)
        #        starttime = time.time()
        tmp, action = pylut.syncfile(src_path=src, tgt_path=tgt, **syncopts)
        #        endtime = time.time()
        #        # check that elapsed time was <1 second
        #        elapsedtime = endtime - starttime
        #        assert elapsedtime < 1
        # verify tmp and target are the same file
        assert _files_equal(tgt, tmp)
        # verify tgt has same FID as before
        assert tgt_fid_orig == tgt.inode()
        # verify src and tgt are in sync
        assert _files_match(src, tgt, syncopts)
Ejemplo n.º 6
0
def syncfile(src_path,
             tgt_path,
             tmpbase=None,
             keeptmp=False,
             synctimes=False,
             syncperms=False,
             syncowner=False,
             syncgroup=False,
             pre_checksums=False,
             post_checksums=True):
    """
    Lustre stripe aware file sync
    Copies a file to temporary location, then creates a hardlink for the target.
    If either the tmp or the target file already exist, that existing file will
    be checked for accuracy by checking size and mtime (and checksums if
    pre_checksum=True). If synctimes=False, tgt is assumed to be equal if
    tgt_mtime >= src_mtime; otherwise, if syntimes=True, tgt_mtime must be
    exactly equal to src_mtime or tgt will be assumed to be out of sync.  If
    a valid tmp or tgt exist and one or more of synctimes, syncperms,
    syncowner, syncgroup are specified, the specified metadata attributes of
    tmp and/or tgt file will be checked and updated.
    If both tmp and tgt already exist, both will be checked for accuracy
    against src.  If both tmp and tgt are valid (accurate matches), nothing
    happens.
    If at least one of tmp or tgt are found to exist and be valid, the invalid 
    file will be removed and a hardlink created to point to the valid file, thus
    avoiding a full file copy.
    If keeptmp=False, the tmp file hardlink will be removed.
    When copying a file with multiple hard links, set keeptmp=True to keep the
    tempfile around so the other hard links will not result in additional file
    copies.  It is up to the user of this function to remove the tmp files at
    a later time.
    The tmpbase parameter cannot be None (this requirement may be removed in
    a future version).  tmpbase will be created if necessary.  The tmpbase
    directory structure will not be removed and therefore must be cleaned up
    manually.
    If post_checksums=True (default), the checksums for src and tgt should be
    immediately available on the same parameters that were passed in (ie:
    src_path.checksum() and tgt_path.checksum() )
    :param src_path FSItem:
    :param tgt_path FSItem:
    :param tmpbase    str: absolute path to directory where tmp files will be created
    :param keeptmp   bool: if True, do not delete tmpfile (default=False)
    :param synctimes bool: sync file times (default=False)
    :param syncperms bool: sync file permissions (default=False)
    :param syncowner bool: sync file owner (default=False)
    :param syncgroup bool: sync file group (default=False)
    :param pre_checksums  bool: use checksum to determine if src and tgt differ 
                                (default=False)
    :param post_checksums bool: if source was copied to target, compare checksums 
                                to verify target was written correctly 
                                (default=True)
    :return two-tuple: 
        1. fsitem.FSItem: full path to tmpfile (even if keeptmp=False)
        2. action_taken: dict with keys of 'data_copy' and 'meta_update' and values
            of True or False depending on the action taken
        2. sync_results: output from rsync --itemize-changes
    """
    if tmpbase is None:
        #TODO - If tmpbase is None, create one at the mountpoint
        # tmpbase = _pathjoin(
        #     fsitem.getmountpoint( tgt_path ),
        #     '.pylutsyncfiletmpbase' )
        raise UserWarning('Default tmpbase not yet implemented')
    # Construct full path to tmpfile: base + <5-char hex value> + <INODE>
    try:
        srcfid = src_path.inode()
    except (Run_Cmd_Error) as e:
        raise SyncError(reason=e.reason, origin=e)
    tmpdir = _pathjoin(tmpbase, hex(hash(srcfid))[-5:])
    tmp_path = fsitem.FSItem(os.path.join(tmpdir, srcfid))
    log.debug('tmp_path:{0}'.format(tmp_path))
    # rsync logic: what already exists on the tgt FS and what needs to be updated
    do_mktmpdir = False
    do_setstripe = False
    setstripe_tgt = None
    setstripe_stripeinfo = None
    do_rsync = False
    rsync_src = None
    rsync_tgt = None
    do_hardlink = False
    hardlink_src = None
    hardlink_tgt = None
    do_checksums = False
    sync_action = {'data_copy': False, 'meta_update': False}
    syncopts = {
        'synctimes': synctimes,
        'syncperms': syncperms,
        'syncowner': syncowner,
        'syncgroup': syncgroup,
        'pre_checksums': pre_checksums,
        'post_checksums': post_checksums,
    }
    tmp_exists, tmp_data_ok, tmp_meta_ok = (False, ) * 3
    tgt_exists, tgt_data_ok, tgt_meta_ok = (False, ) * 3
    tmp_exists = tmp_path.exists()
    if tmp_exists:
        log.debug('tmp exists, comparing tmp to src')
        tmp_data_ok, tmp_meta_ok = _compare_files(src_path, tmp_path, syncopts)
    tgt_exists = tgt_path.exists()
    if tgt_exists:
        log.debug('tgt exists, comparing tgt to src')
        tgt_data_ok, tgt_meta_ok = _compare_files(src_path, tgt_path, syncopts)
    if tmp_exists and tgt_exists:
        log.debug('tmp and tgt exist')
        if tmp_path.inode() == tgt_path.inode():
            log.debug('tmp and tgt are same file')
            if tmp_data_ok:
                if not tmp_meta_ok:
                    log.debug('tmp needs metadata update')
                    sync_action['meta_update'] = True
                    do_rsync = True
                    rsync_src = src_path
                    rsync_tgt = tmp_path
            else:
                log.debug('tmp not ok, unset all')
                os.unlink(str(tmp_path))
                tmp_path.update()
                os.unlink(str(tgt_path))
                tgt_path.update()
                tmp_exists, tmp_data_ok, tmp_meta_ok = (False, ) * 3
                tgt_exists, tgt_data_ok, tgt_meta_ok = (False, ) * 3
        else:
            log.debug('tmp and tgt are different files')
            # check if one of tmp or tgt are ok, to avoid unnecessary data transfer
            if tmp_data_ok:
                log.debug('tmp data ok, unset tgt vars')
                os.unlink(str(tgt_path))
                tgt_path.update()
                tgt_exists, tgt_data_ok, tgt_meta_ok = (False, ) * 3
            elif tgt_data_ok:
                log.debug('tgt data ok, unset tmp vars')
                os.unlink(str(tmp_path))
                tmp_path.update()
                tmp_exists, tmp_data_ok, tmp_meta_ok = (False, ) * 3
            else:
                log.debug('neither tmp nor tgt are ok, unset both')
                os.unlink(str(tmp_path))
                tmp_path.update()
                os.unlink(str(tgt_path))
                tgt_path.update()
                tmp_exists, tmp_data_ok, tmp_meta_ok = (False, ) * 3
                tgt_exists, tgt_data_ok, tgt_meta_ok = (False, ) * 3
    if tmp_exists != tgt_exists:
        # only one file exists
        if tmp_exists:
            log.debug('tmp exists, tgt doesnt')
            if tmp_data_ok:
                log.debug('tmp data ok, tgt needs hardlink')
                do_hardlink = True
                hardlink_src = tmp_path
                hardlink_tgt = tgt_path
                if not tmp_meta_ok:
                    log.debug('tmp needs meta update')
                    sync_action['meta_update'] = True
                    do_rsync = True
                    rsync_src = src_path
                    rsync_tgt = tmp_path
            else:
                log.debug('tmp not ok, unset tmp vars')
                os.unlink(str(tmp_path))
                tmp_path.update()
                tmp_exists, tmp_data_ok, tmp_meta_ok = (False, ) * 3
        else:
            log.debug('tgt exists, tmp doesnt')
            if tgt_data_ok:
                log.debug('tgt data ok')
                if keeptmp:
                    log.debug('keeptmp=True, tmp needs hardlink')
                    do_mktmpdir = True
                    do_hardlink = True
                    hardlink_src = tgt_path
                    hardlink_tgt = tmp_path
                else:
                    log.debug('keeptmp=False, no action needed')
                if not tgt_meta_ok:
                    log.debug('tgt needs metadata update')
                    sync_action['meta_update'] = True
                    do_rsync = True
                    rsync_src = src_path
                    rsync_tgt = tgt_path
            else:
                log.debug('tgt not ok, unset tgt vars')
                os.unlink(str(tgt_path))
                tgt_path.update()
                tgt_exists, tgt_data_ok, tgt_meta_ok = (False, ) * 3
    if not (tmp_exists or tgt_exists):
        log.debug('neither tmp nor tgt exist')
        sync_action.update(data_copy=True, meta_update=True)
        if src_path.is_regular():
            do_setstripe = True
            setstripe_stripeinfo = src_path.stripeinfo()
        if keeptmp:
            do_mktmpdir = True
            setstripe_tgt = tmp_path  #will be ignored if do_setstripe is False
            do_rsync = True
            rsync_src = src_path
            rsync_tgt = tmp_path
            do_hardlink = True
            hardlink_src = tmp_path
            hardlink_tgt = tgt_path
            do_checksums = True
        else:
            log.debug('keeptmp is false, skipping tmpfile creation')
            setstripe_tgt = tgt_path  #will be ignored if do_setstripe is False
            do_rsync = True
            rsync_src = src_path
            rsync_tgt = tgt_path
            do_checksums = True
    if do_mktmpdir:
        # Ensure tmpdir exists
        log.debug('create tmpdir {0}'.format(tmpdir))
        try:
            os.makedirs(tmpdir)
        except (OSError) as e:
            # OSError: [Errno 17] File exists
            if e.errno != 17:
                raise SyncError('Unable to create tmpdir {0}'.format(tmpdir),
                                e)
    if do_setstripe:
        # Set stripe to create the new file with the expected stripe information
        log.debug('setstripe (create) {0}'.format(setstripe_tgt))
        try:
            setstripeinfo(setstripe_tgt,
                          count=setstripe_stripeinfo.count,
                          size=setstripe_stripeinfo.size)
        except (Run_Cmd_Error) as e:
            msg = 'Setstripe failed for {0}'.format(setstripe_tgt)
            raise SyncError(msg, e)
        if rsync_src.size > env['PYLUTRSYNCMAXSIZE']:
            # DD for large files
            # TODO - replace dd with ddrescue (for efficient handling of sparse files)
            cmd = ['/bin/dd']
            opts = {
                'bs': 4194304,
                'if': rsync_src,
                'of': rsync_tgt,
                'status': 'noxfer',
            }
            args = None
            (output, errput) = runcmd(cmd, opts, args)
            if len(errput.splitlines()) > 2:
                #TODO - it is hackish to ignore errors based on line count, better is to
                #       use a dd that supports "status=none"
                raise UserWarning(
                    "errors during dd of '{0}' -> '{1}': output='{2}' errors='{3}'"
                    .format(rsync_src, rsync_tgt, output, errput))
    if do_rsync:
        # Do the rsync
        cmd = [env['PYLUTRSYNCPATH']]
        opts = {'--compress-level': 0}
        args = ['-l', '-A', '-X', '--super', '--inplace', '--specials']
        if synctimes:
            args.append('-t')
        if syncperms:
            args.append('-p')
        if syncowner:
            args.append('-o')
        if syncgroup:
            args.append('-g')
        args.extend([rsync_src, rsync_tgt])
        try:
            (output, errput) = runcmd(cmd, opts, args)
        except (Run_Cmd_Error) as e:
            raise SyncError(reason=e.reason, origin=e)
        if len(errput) > 0:
            raise SyncError(
                reason="errors during sync of '{0}' -> '{1}'".format(
                    rsync_src, rsync_tgt),
                origin="output='{0}' errors='{1}'".format(output, errput))
    if do_hardlink:
        log.debug('hardlink {0} <- {1}'.format(hardlink_src, hardlink_tgt))
        try:
            os.link(str(hardlink_src), str(hardlink_tgt))
        except (OSError) as e:
            raise SyncError(
                reason='Caught exception for link {0} -> {1}'.format(
                    hardlink_src, hardlink_tgt),
                origin=e)
    # Delete tmp
    if keeptmp is False:
        log.debug('unlink tmpfile {0}'.format(tmp_path))
        try:
            os.unlink(str(tmp_path))
        except (OSError) as e:
            # OSError: [Errno 2] No such file or directory
            if e.errno != 2:
                raise SyncError(
                    'Error attempting to delete tmp {0}'.format(tmp_path), e)
        #tmp_path.update()
        # TODO - replace rmtree with safer alternative
        #        walk dirs backwards and rmdir each
        #shutil.rmtree( tmpbase ) #this will force delete everything, careful
    if do_checksums and post_checksums:
        # Compare checksums to verify target file was written accurately
        src_checksum = src_path.checksum()
        tgt_checksum = tgt_path.checksum()
        if src_checksum != tgt_checksum:
            reason = 'Checksum mismatch'
            origin = 'src_file={sf}, tgt_file={tf}, '\
                     'src_checksum={sc}, tgt_checksum={tc}'.format(
                        sf=src_path, tf=tgt_path, sc=src_checksum, tc=tgt_checksum )
            raise SyncError(reason, origin)
    return (tmp_path, sync_action)
Ejemplo n.º 7
0
import pylut
import fsitem

src_path=fsitem.FSItem( '/u/staff/aloftus/lustre_version.pbs' )
tgt_path=fsitem.FSItem( '/projects/test/psynctest/lustre_version.pbs' )

pylut.syncfile( src_path, tgt_path )