Python IncrementalHasher Examples, acdcli.utils.hashing.IncrementalHasher Python Examples

Example #1

0

Show file

    def test_upload_stream(self):
        s, sz = gen_rand_anon_mmap()
        fn = gen_rand_nm()
        h = hashing.IncrementalHasher()

        n = self.acd_client.upload_stream(s, fn, parent=None, read_callbacks=[h.update])
        self.assertEqual(n['contentProperties']['md5'], h.get_result())
        self.assertEqual(n['contentProperties']['size'], sz)

        self.acd_client.move_to_trash(n['id'])

Example #2

0

Show file

    def test_overwrite(self):
        f, sz = gen_temp_file()
        h = hashing.IncrementalHasher()

        n = self.acd_client.create_file(os.path.basename(f.name))
        self.assertIn('id', n)

        n = self.acd_client.overwrite_file(n['id'], f.name, [h.update])
        self.assertEqual(n['contentProperties']['version'], 2)
        self.assertEqual(n['contentProperties']['md5'], h.get_result())

        self.acd_client.move_to_trash(n['id'])

Example #3

0

Show file

    def test_overwrite_stream(self):
        s, sz = gen_rand_anon_mmap()
        fn = gen_rand_nm()
        h = hashing.IncrementalHasher()

        n = self.acd_client.create_file(fn)
        self.assertIn('id', n)

        n = self.acd_client.overwrite_stream(s, n['id'], [h.update])
        self.assertEqual(n['contentProperties']['md5'], h.get_result())
        self.assertEqual(n['contentProperties']['size'], sz)

        self.acd_client.move_to_trash(n['id'])

Example #4

0

Show file

File: acd_cli.py Project: Timdawson264/acd_cli

def upload_stream(stream, file_name, parent_id, dedup=False,
                  pg_handler: progress.FileProgress=None) -> RetryRetVal:
    hasher = hashing.IncrementalHasher()
    try:
        r = content.upload_stream(stream, file_name, parent_id,
                                  read_callbacks=[hasher.update, pg_handler.update],
                                  deduplication=dedup)
        sync.insert_node(r)
        node = query.get_node(r['id'])
        return compare_hashes(node.md5, hasher.get_result(), 'stream')
    except RequestError as e:
        logger.error('Error uploading stream. Code: %s, msg: %s' % (e.status_code, e.msg))
        return UL_DL_FAILED

Example #5

0

Show file

def upload_stream(stream, file_name, parent_id, dedup=False,
                  pg_handler: progress.FileProgress = None) -> RetryRetVal:
    hasher = hashing.IncrementalHasher()
    try:
        r = acd_client.upload_stream(stream, file_name, parent_id,
                                     read_callbacks=[hasher.update, pg_handler.update],
                                     deduplication=dedup)
        cache.insert_node(r)
        node = cache.get_node(r['id'])
        return compare_hashes(node.md5, hasher.get_result(), 'stream')
    except RequestError as e:
        logger.error('Error uploading stream. %s' % str(e))
        return UL_DL_FAILED

Example #6

0

Show file

def overwrite(node_id, local_file, dedup=False,
              pg_handler: progress.FileProgress = None) -> RetryRetVal:
    hasher = hashing.IncrementalHasher()
    try:
        r = acd_client.overwrite_file(node_id, local_file,
                                      read_callbacks=[hasher.update, pg_handler.update],
                                      deduplication=dedup)
        cache.insert_node(r)
        node = cache.get_node(r['id'])
        md5 = node.md5

        return compare_hashes(md5, hasher.get_result(), local_file)
    except RequestError as e:
        logger.error('Error overwriting file. %s' % str(e))
        return UL_DL_FAILED

Example #7

0

Show file

File: acd_cli.py Project: Timdawson264/acd_cli

def overwrite(node_id, local_file, dedup=False,
              pg_handler: progress.FileProgress=None) -> RetryRetVal:
    hasher = hashing.IncrementalHasher()
    try:
        r = content.overwrite_file(node_id, local_file,
                                   read_callbacks=[hasher.update, pg_handler.update],
                                   deduplication=dedup)
        sync.insert_node(r)
        node = query.get_node(r['id'])
        md5 = node.md5

        return compare_hashes(md5, hasher.get_result(), local_file)
    except RequestError as e:
        logger.error('Error overwriting file. Code: %s, msg: %s' % (e.status_code, e.msg))
        return UL_DL_FAILED

Example #8

0

Show file

def download_file(node_id: str, local_path: str,
                  pg_handler: progress.FileProgress = None) -> RetryRetVal:
    node = cache.get_node(node_id)
    name, md5, size = node.name, node.md5, node.size
    # db.Session.remove()  # otherwise, sqlalchemy will complain if thread crashes

    logger.info('Downloading "%s"' % name)

    hasher = hashing.IncrementalHasher()
    try:
        acd_client.download_file(node_id, name, local_path, length=size,
                                 write_callbacks=[hasher.update, pg_handler.update])
    except RequestError as e:
        logger.error('Downloading "%s" failed. %s' % (name, str(e)))
        return UL_DL_FAILED
    else:
        return compare_hashes(hasher.get_result(), md5, name)

Example #9

0

Show file

File: test_api_live.py Project: sausageboss/acd_cli

    def test_overwrite_stream(self):
        s, sz = gen_rand_anon_mmap()
        fn = gen_rand_nm()
        h = hashing.IncrementalHasher()

        n = self.acd_client.create_file(fn)
        self.assertIn('id', n)

        n = self.acd_client.overwrite_stream(s, n['id'], [h.update])
        self.assertEqual(n['contentProperties']['md5'], h.get_result())
        self.assertEqual(n['contentProperties']['size'], sz)

        empty_stream = io.BufferedReader(io.BytesIO())
        n = self.acd_client.overwrite_stream(empty_stream, n['id'])
        self.assertEqual(n['contentProperties']['md5'],
                         'd41d8cd98f00b204e9800998ecf8427e')
        self.assertEqual(n['contentProperties']['size'], 0)

        self.acd_client.move_to_trash(n['id'])

Example #10

0

Show file

File: acd_cli.py Project: nabcos/acd_cli

def download(node_id: str, local_path: str, exclude: list) -> int:
    node = query.get_node(node_id)

    if not node.is_available():
        return 0

    if node.is_folder():
        return download_folder(node_id, local_path, exclude)

    loc_name = node.name

    # # downloading a non-cached node
    # if not loc_name:
    # loc_name = node_id

    for reg in exclude:
        if re.match(reg, loc_name):
            print('Skipping download of "%s" because of exclusion pattern.' %
                  loc_name)
            return 0

    hasher = hashing.IncrementalHasher()

    try:
        print('Current file: %s' % loc_name)
        content.download_file(node_id,
                              loc_name,
                              local_path,
                              length=node.size,
                              write_callback=hasher.update)
    except RequestError as e:
        logger.error('Downloading "%s" failed. Code: %s, msg: %s' %
                     (loc_name, e.status_code, e.msg))
        return UL_DL_FAILED

    return compare_hashes(hasher.get_result(), node.md5, loc_name)

Example #11

0

Show file

def upload_file(path: str, parent_id: str, overwr: bool, force: bool, dedup: bool,
                pg_handler: progress.FileProgress = None) -> RetryRetVal:
    short_nm = os.path.basename(path)

    if dedup and cache.file_size_exists(os.path.getsize(path)):
        nodes = cache.find_md5(hashing.hash_file(path))
        nodes = [n for n in format.PathFormatter(nodes)]
        if len(nodes) > 0:
            # print('Skipping upload of duplicate file "%s".' % short_nm)
            logger.info('Location of duplicates: %s' % nodes)
            pg_handler.done()
            return DUPLICATE

    conflicting_node = cache.conflicting_node(short_nm, parent_id)
    file_id = None
    if conflicting_node:
        if conflicting_node.is_folder():
            logger.error('Name collision with existing folder '
                         'in the same location: "%s".' % short_nm)
            return NAME_COLLISION

        file_id = conflicting_node.id

    if not file_id:
        logger.info('Uploading %s' % path)
        hasher = hashing.IncrementalHasher()
        try:
            r = acd_client.upload_file(path, parent_id,
                                       read_callbacks=[hasher.update, pg_handler.update],
                                       deduplication=dedup)
        except RequestError as e:
            if e.status_code == 409:  # might happen if cache is outdated
                if not dedup:
                    logger.error('Uploading "%s" failed. Name collision with non-cached file. '
                                 'If you want to overwrite, please sync and try again.' % short_nm)
                else:
                    logger.error(
                        'Uploading "%s" failed. '
                        'Name or hash collision with non-cached file.' % short_nm)
                    logger.info(e)
                # colliding node ID is returned in error message -> could be used to continue
                return CACHE_ASYNC
            elif e.status_code == 504 or e.status_code == 408:  # proxy timeout / request timeout
                logger.warning('Timeout while uploading "%s".' % short_nm)
                # TODO: wait; request parent folder's children
                return UL_TIMEOUT
            else:
                logger.error(
                    'Uploading "%s" failed. %s.' % (short_nm, str(e)))
                return UL_DL_FAILED
        else:
            cache.insert_node(r)
            file_id = r['id']
            md5 = cache.get_node(file_id).md5
            return compare_hashes(hasher.get_result(), md5, short_nm)

    # else: file exists
    if not overwr and not force:
        logger.info('Skipping upload of existing file "%s".' % short_nm)
        pg_handler.done()
        return 0

    rmod = (conflicting_node.modified - datetime(1970, 1, 1)) / timedelta(seconds=1)
    rmod = datetime.utcfromtimestamp(rmod)
    lmod = datetime.utcfromtimestamp(os.path.getmtime(path))
    lcre = datetime.utcfromtimestamp(os.path.getctime(path))

    logger.debug('Remote mtime: %s, local mtime: %s, local ctime: %s' % (rmod, lmod, lcre))

    # ctime is checked because files can be overwritten by files with older mtime
    if rmod < lmod or (rmod < lcre and conflicting_node.size != os.path.getsize(path)) \
            or force:
        return overwrite(file_id, path, dedup=dedup, pg_handler=pg_handler).ret_val
    elif not force:
        logger.info('Skipping upload of "%s" because of mtime or ctime and size.' % short_nm)
        pg_handler.done()
        return 0