Example #1
0
    def test_get_update_links_wspace(self):
        wspath = os.path.join(self.tmp_dir, 'wspace')

        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        key = ohfs.put(HDATA_IMG_1)
        fidx = FullIndex(self.tmp_dir, self.tmp_dir)
        cachepath = os.path.join(self.tmp_dir, 'cachefs')
        cache = Cache(cachepath, '', '')

        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        c = get_sample_config_spec(testbucketname, testprofile, testregion)

        r = LocalRepository(c, hfspath)
        r._update_cache(cache, key)

        mfiles = {}
        files = {DATA_IMG_1}
        r._update_links_wspace(cache, fidx, files, key, wspath, mfiles, Status.u.name, 'strict')

        wspace_file = os.path.join(wspath, DATA_IMG_1)
        set_write_read(wspace_file)
        self.assertTrue(os.path.exists(wspace_file))
        self.assertEqual(self.md5sum(HDATA_IMG_1), self.md5sum(wspace_file))
        st = os.stat(wspace_file)
        fi = fidx.get_index()
        for k, v in fi.items():
            self.assertEqual(k, os.path.join('data', 'imghires.jpg'))
            self.assertEqual(v['hash'], 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh')
            self.assertEqual(v['status'], 'u')
            self.assertEqual(v['ctime'], st.st_ctime)
            self.assertEqual(v['mtime'], st.st_mtime)
        self.assertTrue(st.st_nlink == 2)
        self.assertEqual(mfiles, {DATA_IMG_1: 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh'})
Example #2
0
    def test_get_update_links_wspace_with_duplicates(self):
        wspath = os.path.join(self.tmp_dir, 'wspace')

        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        key = ohfs.put(HDATA_IMG_1)
        fidx = FullIndex(self.tmp_dir, self.tmp_dir)
        cachepath = os.path.join(self.tmp_dir, 'cachefs')
        cache = Cache(cachepath, '', '')

        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        c = get_sample_config_spec(testbucketname, testprofile, testregion)

        r = LocalRepository(c, hfspath)
        r._update_cache(cache, key)

        mfiles = {}
        files = {DATA_IMG_1, DATA_IMG_2}
        r._update_links_wspace(cache, fidx, files, key, wspath, mfiles, Status.u.name, 'strict')

        wspace_file = os.path.join(wspath, DATA_IMG_1)
        self.assertTrue(os.path.exists(wspace_file))
        self.assertEqual(self.md5sum(HDATA_IMG_1), self.md5sum(wspace_file))

        wspace_file = os.path.join(wspath, DATA_IMG_2)
        self.assertTrue(os.path.exists(wspace_file))
        self.assertEqual(self.md5sum(HDATA_IMG_1), self.md5sum(wspace_file))
        st = os.stat(wspace_file)
        self.assertTrue(st.st_nlink == 3)
        self.assertEqual(mfiles, {DATA_IMG_1: 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh',
                                  DATA_IMG_2: 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh'})

        wspath = os.path.join(self.tmp_dir, 'wspace')
        ensure_path_exists(wspath)
        to_be_removed = os.path.join(wspath, 'to_be_removed')
        with open(to_be_removed, 'w') as f:
            f.write('DEAD\n')

        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        key = ohfs.put(HDATA_IMG_1)
        fidx = FullIndex(self.tmp_dir, self.tmp_dir)
        cachepath = os.path.join(self.tmp_dir, 'cachefs')
        cache = Cache(cachepath, '', '')
        c = yaml_load('hdata/config.yaml')
        r = LocalRepository(c, hfspath)
        r._update_cache(cache, key)

        mfiles = {}
        files = {DATA_IMG_1, DATA_IMG_2}
        r._update_links_wspace(cache, fidx, files, key, wspath, mfiles, Status.u.name, 'strict')
        r._remove_unused_links_wspace(wspath, mfiles)
        self.assertFalse(os.path.exists(to_be_removed))