Beispiel #1
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.add('.')

    ds = install(
        c_path, source=o_path,
        result_xfm='datasets', return_type='item-or-list')

    file_list = ['file1.txt',
                 opj('subdir', 'file2.txt'),
                 opj('subdir', 'subsubdir', 'file3.txt'),
                 opj('subdir', 'subsubdir', 'file4.txt')]
    files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')

    # check result:
    assert_status('ok', result)
    eq_(set([item.get('path')[len(ds.path) + 1:] for item in result
             if item['type'] == 'file']),
        set(files_in_sub))
    # we also get one report on the subdir
    eq_(len(result) - 1, len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Beispiel #2
0
def module_manifest(path):
    """Returns path to module manifest if one can be found under `path`, else `None`."""
    if not path:
        return None
    for manifest_name in MANIFEST_NAMES:
        if os.path.isfile(opj(path, manifest_name)):
            return opj(path, manifest_name)
Beispiel #3
0
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields):
    """just a helper to carry out few checks on urls"""
    with swallow_logs(new_level=logging.DEBUG) as cml:
        ri_ = cls(**fields)
        murl = RI(ri)
        eq_(murl.__class__, cls)  # not just a subclass
        eq_(murl, ri_)
        eq_(str(RI(ri)), ri)
        eq_(eval(repr(ri_)), ri)  # repr leads back to identical ri_
        eq_(ri, ri_)  # just in case ;)  above should fail first if smth is wrong
        if not exact_str:
            assert_in('Parsed version of', cml.out)
    (eq_ if exact_str else neq_)(ri, str(ri_))  # that we can reconstruct it EXACTLY on our examples
    # and that we have access to all those fields
    nok_(set(fields).difference(set(cls._FIELDS)))
    for f, v in fields.items():
        eq_(getattr(ri_, f), v)

    if localpath:
        eq_(ri_.localpath, localpath)
        old_localpath = ri_.localpath  # for a test below
    else:
        # if not given -- must be a remote url, should raise exception
        with assert_raises(ValueError):
            ri_.localpath

    # do changes in the path persist?
    old_str = str(ri_)
    ri_.path = newpath = opj(ri_.path, 'sub')
    eq_(ri_.path, newpath)
    neq_(str(ri_), old_str)
    if localpath:
        eq_(ri_.localpath, opj(old_localpath, 'sub'))
Beispiel #4
0
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save()

    # now, install that thing:
    ds, subds = install(
        path, source=src, recursive=True,
        result_xfm='datasets', return_type='item-or-list', result_filter=None)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    result = ds.get(curdir, recursive=True)
    # git repo and subds
    assert_status(['ok', 'notneeded'], result)
    assert_result_count(
        result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok')
    ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Beispiel #5
0
def fits_job1(model_names=None, model_cmps=None):
    from os.path import join as opj
    from itertools import product
    #
    obs_datfile = '/n/Users/fdu/now/tab_obs.dat'
    res_dir = '/n/Users/fdu/now/res/'
    out_dir = '/n/Users/fdu/'
    dir_lines = ['CO', '13CO', 'C18O', 'O', 'C', 'C+', 'oH2O_A', 'pH2O_A', 'HD']
    #
    for model_name, model_cmp in product(model_names, model_cmps):
        model_dir = opj(res_dir, model_name)
        fname_html = opj(out_dir, 'tab_{0:s}.html'.format(model_name.replace('/', '')))
        fname_overview = opj(out_dir, 'cmp_{0:s}.pdf'.format(model_name.replace('/', '')))
        #
        obsd = parse_latex_txt_table(obs_datfile)
        #
        fnames = glob_fnames(model_dir, dir_lines)
        b = batch_proc(fnames, _obs_data = obsd)
        #
        cmp_dir = opj(res_dir, model_cmp)
        fnames_cmp = glob_fnames(cmp_dir, dir_lines)
        bcmp = batch_proc(fnames_cmp, _obs_data = obsd)
        #
        b.all_to_html_cmp(bcmp, fname_html)
        print 'Html file generated: ', fname_html
        #
        b.draw_overview(fname_overview, bcmp.specs)
        print 'Pdf file generated: ', fname_overview
Beispiel #6
0
    def initiate(self):
        if self._initiated:
            return
        self._initiated = True
        d = opj(self.repopath, '.git', 'bin')
        if not exists(d):
            os.makedirs(d)

        suf = '-' + self.custom_remote_name.rstrip(':') if self.custom_remote_name else ''
        self._file = _file = opj(d, 'git-annex-remote-datalad' + suf)

        if exists(_file):
            lgr.debug("Commenting out previous entries")
            # comment out all the past entries
            with open(_file) as f:
                entries = f.readlines()
            for i in range(len(self.HEADER.split(os.linesep)), len(entries)):
                e = entries[i]
                if e.startswith('recv ') or e.startswith('send '):
                    entries[i] = '#' + e
            with open(_file, 'w') as f:
                f.write(''.join(entries))
            return  # nothing else to be done

        lgr.debug("Initiating protocoling."
                  "cd %s; vim %s"
                  % (realpath(self.repopath),
                     _file[len(self.repopath) + 1:]))
        with open(_file, 'a') as f:
            f.write(self.HEADER)
        os.chmod(_file, 0o755)
Beispiel #7
0
    def import_zipfile(self, module_file, force=False):
        if not module_file:
            raise Exception(_("No file sent."))
        if not zipfile.is_zipfile(module_file):
            raise UserError(_('File is not a zip file!'))

        success = []
        errors = dict()
        module_names = []
        with zipfile.ZipFile(module_file, "r") as z:
            for zf in z.filelist:
                if zf.file_size > MAX_FILE_SIZE:
                    raise UserError(_("File '%s' exceed maximum allowed file size") % zf.filename)

            with tempdir() as module_dir:
                z.extractall(module_dir)
                dirs = [d for d in os.listdir(module_dir) if os.path.isdir(opj(module_dir, d))]
                for mod_name in dirs:
                    module_names.append(mod_name)
                    try:
                        # assert mod_name.startswith('theme_')
                        path = opj(module_dir, mod_name)
                        self.import_module(mod_name, path, force=force)
                        success.append(mod_name)
                    except Exception, e:
                        _logger.exception('Error while importing module')
                        errors[mod_name] = exception_to_unicode(e)
Beispiel #8
0
def test_remove_file_handle_only(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ok_clean_git(ds.path)
    # make sure there is any key
    ok_(len(ds.repo.get_file_key('one')))
    # both files link to the same key
    eq_(ds.repo.get_file_key('one'),
        ds.repo.get_file_key('two'))
    rpath_one = realpath(opj(ds.path, 'one'))
    eq_(rpath_one, realpath(opj(ds.path, 'two')))
    path_two = opj(ds.path, 'two')
    ok_(exists(path_two))
    # remove one handle, should not affect the other
    ds.remove('two', check=False, message="custom msg")
    eq_(ds.repo.repo.head.commit.message.rstrip(), "custom msg")
    eq_(rpath_one, realpath(opj(ds.path, 'one')))
    ok_(exists(rpath_one))
    ok_(not exists(path_two))
    # remove file without specifying the dataset -- shouldn't fail
    with chpwd(path):
        remove('one', check=False)
        ok_(not exists("one"))
    # and we should be able to remove without saving
    ds.remove('three', check=False, save=False)
    ok_(ds.repo.dirty)
Beispiel #9
0
def test_ExtractedArchive(path):
    archive = opj(path, fn_archive_obscure_ext)
    earchive = ExtractedArchive(archive)
    assert_false(exists(earchive.path))
    # no longer the case -- just using hash for now
    # assert_in(os.path.basename(archive), earchive.path)

    fpath = opj(fn_archive_obscure,  # lead directory
                fn_in_archive_obscure)
    extracted = earchive.get_extracted_filename(fpath)
    eq_(extracted, opj(earchive.path, fpath))
    assert_false(exists(extracted))  # not yet

    extracted_ = earchive.get_extracted_file(fpath)
    eq_(extracted, extracted_)
    assert_true(exists(extracted))  # now it should

    extracted_files = earchive.get_extracted_files()
    ok_generator(extracted_files)
    eq_(sorted(extracted_files),
        sorted([
            # ['bbc/3.txt', 'bbc/abc']
            opj(fn_archive_obscure, fn_in_archive_obscure),
            opj(fn_archive_obscure, '3.txt')
        ]))

    earchive.clean()
    if not os.environ.get('DATALAD_TESTS_TEMP_KEEP'):
        assert_false(exists(earchive.path))
Beispiel #10
0
def test_add_subdataset(path, other):
    subds = create(opj(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # without a base dataset the next is interpreted as "add everything
    # in subds to subds"
    add(subds.path)
    ok_clean_git(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    ds.add(subds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=opj(ds.path, 'other'))
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.add('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(other_clone.is_installed)
    ds.get('other')
    ok_(other_clone.is_installed)
Beispiel #11
0
def test_uninstall_git_file(path):
    ds = Dataset(path)
    ok_(ds.is_installed())
    ok_(exists(opj(path, 'INFO.txt')))
    ok_file_under_git(ds.repo.path, 'INFO.txt')

    # drop file in Git in an annex repo
    # regardless of the type of repo this is 'notneeded'...
    # it is less about education that about "can we
    # we get the content back?", and for a file in Git we can
    assert_result_count(
        ds.drop(path='INFO.txt'),
        1,
        status='notneeded',
        message="no annex'ed content")

    res = ds.uninstall(path="INFO.txt", on_failure='ignore')
    assert_result_count(
        res, 1,
        status='impossible',
        message='can only uninstall datasets (consider the `drop` command)')

    # remove the file:
    res = ds.remove(path='INFO.txt', result_xfm='paths',
                    result_filter=lambda x: x['action'] == 'remove')
    assert_raises(AssertionError, ok_file_under_git, ds.repo.path, 'INFO.txt')
    ok_(not exists(opj(path, 'INFO.txt')))
    eq_(res, ['INFO.txt'])
Beispiel #12
0
    def import_zipfile(self, cr, uid, module_file, force=False, context=None):
        if not module_file:
            raise Exception("No file sent.")
        if not zipfile.is_zipfile(module_file):
            raise UserError(_("File is not a zip file!"))

        success = []
        errors = dict()
        module_names = []
        with zipfile.ZipFile(module_file, "r") as z:
            for zf in z.filelist:
                if zf.file_size > MAX_FILE_SIZE:
                    msg = _("File '%s' exceed maximum allowed file size")
                    raise UserError(msg % zf.filename)

            with openerp.tools.osutil.tempdir() as module_dir:
                z.extractall(module_dir)
                dirs = [d for d in os.listdir(module_dir) if os.path.isdir(opj(module_dir, d))]
                for mod_name in dirs:
                    module_names.append(mod_name)
                    try:
                        # assert mod_name.startswith('theme_')
                        path = opj(module_dir, mod_name)
                        self.import_module(cr, uid, mod_name, path, force=force, context=context)
                        success.append(mod_name)
                    except Exception, e:
                        errors[mod_name] = str(e)
Beispiel #13
0
def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    ok_clean_git(parent.path)
    sub1 = parent.create(opj('down', 'sub1'))
    ok_clean_git(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    ok_clean_git(parent.path, index_modified=['sub2'])
    res = parent.save()
    ok_clean_git(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    ok_clean_git(parent.path, index_modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.add('.', recursive=True)
    # the key action is done
    assert_result_count(
        res, 1, path=opj(subsub.path, 'new'), action='add', status='ok')
    # sub1 is untouched, and not reported
    assert_result_count(res, 0, path=sub1.path)
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    ok_clean_git(parent.path)
Beispiel #14
0
def test_publish_gh1691(origin, src_path, dst_path):

    # prepare src; no subdatasets installed, but mount points present
    source = install(src_path, source=origin, recursive=False)
    ok_(exists(opj(src_path, "subm 1")))
    assert_false(Dataset(opj(src_path, "subm 1")).is_installed())

    # some content modification of the superdataset
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)

    # create the target(s):
    source.create_sibling(
        'ssh://localhost:' + dst_path,
        name='target', recursive=True)

    # publish recursively, which silently ignores non-installed datasets
    results = source.publish(to='target', recursive=True)
    assert_result_count(results, 1)
    assert_result_count(results, 1, status='ok', type='dataset', path=source.path)

    # if however, a non-installed subdataset is requsted explicitly, it'll fail
    results = source.publish(path='subm 1', to='target', on_failure='ignore')
    assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
Beispiel #15
0
def get_module_resource(module, *args):
    """Return the full path of a resource of the given module.

    :param module: module name
    :param list(str) args: resource path components within module

    :rtype: str
    :return: absolute path to the resource

    TODO name it get_resource_path
    TODO make it available inside on osv object (self.get_resource_path)
    """
    mod_path = get_module_path(module)
    if not mod_path: return False
    resource_path = opj(mod_path, *args)
    if os.path.isdir(mod_path):
        # the module is a directory - ignore zip behavior
        if os.path.exists(resource_path):
            return resource_path
    elif zipfile.is_zipfile(mod_path + '.zip'):
        zip = zipfile.ZipFile( mod_path + ".zip")
        files = ['/'.join(f.split('/')[1:]) for f in zip.namelist()]
        resource_path = '/'.join(args)
        if resource_path in files:
            return opj(mod_path, resource_path)
    return False
Beispiel #16
0
def test_basic_aggregate(path):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    #base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    # we will first aggregate the middle dataset on its own, this will
    # serve as a smoke test for the reuse of metadata objects later on
    sub.aggregate_metadata()
    base.save()
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.uninstall('subsub', check=False)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True, update_mode='all')
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.uninstall('sub', recursive=True, check=False)
    assert(not sub.is_installed())
    ok_clean_git(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)
Beispiel #17
0
    def create_postupdate_hook(path, ssh, dataset):
        # location of post-update hook file, logs folder on remote target
        hooks_remote_dir = opj(path, '.git', 'hooks')
        hook_remote_target = opj(hooks_remote_dir, 'post-update')
        # post-update hook should create its log directory if doesn't exist
        logs_remote_dir = opj(path, WEB_META_LOG)

        make_log_dir = 'mkdir -p "{}"'.format(logs_remote_dir)

        # create json command for current dataset
        json_command = r'''
        mkdir -p {};
        ( which datalad > /dev/null \
        && ( cd ..; GIT_DIR=$PWD/.git datalad ls -a --json file '{}'; ) \
        || echo "no datalad found - skipping generation of indexes for web frontend"; \
        ) &> "{}/{}"
        '''.format(logs_remote_dir,
                   str(path),
                   logs_remote_dir,
                   'datalad-publish-hook-$(date +%s).log' % TIMESTAMP_FMT)

        # collate content for post_update hook
        hook_content = '\n'.join(['#!/bin/bash', 'git update-server-info', make_log_dir, json_command])

        with make_tempfile(content=hook_content) as tempf:  # create post_update hook script
            ssh.copy(tempf, hook_remote_target)             # upload hook to dataset
        ssh(['chmod', '+x', hook_remote_target])            # and make it executable
Beispiel #18
0
def test_publish_aggregated(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    base.create('sub', force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # create sibling and publish to it
    spath = opj(path, 'remote')
    base.create_sibling(
        name="local_target",
        sshurl="ssh://localhost",
        target_dir=spath)
    base.publish('.', to='local_target', transfer_data='all')
    remote = Dataset(spath)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # all object files a present in both datasets
    eq_(all(base.repo.file_has_content(objs)), True)
    eq_(all(remote.repo.file_has_content(objs)), True)
    # and we can squeeze the same metadata out
    eq_(
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in base.metadata('sub')],
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in remote.metadata('sub')],
    )
Beispiel #19
0
def test_aggregate_removal(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.add('.', recursive=True)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=subsub.path)
    # check that we only have object files that are listed in agginfo
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # now delete the deepest subdataset to test cleanup of aggregated objects
    # in the top-level ds
    base.remove(opj('sub', 'subsub'), check=False)
    # now aggregation has to detect that subsub is not simply missing, but gone
    # for good
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    # internally consistent state
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # info on subsub was removed at all levels
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 2)
    res = sub.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 1)
Beispiel #20
0
def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    ok_clean_git(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.aggregate_metadata(recursive=True, update_mode='all', force_extraction=True)
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(
        objs,
        list(sorted(base.repo.find(objpath)))
    )
Beispiel #21
0
def test_aggregate_with_unavailable_objects_from_subds(path, target):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # now make that a subdataset of a new one, so aggregation needs to get the
    # metadata objects first:
    super = Dataset(target).create()
    super.install("base", source=base.path)
    ok_clean_git(super.path)
    clone = Dataset(opj(super.path, "base"))
    ok_clean_git(clone.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = [o for o in sorted(clone.repo.get_annexed_files(with_content_only=False)) if o.startswith(objpath)]
    eq_(len(objs), 6)
    eq_(all(clone.repo.file_has_content(objs)), False)

    # now aggregate should get those metadata objects
    super.aggregate_metadata(recursive=True, update_mode='all',
                             force_extraction=False)
    eq_(all(clone.repo.file_has_content(objs)), True)
Beispiel #22
0
    def remove_handle(self, key):

        dir_ = self._key2filename(key)

        # remove handle from collection descriptor:
        uri = Graph().parse(opj(self.path, dir_, REPO_STD_META_FILE),
                            format="turtle").value(predicate=RDF.type,
                                                   object=DLNS.Handle)
        col_graph = Graph().parse(opj(self.path, REPO_STD_META_FILE),
                                  format="turtle")
        col_graph.remove((DLNS.this, DCTERMS.hasPart, uri))
        col_graph.serialize(opj(self.path, REPO_STD_META_FILE), format="turtle")

        # remove handle's directory:
        # Note: Currently all files separatly due to issues with the
        # normalize_path decorator in gitrepo.py. It expects one output per
        # one input file. So, recursively removing the 'dir_' violates that
        # assertion.
        # Note2: Currently using "-f" option, since on ntfs/vfat, git somehow
        # reports the files (at least config.ttl) have staged changes.
        # TODO: Figure out, what the hell this is about.
        [self.git_remove(file_, f=True) for file_ in self.get_indexed_files()
         if file_.startswith(dir_)]

        self.git_add(REPO_STD_META_FILE)
        self.git_commit("Removed handle %s." % key)
Beispiel #23
0
def test_getpwd_symlink(tdir):
    sdir = opj(tdir, 's1')
    pwd_orig = getpwd()
    os.symlink('.', sdir)
    s1dir = opj(sdir, 's1')
    s2dir = opj(sdir, 's2')
    try:
        chpwd(sdir)
        pwd = getpwd()
        eq_(pwd, sdir)
        chpwd('s1')
        eq_(getpwd(), s1dir)
        chpwd('.')
        eq_(getpwd(), s1dir)
        chpwd('..')
        eq_(getpwd(), sdir)
    finally:
        chpwd(pwd_orig)

    # test context handler way of use
    with chpwd(s1dir):
        eq_(getpwd(), s1dir)
    eq_(getpwd(), pwd_orig)

    assert_false(exists(s2dir))
    with assert_raises(OSError):
        with chpwd(s2dir):
            pass
    with chpwd(s2dir, mkdir=True):
        ok_(exists(s2dir))
        eq_(getpwd(), s2dir)
Beispiel #24
0
def test_add_handle_by_names(hurl, hpath, cpath, lcpath):

    class mocked_dirs:
        user_data_dir = lcpath

    with patch('datalad.cmdline.helpers.dirs', mocked_dirs), \
            swallow_logs() as cml:

        # get testrepos and make them known to datalad:
        handle = install_handle(hurl, hpath)
        collection = register_collection(cpath)
        assert_not_in(handle.name, collection)

        return_value = add_handle(handle.name, collection.name)

        # now handle is listed by collection:
        collection._reload()
        assert_in(handle.name, collection)

        # test collection repo:
        ok_clean_git(cpath, annex=False)
        ok_(isdir(opj(cpath, handle.name)))
        ok_(exists(opj(cpath, handle.name, REPO_CONFIG_FILE)))
        ok_(exists(opj(cpath, handle.name, REPO_STD_META_FILE)))

        # evaluate return value:
        assert_is_instance(return_value, Handle,
                           "install_handle() returns object of "
                           "incorrect class: %s" % type(return_value))
        eq_(return_value.name, handle.name)
        eq_(urlparse(return_value.url).path, urlparse(handle.url).path)
Beispiel #25
0
def _get_file_matches(bids_directory, glob_pattern):
    files = glob(
        opj(bids_directory, "sub-*", "*", "sub-{}".format(glob_pattern)))
    files += glob(
        opj(bids_directory, "sub-*", "ses-*", "*", "sub-*_ses-{}".format(
            glob_pattern)))
    return files
Beispiel #26
0
def test_dont_trip_over_missing_subds(path):
    ds1 = Dataset(opj(path, 'ds1')).create()
    ds2 = Dataset(opj(path, 'ds2')).create()
    subds2 = ds1.install(
        source=ds2.path, path='subds2',
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subds2.is_installed())
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    subds2.uninstall()
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    assert_false(subds2.is_installed())
    # see if it wants to talk to github (and fail), or if it trips over something
    # before
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******')
    # inject remote config prior run
    assert_not_in('github', ds1.repo.get_remotes())
    # fail on existing
    ds1.repo.add_remote('github', 'http://nothere')
    assert_raises(ValueError,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******')
    # talk to github when existing is OK
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******', existing='reconfigure')
    # return happy emptiness when all is skipped
    assert_equal(
        ds1.create_sibling_github(
            'bogus', recursive=True,
            github_login='******', existing='skip'),
        [])
Beispiel #27
0
def test_install_subdataset(src, path):
    # get the superdataset:
    ds = install(path=path, source=src)

    # subdataset not installed:
    subds = Dataset(opj(path, 'sub1'))
    assert_false(subds.is_installed())

    # install it:
    ds.install('sub1')

    ok_(subds.is_installed())
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    assert_equal(set(subds.repo.get_indexed_files()),
                 {'test.dat', 'INFO.txt', 'test-annex.dat'})

    # Now the obnoxious install an annex file within not yet
    # initialized repository!
    with swallow_outputs():  # progress bar
        ds.install(opj('sub2', 'test-annex.dat'))
    subds2 = Dataset(opj(path, 'sub2'))
    assert(subds2.is_installed())
    assert(subds2.repo.file_has_content('test-annex.dat'))
    # we shouldn't be able silently ignore attempt to provide source while
    # "installing" file under git
    assert_raises(FileInGitError, ds.install, opj('sub2', 'INFO.txt'), source="http://bogusbogus")
Beispiel #28
0
    def call(self, runscript, workdir, out, err, runflags):
        """call(runscript, workdir, out, err, runflags)
        Execute the *runscript* in the folder *workdir*. Redirect output and error streams to *out* and *err*, respectively.

        Arguments *runscript*, *workdir*, *out* and *err* should be strings with paths to corresponding files or folders.

        *runflags* is a |Settings| instance containing the `run` branch of running job's settings. The basic job runner defined here ignores them, but they can be useful in |JobRunner| subclasses (see :meth:`GridRunner.call`).

        Returns an integer with the exit code returned by the *runscript*.

        This method can be safely overridden in |JobRunner| subclasses. For example, in |GridRunner| it submits the runscript to a queueing system instead of executing it locally.

        .. note::
            This method is used automatically during |run| and should never be explicitly called in your script.
        """
        log('Executing {}'.format(runscript), 5)
        command = ['./'+runscript] if os.name == 'posix' else ['sh', runscript]
        if out is not None:
            with open(opj(workdir, err), 'w') as e, open(opj(workdir, out), 'w') as o:
                process = saferun(command, cwd=workdir, stderr=e, stdout=o)
        else:
            with open(opj(workdir, err), 'w') as e:
                process = saferun(command, cwd=workdir, stderr=e)
        log('Execution of {} finished with returncode {}'.format(runscript, process.returncode), 5)
        return process.returncode
Beispiel #29
0
def test_ignore_nondatasets(path):
    # we want to ignore the version/commits for this test
    def _kill_time(meta):
        for m in meta:
            for k in ('version', 'shasum'):
                if k in m:
                    del m[k]
        return meta

    ds = Dataset(path).create()
    meta = _kill_time(ds.metadata(reporton='datasets', on_failure='ignore'))
    n_subm = 0
    # placing another repo in the dataset has no effect on metadata
    for cls, subpath in ((GitRepo, 'subm'), (AnnexRepo, 'annex_subm')):
        subm_path = opj(ds.path, subpath)
        r = cls(subm_path, create=True)
        with open(opj(subm_path, 'test'), 'w') as f:
            f.write('test')
        r.add('test')
        r.commit('some')
        assert_true(Dataset(subm_path).is_installed())
        assert_equal(meta, _kill_time(ds.metadata(reporton='datasets', on_failure='ignore')))
        # making it a submodule has no effect either
        ds.add(subpath)
        assert_equal(len(ds.subdatasets()), n_subm + 1)
        assert_equal(meta, _kill_time(ds.metadata(reporton='datasets', on_failure='ignore')))
        n_subm += 1
Beispiel #30
0
    def verify_files(self, data):
        files_path = opj(abspath(curdir), '_files')

        # list of files that exist from canonical tarball
        con_files = set(f_f('.*', topdir=curdir, exclude='./(_files|.datalad)'))
        assert con_files

        # list of files that are individually downloaded
        files = set(f_f('.*', topdir='_files'))
        assert files

        for item_from_con in con_files:
            item_compare = normpath(opj('_files', item_from_con))
            if item_compare in files:
                key_item = self.repo.get_file_key(item_from_con)
                key_files_item = self.repo.get_file_key(item_compare)
                if key_item == key_files_item:
                    pass
                else:
                    lgr.warning("%s varies in content from the individually downloaded "
                                "file with the same name, it is removed and file "
                                "from canonical tarball is kept" % item_from_con)

                files.discard(item_compare)
            else:
                lgr.warning("%s does not exist in the individually listed files by name, "
                            "but will be kept from canonical tarball" % item_compare)

        if files:
            lgr.warning("The following files do not exist in the canonical tarball, but are "
                        "individually listed files and will not be kept: %s" % files)

        rmtree(files_path)
        yield data
Beispiel #31
0
def filter_unmodified(content_by_ds, refds, since):
    """Filter per-dataset path specifications based on modification history.

    This function takes a path specification dictionary, as produced by
    `Interface._prep()` and filters it such that only that subset of paths
    remains in the dictionary that corresponding to the set of changes in
    the given reference dataset since a given state.

    The change set is traced across all related subdatasets, i.e. if a submodule
    in the reference dataset is reported as modified then all paths for any given
    subdataset in the modified one are tested for changes too (based on the
    state at which the parent dataset reports a change in the subdataset), and so
    on.

    In doing so, not only unmodified given paths are removed, but also modified
    given paths are replaced by the set of actually modified paths within them.

    Only committed changes are considered!

    Parameters
    ----------
    content_by_ds : dict
      Per-dataset path specifications, as produced ,for example, by
      `Interface._prep()`
    refds : Dataset or *Repo or path
      Reference dataset for which to determine the initial change set
    since : state
      Any commit-ish/tree-ish supported by Git (tag, commit, branch, ...).
      Changes between this given state and the most recent commit are
      evaluated.

    Returns
    -------
    dict
      Filtered path spec dictionary. If `since` is not None, the output is
      guaranteed to only contain paths to modified, and presently existing
      components of subdatasets of the given reference dataset (and itself).
    """
    if since is None:
        # we want all, subds not matching the ref are assumed to have been
        # sorted out before (e.g. one level up)
        return content_by_ds
    # turn refds argument into a usable repo instance
    if not hasattr(refds, 'path'):
        # not a Repo or Dataset
        refds_path = refds
        refds = GitRepo(refds, create=False)
    else:
        refds_path = refds.path
    repo = refds.repo
    if hasattr(repo, 'repo'):
        # TODO use GitRepo.diff() when available (gh-1217)
        repo = repo.repo

    dict_class = content_by_ds.__class__  # could be ordered dict

    # life is simple: we diff the base dataset, and kill anything that
    # does not start with something that is in the diff
    # we cannot really limit the diff paths easily because we might get
    # or miss content (e.g. subdatasets) if we don't figure out which ones
    # are known -- and we don't want that
    try:
        diff = repo.commit().diff(since)
    except GitCommandError as exc:
        # could fail because `since` points to non existing location.
        # Unfortunately there might be no meaningful message
        # e.g. "fatal: ambiguous argument 'HEAD^': unknown revision or path not in the working tree"
        # logged within this GitCommandError for some reason! So let's check
        # that value of since post-error for being correct:
        try:
            refds.repo._git_custom_command(
                [], ['git', 'show', '--stat', since, '--'],
                expect_stderr=True,
                expect_fail=True)
            raise  # re-raise since our idea was incorrect
        except CommandError as ce_exc:
            if ce_exc.stderr.startswith('fatal: bad revision'):
                raise ValueError(
                    "Value since=%r is not valid. Git reports: %s" %
                    (since, exc_str(ce_exc)))
            else:
                raise  # re-raise

    # get all modified paths (with original? commit) that are still
    # present
    modified = dict(
        (opj(refds_path, d.b_path), d.b_blob.hexsha if d.b_blob else None)
        for d in diff)
    if not modified:
        # nothing modified nothing to report
        return dict_class()
    # determine the subset that is a directory and hence is relevant for possible
    # subdatasets
    modified_dirs = {_with_sep(d) for d in modified if isdir(d)}
    # find the subdatasets matching modified paths, this will also kick out
    # any paths that are not in the dataset sub-hierarchy
    mod_subs = dict_class(
        (candds, paths) for candds, paths in content_by_ds.items()
        if candds != refds_path and any(
            _with_sep(candds).startswith(md) for md in modified_dirs))
    # now query the next level down
    keep_subs = \
        [filter_unmodified(mod_subs, subds_path, modified[subds_path])
         for subds_path in mod_subs
         if subds_path in modified]
    # merge result list into a single dict
    keep = dict_class((k, v) for d in keep_subs for k, v in d.items())

    paths_refds = content_by_ds[refds_path]
    keep[refds_path] = [
        m for m in modified if lexists(m)  # still around
        and (m in paths_refds  # listed file, or subds
             # or a modified path under a given directory
             or any(m.startswith(_with_sep(p)) for p in paths_refds))
    ]
    return keep
Beispiel #32
0
import sys
from os.path import lexists, dirname, join as opj, curdir

# Hard coded version, to be done by release process,
# it is also "parsed" (not imported) by setup.py, that is why assigned as
# __hardcoded_version__ later and not vise versa
__version__ = '0.13.4'
__hardcoded_version__ = __version__
__full_version__ = __version__

# NOTE: might cause problems with "python setup.py develop" deployments
#  so I have even changed buildbot to use  pip install -e .
moddir = dirname(__file__)
projdir = curdir if moddir == 'datalad' else dirname(moddir)
if lexists(opj(projdir, '.git')):
    # If under git -- attempt to deduce a better "dynamic" version following git
    try:
        from subprocess import Popen, PIPE
        # Note: Popen does not support `with` way correctly in 2.7
        #
        git = Popen([
            'git', 'describe', '--abbrev=4', '--dirty', '--match', r'[0-9]*\.*'
        ],
                    stdout=PIPE,
                    stderr=PIPE,
                    cwd=projdir)
        if git.wait() != 0:
            raise OSError("Could not run git describe")
        line = git.stdout.readlines()[0]
        _ = git.stderr.readlines()
Beispiel #33
0
def test_fs_traverse(topdir):
    # setup temp directory tree for testing
    annex = AnnexRepo(topdir)
    AnnexRepo(opj(topdir, 'annexdir'), create=True)
    GitRepo(opj(topdir, 'gitdir'), create=True)
    GitRepo(opj(topdir, 'dir', 'subgit'), create=True)
    annex.add(opj(topdir, 'dir'))
    annex.commit()
    annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force'])

    # traverse file system in recursive and non-recursive modes
    for recursive in [True, False]:
        # test fs_traverse in display mode
        with swallow_logs(new_level=logging.INFO) as log, swallow_outputs() as cmo:
            repo = AnnexRepo(topdir)
            fs = fs_traverse(topdir, repo, recurse_directories=recursive, json='display')
            if recursive:
                # fs_traverse logs should contain all not ignored subdirectories
                for subdir in [opj(topdir, 'dir'), opj(topdir, 'dir', 'subdir')]:
                    assert_in('Directory: ' + subdir, log.out)
                # fs_traverse stdout contains subdirectory
                assert_in(('file2.txt' and 'dir'), cmo.out)

            # extract info of the top-level child directory
            child = [item for item in fs['nodes'] if item['name'] == 'dir'][0]
            # size of dir type child in non-recursive modes should be 0 Bytes(default) as
            # dir type child's size currently has no metadata file for traverser to pick its size from
            # and would require a recursive traversal w/ write to child metadata file mode
            assert_equal(child['size']['total'], {True: '6 Bytes', False: '0 Bytes'}[recursive])
            repo.precommit()  # to possibly stop batch process occupying the stdout

    for recursive in [True, False]:
        # run fs_traverse in write to json 'file' mode
        repo = AnnexRepo(topdir)
        fs = fs_traverse(topdir, repo, recurse_directories=recursive, json='file')
        # fs_traverse should return a dictionary
        assert_equal(isinstance(fs, dict), True)
        # not including git and annex folders
        assert_equal([item for item in fs['nodes'] if ('gitdir' or 'annexdir') == item['name']], [])
        # extract info of the top-level child directory
        child = [item for item in fs['nodes'] if item['name'] == 'dir'][0]
        # verify node type
        assert_equal(child['type'], 'dir')
        # same node size on running fs_traversal in recursive followed by non-recursive mode
        # verifies child's metadata file being used to find its size
        # running in reverse order (non-recursive followed by recursive mode) will give (0, actual size)
        assert_equal(child['size']['total'], '6 Bytes')

        # verify subdirectory traversal if run in recursive mode
        # In current RF 'nodes' are stripped away during recursive traversal
        # for now... later we might reincarnate them "differently"
        # TODO!
        if False:  # recursive:
            # sub-dictionary should not include git and hidden directory info
            assert_equal([item for item in child['nodes'] if ('subgit' or '.fgit') == item['name']], [])
            # extract subdirectory dictionary, else fail
            subchild = [subitem for subitem in child["nodes"] if subitem['name'] == 'subdir'][0]
            # extract info of file1.txts, else fail
            link = [subnode for subnode in subchild["nodes"] if subnode['name'] == 'file1.txt'][0]
            # verify node's sizes and type
            assert_equal(link['size']['total'], '3 Bytes')
            assert_equal(link['size']['ondisk'], link['size']['total'])
            assert_equal(link['type'], 'link')
            # extract info of file2.txt, else fail
            brokenlink = [subnode for subnode in subchild["nodes"] if subnode['name'] == 'file2.txt'][0]
            # verify node's sizes and type
            assert_equal(brokenlink['type'], 'link-broken')
            assert_equal(brokenlink['size']['ondisk'], '0 Bytes')
            assert_equal(brokenlink['size']['total'], '3 Bytes')
Beispiel #34
0
 def get_metahash(*path):
     if not path:
         path = ['/']
     return hashlib.md5(opj(*path).encode('utf-8')).hexdigest()
Beispiel #35
0
def test_ls_json(topdir, topurl):
    annex = AnnexRepo(topdir, create=True)
    ds = Dataset(topdir)
    # create some file and commit it
    with open(opj(ds.path, 'subdsfile.txt'), 'w') as f:
        f.write('123')
    ds.save(path='subdsfile.txt', message="Hello!", version_tag=1)

    # add a subdataset
    ds.install('subds', source=topdir)

    subdirds = ds.create(_path_('dir/subds2'), force=True)
    subdirds.save('file')

    git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True)                    # create git repo
    git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt'))                           # commit to git to init git repo
    git.commit()
    annex.add(opj(topdir, 'dir', 'subgit'))                                     # add the non-dataset git repo to annex
    annex.add(opj(topdir, 'dir'))                                               # add to annex (links)
    annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force'])  # broken-link
    annex.commit()

    git.add('fgit.txt')              # commit to git to init git repo
    git.commit()
    # annex.add doesn't add submodule, so using ds.add
    ds.save(opj('dir', 'subgit'))                   # add the non-dataset git repo to annex
    ds.save('dir')                                  # add to annex (links)
    ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False)  # broken-link

    # register "external" submodule  by installing and uninstalling it
    ext_url = topurl + '/dir/subgit/.git'
    # need to make it installable via http
    Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit'))
    ds.install(opj('dir', 'subgit_ext'), source=ext_url)
    ds.uninstall(opj('dir', 'subgit_ext'))
    meta_dir = opj('.git', 'datalad', 'metadata')

    def get_metahash(*path):
        if not path:
            path = ['/']
        return hashlib.md5(opj(*path).encode('utf-8')).hexdigest()

    def get_metapath(dspath, *path):
        return _path_(dspath, meta_dir, get_metahash(*path))

    def get_meta(dspath, *path):
        with open(get_metapath(dspath, *path)) as f:
            return js.load(f)

    # Let's see that there is no crash if one of the files is available only
    # in relaxed URL mode, so no size could be picked up
    ds.repo.add_url_to_file(
        'fromweb', topurl + '/noteventhere', options=['--relaxed'])

    for all_ in [True, False]:  # recurse directories
        for recursive in [True, False]:
            for state in ['file', 'delete']:
                # subdataset should have its json created and deleted when
                # all=True else not
                subds_metapath = get_metapath(opj(topdir, 'subds'))
                exists_prior = exists(subds_metapath)

                #with swallow_logs(), swallow_outputs():
                dsj = _ls_json(
                    topdir,
                    json=state,
                    all_=all_,
                    recursive=recursive
                )
                ok_startswith(dsj['tags'], '1-')

                exists_post = exists(subds_metapath)
                # print("%s %s -> %s" % (state, exists_prior, exists_post))
                assert_equal(exists_post, (state == 'file' and recursive))

                # root should have its json file created and deleted in all cases
                ds_metapath = get_metapath(topdir)
                assert_equal(exists(ds_metapath), state == 'file')

                # children should have their metadata json's created and deleted only when recursive=True
                child_metapath = get_metapath(topdir, 'dir', 'subdir')
                assert_equal(exists(child_metapath), (state == 'file' and all_))

                # ignored directories should not have json files created in any case
                for subdir in [('.hidden',), ('dir', 'subgit')]:
                    assert_false(exists(get_metapath(topdir, *subdir)))

                # check if its updated in its nodes sublist too. used by web-ui json. regression test
                assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total'])

                # check size of subdataset
                subds = [item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds')][0]
                assert_equal(subds['size']['total'], '3 Bytes')

                # dir/subds2 must not be listed among nodes of the top dataset:
                topds_nodes = {x['name']: x for x in dsj['nodes']}

                assert_in('subds', topds_nodes)
                # XXX
                # # condition here is a bit a guesswork by yoh later on
                # # TODO: here and below clear destiny/interaction of all_ and recursive
                # assert_equal(dsj['size']['total'],
                #              '15 Bytes' if (recursive and all_) else
                #              ('9 Bytes' if (recursive or all_) else '3 Bytes')
                # )

                # https://github.com/datalad/datalad/issues/1674
                if state == 'file' and all_:
                    dirj = get_meta(topdir, 'dir')
                    dir_nodes = {x['name']: x for x in dirj['nodes']}
                    # it should be present in the subdir meta
                    assert_in('subds2', dir_nodes)
                    assert_not_in('url_external', dir_nodes['subds2'])
                    assert_in('subgit_ext', dir_nodes)
                    assert_equal(dir_nodes['subgit_ext']['url'], ext_url)
                # and not in topds
                assert_not_in('subds2', topds_nodes)

                # run non-recursive dataset traversal after subdataset metadata already created
                # to verify sub-dataset metadata being picked up from its metadata file in such cases
                if state == 'file' and recursive and not all_:
                    dsj = _ls_json(topdir, json='file', all_=False)
                    subds = [
                        item for item in dsj['nodes']
                        if item['name'] == ('subdsfile.txt' or 'subds')
                    ][0]
                    assert_equal(subds['size']['total'], '3 Bytes')

                assert_equal(
                    topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE
                )
Beispiel #36
0
def make_studyforrest_mockup(path):
    """Generate a dataset structure mimicking aspects of studyforrest.org

    Under the given path there are two directories:

    public - to be published datasets
    private - never to be published datasets

    The 'public' directory itself is a superdataset, the 'private' directory
    is just a directory that contains standalone datasets in subdirectories.
    """
    public = create(opj(path, 'public'), description="umbrella dataset")
    # the following tries to capture the evolution of the project
    phase1 = public.create('phase1',
                           description='old-style, no connection to RAW')
    structural = public.create('structural', description='anatomy')
    tnt = public.create('tnt', description='image templates')
    tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto')
    tnt.clone(source=structural.path,
              path=opj('src', 'structural'),
              reckless='auto')
    aligned = public.create('aligned', description='aligned image data')
    aligned.clone(source=phase1.path,
                  path=opj('src', 'phase1'),
                  reckless='auto')
    aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto')
    # new acquisition
    labet = create(opj(path, 'private', 'labet'), description="raw data ET")
    phase2_dicoms = create(opj(path, 'private', 'p2dicoms'),
                           description="raw data P2MRI")
    phase2 = public.create('phase2', description='new-style, RAW connection')
    phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless='auto')
    phase2.clone(source=phase2_dicoms.path,
                 path=opj('src', 'dicoms'),
                 reckless='auto')
    # add to derivatives
    tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto')
    aligned.clone(source=phase2.path,
                  path=opj('src', 'phase2'),
                  reckless='auto')
    # never to be published media files
    media = create(opj(path, 'private', 'media'), description="raw data ET")
    # assuming all annotations are in one dataset (in reality this is also
    # a superdatasets with about 10 subdatasets
    annot = public.create('annotations', description='stimulus annotation')
    annot.clone(source=media.path, path=opj('src', 'media'), reckless='auto')
    # a few typical analysis datasets
    # (just doing 3, actual status quo is just shy of 10)
    # and also the real goal -> meta analysis
    metaanalysis = public.create('metaanalysis',
                                 description="analysis of analyses")
    for i in range(1, 3):
        ana = public.create('analysis{}'.format(i),
                            description='analysis{}'.format(i))
        ana.clone(source=annot.path, path=opj('src', 'annot'), reckless='auto')
        ana.clone(source=aligned.path,
                  path=opj('src', 'aligned'),
                  reckless='auto')
        ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto')
        # link to metaanalysis
        metaanalysis.clone(source=ana.path,
                           path=opj('src', 'ana{}'.format(i)),
                           reckless='auto')
        # simulate change in an input (but not raw) dataset
        create_tree(aligned.path,
                    {'modification{}.txt'.format(i): 'unique{}'.format(i)})
        aligned.save()
    # finally aggregate data
    aggregate = public.create('aggregate', description='aggregate data')
    aggregate.clone(source=aligned.path,
                    path=opj('src', 'aligned'),
                    reckless='auto')
Beispiel #37
0
def _ls_dataset(loc, fast=False, recursive=False, all_=False, long_=False):
    isabs_loc = isabs(loc)
    topdir = '' if isabs_loc else abspath(curdir)

    topds = Dataset(loc)
    dss = [topds] + (
        [Dataset(opj(loc, sm))
         for sm in topds.subdatasets(recursive=recursive, result_xfm='relpaths')]
        if recursive else [])

    dsms = []
    for ds in dss:
        if not ds.is_installed():
            dsm = AbsentRepoModel(ds.path)
        elif isinstance(ds.repo, AnnexRepo):
            dsm = AnnexModel(ds.repo)
        elif isinstance(ds.repo, GitRepo):
            dsm = GitModel(ds.repo)
        else:
            raise RuntimeError("Got some dataset which don't know how to handle %s"
                               % ds)
        dsms.append(dsm)

    # adjust path strings
    for ds_model in dsms:
        #path = ds_model.path[len(topdir) + 1 if topdir else 0:]
        path = relpath(ds_model.path, topdir) if topdir else ds_model.path
        if not path:
            path = '.'
        ds_model.path = path
    dsms = sorted(dsms, key=lambda m: m.path)

    maxpath = max(len(ds_model.path) for ds_model in dsms)
    path_fmt = u"{ds.path!U:<%d}" % (maxpath + (11 if is_interactive() else 0))  # + to accommodate ansi codes
    pathtype_fmt = path_fmt + u"  [{ds.type}]"
    full_fmt = pathtype_fmt + u"  {ds.branch!N}  {ds.describe!N} {ds.date!D}"
    if (not fast) or long_:
        full_fmt += u"  {ds.clean!X}"

    fmts = {
        AbsentRepoModel: pathtype_fmt,
        GitModel: full_fmt,
        AnnexModel: full_fmt
    }
    if long_:
        fmts[AnnexModel] += u"  {ds.annex_local_size!S}/{ds.annex_worktree_size!S}"

    formatter = LsFormatter()
    # weird problems happen in the parallel run -- TODO - figure it out
    # for out in Parallel(n_jobs=1)(
    #         delayed(format_ds_model)(formatter, dsm, full_fmt, format_exc=path_fmt + "  {msg!R}")
    #         for dsm in dss):
    #     print(out)
    for dsm in dsms:
        fmt = fmts[dsm.__class__]
        ds_str = format_ds_model(formatter, dsm, fmt, format_exc=path_fmt + u"  {msg!R}")
        safe_print(ds_str)
        # workaround for explosion of git cat-file --batch processes
        # https://github.com/datalad/datalad/issues/1888
        if dsm.repo is not None:
            dsm.repo.repo.close()
            del dsm.repo
            dsm.repo = None
Beispiel #38
0
def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save("add a new file")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save("modified new_file.tst")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save("love rapunzel")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save("love marsians")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.rev_create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # Note/TODO: ok_clean_git is failing in direct mode, due to staged but
    # uncommited .datalad (probably caused within create)
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds.save()
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.rev_create('someotherds')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
Beispiel #39
0
def test_recursive_save(path):
    ds = Dataset(path).rev_create()
    # nothing to save
    assert_status('notneeded', ds.save())
    subds = ds.rev_create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.rev_create('subsub')
    assert_equal(
        ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'),
        [subds.path, subsubds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_result_values_equal(ds.save(result_filter=is_ok_dataset), 'path',
                               [ds.path])

    # make the new file known to its dataset
    ds.add(newfile_name, save=False)

    # but remains dirty because of the uncommited file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_status('notneeded', ds.save())
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset), 'path',
        [subsubds.path, subds.path, ds.path])

    # at this point the entire tree is clean
    ok_clean_git(ds.path)
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    # now we save recursively, nothing should happen
    res = ds.save(recursive=True)
    # we do not get any report from a subdataset, because we detect at the
    # very top that the entire tree is clean
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        status='notneeded',
                        action='save',
                        path=ds.path)
    # now we introduce new files all the way down
    create_tree(subsubds.path, {"mike1": 'mike1'})
    # because we cannot say from the top if there is anything to do down below,
    # we have to traverse and we will get reports for all dataset, but there is
    # nothing actually saved
    res = ds.save(recursive=True)
    assert_result_count(res, 3)
    assert_status('notneeded', res)
    subsubds_indexed = subsubds.repo.get_indexed_files()
    assert_not_in('mike1', subsubds_indexed)
    assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)])
    unlink(opj(subsubds.path, 'mike1'))
    ok_clean_git(ds.path)

    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_status('notneeded', ds.save())
    # no recursive
    assert_status('notneeded', ds.save())
    # an explicit target saves only the corresponding dataset
    assert_result_values_equal(save(path=[testfname]), 'path', [subsubds.path])
    # plain recursive without any files given will save the beast
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset), 'path',
        [subds.path, ds.path])
    # there is nothing else to save
    assert_status('notneeded', ds.save(recursive=True))
    ok_clean_git(ds.path)
    # one more time and check that all datasets in the hierarchy are not
    # contaminated with untracked files
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_status('notneeded', ds.save(recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_equal(old, new)
    assert ds.repo.dirty
    unlink(opj(ds.path, testfname))
    ok_clean_git(ds.path)

    # now let's check saving "upwards"
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty
    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo,
                 untracked=['testnew'],
                 index_modified=['subsub'],
                 head_modified=['testadded'])
    old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    subsubds.save(message="savingtestmessage", super_datasets=True)
    # this save actually didn't save anything in subsub (or anywhere),
    # because there were only untracked bits pending
    for old, new in zip(old_states,
                        [d.repo.get_hexsha() for d in (ds, subds, subsubds)]):
        assert_equal(old, new)
    # but now we are saving this untracked bit specifically
    subsubds.save(message="savingtestmessage",
                  path=['testnew2'],
                  super_datasets=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo,
                 untracked=['testnew'],
                 head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(
        next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')
    assert_equal(
        next(subds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')
    assert_equal(
        next(ds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')

    # and if we try to save while being within that subsubds path
    subsubds.unlock('testnew2')
    create_tree(subsubds.path, {"testnew2": 'smth2'})

    # trying to replicate https://github.com/datalad/datalad/issues/1540
    subsubds.save(message="saving new changes", all_updated=True)  # no super
    with chpwd(subds.path):
        # no explicit dataset is provided by path is provided
        save(path=['subsub'], message='saving sub', super_datasets=True)
    # super should get it saved too
    assert_equal(
        next(ds.repo.get_branch_commits('master')).message.rstrip(),
        'saving sub')
Beispiel #40
0
def test_add_source(path, url, ds_dir):
    raise SkipTest('functionality is not supported ATM')
    from os import listdir
    from datalad.support.network import RI

    urls = [RI(url + f) for f in listdir(path)]
    ds = Dataset(ds_dir).create()
    eq_(len(ds.repo.get_annexed_files()), 0)

    # add a remote source to git => fail:
    assert_raises(NotImplementedError, ds.add, source=urls[0], to_git=True)
    # annex add a remote source:
    ds.add(source=urls[0])
    eq_(len(ds.repo.get_annexed_files()), 1)

    # add two remote source an give local names:
    ds.add(path=['local1.dat', 'local2.dat'], source=urls[1:3])
    annexed = ds.repo.get_annexed_files()
    eq_(len(annexed), 3)
    assert_in('local1.dat', annexed)
    assert_in('local2.dat', annexed)

    # add a second source for one of them
    ds.add(path='local1.dat', source=urls[3])
    eq_(len(annexed), 3)
    whereis_dict = ds.repo.whereis('local1.dat', output='full')
    reg_urls = [
        whereis_dict[uuid]['urls'] for uuid in whereis_dict
        if not whereis_dict[uuid]['here']
    ]
    eq_(len(reg_urls), 1)  # one remote for 'local1.dat', that is not "here"
    eq_({str(urls[1]), str(urls[3])}, set(reg_urls[0]))

    # just to be sure compare to 'local2.dat':
    whereis_dict = ds.repo.whereis('local2.dat', output='full')
    reg_urls = [
        whereis_dict[uuid]['urls'] for uuid in whereis_dict
        if not whereis_dict[uuid]['here']
    ]
    eq_(len(reg_urls), 1)  # one remote for 'local2.dat', that is not "here"
    eq_([urls[2]], reg_urls[0])

    # provide more paths than sources:
    # report failure on non-existing 'local4.dat':
    result = ds.add(path=['local3.dat', 'local4.dat'], source=urls[4])
    ok_(
        all([
            r['success'] is False and r['note'] == 'not found' for r in result
            if r['file'] == 'local4.dat'
        ]))

    with open(opj(ds.path, 'local4.dat'), 'w') as f:
        f.write('local4 content')

    ds.add(path=['local3.dat', 'local4.dat'], source=urls[4])
    annexed = ds.repo.get_annexed_files()
    eq_(len(annexed), 5)
    assert_in('local3.dat', annexed)
    assert_in('local4.dat', annexed)

    # 'local3.dat' has a remote source
    whereis_dict = ds.repo.whereis('local3.dat', output='full')
    reg_urls = [
        whereis_dict[uuid]['urls'] for uuid in whereis_dict
        if not whereis_dict[uuid]['here']
    ]
    eq_(len(reg_urls), 1)  # one remote for 'local3.dat', that is not "here"
    eq_([urls[4]], reg_urls[0])

    # 'local4.dat' has no remote source
    whereis_dict = ds.repo.whereis('local4.dat', output='full')
    reg_urls = [
        whereis_dict[uuid]['urls'] for uuid in whereis_dict
        if not whereis_dict[uuid]['here']
    ]
    eq_(len(reg_urls), 0)

    # provide more sources than paths:
    ds.add('local5.dat', source=urls[5:])
    annexed = ds.repo.get_annexed_files()
    assert_in('local5.dat', annexed)
    eq_(len(annexed), 5 + len(urls[5:]))

    # Note: local4.dat didn't come from an url,
    # but 'local1.dat' consumes two urls
    eq_(len(annexed), len(urls))
    # all files annexed (-2 for '.git' and '.datalad'):
    eq_(len(annexed), len(listdir(ds.path)) - 2)
Beispiel #41
0
    '062', '063', '066', '126', '127', '146'
]

session_list = ['run001', 'run002', 'run003']

# subject_list = ['003']

# session_list = ['run001', 'run002']

frequency_list = ['10Hz', '20Hz', '40Hz']

output_dir = 'Stimulation_Preproc_OutputDir'
working_dir = 'Stimulation_Preproc_WorkingDir'

stimulation_preproc = Workflow(name='stimulation_preproc')
stimulation_preproc.base_dir = opj(experiment_dir, working_dir)

# =====================================================================================================
# In[3]:
# to prevent nipype from iterating over the anat image with each func run, you need seperate
# nodes to select the files
# and this will solve the problem I have for almost 6 months
# but notice that in the sessions, you have to iterate also over subject_id to get the {subject_id} var

# Infosource - a function free node to iterate over the list of subject names
infosource_anat = Node(IdentityInterface(fields=['subject_id']),
                       name="infosource_anat")
infosource_anat.iterables = [('subject_id', subject_list)]

infosource_func = Node(
    IdentityInterface(fields=['subject_id', 'session_id', 'frequency_id']),
Beispiel #42
0
def test_bf1886(path):
    parent = Dataset(path).rev_create()
    sub = parent.rev_create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = rev_create(opj(parent.path, 'sub2'))
    os.symlink(opj(pardir, pardir, 'sub2'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = rev_create(opj(parent.path, 'sub3'))
    os.symlink(opj(pardir, pardir, 'sub3'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        rev_save([
            opj(parent.path, 'sub3'),
            opj(parent.path, 'subdir', 'subsubdir', 'upup3')
        ])
    # in contrast to `add` only operates on a single top-level dataset
    # although it is not specified, it get's discovered based on the PWD
    # the logic behind that feels a bit shaky
    # consult discussion in https://github.com/datalad/datalad/issues/3230
    # if this comes up as an issue at some point
    ok_clean_git(parent.path)
Beispiel #43
0
 def initialize_options(self):
     self.manpath = opj('build', 'man')
     self.rstpath = opj('docs', 'source', 'generated', 'man')
     self.parser = 'datalad.cmdline.main:setup_parser'
        for f in findall(opj('datalad_neuroimaging', subdir))
        if splitext(f)[-1].lstrip('.') in extensions
    ]


# extension version
version = get_version()

cmdclass = {
    'build_manpage': BuildManPage,
    'build_examples': BuildRSTExamplesFromScripts,
}

# PyPI doesn't render markdown yet. Workaround for a sane appearance
# https://github.com/pypa/pypi-legacy/issues/148#issuecomment-227757822
README = opj(dirname(__file__), 'README.md')
try:
    import pypandoc
    long_description = pypandoc.convert(README, 'rst')
except (ImportError, OSError) as exc:
    # attempting to install pandoc via brew on OSX currently hangs and
    # pypandoc imports but throws OSError demanding pandoc
    print(
        "WARNING: pypandoc failed to import or thrown an error while converting"
        " README.md to RST: %r   .md version will be used as is" % exc)
    long_description = open(README).read()

requires = {
    'devel-docs': [
        # used for converting README.md -> .rst for long_description
        'pypandoc',
Beispiel #45
0
 def initialize_options(self):
     self.expath = opj('docs', 'examples')
     self.rstpath = opj('docs', 'source', 'generated', 'examples')
Beispiel #46
0
def discover_dataset_trace_to_targets(basepath,
                                      targetpaths,
                                      current_trace,
                                      spec,
                                      includeds=None):
    """Discover the edges and nodes in a dataset tree to given target paths

    Parameters
    ----------
    basepath : path
      Path to a start or top-level dataset. Really has to be a path to a
      dataset!
    targetpaths : list(path)
      Any non-zero number of paths that are termination points for the
      search algorithm. Can be paths to datasets, directories, or files
      (and any combination thereof).
    current_trace : list
      For a top-level call this should probably always be `[]`
    spec : dict
      `content_by_ds`-style dictionary that will receive information about the
      discovered datasets. Specifically, for each discovered dataset there
      will be in item with its path under the key (path) of the respective
      superdataset.
    includeds : sequence, optional
      Any paths given are treated as existing subdatasets, regardless of
      whether they can be found in the filesystem. Such subdatasets will appear
      under the key of the closest existing dataset in the `spec`.

    Returns
    -------
    None
      Function calls itself recursively and populates `spec` dict in-place.
      Keys are dataset paths, values are sets of subdataset paths
    """
    # convert to set for faster lookup
    includeds = includeds if isinstance(includeds, set) else \
        set() if includeds is None else set(includeds)
    # this beast walks the directory tree from a given `basepath` until
    # it discovers any of the given `targetpaths`
    # if it finds one, it commits any accummulated trace of visited
    # datasets on this edge to the spec
    valid_repo = GitRepo.is_valid_repo(basepath)
    if valid_repo:
        # we are passing into a new dataset, extend the dataset trace
        current_trace = current_trace + [basepath]
    # this edge is not done, we need to try to reach any downstream
    # dataset
    undiscovered_ds = set(t for t in targetpaths)  # if t != basepath)
    # whether anything in this directory matched a targetpath
    filematch = False
    if isdir(basepath):
        for p in listdir(basepath):
            p = assure_unicode(opj(basepath, p))
            if not isdir(p):
                if p in targetpaths:
                    filematch = True
                # we cannot have anything below this one
                continue
            # OPT listdir might be large and we could have only few items
            # in `targetpaths` -- so traverse only those in spec which have
            # leading dir basepath
            # filter targets matching this downward path
            downward_targets = set(t for t in targetpaths
                                   if path_startswith(t, p))
            if not downward_targets:
                continue
            # remove the matching ones from the "todo" list
            undiscovered_ds.difference_update(downward_targets)
            # go one deeper
            discover_dataset_trace_to_targets(
                p,
                downward_targets,
                current_trace,
                spec,
                includeds=includeds
                if not includeds else includeds.intersection(downward_targets))
    undiscovered_ds = [
        t for t in undiscovered_ds if includeds
        and path_is_subpath(t, current_trace[-1]) and t in includeds
    ]
    if filematch or basepath in targetpaths or undiscovered_ds:
        for i, p in enumerate(current_trace[:-1]):
            # TODO RF prepare proper annotated path dicts
            subds = spec.get(p, set())
            subds.add(current_trace[i + 1])
            spec[p] = subds
        if undiscovered_ds:
            spec[current_trace[-1]] = spec.get(current_trace[-1],
                                               set()).union(undiscovered_ds)
Beispiel #47
0
def test_within_ds_file_search(path):
    try:
        import nibabel
    except ImportError:
        raise SkipTest
    ds = Dataset(path).create(force=True)
    ds.config.add('datalad.metadata.nativetype', 'nifti1', where='dataset')
    makedirs(opj(path, 'stim'))
    for src, dst in (('nifti1.nii.gz',
                      opj('sub-01', 'func', 'sub-01_task-some_bold.nii.gz')),
                     ('nifti1.nii.gz',
                      opj('sub-03', 'func', 'sub-03_task-other_bold.nii.gz'))):
        copy(opj(dirname(dirname(__file__)), 'tests', 'data', 'files', src),
             opj(path, dst))
    ds.add('.')
    ds.aggregate_metadata()
    ok_clean_git(ds.path)
    # basic sanity check on the metadata structure of the dataset
    dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
    for src in ('bids', 'nifti1'):
        # something for each one
        assert_in(src, dsmeta)
        # each src declares its own context
        assert_in('@context', dsmeta[src])
        # we have a unique content metadata summary for each src
        assert_in(src, dsmeta['datalad_unique_content_properties'])

    # test default behavior
    with swallow_outputs() as cmo:
        ds.search(show_keys='name', mode='textblob')

        assert_in("""\
id
meta
parentds
path
type
""", cmo.out)

    target_out = """\
annex.key
bids.BIDSVersion
bids.author
bids.citation
bids.conformsto
bids.datatype
bids.description
"""
    if external_versions['bids'] >= '0.9':
        target_out += "bids.extension\n"
    target_out += """\
bids.fundedby
bids.license
bids.name
bids.subject.age(years)
bids.subject.gender
bids.subject.handedness
bids.subject.hearing_problems_current
bids.subject.id
bids.subject.language
bids.suffix
bids.task
datalad_core.id
datalad_core.refcommit
id
nifti1.cal_max
nifti1.cal_min
nifti1.datatype
nifti1.description
nifti1.dim
nifti1.freq_axis
nifti1.intent
nifti1.magic
nifti1.phase_axis
nifti1.pixdim
nifti1.qform_code
nifti1.sform_code
nifti1.sizeof_hdr
nifti1.slice_axis
nifti1.slice_duration
nifti1.slice_end
nifti1.slice_order
nifti1.slice_start
nifti1.spatial_resolution(mm)
nifti1.t_unit
nifti1.temporal_spacing(s)
nifti1.toffset
nifti1.vox_offset
nifti1.xyz_unit
parentds
path
type
"""

    # check generated autofield index keys
    with swallow_outputs() as cmo:
        ds.search(mode='autofield', show_keys='name')
        # it is impossible to assess what is different from that dump
        # so we will use diff
        diff = list(unified_diff(target_out.splitlines(),
                                 cmo.out.splitlines()))
        assert_in(target_out, cmo.out, msg="Diff: %s" % os.linesep.join(diff))

    assert_result_count(ds.search('blablob#'), 0)
    # now check that we can discover things from the aggregated metadata
    for mode, query, hitpath, matched_key, matched_val in (
            # random keyword query
            # multi word query implies AND
        ('textblob', ['bold', 'female'],
         opj('sub-03', 'func',
             'sub-03_task-some_bold.nii.gz'), 'meta', 'female'),
            # report which field matched with auto-field
        ('autofield', 'female',
         opj('sub-03', 'func', 'sub-03_task-other_bold.nii.gz'),
         'bids.subject.gender', 'female'),
            # autofield multi-word query is also AND
        ('autofield', ['bids.suffix:bold', 'bids.subject.id:01'],
         opj('sub-01', 'func',
             'sub-01_task-some_bold.nii.gz'), 'bids.suffix', 'bold'),
            # TODO extend with more complex queries to test whoosh
            # query language configuration
    ):
        res = ds.search(query, mode=mode, full_record=True)
        if mode == 'textblob':
            # 'textblob' does datasets by default only (be could be configured otherwise
            assert_result_count(res, 1)
        else:
            # the rest has always a file and the dataset, because they carry metadata in
            # the same structure
            assert_result_count(res, 2)
            assert_result_count(
                res,
                1,
                type='file',
                path=opj(ds.path, hitpath),
                # each file must report the ID of the dataset it is from, critical for
                # discovering related content
                dsid=ds.id)
        assert_result_count(res, 1, type='dataset', path=ds.path, dsid=ds.id)
        # test the key and specific value of the match
        assert_in(matched_key, res[-1]['query_matched'])
        assert_equal(res[-1]['query_matched'][matched_key], matched_val)
Beispiel #48
0
def _path_rel2file(p):
    return opj(dirname(__file__), p)
Beispiel #49
0
def pipeline(bucket,
             prefix=None,
             no_annex=False,
             tag=True,
             skip_problematic=False,
             to_http=False,
             rename=None,
             directory=None,
             archives=False,
             allow_dirty=False,
             backend='MD5E',
             drop=False,
             drop_force=False,
             strategy='commit-versions',
             exclude=None,
             **kwargs):
    """Pipeline to crawl/annex an arbitrary bucket

    Parameters
    ----------
    bucket : str
    prefix : str, optional
      prefix within the bucket
    tag : bool, optional
      tag "versions"
    skip_problematic : bool, optional
      pass to Annexificator
    to_http : bool, optional
      Convert s3:// urls to corresponding generic http:// . So to be used for resources
      which are publicly available via http
    directory : {subdataset}, optional
      What to do when encountering a directory.  'subdataset' would initiate a new sub-dataset
      at that directory
    strategy : {'commit-versions', 'naive'}, optional
      What strategy to use whenever processing "delete" event, See `crawl_s3` node for more information.
    drop : bool, optional
      Drop all the files whenever done crawling
    exclude : str, optional
      Regular expression to be passed to s3_crawl to exclude some files
    **kwargs:
      passed into simple_with_archives.pipeline
    """

    lgr.info("Creating a pipeline for the %s bucket", bucket)

    # TODO: see if we could make it more generic!!!
    # drop = assure_bool(drop)
    # drop_force = assure_bool(drop_force)

    annex_kw = {}

    to_http = assure_bool(to_http)
    tag = assure_bool(tag)
    archives = assure_bool(archives)
    no_annex = assure_bool(no_annex)
    allow_dirty = assure_bool(allow_dirty)
    drop = assure_bool(drop)
    drop_force = assure_bool(drop_force)

    if not to_http:
        annex_kw['special_remotes'] = [DATALAD_SPECIAL_REMOTE]

    annex = Annexificator(
        create=False,  # must be already initialized etc
        backend=backend,
        no_annex=no_annex,
        skip_problematic=skip_problematic,
        allow_dirty=allow_dirty,
        # Primary purpose of this one is registration of all URLs with our
        # upcoming "ultimate DB" so we don't get to git anything
        # options=["-c", "annex.largefiles=exclude=CHANGES* and exclude=changelog.txt and exclude=dataset_description.json and exclude=README* and exclude=*.[mc]"]
        **annex_kw)

    s3_actions = {'commit': annex.finalize(tag=tag), 'annex': annex}
    s3_switch_kw = {}
    recursive = True
    if directory:
        if directory == 'subdataset':
            new_prefix = '%(filename)s/'
            if prefix:
                new_prefix = opj(prefix, new_prefix)
            s3_actions['directory'] = [
                # for initiate_dataset we should replicate filename as handle_name, prefix
                assign({
                    'prefix': new_prefix,
                    'dataset_name': '%(filename)s'
                },
                       interpolate=True),
                annex.initiate_dataset(template='simple_s3',
                                       data_fields=['prefix'],
                                       add_fields={
                                           'bucket': bucket,
                                           'to_http': to_http,
                                           'skip_problematic':
                                           skip_problematic,
                                       })
            ]
            s3_switch_kw['missing'] = 'skip'  # ok to not remove
            recursive = False
        else:
            raise ValueError("Do not know how to treat %s" % directory)
    else:
        s3_actions['remove'] = annex.remove

    incoming_pipeline = [
        crawl_s3(bucket,
                 prefix=prefix,
                 strategy=strategy,
                 repo=annex.repo,
                 recursive=recursive,
                 exclude=exclude),
    ]

    from ..nodes.misc import debug
    if to_http:
        incoming_pipeline.append(sub_s3_to_http)

    if rename:
        incoming_pipeline += [
            sub({'filename': get_replacement_dict(rename)}, ok_missing=True)
        ]

    incoming_pipeline.append(
        switch('datalad_action', s3_actions, **s3_switch_kw))

    if archives:
        pipeline = swa_pipeline(incoming_pipeline=incoming_pipeline,
                                annex=annex,
                                **kwargs)
    else:
        pipeline = incoming_pipeline

    if drop:
        pipeline.append(annex.drop(all=True, force=drop_force))

    return pipeline
Beispiel #50
0
    def _dataset_auto_get(self, filepath):
        """Verify that filepath is under annex, and if so and not present - get it"""

        if not self._autoget:
            return
        # if filepath is not there at all (program just "checked" if it could access it
        if not lexists(filepath):
            lgr.log(2, " skipping %s since it is not there", filepath)
            return
        # deduce directory for filepath
        filedir = dirname(filepath)
        annex = None
        if self._repos_cache is not None:
            filedir_parts = filedir.split(pathsep)
            # ATM we do not expect subdatasets under .datalad, so we could take the top
            # level dataset for that
            try:
                filedir = pathsep.join(
                    filedir_parts[:filedir_parts.index(DATALAD_DOTDIR)])
            except ValueError:
                # would happen if no .datalad
                pass
            try:
                annex = self._repos_cache[filedir]
            except KeyError:
                pass

        if annex is None:
            try:
                # TODO: verify logic for create -- we shouldn't 'annexify' non-annexified
                # see https://github.com/datalad/datalad/issues/204
                annex = get_repo_instance(filedir)
                lgr.log(2, "Got the repository %s id:%s containing %s", annex,
                        id(annex), filedir)
            except (RuntimeError, InvalidGitRepositoryError) as e:
                # must be not under annex etc
                return
            if self._repos_cache is not None:
                self._repos_cache[filedir] = annex
        if not isinstance(annex, AnnexRepo):
            # not an annex -- can do nothing
            lgr.log(2, " skipping %s since the repo is not annex", filepath)
            return
        # since Git/AnnexRepo functionality treats relative paths relative to the
        # top of the repository and might be outside, get a full path
        if not isabs(filepath):
            filepath = opj(getpwd(), filepath)

        # "quick" check first if under annex at all
        try:
            # might fail.  TODO: troubleshoot when it does e.g.
            # datalad/tests/test_auto.py:test_proxying_open_testrepobased
            under_annex = annex.is_under_annex(filepath, batch=True)
        except Exception as exc:  # MIH: really? what if MemoryError
            lgr.log(5, " cannot determine if %s under annex: %s", filepath,
                    exc_str(exc))
            under_annex = None
        # either it has content
        if (under_annex or
                under_annex is None) and not annex.file_has_content(filepath):
            lgr.info("AutomagicIO: retrieving file content of %s", filepath)
            out = annex.get(filepath)
            if out and not out.get('success', False):
                # to assure that it is present and without trailing/leading new lines
                out['note'] = out.get('note', '').strip()
                lgr.error("Failed to retrieve %(file)s: %(note)s", out)
Beispiel #51
0
def sorted_files(dout):
    """Return a (sorted) list of files under dout
    """
    return sorted(
        sum([[opj(r, f)[len(dout) + 1:] for f in files]
             for r, d, files in os.walk(dout) if not '.git' in r], []))
Beispiel #52
0
    def call(self, runscript, workdir, out, err, runflags):
        """call(runscript, workdir, out, err, runflags)
        Submit *runscript* to the queueing system with *workdir* as the working directory. Redirect output and error streams to *out* and *err*, respectively. *runflags* stores varoius submit command options.

        The submit command has the following structure::

            <commands.submit>_<workdir>_{workdir}_<error>_{err}[_<output>_{out}][FLAGS]_{runscript}

        Underscores denote spaces, parts in pointy brackets correspond to ``settings`` entries, parts in curly brackets to :meth:`call` arguments, square brackets contain optional parts. Output part is added if *out* is not ``None``. This is handled automatically based on ``runscript.stdout_redirect`` value in job's ``settings``.

        ``FLAGS`` part is built based on *runflags* argument, which is a |Settings| instance storing |run| keyword arguments. For every *(key,value)* pair in *runflags* the string ``_-key_value`` is appended to ``FLAGS`` **unless** the *key* is a special key occurring in ``commands.special``. In that case ``_<commands.special.key>value`` is used (mind the lack of space in between). For example, a |Settings| instance defining interaction with SLURM has the following entries::

            workdir = '-D'
            output  = '-o'
            error   = '-e'
            special.nodes    = '-N '
            special.walltime = '-t '
            special.memory = '--mem='
            special.queue    = '-p '
            commands.submit  = 'sbatch'
            commands.check  = 'squeue'

        The submit command produced by::

            gr = GridRunner(parallel=True, maxjobs=4, grid='slurm')
            j.run(jobrunner=gr, queue='short', nodes=2, J='something', O='')

        will be:

        .. code-block:: none

            sbatch -D {workdir} -e {err} -o {out} -p short -N 2 -J something -O  {runscript}

        In certain queueing systems some flags don't have a short form with semantics ``-key value``. For example, in SLURM the flag ``--nodefile=value`` has a short form ``-F value``, but the flag ``--export=value`` does not. One can still use such a flag using the special keys logic::

            gr = GridRunner(parallel=True, maxjobs=4, grid='slurm')
            gr.settings.special.export = '--export='
            j.run(jobrunner=gr, queue='short', export='value')

        That results in the command::

            sbatch -D {workdir} -e {err} -o {out} -p short --export=value {runscript}

        The submit command is then executed and the output returned by it is used to determine the submitted job's ID. The value stored in ``commands.getid`` is used for that purpose. It should be a function taking a single string (the whole output of the submit command) and returning a string with job's ID.

        The submitted job's ID is then added to ``_active_jobs`` dictionary, with the key being job's ID and the value being an instance of :class:`threading.Lock`. This lock is used to singal the fact that the job is finished and the thread handling it can continue. Then the :meth:`_check_queue` method starts the thread querying the queue and unlocking finished jobs.

        Since it is difficult to automatically obtain job's exit code, the returned value is 0 (or 1, if the submit command failed). From |run| perspective it means that a job executed with |GridRunner| is *crashed* only if it never entered the queue (usually due to improper submit command).

        .. note::
            This method is used automatically during |run| and should never be explicitly called in your script.
        """
        s = self.settings
        cmd = ' '.join([s.commands.submit, s.workdir, workdir, s.error, err])
        if out is not None:
            cmd += ' '+s.output+' '+out
        for k,v in runflags.items():
            if k in s.special:
                cmd += ' '+s.special[k]+str(v)
            else:
                cmd += ' -'+k+' '+str(v)
        cmd += ' ' + opj(workdir,runscript)

        log('Submitting {} with command {}'.format(runscript, cmd), 5)
        process = saferun(cmd.split(' '), stdout=PIPE, stderr=PIPE)
        subout = process.stdout.decode()
        log('Output of {} submit command: {}'.format(runscript, subout), 5)

        jobid = s.commands.getid(subout)
        if jobid is None:
            log('Submitting of {} failed. Stderr of submit command:\n{}'.format(runscript, process.stderr.decode()), 1)
            return 1
        log('{} submitted successfully as job {}'.format(runscript, jobid), 3)

        event = threading.Event()
        with self._active_lock:
            self._active_jobs[jobid] = event
        self._check_queue()
        event.wait()

        log('Execution of {} finished'.format(runscript), 5)
        return 0
Beispiel #53
0
    def __call__(dataset, pattern, ref_dir='.', makedirs=False):
        # could be extended to accept actual largefile expressions
        from os.path import join as opj
        from os.path import isabs
        from os.path import exists
        from os import makedirs as makedirsfx
        from datalad.distribution.dataset import require_dataset
        from datalad.support.annexrepo import AnnexRepo
        from datalad.utils import assure_list

        pattern = assure_list(pattern)
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='no_annex configuration')

        res_kwargs = dict(
            path=ds.path,
            type='dataset',
            action='no_annex',
        )

        # all the ways we refused to cooperate
        if not isinstance(ds.repo, AnnexRepo):
            yield dict(res_kwargs,
                       status='notneeded',
                       message='dataset has no annex')
            return
        if any(isabs(p) for p in pattern):
            yield dict(
                res_kwargs,
                status='error',
                message=
                ('path pattern for `no_annex` configuration must be relative paths: %s',
                 pattern))
            return
        if isabs(ref_dir):
            yield dict(
                res_kwargs,
                status='error',
                message=
                ('`ref_dir` for `no_annex` configuration must be a relative path: %s',
                 ref_dir))
            return

        gitattr_dir = opj(ds.path, ref_dir)
        if not exists(gitattr_dir):
            if makedirs:
                makedirsfx(gitattr_dir)
            else:
                yield dict(
                    res_kwargs,
                    status='error',
                    message=
                    'target directory for `no_annex` does not exist (consider makedirs=True)'
                )
                return

        gitattr_file = opj(gitattr_dir, '.gitattributes')
        with open(gitattr_file, 'a') as fp:
            for p in pattern:
                fp.write('{} annex.largefiles=nothing\n'.format(p))
            yield dict(res_kwargs, status='ok')

        for r in dataset.add(gitattr_file,
                             to_git=True,
                             message="[DATALAD] exclude paths from annex'ing",
                             result_filter=None,
                             result_xfm=None):
            yield r
Beispiel #54
0
def job_runner(email, dir_cache, seqs=None, run_id='', parallel_run_count=1,
               max_title_a_len=0, max_run_id_len=0):
    """Run InterProScan 5."""
    max_jobs = int(30 / parallel_run_count)
    delay = 0.333
    cfp = opj(dir_cache, 'ips5_cache_running_' + run_id)

    def load():
        with open(cfp, 'rb') as f:
            c = pickle.load(f)
        return c

    def dump(c):
        with open(cfp, 'wb') as f:
            pickle.dump(c, f, protocol=PICKLE_PROTOCOL)

    seqs_orig = deepcopy(seqs)
    seqs = deepcopy(seqs)

    jobs = None

    if ope(cfp):
        jobs = load()
    else:
        if seqs is not None:
            jobs = {'queue': seqs,
                    'running': OrderedDict(),
                    'finished': OrderedDict(),
                    'error': OrderedDict(),
                    'failure': OrderedDict(),
                    'not_found': OrderedDict(),
                    'other': OrderedDict()}

            dump(jobs)

        else:
            print('No sequences provided.')
            sys.exit(0)

    ##########################################################################

    queue = jobs['queue']
    running = jobs['running']
    finished = jobs['finished']
    # error = jobs['error']
    failure = jobs['failure']
    # not_found = jobs['not_found']
    other = jobs['other']

    retry_list = list()

    queue_size = len(seqs_orig)

    busy = True
    submit_message = True
    while busy:
        dump(jobs)
        sleep(delay)

        if len(queue) > 0:
            if len(running) < max_jobs:
                if submit_message is True:
                    pass
                    # print()
                    # print('Submitting jobs: ' + run_id)
                    # print()
                job_status = 'SUBMITTED '
                title = list(queue.keys()).pop()
                sequence = queue.pop(title)
                job_id = submit_job(email, title, sequence)
                if job_id is None:
                    job_status = 'WILL_RETRY'
                    queue[title] = sequence
                    job_id = ''
                else:
                    running[title] = job_id
                titles_ab = split_seq_defn(title)
                title_a = titles_ab[0]

                msg = (job_status + '  ' +
                       title_a.ljust(max_title_a_len) +
                       run_id.ljust(max_run_id_len) +
                       ' ' * 5 + job_id)

                Log.msg(msg)

        if len(running) < max_jobs and len(queue) > 0:
            submit_message = False
            continue

        if len(running) > 0:
            submit_message = True
            # print()
            # print('Looking for finished jobs: ' + run_id)
            # print()
            finished_jobs = False
            sleep(delay + 7)
            job_statuses = {}

            for title in running:
                sleep(delay)
                job_id = running[title]
                job_status = status(job_id)
                job_statuses[title] = {'job_id': job_id,
                                       'job_status': job_status}
                titles_ab = split_seq_defn(title)
                title_a = titles_ab[0]
                # ToDo: Refactor
                if job_status == 'RUNNING':
                    Log.msg(' ' * 10 + '- ' + title_a.ljust(max_title_a_len) +
                            run_id.ljust(max_run_id_len) + ' ' * 5 + job_id)
                else:
                    Log.msg(' ' * 10 + '+ ' + title_a.ljust(max_title_a_len) +
                            run_id.ljust(max_run_id_len) + ' ' * 5 + job_id)
                    finished_jobs = True

            if finished_jobs is True:
                # print()
                pass
            else:
                continue

            for title in job_statuses:

                job_id = job_statuses[title]['job_id']
                job_status = job_statuses[title]['job_status']

                if job_status == 'RUNNING':
                    pass

                elif job_status == 'FINISHED':
                    job_id = running.pop(title)
                    if 'error' in result_types(job_id):
                        job_status = 'FAILURE'
                        failure[title] = job_id
                        if retry_list.count(title) < 3:
                            job_status = 'WILL_RETRY'
                            queue[title] = seqs_orig[title]
                            retry_list.append(title)
                    else:
                        finished[title] = job_id

                elif job_status == 'FAILURE':
                    job_id = running.pop(title)
                    failure[title] = job_id

                elif job_status == 'ERROR':
                    continue
                    # job_id = running.pop(title)
                    # error[title] = job_id

                elif job_status == 'NOT_FOUND':
                    job_status = 'WILL_RETRY'
                    job_id = running.pop(title)
                    queue[title] = seqs_orig[title]
                    # not_found[title] = job_id

                else:
                    job_id = running.pop(title)
                    other[title] = job_id

                if job_status != 'RUNNING':
                    progress = round((len(finished) / queue_size) * 100)
                    progress_str = '{:3d}'.format(progress) + '%'
                    titles_ab = split_seq_defn(title)
                    title_a = titles_ab[0]
                    job_status_msg = job_status.ljust(10)
                    msg = (job_status_msg + '  ' +
                           title_a.ljust(max_title_a_len) +
                           run_id.ljust(max_run_id_len) +
                           progress_str.rjust(4) + ' ' + job_id)

                    Log.msg(msg)

        if len(running) == 0 and len(queue) == 0:
            busy = False

    if ope(cfp):
        remove(cfp)

    return jobs
Beispiel #55
0
def load_information_from_description_file(module, mod_path=None):
    """
    :param module: The name of the module (sale, purchase, ...)
    :param mod_path: Physical path of module, if not providedThe name of the module (sale, purchase, ...)
    """
    if not mod_path:
        mod_path = get_module_path(module, downloaded=True)
    manifest_file = module_manifest(mod_path)
    if manifest_file:
        # default values for descriptor
        info = {
            'application': False,
            'author': 'Odoo S.A.',
            'auto_install': False,
            'category': 'Uncategorized',
            'depends': [],
            'description': '',
            'icon': get_module_icon(module),
            'installable': True,
            'license': 'LGPL-3',
            'post_load': None,
            'version': '1.0',
            'web': False,
            'sequence': 100,
            'summary': '',
            'website': '',
        }
        info.update(
            zip('depends data demo test init_xml update_xml demo_xml'.split(),
                iter(list, None)))

        f = tools.file_open(manifest_file, mode='rb')
        try:
            info.update(ast.literal_eval(pycompat.to_text(f.read())))
        finally:
            f.close()

        if not info.get('description'):
            readme_path = [
                opj(mod_path, x) for x in README
                if os.path.isfile(opj(mod_path, x))
            ]
            if readme_path:
                with tools.file_open(readme_path[0]) as fd:
                    info['description'] = fd.read()

        # auto_install is set to `False` if disabled, and a set of
        # auto_install dependencies otherwise. That way, we can set
        # auto_install: [] to always auto_install a module regardless of its
        # dependencies
        auto_install = info.get('auto_install', info.get('active', False))
        if isinstance(auto_install, collections.abc.Iterable):
            info['auto_install'] = set(auto_install)
            non_dependencies = info['auto_install'].difference(info['depends'])
            assert not non_dependencies,\
                "auto_install triggers must be dependencies, found " \
                "non-dependencies [%s] for module %s" % (
                    ', '.join(non_dependencies), module
                )
        elif auto_install:
            info['auto_install'] = set(info['depends'])
        else:
            info['auto_install'] = False

        info['version'] = adapt_version(info['version'])
        return info

    _logger.debug('module %s: no manifest file found %s', module,
                  MANIFEST_NAMES)
    return {}
Beispiel #56
0
 def is_really_module(name):
     for mname in MANIFEST_NAMES:
         if os.path.isfile(opj(dir, name, mname)):
             return True
Beispiel #57
0
    def _import_module(self, module, path, force=False):
        known_mods = self.search([])
        known_mods_names = {m.name: m for m in known_mods}
        installed_mods = [m.name for m in known_mods if m.state == 'installed']

        terp = load_information_from_description_file(module, mod_path=path)
        if not terp:
            return False
        values = self.get_values_from_terp(terp)
        if 'version' in terp:
            values['latest_version'] = terp['version']

        unmet_dependencies = set(terp['depends']).difference(installed_mods)

        if unmet_dependencies:
            if (unmet_dependencies == set(['web_studio']) and
                    _is_studio_custom(path)):
                err = _("Studio customizations require Studio")
            else:
                err = _("Unmet module dependencies: \n\n - %s") % '\n - '.join(
                    known_mods.filtered(lambda mod: mod.name in unmet_dependencies).mapped('shortdesc')
                )
            raise UserError(err)
        elif 'web_studio' not in installed_mods and _is_studio_custom(path):
            raise UserError(_("Studio customizations require the Eagle Studio app."))

        mod = known_mods_names.get(module)
        if mod:
            mod.write(dict(state='installed', **values))
            mode = 'update' if not force else 'init'
        else:
            assert terp.get('installable', True), "Module not installable"
            self.create(dict(name=module, state='installed', imported=True, **values))
            mode = 'init'

        for kind in ['data', 'init_xml', 'update_xml']:
            for filename in terp[kind]:
                ext = os.path.splitext(filename)[1].lower()
                if ext not in ('.xml', '.csv', '.sql'):
                    _logger.info("module %s: skip unsupported file %s", module, filename)
                    continue
                _logger.info("module %s: loading %s", module, filename)
                noupdate = False
                if ext == '.csv' and kind in ('init', 'init_xml'):
                    noupdate = True
                pathname = opj(path, filename)
                idref = {}
                convert_file(self.env.cr, module, filename, idref, mode=mode, noupdate=noupdate, kind=kind, pathname=pathname)

        path_static = opj(path, 'static')
        IrAttachment = self.env['ir.attachment']
        if os.path.isdir(path_static):
            for root, dirs, files in os.walk(path_static):
                for static_file in files:
                    full_path = opj(root, static_file)
                    with open(full_path, 'rb') as fp:
                        data = base64.b64encode(fp.read())
                    url_path = '/{}{}'.format(module, full_path.split(path)[1].replace(os.path.sep, '/'))
                    if not isinstance(url_path, str):
                        url_path = url_path.decode(sys.getfilesystemencoding())
                    filename = os.path.split(url_path)[1]
                    values = dict(
                        name=filename,
                        url=url_path,
                        res_model='ir.ui.view',
                        type='binary',
                        datas=data,
                    )
                    attachment = IrAttachment.search([('url', '=', url_path), ('type', '=', 'binary'), ('res_model', '=', 'ir.ui.view')])
                    if attachment:
                        attachment.write(values)
                    else:
                        IrAttachment.create(values)

        return True
Beispiel #58
0
else:
    import pickle

from six import string_types
from os.path import join as opj, exists
from os.path import dirname
from importlib import import_module
from datalad.utils import swallow_logs
from datalad.utils import assure_dir
from datalad.support.json_py import load as jsonload
from datalad.dochelpers import exc_str
from datalad.log import lgr

# common format
metadata_filename = 'meta.json'
metadata_basepath = opj('.datalad', 'meta')


# XXX Could become dataset method
def get_metadata_type(ds, guess=False):
    """Return the metadata type(s)/scheme(s) of a dataset

    Parameters
    ----------
    ds : Dataset
      Dataset instance to be inspected
    guess : bool
      Whether to try to auto-detect the type if no metadata type setting is
      found. All supported metadata schemes are tested in alphanumeric order.

    Returns
Beispiel #59
0
def check_resource_path(mod_path, *args):
    resource_path = opj(mod_path, *args)
    if os.path.exists(resource_path):
        return resource_path
    return False
Beispiel #60
0
"""HTCrystalBall - A crystal ball that lets you peek into the future."""

import logging

from os.path import expanduser, join as opj

from htcrystalball._version import __version__

SLOTS_CONFIGURATION = opj(expanduser('~'), '.htcrystalball')

# External (root level) logging level
logging.basicConfig(level=logging.ERROR, format='WARNING: %(message)s')

# Internal logging level
LOGGER = logging.getLogger('crystal_balls')
LOGGER.setLevel(level=logging.DEBUG)

__all__ = [
    '__version__',
    'SLOTS_CONFIGURATION',
    'LOGGER'
]