Exemple #1
0
def _enter_title(ui, dataset):
    default = _get_default_title(dataset)
    while True:
        title = ui.question(
            "Please enter the title (must be at least 3 characters long).",
            title="New article",
            default=default)
        if len(title) < 3:
            ui.error("Title must be at least 3 characters long.")
        else:
            return title
Exemple #2
0
def _run_with_exception_handler(cmdlineargs):
    """Execute the command and perform some reporting
    normalization if it crashes, but otherwise just let it go"""
    # otherwise - guard and only log the summary. Postmortem is not
    # as convenient if being caught in this ultimate except
    try:
        return cmdlineargs.func(cmdlineargs)
    # catch BaseException for KeyboardInterrupt
    except BaseException as exc:
        from datalad.support.exceptions import (
            CapturedException,
            InsufficientArgumentsError,
            IncompleteResultsError,
            CommandError,
        )
        ce = CapturedException(exc)
        # we crashed, it has got to be non-zero for starters
        exit_code = 1
        if isinstance(exc, InsufficientArgumentsError):
            # if the func reports inappropriate usage, give help output
            lgr.error('%s (%s)', ce, exc.__class__.__name__)
            cmdlineargs.subparser.print_usage(sys.stderr)
            exit_code = 2
        elif isinstance(exc, IncompleteResultsError):
            # in general we do not want to see the error again, but
            # present in debug output
            lgr.debug('could not perform all requested actions: %s', ce)
        elif isinstance(exc, CommandError):
            exit_code = _communicate_commanderror(exc) or exit_code
        elif isinstance(exc, KeyboardInterrupt):
            from datalad.ui import ui
            ui.error("\nInterrupted by user while doing magic: %s" % ce)
            exit_code = 3
        else:
            # some unforeseen problem
            lgr.error('%s (%s)', ce.message, ce.name)
        sys.exit(exit_code)
Exemple #3
0
    def enter_new(self,
                  url=None,
                  auth_types=[],
                  url_re=None,
                  name=None,
                  credential_name=None,
                  credential_type=None):
        # TODO: level/location!
        """Create new provider and credential config

        If interactive, this will ask the user to enter the details (or confirm
        default choices). A dedicated config file is written at
        <user_config_dir>/providers/<name>.cfg

        Parameters:
        -----------
        url: str or RI
          URL this config is created for
        auth_types: list
          List of authentication types to choose from. First entry becomes
          default. See datalad.downloaders.providers.AUTHENTICATION_TYPES
        url_re: str
          regular expression; Once created, this config will be used for any
          matching URL; defaults to `url`
        name: str
          name for the provider; needs to be unique per user
        credential_name: str
          name for the credential; defaults to the provider's name
        credential_type: str
          credential type to use (key for datalad.downloaders.CREDENTIAL_TYPES)
        """

        from datalad.ui import ui
        if url and not name:
            ri = RI(url)
            for f in ('hostname', 'name'):
                try:
                    # might need sanitarization
                    name = str(getattr(ri, f))
                except AttributeError:
                    pass
        known_providers_by_name = {p.name: p for p in self._providers}
        providers_user_dir = self._get_providers_dirs()['user']
        while True:
            name = ui.question(
                title="New provider name",
                text="Unique name to identify 'provider' for %s" % url,
                default=name)
            filename = pathjoin(providers_user_dir, '%s.cfg' % name)
            if name in known_providers_by_name:
                if ui.yesno(
                        title="Known provider %s" % name,
                        text=
                        "Provider with name %s already known. Do you want to "
                        "use it for this session?" % name,
                        default=True):
                    return known_providers_by_name[name]
            elif path.lexists(filename):
                ui.error("File %s already exists, choose another name" %
                         filename)
            else:
                break

        if not credential_name:
            credential_name = name
        if not url_re:
            url_re = re.escape(url) if url else None
        while True:
            url_re = ui.question(
                title="New provider regular expression",
                text="A (Python) regular expression to specify for which URLs "
                "this provider should be used",
                default=url_re)
            if not re.match(url_re, url):
                ui.error("Provided regular expression doesn't match original "
                         "url.  Please re-enter")
            # TODO: url_re of another provider might match it as well
            #  I am not sure if we have any kind of "priority" setting ATM
            #  to differentiate or to to try multiple types :-/
            else:
                break

        authentication_type = None
        if auth_types:
            auth_types = [t for t in auth_types if t in AUTHENTICATION_TYPES]
            if auth_types:
                authentication_type = auth_types[0]

        # Setup credential
        authentication_type = ui.question(
            title="Authentication type",
            text="What authentication type to use",
            default=authentication_type,
            choices=sorted(AUTHENTICATION_TYPES))
        authenticator_class = AUTHENTICATION_TYPES[authentication_type]

        # TODO: need to figure out what fields that authenticator might
        #       need to have setup and ask for them here!

        credential_type = ui.question(
            title="Credential",
            text="What type of credential should be used?",
            choices=sorted(CREDENTIAL_TYPES),
            default=credential_type
            or getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE'))

        cfg = self._CONFIG_TEMPLATE.format(**locals())
        if ui.yesno(title="Save provider configuration file",
                    text="Following configuration will be written to %s:\n%s" %
                    (filename, cfg),
                    default='yes'):
            # Just create a configuration file and reload the thing
            return self._store_new(url=url,
                                   authentication_type=authentication_type,
                                   authenticator_class=authenticator_class,
                                   url_re=url_re,
                                   name=name,
                                   credential_name=credential_name,
                                   credential_type=credential_type,
                                   level='user')
        else:
            return None
Exemple #4
0
    def enter_new(self, url=None, auth_types=[]):
        from datalad.ui import ui
        name = None
        if url:
            ri = RI(url)
            for f in ('hostname', 'name'):
                try:
                    # might need sanitarization
                    name = str(getattr(ri, f))
                except AttributeError:
                    pass
        known_providers_by_name = {p.name: p for p in self._providers}
        providers_user_dir = self._get_providers_dirs()['user']
        while True:
            name = ui.question(
                title="New provider name",
                text="Unique name to identify 'provider' for %s" % url,
                default=name
            )
            filename = pathjoin(providers_user_dir, '%s.cfg' % name)
            if name in known_providers_by_name:
                if ui.yesno(
                    title="Known provider %s" % name,
                    text="Provider with name %s already known. Do you want to "
                         "use it for this session?"
                         % name,
                    default=True
                ):
                    return known_providers_by_name[name]
            elif path.lexists(filename):
                ui.error(
                    "File %s already exists, choose another name" % filename)
            else:
                break

        url_re = re.escape(url) if url else None
        while True:
            url_re = ui.question(
                title="New provider regular expression",
                text="A (Python) regular expression to specify for which URLs "
                     "this provider should be used",
                default=url_re
            )
            if not re.match(url_re, url):
                ui.error("Provided regular expression doesn't match original "
                         "url.  Please re-enter")
            # TODO: url_re of another provider might match it as well
            #  I am not sure if we have any kind of "priority" setting ATM
            #  to differentiate or to to try multiple types :-/
            else:
                break

        authentication_type = None
        if auth_types:
            auth_types = [
                t for t in auth_types if t in AUTHENTICATION_TYPES
            ]
            if auth_types:
                authentication_type = auth_types[0]

        # Setup credential
        authentication_type = ui.question(
            title="Authentication type",
            text="What authentication type to use",
            default=authentication_type,
            choices=sorted(AUTHENTICATION_TYPES)
        )
        authenticator_class = AUTHENTICATION_TYPES[authentication_type]

        # TODO: need to figure out what fields that authenticator might
        #       need to have setup and ask for them here!

        credential_type = ui.question(
            title="Credential",
            text="What type of credential should be used?",
            choices=sorted(CREDENTIAL_TYPES),
            default=getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE')
        )

        # Just create a configuration file and reload the thing
        if not path.lexists(providers_user_dir):
            os.makedirs(providers_user_dir)
        cfg = """\
# Provider configuration file created to initially access
# {url}

[provider:{name}]
url_re = {url_re}
authentication_type = {authentication_type}
# Note that you might need to specify additional fields specific to the
# authenticator.  Fow now "look into the docs/source" of {authenticator_class}
# {authentication_type}_
credential = {name}

[credential:{name}]
# If known, specify URL or email to how/where to request credentials
# url = ???
type = {credential_type}
""".format(**locals())
        if ui.yesno(
            title="Save provider configuration file",
            text="Following configuration will be written to %s:\n%s"
                % (filename, cfg),
            default='yes'
        ):
            with open(filename, 'wb') as f:
                f.write(cfg.encode('utf-8'))
        else:
            return None
        self.reload()
        # XXX see above note about possibly multiple matches etc
        return self.get_provider(url)
Exemple #5
0
    def enter_new(self, url=None, auth_types=[]):
        from datalad.ui import ui
        name = None
        if url:
            ri = RI(url)
            for f in ('hostname', 'name'):
                try:
                    # might need sanitarization
                    name = str(getattr(ri, f))
                except AttributeError:
                    pass
        known_providers_by_name = {p.name: p for p in self._providers}
        providers_user_dir = self._get_providers_dirs()['user']
        while True:
            name = ui.question(
                title="New provider name",
                text="Unique name to identify 'provider' for %s" % url,
                default=name
            )
            filename = pathjoin(providers_user_dir, '%s.cfg' % name)
            if name in known_providers_by_name:
                if ui.yesno(
                    title="Known provider %s" % name,
                    text="Provider with name %s already known. Do you want to "
                         "use it for this session?"
                         % name,
                    default=True
                ):
                    return known_providers_by_name[name]
            elif path.lexists(filename):
                ui.error(
                    "File %s already exists, choose another name" % filename)
            else:
                break

        url_re = re.escape(url) if url else None
        while True:
            url_re = ui.question(
                title="New provider regular expression",
                text="A (Python) regular expression to specify for which URLs "
                     "this provider should be used",
                default=url_re
            )
            if not re.match(url_re, url):
                ui.error("Provided regular expression doesn't match original "
                         "url.  Please re-enter")
            # TODO: url_re of another provider might match it as well
            #  I am not sure if we have any kind of "priority" setting ATM
            #  to differentiate or to to try multiple types :-/
            else:
                break

        authentication_type = None
        if auth_types:
            auth_types = [
                t for t in auth_types if t in AUTHENTICATION_TYPES
            ]
            if auth_types:
                authentication_type = auth_types[0]

        # Setup credential
        authentication_type = ui.question(
            title="Authentication type",
            text="What authentication type to use",
            default=authentication_type,
            choices=sorted(AUTHENTICATION_TYPES)
        )
        authenticator_class = AUTHENTICATION_TYPES[authentication_type]

        # TODO: need to figure out what fields that authenticator might
        #       need to have setup and ask for them here!

        credential_type = ui.question(
            title="Credential",
            text="What type of credential should be used?",
            choices=sorted(CREDENTIAL_TYPES),
            default=getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE')
        )

        # Just create a configuration file and reload the thing
        if not path.lexists(providers_user_dir):
            os.makedirs(providers_user_dir)
        cfg = """\
# Provider configuration file created to initially access
# {url}

[provider:{name}]
url_re = {url_re}
authentication_type = {authentication_type}
# Note that you might need to specify additional fields specific to the
# authenticator.  Fow now "look into the docs/source" of {authenticator_class}
# {authentication_type}_
credential = {name}

[credential:{name}]
# If known, specify URL or email to how/where to request credentials
# url = ???
type = {credential_type}
""".format(**locals())
        if ui.yesno(
            title="Save provider configuration file",
            text="Following configuration will be written to %s:\n%s"
                % (filename, cfg),
            default='yes'
        ):
            with open(filename, 'wb') as f:
                f.write(cfg.encode('utf-8'))
        else:
            return None
        self.reload()
        # XXX see above note about possibly multiple matches etc
        return self.get_provider(url)
Exemple #6
0
def _ls_s3(loc, fast=False, recursive=False, all_=False, long_=False,
           config_file=None, list_content=False):
    """List S3 bucket content"""
    if loc.startswith('s3://'):
        bucket_prefix = loc[5:]
    else:
        raise ValueError("passed location should be an s3:// url")

    import boto
    from hashlib import md5
    from boto.s3.key import Key
    from boto.s3.prefix import Prefix
    from boto.s3.connection import OrdinaryCallingFormat
    from boto.exception import S3ResponseError
    from configparser import ConfigParser as SafeConfigParser

    if '/' in bucket_prefix:
        bucket_name, prefix = bucket_prefix.split('/', 1)
    else:
        bucket_name, prefix = bucket_prefix, None

    if prefix and '?' in prefix:
        ui.message("We do not care about URL options ATM, they get stripped")
        prefix = prefix[:prefix.index('?')]

    ui.message("Connecting to bucket: %s" % bucket_name)
    if config_file:
        config = SafeConfigParser()
        config.read(config_file)
        access_key = config.get('default', 'access_key')
        secret_key = config.get('default', 'secret_key')

        # TODO: remove duplication -- reuse logic within downloaders/s3.py to get connected
        kwargs = {}
        if '.' in bucket_name:
            kwargs['calling_format']=OrdinaryCallingFormat()
        conn = boto.connect_s3(access_key, secret_key, **kwargs)
        try:
            bucket = conn.get_bucket(bucket_name)
        except S3ResponseError as e:
            ui.message("E: Cannot access bucket %s by name" % bucket_name)
            all_buckets = conn.get_all_buckets()
            all_bucket_names = [b.name for b in all_buckets]
            ui.message("I: Found following buckets %s" % ', '.join(all_bucket_names))
            if bucket_name in all_bucket_names:
                bucket = all_buckets[all_bucket_names.index(bucket_name)]
            else:
                raise RuntimeError("E: no bucket named %s thus exiting" % bucket_name)
    else:
        # TODO: expose credentials
        # We don't need any provider here really but only credentials
        from datalad.downloaders.providers import Providers
        providers = Providers.from_config_files()
        provider = providers.get_provider(loc)

        if not provider:
            raise ValueError(
                "Don't know how to deal with this url %s -- no provider defined for %s. "
                "Define a new provider (DOCS: TODO) or specify just s3cmd config file instead for now."
                % loc
            )
        downloader = provider.get_downloader(loc)

        # should authenticate etc, and when ready we will ask for a bucket ;)
        bucket = downloader.access(lambda url: downloader.bucket, loc)

    info = []
    for iname, imeth in [
        ("Versioning", bucket.get_versioning_status),
        ("   Website", bucket.get_website_endpoint),
        ("       ACL", bucket.get_acl),
    ]:
        try:
            ival = imeth()
        except Exception as e:
            ival = str(e).split('\n')[0]
        info.append(" {iname}: {ival}".format(**locals()))
    ui.message("Bucket info:\n %s" % '\n '.join(info))

    kwargs = {} if recursive else {'delimiter': '/'}

    ACCESS_METHODS = [
        bucket.list_versions,
        bucket.list
    ]

    prefix_all_versions = None
    got_versioned_list = False
    for acc in ACCESS_METHODS:
        try:
            prefix_all_versions = list(acc(prefix, **kwargs))
            got_versioned_list = acc is bucket.list_versions
            break
        except Exception as exc:
            lgr.debug("Failed to access via %s: %s", acc, exc_str(exc))

    if not prefix_all_versions:
        ui.error("No output was provided for prefix %r" % prefix)
    else:
        max_length = max((len(e.name) for e in prefix_all_versions))
        max_size_length = max((len(str(getattr(e, 'size', 0))) for e in prefix_all_versions))

    results = []
    for e in prefix_all_versions:
        results.append(e)
        if isinstance(e, Prefix):
            ui.message("%s" % (e.name, ),)
            continue

        base_msg = ("%%-%ds %%s" % max_length) % (e.name, e.last_modified)
        if isinstance(e, Key):
            if got_versioned_list and not (e.is_latest or all_):
                lgr.debug(
                    "Skipping Key since not all versions requested: %s", e)
                # Skip this one
                continue
            ui.message(base_msg + " %%%dd" % max_size_length % e.size, cr=' ')
            # OPT: delayed import
            from datalad.support.s3 import get_key_url
            url = get_key_url(e, schema='http')
            try:
                _ = urlopen(Request(url))
                urlok = "OK"
            except HTTPError as err:
                urlok = "E: %s" % err.code

            try:
                acl = e.get_acl()
            except S3ResponseError as exc:
                acl = exc.code if exc.code in ('AccessDenied',) else str(exc)

            content = ""
            if list_content:
                # IO intensive, make an option finally!
                try:
                    # _ = e.next()[:5]  if we are able to fetch the content
                    kwargs = dict(version_id=e.version_id)
                    if list_content in {'full', 'first10'}:
                        if list_content in 'first10':
                            kwargs['headers'] = {'Range': 'bytes=0-9'}
                        content = repr(e.get_contents_as_string(**kwargs))
                    elif list_content == 'md5':
                        digest = md5()
                        digest.update(e.get_contents_as_string(**kwargs))
                        content = digest.hexdigest()
                    else:
                        raise ValueError(list_content)
                    # content = "[S3: OK]"
                except S3ResponseError as err:
                    content = str(err)
                finally:
                    content = " " + content
            ui.message(
                "ver:%-32s  acl:%s  %s [%s]%s"
                % (getattr(e, 'version_id', None),
                   acl, url, urlok, content)
                if long_ else ''
            )
        else:
            ui.message(base_msg + " " + str(type(e)).split('.')[-1].rstrip("\"'>"))
    return results