Python check_for_updates 예제들, retriever.lib.repository.check_for_updates Python 예제들

예제 #1

0

파일 보기

def download(dataset, path='./', quiet=False, subdir=False, debug=False):
    """Download scripts for retriever."""
    args = {
        'dataset': dataset,
        'command': 'download',
        'path': path,
        'subdir': subdir,
        'quiet': quiet
    }
    engine = choose_engine(args)
    script_list = SCRIPT_LIST()
    if not script_list or not os.listdir(SCRIPT_WRITE_PATH):
        check_for_updates()
        script_list = SCRIPT_LIST()
    scripts = name_matches(script_list, args['dataset'])
    if scripts:
        for script in scripts:
            print("=> Downloading", script.name)
            try:
                script.download(engine, debug=debug)
                script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = "Run retriever.datasets() to see a list of currently " \
                  "available datasets."
        raise ValueError(message)

예제 #2

0

파일 보기

파일: install.py 프로젝트: goelakash/retriever

def _install(args, use_cache, debug):
    """Install datasets for retriever."""
    engine = choose_engine(args)
    engine.use_cache = use_cache

    script_list = SCRIPT_LIST()
    if not script_list or not os.listdir(SCRIPT_WRITE_PATH):
        check_for_updates()
        script_list = SCRIPT_LIST(force_compile=False)
    data_sets_scripts = name_matches(script_list, args['dataset'])
    if data_sets_scripts:
        for data_sets_script in data_sets_scripts:
            print("=> Installing", data_sets_script.name)
            try:
                data_sets_script.download(engine, debug=debug)
                data_sets_script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = "The dataset \"{}\" isn't available in the Retriever. " \
                  "Run retriever.datasets()to list the currently available " \
                  "datasets".format(args['dataset'])
        raise ValueError(message)

예제 #3

0

파일 보기

파일: install.py 프로젝트: henrykironde/retriever

def _install(args, use_cache, debug):
    """Install datasets for retriever."""
    engine = choose_engine(args)
    engine.use_cache = use_cache

    script_list = SCRIPT_LIST()
    if not (script_list or os.listdir(SCRIPT_WRITE_PATH)):
        check_for_updates()
        script_list = SCRIPT_LIST()
    data_sets_scripts = name_matches(script_list, args['dataset'])
    if data_sets_scripts:
        for data_sets_script in data_sets_scripts:
            try:
                engine.script_table_registry = OrderedDict()
                data_sets_script.download(engine, debug=debug)
                data_sets_script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = "Run retriever.datasets()to list the currently available " \
                  "datasets."
        raise ValueError(message)
    return engine

예제 #4

0

파일 보기

def _install(args, use_cache, debug):
    """Install datasets for retriever."""
    engine = choose_engine(args)
    engine.use_cache = use_cache

    script_list = SCRIPT_LIST()
    if not script_list or not os.listdir(SCRIPT_WRITE_PATH):
        check_for_updates()
        script_list = SCRIPT_LIST(force_compile=False)
    data_sets_scripts = name_matches(script_list, args['dataset'])
    if data_sets_scripts:
        for data_sets_script in data_sets_scripts:
            print("=> Installing", data_sets_script.name)
            try:
                data_sets_script.download(engine, debug=debug)
                data_sets_script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = "The dataset \"{}\" isn't available in the Retriever. " \
                  "Run retriever.datasets()to list the currently available " \
                  "datasets".format(args['dataset'])
        raise ValueError(message)

예제 #5

0

파일 보기

파일: download.py 프로젝트: dassaniansh/retriever

def download(dataset,
             path='./',
             quiet=False,
             sub_dir='',
             debug=False,
             use_cache=True):
    """Download scripts for retriever."""
    args = {
        'dataset': dataset,
        'command': 'download',
        'path': path,
        'sub_dir': sub_dir,
        'quiet': quiet
    }
    engine = choose_engine(args)
    engine.use_cache = use_cache

    script_list = SCRIPT_LIST()
    if not script_list or not os.listdir(SCRIPT_WRITE_PATH):
        check_for_updates()
        script_list = SCRIPT_LIST()
    scripts = name_matches(script_list, args['dataset'])
    if scripts:
        for script in scripts:
            print("=> Downloading", script.name)
            try:
                script.download(engine, debug=debug)
                script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    elif args['dataset'].startswith('socrata') and (scripts is None):
        socrata_id = args['dataset'].split('-', 1)[1]
        resource = find_socrata_dataset_by_id(socrata_id)

        if "error" in resource.keys():
            if resource["datatype"][0] == "map":
                print("{} because map type datasets are not supported".format(
                    resource["error"]))
            else:
                print("{} because it is of type {} and not tabular".format(
                    resource["error"], resource["datatype"][1]))
        elif len(resource.keys()) == 0:
            return
        else:
            print("=> Downloading", args['dataset'])
            name = f"socrata-{socrata_id}"
            create_socrata_dataset(engine, name, resource)
    elif (scripts is None) and (args['dataset'].startswith('rdataset')):
        print("=> Downloading", args['dataset'])
        rdataset = args['dataset'].split('-')
        update_rdataset_catalog()
        package, dataset_name = rdataset[1], rdataset[2]
        create_rdataset(engine, package, dataset_name)
    else:
        message = "Run retriever.datasets() to see the list of currently " \
                  "available datasets."
        raise ValueError(message)
    return engine

예제 #6

0

파일 보기

def _install(args, use_cache, debug):
    """Install datasets for retriever."""
    engine = choose_engine(args)
    engine.use_cache = use_cache

    if args['dataset'].endswith('.zip') or args['hash_value']:
        path_to_archive = args['dataset']
        if args['hash_value']:
            path_to_archive = os.path.join(PROVENANCE_DIR, args['dataset'],
                                           '{}-{}.zip'.format(args['dataset'], args['hash_value'][0]))
        if not os.path.exists(path_to_archive):
            print('The committed file does not exist.')
        engine = install_committed(path_to_archive, engine, force=args.get('force', False))
        return engine
    script_list = SCRIPT_LIST()
    if not (script_list or os.listdir(SCRIPT_WRITE_PATH)):
        check_for_updates()
        script_list = SCRIPT_LIST()
    data_sets_scripts = name_matches(script_list, args['dataset'])
    if data_sets_scripts:
        for data_sets_script in data_sets_scripts:
            try:
                engine.script_table_registry = OrderedDict()
                data_sets_script.download(engine, debug=debug)
                data_sets_script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = "Run retriever.datasets() to list the currently available " \
                  "datasets."
        raise ValueError(message)
    return engine

예제 #7

0

파일 보기

파일: download.py 프로젝트: weecology/retriever

def download(dataset, path='./', quiet=False, sub_dir='', debug=False, use_cache=True):
    """Download scripts for retriever."""
    args = {
        'dataset': dataset,
        'command': 'download',
        'path': path,
        'sub_dir': sub_dir,
        'quiet': quiet
    }
    engine = choose_engine(args)
    engine.use_cache = use_cache

    script_list = SCRIPT_LIST()
    if not script_list or not os.listdir(SCRIPT_WRITE_PATH):
        check_for_updates()
        script_list = SCRIPT_LIST()
    scripts = name_matches(script_list, args['dataset'])
    if scripts:
        for script in scripts:
            print("=> Downloading", script.name)
            try:
                script.download(engine, debug=debug)
                script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = "Run retriever.datasets() to see the list of currently " \
                  "available datasets."
        raise ValueError(message)
    return engine

예제 #8

0

파일 보기

def _install(args, use_cache, debug):
    """Install datasets for retriever."""
    engine = choose_engine(args)
    engine.use_cache = use_cache

    script_list = SCRIPT_LIST()
    if not (script_list or os.listdir(SCRIPT_WRITE_PATH)):
        check_for_updates()
        script_list = SCRIPT_LIST()
    data_sets_scripts = name_matches(script_list, args['dataset'])
    if data_sets_scripts:
        for data_sets_script in data_sets_scripts:
            try:
                engine.script_table_registry = OrderedDict()
                data_sets_script.download(engine, debug=debug)
                data_sets_script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    else:
        message = "Run retriever.datasets()to list the currently available " \
                  "datasets."
        raise ValueError(message)
    return engine

예제 #9

0

파일 보기

파일: setup.py 프로젝트: KristinaRiemer/retriever

    # if platform is OS X use "~/.bash_profile"
    if current_platform == "darwin":
        bash_file = "~/.bash_profile"
    # if platform is Linux use "~/.bashrc
    elif current_platform == "linux":
        bash_file = "~/.bashrc"
    # else write and discard
    else:
        bash_file = "/dev/null"

    argcomplete_command = 'eval "$(register-python-argcomplete retriever)"'
    with open(os.path.expanduser(bash_file), "a+") as bashrc:
        bashrc.seek(0)
        # register retriever for arg-completion if not already registered
        # whenever a new shell is spawned
        if argcomplete_command not in bashrc.read():
            bashrc.write(argcomplete_command + "\n")
            bashrc.close()
    os.system("activate-global-python-argcomplete")
    # register for the current shell
    os.system(argcomplete_command)

try:
    from retriever.compile import compile
    from retriever.lib.repository import check_for_updates

    check_for_updates(False)
    compile()
except:
    pass

예제 #10

0

파일 보기

def main():
    """This function launches the Data Retriever."""
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them
        args = parser.parse_args()

        if args.command not in ['reset', 'update'] \
        and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) \
        and not [f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1])
            if os.path.exists(SCRIPT_SEARCH_PATHS[-1])]:
            check_for_updates()
            reload_scripts()
        script_list = SCRIPT_LIST()

        if args.command == "install" and not args.engine:
            parser.parse_args(['install', '-h'])

        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = reload_scripts()

        if args.command == 'defaults':
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == 'update':
            check_for_updates()
            reload_scripts()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print("\nDataset:  {}".format(dataset.name))
                    print("Citation:   {}".format(dataset.citation))
                    print("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'license':
            if args.dataset is None:
                print(LICENSE)
            else:
                dataset_license = license(args.dataset)
                if dataset_license:
                    print(dataset_license)
                else:
                    print("There is no license information for {}".format(
                        args.dataset))
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        elif args.command == 'new_json':
            # create new JSON script
            create_json()
            return

        elif args.command == 'edit_json':
            # edit existing JSON script
            json_file = get_script_filename(args.dataset.lower())
            edit_json(json_file)
            return

        elif args.command == 'autocreate':
            if sum([args.f, args.d]) == 1:
                file_flag = True if args.f else False
                create_package(args.path, args.dt, file_flag, args.o,
                               args.skip_lines)
            else:
                print('Please use one and only one of the flags -f -d')
            return

        elif args.command == 'delete_json':
            # delete existing JSON script from home directory and or script directory if exists in current dir
            confirm = input("Really remove " + args.dataset.lower() +
                            " and all its contents? (y/N): ")
            if confirm.lower().strip() in ['y', 'yes']:
                json_file = get_script_filename(args.dataset.lower())
                delete_json(json_file)
            return

        if args.command == 'ls':
            # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init
            if not (args.l or args.k or isinstance(args.v, list)):
                all_scripts = dataset_names()
                print("Available datasets : {}\n".format(len(all_scripts)))
                from retriever import lscolumns

                lscolumns.printls(all_scripts)

            elif isinstance(args.v, list):
                if args.v:
                    try:
                        all_scripts = [
                            get_script(dataset) for dataset in args.v
                        ]
                    except KeyError:
                        all_scripts = []
                        print("Dataset(s) is not found.")
                else:
                    all_scripts = datasets()
                count = 1
                for script in all_scripts:
                    print("{count}. {title}\n {name}\n"
                          "{keywords}\n{description}\n"
                          "{licenses}\n{citation}\n"
                          "".format(
                              count=count,
                              title=script.title,
                              name=script.name,
                              keywords=script.keywords,
                              description=script.description,
                              licenses=str(script.licenses[0]['name']),
                              citation=script.citation,
                          ))
                    count += 1

            else:
                param_licenses = args.l if args.l else None
                keywords = args.k if args.k else None

                # search
                searched_scripts = datasets(keywords, param_licenses)
                if not searched_scripts:
                    print("No available datasets found")
                else:
                    print("Available datasets : {}\n".format(
                        len(searched_scripts)))
                    count = 1
                    for script in searched_scripts:
                        print("{count}. {title}\n{name}\n"
                              "{keywords}\n{licenses}\n".format(
                                  count=count,
                                  title=script.title,
                                  name=script.name,
                                  keywords=script.keywords,
                                  licenses=str(script.licenses[0]['name']),
                              ))
                        count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            engine.use_cache = False
        else:
            engine.use_cache = True

        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print(
                "Run 'retriever ls' to see a list of currently available datasets."
            )

예제 #11

0

파일 보기

def main():
    """This function launches the Data Retriever."""
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them
        args = parser.parse_args()
        reset_or_update = args.command in ["reset", "update"]
        if (not reset_or_update and not os.path.isdir(SCRIPT_SEARCH_PATHS[1])
                and not [
                    f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1])
                    if os.path.exists(SCRIPT_SEARCH_PATHS[-1])
                ]):
            check_for_updates()
            reload_scripts()
        script_list = SCRIPT_LIST()

        if args.command == "install" and not args.engine:
            parser.parse_args(["install", "-h"])

        if args.quiet:
            sys.stdout = open(os.devnull, "w")

        if args.command == "help":
            parser.parse_args(["-h"])

        if hasattr(args, "compile") and args.compile:
            script_list = reload_scripts()

        if args.command == "defaults":
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == "update":
            check_for_updates()
            reload_scripts()
            return

        if args.command == "citation":
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                citations = get_script_citation(args.dataset)
                for citation in citations:
                    print("Citation:   {}".format(citation))
            return

        if args.command == 'license':
            if args.dataset is None:
                print(LICENSE)
            else:
                dataset_license = license(args.dataset)
                if dataset_license:
                    print(dataset_license)
                else:
                    print("There is no license information for {}".format(
                        args.dataset))
            return

        if args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()
            return

        if args.command == 'reset':
            reset_retriever(args.scope)
            return

        if args.command == 'autocreate':
            if args.c:
                url = args.path
                script_list = SCRIPT_LIST()
                flag = 0

                for script in script_list:
                    for dataset in script.tables:
                        if script.tables[dataset].url == url:
                            flag = 1
                            break

                if flag == 1:
                    print("File already exist in dataset " + str(script.name))
                else:
                    print("Dataset is not avaliable, Please download")
                return
            if sum([args.f, args.d]) == 1:
                file_flag = bool(args.f)
                create_package(args.path, args.dt, file_flag, args.o,
                               args.skip_lines, args.e)
            else:
                print('Please use one and only one of the flags -f -d')
            return

        if args.command == 'ls':
            # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init
            if not any([args.l, args.k, args.v, args.s, args.rdataset]):
                all_scripts = dataset_names()
                from retriever import lscolumns

                all_scripts_combined = []
                for dataset in all_scripts['offline']:
                    all_scripts_combined.append((dataset, True))
                for dataset in all_scripts['online']:
                    if dataset in all_scripts['offline']:
                        continue
                    all_scripts_combined.append((dataset, False))
                all_scripts_combined = sorted(all_scripts_combined,
                                              key=lambda x: x[0])
                print("Available datasets : {}\n".format(
                    len(all_scripts_combined)))
                lscolumns.printls(all_scripts_combined)
                print("\nThe symbol * denotes the online datasets.")
                print(
                    "To see the full list of available online datasets, visit\n"
                    "https://github.com/weecology/retriever-recipes.")

            elif isinstance(args.s, list):
                try:
                    theme = INQUIRER_THEME
                except NameError:
                    print("To use retriever ls -s, install inquirer")
                    exit()

                name_list = socrata_autocomplete_search(args.s)
                print("Autocomplete suggestions : Total {} results\n".format(
                    len(name_list)))
                if len(name_list):
                    question = [
                        inquirer.List('dataset name',
                                      message='Select the dataset name',
                                      choices=name_list)
                    ]
                    answer = inquirer.prompt(question,
                                             theme=INQUIRER_THEME,
                                             raise_keyboard_interrupt=True)
                    dataset_name = answer['dataset name']
                    metadata = socrata_dataset_info(dataset_name)

                    print(
                        "Dataset Information of {}: Total {} results\n".format(
                            dataset_name, len(metadata)))

                    for i in range(len(metadata)):
                        print("{}. {}\n \tID : {}\n"
                              "\tType : {}\n"
                              "\tDescription : {}\n"
                              "\tDomain : {}\n \tLink : {}\n".format(
                                  i + 1, metadata[i]["name"],
                                  metadata[i]["id"], metadata[i]["type"],
                                  metadata[i]["description"][:50] + "...",
                                  metadata[i]["domain"], metadata[i]["link"]))

            elif args.rdataset:
                if not isinstance(args.p, list) and not args.all:
                    display_all_rdataset_names()
                elif not isinstance(args.p, list) and args.all:
                    display_all_rdataset_names(package_name='all')
                else:
                    display_all_rdataset_names(package_name=args.p)

            elif isinstance(args.v, list):
                dataset_verbose_list(args.v)

            else:
                param_licenses = args.l if args.l else None
                keywords = args.k if args.k else None

                # search
                searched_scripts = datasets(keywords, param_licenses)
                offline_mesg = "Available offline datasets : {}\n"
                online_mesg = "Available online datasets : {}\n"
                if not searched_scripts:
                    print("No available datasets found")
                else:
                    print(offline_mesg.format(len(
                        searched_scripts['offline'])))
                    count = 1
                    for script in searched_scripts['offline']:
                        print("{count}. {title}\n{name}\n"
                              "{keywords}\n{licenses}\n".format(
                                  count=count,
                                  title=script.title,
                                  name=script.name,
                                  keywords=script.keywords,
                                  licenses=str(script.licenses[0]['name'])
                                  if script.licenses and len(script.licenses)
                                  else str('N/A'),
                              ))
                        count += 1

                    count = 1
                    searched_scripts_offline = [
                        script.name for script in searched_scripts["offline"]
                    ]
                    searched_scripts_online = []
                    for script in searched_scripts['online']:
                        if script in searched_scripts_offline:
                            continue
                        searched_scripts_online.append(script)
                    print(online_mesg.format(len(searched_scripts_online)))
                    for script in searched_scripts_online:
                        print("{count}. {name}".format(count=count,
                                                       name=script))
                        count += 1
            return
        if args.command == 'commit':
            commit(
                dataset=args.dataset,
                path=os.path.normpath(args.path) if args.path else None,
                commit_message=args.message,
            )
            return
        if args.command == 'log':
            commit_log(dataset=args.dataset)
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            use_cache = False
        else:
            use_cache = True
        engine.use_cache = use_cache
        if args.dataset is not None:
            if args.dataset.startswith(('socrata', 'rdataset')):
                scripts = True
            else:
                scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            _install(vars(args), debug=debug, use_cache=use_cache)
            print("Done!")
        else:
            print(
                "Run 'retriever ls' to see a list of currently available datasets."
            )

예제 #12

0

파일 보기

파일: __main__.py 프로젝트: pathak-mayurdeep/retriever

def main():
    """This function launches the Data Retriever."""
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them
        args = parser.parse_args()

        reset_or_update = args.command in ["reset", "update"]
        if (not reset_or_update and not os.path.isdir(SCRIPT_SEARCH_PATHS[1])
                and not [
                    f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1])
                    if os.path.exists(SCRIPT_SEARCH_PATHS[-1])
                ]):
            check_for_updates()
            reload_scripts()
        script_list = SCRIPT_LIST()

        if args.command == "install" and not args.engine:
            parser.parse_args(["install", "-h"])

        if args.quiet:
            sys.stdout = open(os.devnull, "w")

        if args.command == "help":
            parser.parse_args(["-h"])

        if hasattr(args, "compile") and args.compile:
            script_list = reload_scripts()

        if args.command == "defaults":
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == "update":
            check_for_updates()
            reload_scripts()
            return

        if args.command == "citation":
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                citations = get_script_citation(args.dataset)
                for citation in citations:
                    print("Citation:   {}".format(citation))
            return

        if args.command == 'license':
            if args.dataset is None:
                print(LICENSE)
            else:
                dataset_license = license(args.dataset)
                if dataset_license:
                    print(dataset_license)
                else:
                    print("There is no license information for {}".format(
                        args.dataset))
            return

        if args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        if args.command == 'reset':
            reset_retriever(args.scope)
            return

        if args.command == 'autocreate':
            if sum([args.f, args.d]) == 1:
                file_flag = bool(args.f)
                create_package(args.path, args.dt, file_flag, args.o,
                               args.skip_lines, args.e)
            else:
                print('Please use one and only one of the flags -f -d')
            return

        if args.command == 'ls':
            # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init
            if not (args.l or args.k or isinstance(args.v, list)):
                all_scripts = dataset_names()
                from retriever import lscolumns

                all_scripts_combined = []
                for dataset in all_scripts['offline']:
                    all_scripts_combined.append((dataset, True))
                for dataset in all_scripts['online']:
                    if dataset in all_scripts['offline']:
                        continue
                    all_scripts_combined.append((dataset, False))
                all_scripts_combined = sorted(all_scripts_combined,
                                              key=lambda x: x[0])
                print("Available datasets : {}\n".format(
                    len(all_scripts_combined)))
                lscolumns.printls(all_scripts_combined)
                print("\nThe symbol * denotes the online datasets.")
                print(
                    "To see the full list of available online datasets, visit\n"
                    "https://github.com/weecology/retriever-recipes.")

            elif isinstance(args.v, list):
                online_scripts = []
                if args.v:
                    try:
                        all_scripts = [
                            get_script(dataset) for dataset in args.v
                        ]
                    except KeyError:
                        all_scripts = []
                        print("Dataset(s) is not found.")
                else:
                    scripts = datasets()
                    all_scripts = scripts['offline']
                    online_scripts = scripts['online']
                count = 1
                if not args.v:
                    print("Offline datasets : {}\n".format(len(all_scripts)))
                for script in all_scripts:
                    print("{count}. {title}\n {name}\n"
                          "{keywords}\n{description}\n"
                          "{licenses}\n{citation}\n"
                          "".format(
                              count=count,
                              title=script.title,
                              name=script.name,
                              keywords=script.keywords,
                              description=script.description,
                              licenses=str(script.licenses[0]['name']),
                              citation=script.citation,
                          ))
                    count += 1

                count = 1
                offline_scripts = [script.name for script in all_scripts]
                set_online_scripts = []
                for script in online_scripts:
                    if script in offline_scripts:
                        continue
                    set_online_scripts.append(script)
                if not args.v:
                    print("Online datasets : {}\n".format(
                        len(set_online_scripts)))
                for script in set_online_scripts:
                    print("{count}. {name}".format(count=count, name=script))
                    count += 1
            else:
                param_licenses = args.l if args.l else None
                keywords = args.k if args.k else None

                # search
                searched_scripts = datasets(keywords, param_licenses)
                offline_mesg = "Available offline datasets : {}\n"
                online_mesg = "Available online datasets : {}\n"
                if not searched_scripts:
                    print("No available datasets found")
                else:
                    print(offline_mesg.format(len(
                        searched_scripts['offline'])))
                    count = 1
                    for script in searched_scripts['offline']:
                        print("{count}. {title}\n{name}\n"
                              "{keywords}\n{licenses}\n".format(
                                  count=count,
                                  title=script.title,
                                  name=script.name,
                                  keywords=script.keywords,
                                  licenses=str(script.licenses[0]['name']),
                              ))
                        count += 1

                    count = 1
                    searched_scripts_offline = [
                        script.name for script in searched_scripts["offline"]
                    ]
                    searched_scripts_online = []
                    for script in searched_scripts['online']:
                        if script in searched_scripts_offline:
                            continue
                        searched_scripts_online.append(script)
                    print(online_mesg.format(len(searched_scripts_online)))
                    for script in searched_scripts_online:
                        print("{count}. {name}".format(count=count,
                                                       name=script))
                        count += 1
            return
        if args.command == 'commit':
            commit(
                dataset=args.dataset,
                path=os.path.normpath(args.path) if args.path else None,
                commit_message=args.message,
            )
            return
        if args.command == 'log':
            commit_log(dataset=args.dataset)
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            use_cache = False
        else:
            use_cache = True
        engine.use_cache = use_cache
        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            if args.dataset.endswith('.zip') or hasattr(args, 'hash_value'):
                _install(vars(args), debug=debug, use_cache=use_cache)
                return
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print(
                "Run 'retriever ls' to see a list of currently available datasets."
            )

예제 #13

0

파일 보기

파일: conf.py 프로젝트: ghoshbishakh/retriever

from retriever import VERSION,COPYRIGHT
from retriever.lib.repository import check_for_updates
from retriever import SCRIPT_LIST

# Create the .rst file for the available datasets
datasetfile = open("datasets.rst", "w")
datasetfile_title = """
==================
Datasets Available
==================


"""
check_for_updates(graphical=False)
script_list = SCRIPT_LIST()

# write the title of dataset rst file
datasetfile.write(datasetfile_title)

# get info from the scripts
for script_num, script in enumerate(script_list, start=1):
    if script.ref.strip():
        reference_link = script.ref
    elif bool(script.urls.values()):
        reference_link = script.urls.values()[0].rpartition('/')[0]
    else:
        reference_link = ""
    datasetfile.write("| " + str(script_num) + ". **{}** \n| shortname: {}\n| reference: {}\n\n".format(script.name, script.shortname, reference_link))
datasetfile.close()

needs_sphinx = '1.3'

예제 #14

0

파일 보기

파일: conf.py 프로젝트: pathak-mayurdeep/retriever

from retriever.lib.repository import check_for_updates


def to_str(object, object_encoding=encoding):
    return str(object).encode('UTF-8').decode(encoding)


# Create the .rst file for the available datasets
datasetfile = open_fw("datasets_list.rst")
datasetfile_title = """==================
Datasets Available
==================


"""
check_for_updates()
reload_scripts()
script_list = SCRIPT_LIST()

# write the title of dataset rst file
# ref:http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
datasetfile.write(datasetfile_title)

# get info from the scripts using specified encoding
for script_num, script in enumerate(script_list, start=1):
    reference_link = ''
    if script.ref.strip():
        reference_link = script.ref
    elif hasattr(script, 'homepage'):
        reference_link = script.homepage
    elif not reference_link.strip():

예제 #15

0

파일 보기

def main():
    """This function launches the Data Retriever."""
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()

        args = parser.parse_args()

        if args.command == "install" and not args.engine:
            parser.parse_args(['install','-h'])

        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'defaults':
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == 'update':
            check_for_updates()
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print("\nDataset:  {}".format(dataset.name))
                    print("Citation:   {}".format(dataset.citation))
                    print("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        elif args.command == 'new_json':
            # create new JSON script
            create_json()
            return

        elif args.command == 'edit_json':
            # edit existing JSON script
            for json_file in [filename for filename in
                              os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']:
                if json_file.lower().find(args.filename.lower()) != -1:
                    edit_json(json_file)
                    return
            raise Exception("File not found")

        elif args.command == 'delete_json':
            # delete existing JSON script
            for json_file in [filename for filename in
                              os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']:
                if json_file.lower().find(args.dataset.lower()) != -1:
                    confirm = input("Really remove " + json_file +
                                    " and all its contents? (y/N): ")
                    if confirm.lower().strip() in ['y', 'yes']:
                        # raise Exception(json_file)
                        os.remove(os.path.join(HOME_DIR, 'scripts', json_file))
                        try:
                            os.remove(os.path.join(
                                HOME_DIR, 'scripts', json_file[:-4] + 'py'))
                        except:
                            # Not compiled yet
                            pass
                    return
            raise Exception("File not found")

        if args.command == 'ls':
            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print("No scripts are currently available. Updating scripts now...")
                check_for_updates()
                print("\n\nScripts downloaded.\n")
                script_list = SCRIPT_LIST()

            all_scripts = []

            for script in script_list:
                if script.shortname:
                    if args.l is not None:
                        script_name = script.name + "\nShortname: " + script.shortname + "\n"
                        if script.tags:
                            script_name += "Tags: " + \
                                str([tag for tag in script.tags]) + "\n"
                        not_found = 0
                        for term in args.l:
                            if script_name.lower().find(term.lower()) == -1:
                                not_found = 1
                                break
                        if not_found == 0:
                            all_scripts.append(script_name)
                    else:
                        script_name = script.shortname
                        all_scripts.append(script_name)

            all_scripts = sorted(all_scripts, key=lambda s: s.lower())

            print("Available datasets : {}\n".format(len(all_scripts)))

            if args.l is None:
                from retriever import lscolumns
                lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower()))
            else:
                count = 1
                for script in all_scripts:
                    print("%d. %s" % (count, script))
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            use_cache = False
        else:
            use_cache = True

        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug, use_cache=use_cache)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print("The dataset {} isn't currently available in the Retriever".format(
                args.dataset))
            print("Run 'retriever ls to see a list of currently available datasets")

예제 #16

0

파일 보기

def main():
    """This function launches the Data Retriever."""
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()

        args = parser.parse_args()

        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'update':
            check_for_updates()
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION')
                print("\nCitation for retriever:\n")
                with open(citation_path) as citation_file:
                    print(citation_file.read())
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print("\nDataset:  {}".format(dataset.name))
                    print("Citation:   {}".format(dataset.citation))
                    print("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        elif args.command == 'new_json':
            # create new JSON script
            create_json()
            return

        elif args.command == 'edit_json':
            # edit existing JSON script
            for json_file in [filename for filename in
                              os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']:
                if json_file.lower().find(args.filename.lower()) != -1:
                    edit_json(json_file)
                    return
            raise Exception("File not found")

        elif args.command == 'delete_json':
            # delete existing JSON script
            for json_file in [filename for filename in
                              os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']:
                if json_file.lower().find(args.dataset.lower()) != -1:
                    confirm = input("Really remove " + json_file +
                                    " and all its contents? (y/N): ")
                    if confirm.lower().strip() in ['y', 'yes']:
                        # raise Exception(json_file)
                        os.remove(os.path.join(HOME_DIR, 'scripts', json_file))
                        try:
                            os.remove(os.path.join(
                                HOME_DIR, 'scripts', json_file[:-4] + 'py'))
                        except:
                            # Not compiled yet
                            pass
                    return
            raise Exception("File not found")

        if args.command == 'ls':
            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print("No scripts are currently available. Updating scripts now...")
                check_for_updates()
                print("\n\nScripts downloaded.\n")
                script_list = SCRIPT_LIST()

            all_scripts = []

            for script in script_list:
                if script.shortname:
                    if args.l is not None:
                        script_name = script.name + "\nShortname: " + script.shortname + "\n"
                        if script.tags:
                            script_name += "Tags: " + \
                                str([tag for tag in script.tags]) + "\n"
                        not_found = 0
                        for term in args.l:
                            if script_name.lower().find(term.lower()) == -1:
                                not_found = 1
                                break
                        if not_found == 0:
                            all_scripts.append(script_name)
                    else:
                        script_name = script.shortname
                        all_scripts.append(script_name)

            all_scripts = sorted(all_scripts, key=lambda s: s.lower())

            print("Available datasets : {}\n".format(len(all_scripts)))

            if args.l is None:
                from retriever import lscolumns
                lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower()))
            else:
                count = 1
                for script in all_scripts:
                    print("%d. %s" % (count, script))
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print("The dataset {} isn't currently available in the Retriever".format(
                args.dataset))
            print("Run 'retriever ls to see a list of currently available datasets")

예제 #17

0

파일 보기

파일: __main__.py 프로젝트: gitter-badger/retriever

def main():
    """This function launches the EcoData Retriever."""
    if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'):
        # if no command line args are passed, launch GUI

        check_for_updates(graphical=False if current_platform == 'darwin' else True)
        lists = get_lists()

        from retriever.app.main import launch_app
        launch_app(lists)

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()

        args = parser.parse_args()
        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'update':
            check_for_updates(graphical=False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION')
                print "\nCitation for retriever:\n"
                with open(citation_path) as citation_file:
                    print citation_file.read()
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:

                    print ("\nCitation:   {}".format(dataset.citation))
                    print ("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'gui':
            lists = get_lists()

            from retriever.app.main import launch_app
            launch_app(lists)
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        if args.command == 'ls' or args.dataset is None:

            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print "No scripts are currently available. Updating scripts now..."
                check_for_updates(graphical=False)
                print "\n\nScripts downloaded.\n"
                script_list = SCRIPT_LIST()

            all_scripts = []

            for script in script_list:
                if script.name:
                    if args.l!=None:
                        script_name = script.name + "\nShortname: " + script.shortname+"\n"
                        if script.tags:
                            script_name += "Tags: "+str([tag for tag in script.tags])+"\n"
                        not_found = 0
                        for term in args.l:
                            if script_name.lower().find(term.lower()) == -1:
                                not_found = 1
                                break
                        if not_found == 0:
                            all_scripts.append(script_name)
                    else:
                        script_name = script.shortname
                        all_scripts.append(script_name)

            all_scripts = sorted(all_scripts, key=lambda s: s.lower())

            print "Available datasets : {}\n".format(len(all_scripts))

            if args.l==None:
                import lscolumns
                lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower()))
            else:
                count = 1
                for script in all_scripts:
                    print ("%d. %s"%(count, script))
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False

        scripts = name_matches(script_list, args.dataset)
        if scripts:
            for dataset in scripts:
                print "=> Installing", dataset.name
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print e
                    if debug: raise
            print "Done!"
        else:
            print "The dataset {} isn't currently available in the Retriever".format(args.dataset)
            print "Run 'retriever ls to see a list of currently available datasets"

예제 #18

0

파일 보기

파일: conf.py 프로젝트: raj-maurya/retriever

from retriever import VERSION, COPYRIGHT
from retriever.lib.repository import check_for_updates
from retriever import SCRIPT_LIST

# Create the .rst file for the available datasets
datasetfile = open("datasets.rst", "w")
datasetfile_title = """
==================
Datasets Available
==================


"""
check_for_updates(graphical=False)
script_list = SCRIPT_LIST()

# write the title of dataset rst file
datasetfile.write(datasetfile_title)

# get info from the scripts
for script_num, script in enumerate(script_list, start=1):
    if script.ref.strip():
        reference_link = script.ref
    elif bool(script.urls.values()):
        reference_link = script.urls.values()[0].rpartition('/')[0]
    else:
        reference_link = ""
    datasetfile.write("| " + str(script_num) +
                      ". **{}** \n| shortname: {}\n| reference: {}\n\n".format(
                          script.name, script.shortname, reference_link))
datasetfile.close()

예제 #19

0

파일 보기

파일: __main__.py 프로젝트: goelakash/retriever

def main():
    """This function launches the Data Retriever."""
    sys.argv[1:] = [arg.lower() for arg in sys.argv[1:]]
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them

        if not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) and not \
                [f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1])
                 if os.path.exists(SCRIPT_SEARCH_PATHS[-1])]:
            check_for_updates()
        script_list = SCRIPT_LIST()

        args = parser.parse_args()

        if args.command == "install" and not args.engine:
            parser.parse_args(['install', '-h'])

        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'defaults':
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == 'update':
            check_for_updates(False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print("\nDataset:  {}".format(dataset.name))
                    print("Citation:   {}".format(dataset.citation))
                    print("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'license':
            dataset_license = license(args.dataset)
            if dataset_license:
                print(dataset_license)
            else:
                print("There is no license information for {}".format(args.dataset))
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        elif args.command == 'new_json':
            # create new JSON script
            create_json()
            return

        elif args.command == 'edit_json':
            # edit existing JSON script
            json_file = get_script_filename(args.dataset.lower())
            edit_json(json_file)
            return

        elif args.command == 'delete_json':
            # delete existing JSON script from home directory and or script directory if exists in current dir
            confirm = input("Really remove " + args.dataset.lower() +
                            " and all its contents? (y/N): ")
            if confirm.lower().strip() in ['y', 'yes']:
                json_file = get_script_filename(args.dataset.lower())
                delete_json(json_file)
            return

        if args.command == 'ls':
            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print("No scripts are currently available. Updating scripts now...")
                check_for_updates(False)
                print("\n\nScripts downloaded.\n")

            if args.l is None:
                all_scripts = datasets()
                print("Available datasets : {}\n".format(len(all_scripts)))
                from retriever import lscolumns
                lscolumns.printls(dataset_names())
            else:
                all_scripts = datasets(args.l[0])
                print("Available datasets : {}\n".format(len(all_scripts)))
                count = 1
                for script in all_scripts:
                    print("{}. {}".format(count, script.title))
                    print(script.name)
                    print(script.keywords)
                    print()
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            engine.use_cache = False
        else:
            engine.use_cache = True

        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print("The dataset {} isn't currently available in the Retriever".format(
                args.dataset))
            print("Run 'retriever ls to see a list of currently available datasets")

예제 #20

0

파일 보기

def main():
    """This function launches the EcoData Retriever."""
    if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'):
        # if no command line args are passed, launch GUI

        check_for_updates(graphical=False if 'darwin' in platform.platform().lower() else True)
        lists = get_lists()
        
        from retriever.app.main import launch_app
        launch_app(lists)

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()
        
        args = parser.parse_args()
        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])
        
        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)
        
        if args.command == 'update':
            check_for_updates(graphical=False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION')
                print citation_path
                with open(citation_path) as citation_file:
                    print citation_file.read()
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print dataset.description

            return
            
        elif args.command == 'gui':
            lists = get_lists()

            from retriever.app.main import launch_app
            launch_app(lists)
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()
            
            return
        
        if args.command == 'ls' or args.dataset is None:
            import lscolumns

            #If scripts have never been downloaded there is nothing to list
            if not script_list:
                print "No scripts are currently available. Updating scripts now..."
                check_for_updates(graphical=False)
                print "\n\nScripts downloaded.\n"
                script_list = SCRIPT_LIST()

            all_scripts = set([script.shortname for script in script_list])
            all_tags = set(["ALL"] + 
                            [tag.strip().upper() for script in script_list for tagset in script.tags for tag in tagset.split('>')])

            print "Available datasets (%s):" % len(all_scripts)
            lscolumns.printls(sorted(list(all_scripts), key=lambda s: s.lower()))
            print "Groups:"
            lscolumns.printls(sorted(list(all_tags)))
            return
        
        engine = choose_engine(args.__dict__)
        
        if hasattr(args, 'debug') and args.debug: debug = True
        else: debug = False
        
        scripts = name_matches(script_list, args.dataset)
        if scripts:
            for dataset in scripts:
                print "=> Installing", dataset.name
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print e
                    if debug: raise
            print "Done!"
        else:
            print "The dataset %s isn't currently available in the Retriever" % (args.dataset)
            print "Run 'retriever -ls to see a list of currently available datasets"

예제 #21

0

파일 보기

파일: conf.py 프로젝트: weecology/retriever

def to_str(object, object_encoding=encoding):
    if sys.version_info >= (3, 0, 0):
        return str(object).encode('UTF-8').decode(encoding)
    return object


# Create the .rst file for the available datasets
datasetfile = open_fw("datasets_list.rst")
datasetfile_title = """==================
Datasets Available
==================


"""
check_for_updates()
reload_scripts()
script_list = SCRIPT_LIST()

# write the title of dataset rst file
# ref:http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
datasetfile.write(datasetfile_title)

# get info from the scripts using specified encoding
for script_num, script in enumerate(script_list, start=1):
    reference_link = ''
    if script.ref.strip():
        reference_link = script.ref
    elif hasattr(script, 'homepage'):
        reference_link = script.homepage
    elif not reference_link.strip():

예제 #22

0

파일 보기

def main():
    """This function launches the Data Retriever."""
    sys.argv[1:] = [arg.lower() for arg in sys.argv[1:]]
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()

        args = parser.parse_args()

        if args.command == "install" and not args.engine:
            parser.parse_args(['install', '-h'])

        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'defaults':
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == 'update':
            check_for_updates(False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print("\nDataset:  {}".format(dataset.name))
                    print("Citation:   {}".format(dataset.citation))
                    print("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'license':
            dataset_license = license(args.dataset)
            if dataset_license:
                print(dataset_license)
            else:
                print("There is no license information for {}".format(args.dataset))
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        elif args.command == 'new_json':
            # create new JSON script
            create_json()
            return

        elif args.command == 'edit_json':
            # edit existing JSON script
            json_file = get_script_filename(args.dataset.lower())
            edit_json(json_file)
            return

        elif args.command == 'delete_json':
            # delete existing JSON script from home directory and or script directory if exists in current dir
            confirm = input("Really remove " + args.dataset.lower() +
                            " and all its contents? (y/N): ")
            if confirm.lower().strip() in ['y', 'yes']:
                json_file = get_script_filename(args.dataset.lower())
                delete_json(json_file)
            return

        if args.command == 'ls':
            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print("No scripts are currently available. Updating scripts now...")
                check_for_updates(False)
                print("\n\nScripts downloaded.\n")

            if args.l is None:
                all_scripts = datasets()
                print("Available datasets : {}\n".format(len(all_scripts)))
                from retriever import lscolumns
                lscolumns.printls(dataset_names())
            else:
                all_scripts = datasets(args.l[0])
                print("Available datasets : {}\n".format(len(all_scripts)))
                count = 1
                for script in all_scripts:
                    print("{}. {}".format(count, script.title))
                    print(script.name)
                    print(script.keywords)
                    print()
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            engine.use_cache = False
        else:
            engine.use_cache = True

        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print("The dataset {} isn't currently available in the Retriever".format(
                args.dataset))
            print("Run 'retriever ls to see a list of currently available datasets")

예제 #23

0

파일 보기

파일: setup.py 프로젝트: smallmonster23/retriever

    # if platform is OS X use "~/.bash_profile"
    if current_platform == "darwin":
        bash_file = "~/.bash_profile"
    # if platform is Linux use "~/.bashrc
    elif current_platform == "linux":
        bash_file = "~/.bashrc"
    # else write and discard
    else:
        bash_file = "/dev/null"

    argcomplete_command = 'eval "$(register-python-argcomplete retriever)"'
    with open(os.path.expanduser(bash_file), "a+") as bashrc:
        bashrc.seek(0)
        # register retriever for arg-completion if not already registered
        # whenever a new shell is spawned
        if argcomplete_command not in bashrc.read():
            bashrc.write(argcomplete_command + "\n")
            bashrc.close()
    os.system("activate-global-python-argcomplete")
    # register for the current shell
    os.system(argcomplete_command)

try:
    from retriever.compile import compile
    from retriever.lib.repository import check_for_updates

    check_for_updates(False)
    compile()
except:
    pass

예제 #24

0

파일 보기

파일: __main__.py 프로젝트: weecology/retriever

def main():
    """This function launches the Data Retriever."""
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them
        args = parser.parse_args()

        if args.command not in ['reset', 'update'] \
        and not os.path.isdir(SCRIPT_SEARCH_PATHS[1]) \
        and not [f for f in os.listdir(SCRIPT_SEARCH_PATHS[-1])
            if os.path.exists(SCRIPT_SEARCH_PATHS[-1])]:
                check_for_updates()
                reload_scripts()
        script_list = SCRIPT_LIST()

        if args.command == "install" and not args.engine:
            parser.parse_args(['install', '-h'])

        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = reload_scripts()

        if args.command == 'defaults':
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == 'update':
            check_for_updates()
            reload_scripts()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print("\nDataset:  {}".format(dataset.name))
                    print("Citation:   {}".format(dataset.citation))
                    print("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'license':
            if args.dataset is None:
                print(LICENSE)
            else:
                dataset_license = license(args.dataset)
                if dataset_license:
                    print(dataset_license)
                else:
                    print("There is no license information for {}".format(args.dataset))
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        elif args.command == 'new_json':
            # create new JSON script
            create_json()
            return

        elif args.command == 'edit_json':
            # edit existing JSON script
            json_file = get_script_filename(args.dataset.lower())
            edit_json(json_file)
            return

        elif args.command == 'autocreate':
            if sum([args.f, args.d]) == 1:
                file_flag = True if args.f else False
                create_package(args.path, args.dt, file_flag, args.o, args.skip_lines)
            else:
                print('Please use one and only one of the flags -f -d')
            return

        elif args.command == 'delete_json':
            # delete existing JSON script from home directory and or script directory if exists in current dir
            confirm = input("Really remove " + args.dataset.lower() +
                            " and all its contents? (y/N): ")
            if confirm.lower().strip() in ['y', 'yes']:
                json_file = get_script_filename(args.dataset.lower())
                delete_json(json_file)
            return

        if args.command == 'ls':
            # scripts should never be empty because check_for_updates is run on SCRIPT_LIST init
            if not (args.l or args.k or isinstance(args.v, list)):
                all_scripts = dataset_names()
                print("Available datasets : {}\n".format(len(all_scripts)))
                from retriever import lscolumns

                lscolumns.printls(all_scripts)

            elif isinstance(args.v, list):
                if args.v:
                    try:
                        all_scripts = [get_script(dataset) for dataset in args.v]
                    except KeyError:
                        all_scripts = []
                        print("Dataset(s) is not found.")
                else:
                    all_scripts = datasets()
                count = 1
                for script in all_scripts:
                    print(
                        "{count}. {title}\n {name}\n"
                        "{keywords}\n{description}\n"
                        "{licenses}\n{citation}\n"
                        "".format(
                            count=count,
                            title=script.title,
                            name=script.name,
                            keywords=script.keywords,
                            description=script.description,
                            licenses=str(script.licenses[0]['name']),
                            citation=script.citation,
                        )
                    )
                    count += 1

            else:
                param_licenses = args.l if args.l else None
                keywords = args.k if args.k else None

                # search
                searched_scripts = datasets(keywords, param_licenses)
                if not searched_scripts:
                    print("No available datasets found")
                else:
                    print("Available datasets : {}\n".format(len(searched_scripts)))
                    count = 1
                    for script in searched_scripts:
                        print(
                            "{count}. {title}\n{name}\n"
                            "{keywords}\n{licenses}\n".format(
                                count=count,
                                title=script.title,
                                name=script.name,
                                keywords=script.keywords,
                                licenses=str(script.licenses[0]['name']),
                            )
                        )
                        count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            engine.use_cache = False
        else:
            engine.use_cache = True

        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print("Run 'retriever ls' to see a list of currently available datasets.")

예제 #25

0

파일 보기

def _install(args, use_cache, debug):
    """Install datasets for retriever."""
    engine = choose_engine(args)
    engine.use_cache = use_cache

    if args['dataset'].endswith('.zip') or args.get('hash_value'):
        path_to_archive = args['dataset']
        if args.get('hash_value'):
            path_to_archive = os.path.join(
                PROVENANCE_DIR, args['dataset'],
                '{}-{}.zip'.format(args['dataset'], args['hash_value']))
        if not os.path.exists(path_to_archive):
            print('The committed file does not exist.')
        engine = install_committed(path_to_archive,
                                   engine,
                                   force=args.get('force', False))
        return engine
    script_list = SCRIPT_LIST()
    if not (script_list or os.listdir(SCRIPT_WRITE_PATH)):
        check_for_updates()
        script_list = SCRIPT_LIST()
    data_sets_scripts = name_matches(script_list, args['dataset'])
    if data_sets_scripts:
        for data_sets_script in data_sets_scripts:
            print("=> Installing", data_sets_script.name)
            try:
                if engine.name == "HDF5":
                    sqlite_opts = {
                        'command': 'install',
                        'dataset': data_sets_script,
                        'engine': 'sqlite',
                        'file': (args["file"].split("."))[0] + ".db",
                        'table_name': args["table_name"],
                        'data_dir': args["data_dir"]
                    }
                    sqlite_engine = choose_engine(sqlite_opts)
                    data_sets_script.download(sqlite_engine, debug=debug)
                    data_sets_script.engine.final_cleanup()
                engine.script_table_registry = OrderedDict()
                data_sets_script.download(engine, debug=debug)
                data_sets_script.engine.final_cleanup()
            except Exception as e:
                print(e)
                if debug:
                    raise
    elif args['dataset'].startswith('socrata') and not data_sets_scripts:
        socrata_id = args['dataset'].split('-', 1)[1]
        resource = find_socrata_dataset_by_id(socrata_id)

        if "error" in resource.keys():
            if resource["datatype"][0] == "map":
                print("{} because map type datasets are not supported".format(
                    resource["error"]))
            else:
                print("{} because it is of type {} and not tabular".format(
                    resource["error"], resource["datatype"][1]))
        elif len(resource.keys()) == 0:
            return
        else:
            print("=> Installing", args['dataset'])
            name = f"socrata-{socrata_id}"
            create_socrata_dataset(engine, name, resource)
            if args['command'] == 'download':
                return engine
            else:
                script_list = SCRIPT_LIST()
                script = get_script(args['dataset'])
                script.download(engine, debug=debug)
                script.engine.final_cleanup()
    elif args['dataset'].startswith('rdataset') and not data_sets_scripts:
        print("=> Installing", args['dataset'])
        rdataset = args['dataset'].split('-')
        update_rdataset_catalog()
        package, dataset_name = rdataset[1], rdataset[2]
        create_rdataset(engine, package, dataset_name)
        if args['command'] == 'download':
            return engine
        else:
            script_list = SCRIPT_LIST()
            script = get_script(args['dataset'])
            script.download(engine, debug=debug)
            script.engine.final_cleanup()
    else:
        message = "Run retriever.datasets() to list the currently available " \
                  "datasets."
        raise ValueError(message)
    return engine