Beispiel #1
0
def get_lists():
    # get a list of category tags from all scripts
    from retriever import SCRIPT_LIST
    SCRIPT_LIST = SCRIPT_LIST()
    SCRIPT_LIST.sort(key=attrgetter('name'))

    full_tags = set()
    tag_heads = set()
    for script in SCRIPT_LIST:
        for tag in script.tags:
            full_tags.update([tag])
            if len(tag_tree(tag)) > 0:
                tag_heads.update([tag_tree(tag)[0]])
    full_tags = sorted(list(full_tags))
    tag_heads = sorted(list(tag_heads))

    lists = []
    for head in tag_heads:
        valid_scripts = [
            script for script in SCRIPT_LIST if len([
                tag for tag in script.tags
                if len(tag_tree(tag)) > 0 and tag_tree(tag)[0] == head
            ]) > 0
        ]
        lists.append(
            Category(head,
                     valid_scripts,
                     children=children(head, valid_scripts, [
                         tag_tree(tag)[1:]
                         for tag in full_tags if tag_tree(tag)[0] == head
                     ], 1)))

    # Get list of additional datasets from scripts.config file
    if os.path.isfile("scripts.config"):
        other_scripts = []
        config = open("scripts.config", 'rb')
        for line in config:
            if line:
                try:
                    new_dataset = eval(line)
                    other_scripts.append(new_dataset)
                except:
                    pass

        other_scripts.sort(key=attrgetter('name'))
        if len(other_scripts) > 0:
            lists.append(Category("Custom", other_scripts))
            for script in other_scripts:
                lists[0].scripts.append(script)

    choice_tree = Category("All Datasets", SCRIPT_LIST, children=lists)

    return choice_tree
Beispiel #2
0
def get_lists():
    # get a list of category tags from all scripts
    from retriever import SCRIPT_LIST
    SCRIPT_LIST = SCRIPT_LIST()
    SCRIPT_LIST.sort(key=attrgetter('name'))

    full_tags = set()
    tag_heads = set()
    for script in SCRIPT_LIST:
        for tag in script.tags:
            full_tags.update([tag])
            if len(tag_tree(tag)) > 0:
                tag_heads.update([tag_tree(tag)[0]])
    full_tags = sorted(list(full_tags))
    tag_heads = sorted(list(tag_heads))

    lists = []
    for head in tag_heads:
        valid_scripts = [script for script in SCRIPT_LIST
                         if len([tag for tag in script.tags
                                 if len(tag_tree(tag)) > 0 and
                                 tag_tree(tag)[0] == head]) > 0]
        lists.append(Category(head, valid_scripts,
                              children=children(head, valid_scripts,
                                                [tag_tree(tag)[1:]
                                                 for tag in full_tags
                                                 if tag_tree(tag)[0] == head],
                                                1))
                     )

    # Get list of additional datasets from scripts.config file
    if os.path.isfile("scripts.config"):
        other_scripts = []
        config = open("scripts.config", 'rb')
        for line in config:
            if line:
                try:
                    new_dataset = eval(line)
                    other_scripts.append(new_dataset)
                except:
                    pass

        other_scripts.sort(key=attrgetter('name'))
        if len(other_scripts) > 0:
            lists.append(Category("Custom", other_scripts))
            for script in other_scripts:
                lists[0].scripts.append(script)

    choice_tree = Category("All Datasets", SCRIPT_LIST,
                           children=lists)

    return choice_tree
Beispiel #3
0
def download_public_data(datasets, data_dir='./data/'):
    """Download public datasets using the EcoData Retriever"""

    from retriever import VERSION, SCRIPT_LIST, ENGINE_LIST
    from retriever.lib.tools import choose_engine, get_opts

    for dataset in datasets:
        script_list = SCRIPT_LIST()
        opts = get_opts(script_list,
                        args=[
                            'install', dataset, '-e', 's', '-f',
                            'downloaded_data.sqlite'
                        ])
        script = opts["script"]
        engine = choose_engine(opts)
        if isinstance(script, list):
            for dataset in script:
                print "=> Installing", dataset.name
                dataset.download(engine, debug=debug)
        else:
            script.download(engine)
    print "Datasets successfully downloaded."
Beispiel #4
0
    def Find(self, evt):
        dlg = wx.TextEntryDialog(self, 'Enter the keyword(s) to search for',
                                 'Find', '')
        dlg.ShowModal()
        result = dlg.GetValue().strip()

        if result:
            search_terms = [
                term.strip() for term in result.split(' ') if term.strip()
            ]
            scripts = []
            for script in SCRIPT_LIST():
                if script.matches_terms(search_terms):
                    scripts.append(script)

            if len(scripts) > 0:
                results = Category(
                    "Search results: " + ', '.join(search_terms), scripts)
                self.cat_list.AddChild(results, select=True)
            else:
                wx.MessageBox("Your search returned no results.", "No results")

        dlg.Destroy()
Beispiel #5
0
    "json": {
        'engine': 'json',
        'table_name': 'output_file_{table}.json'
    },
    "csv": {
        'engine': 'csv',
        'table_name': 'output_file_{table}.csv'
    },
    "sqlite": {
        'engine': 'sqlite',
        'file': dbfile,
        'table_name': '{db}_{table}'
    }
}

SCRIPT_LIST = SCRIPT_LIST()
TEST_ENGINES = {}
IGNORE = [
    "forest-inventory-analysis", "bioclim", "prism-climate", "vertnet", "NPN",
    "mammal-super-tree"
]
IGNORE = [dataset.lower() for dataset in IGNORE]

for engine in ENGINE_LIST:
    if engine.abbreviation in engine_test:
        try:
            opts = engine_test[engine.abbreviation]
            TEST_ENGINES[engine.abbreviation] = choose_engine(opts)
        except:
            TEST_ENGINES[engine.abbreviation] = None
            pass
Beispiel #6
0
def main():
    """This function launches the EcoData Retriever."""
    if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'):
        # if no command line args are passed, launch GUI

        check_for_updates(graphical=False if 'darwin' in platform.platform().lower() else True)
        lists = get_lists()
        
        from retriever.app.main import launch_app
        launch_app(lists)

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()
        
        args = parser.parse_args()
        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])
        
        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)
        
        if args.command == 'update':
            check_for_updates(graphical=False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION')
                print citation_path
                with open(citation_path) as citation_file:
                    print citation_file.read()
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print dataset.description

            return
            
        elif args.command == 'gui':
            lists = get_lists()

            from retriever.app.main import launch_app
            launch_app(lists)
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()
            
            return
        
        if args.command == 'ls' or args.dataset is None:
            import lscolumns

            #If scripts have never been downloaded there is nothing to list
            if not script_list:
                print "No scripts are currently available. Updating scripts now..."
                check_for_updates(graphical=False)
                print "\n\nScripts downloaded.\n"
                script_list = SCRIPT_LIST()

            all_scripts = set([script.shortname for script in script_list])
            all_tags = set(["ALL"] + 
                            [tag.strip().upper() for script in script_list for tagset in script.tags for tag in tagset.split('>')])

            print "Available datasets (%s):" % len(all_scripts)
            lscolumns.printls(sorted(list(all_scripts), key=lambda s: s.lower()))
            print "Groups:"
            lscolumns.printls(sorted(list(all_tags)))
            return
        
        engine = choose_engine(args.__dict__)
        
        if hasattr(args, 'debug') and args.debug: debug = True
        else: debug = False
        
        scripts = name_matches(script_list, args.dataset)
        if scripts:
            for dataset in scripts:
                print "=> Installing", dataset.name
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print e
                    if debug: raise
            print "Done!"
        else:
            print "The dataset %s isn't currently available in the Retriever" % (args.dataset)
            print "Run 'retriever -ls to see a list of currently available datasets"
Beispiel #7
0
from builtins import str
from retriever import VERSION, COPYRIGHT
from retriever.lib.repository import check_for_updates
from retriever import SCRIPT_LIST

# Create the .rst file for the available datasets
datasetfile = open("datasets.rst", "w")
datasetfile_title = """
==================
Datasets Available
==================


"""
check_for_updates()
script_list = SCRIPT_LIST()

# write the title of dataset rst file
datasetfile.write(datasetfile_title)

# get info from the scripts
for script_num, script in enumerate(script_list, start=1):
    if script.ref.strip():
        reference_link = script.ref
    elif bool(script.urls.values()):
        reference_link = script.urls.values()[0].rpartition('/')[0]
    else:
        reference_link = ""
    datasetfile.write("| " + str(script_num) +
                      ". **{}** \n| shortname: {}\n| reference: {}\n\n".format(
                          script.name, script.shortname, reference_link))
Beispiel #8
0
from __future__ import print_function
from builtins import input
import os
import json
from time import sleep
from retriever import SCRIPT_LIST, HOME_DIR

short_names = [script.shortname.lower() for script in SCRIPT_LIST()]


def is_empty(val):
    """Check if a variable is an empty string or an empty list"""
    return val == "" or val == []


def clean_input(prompt="", split_char='', ignore_empty=False, dtype=None):
    """Clean the user-input from the CLI before adding it"""
    while True:
        val = input(prompt).strip()
        # split to list type if split_char specified
        if split_char != "":
            val = [v.strip() for v in val.split(split_char) if v.strip() != ""]
        # do not ignore empty input if not allowed
        if not ignore_empty and is_empty(val):
            print("\tError: empty input. Need one or more values.\n")
            continue
        # ensure correct input datatype if specified
        if not is_empty(val) and dtype is not None:
            try:
                if not type(eval(val)) == dtype:
                    print("\tError: input doesn't match required type ", dtype,
Beispiel #9
0
def main():
    """This function launches the Data Retriever."""
    if len(sys.argv) == 1:
        # if no command line args are passed, show the help options
        parser.parse_args(['-h'])

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()

        args = parser.parse_args()

        if args.command == "install" and not args.engine:
            parser.parse_args(['install','-h'])

        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'defaults':
            for engine_item in engine_list:
                print("Default options for engine ", engine_item.name)
                for default_opts in engine_item.required_opts:
                    print(default_opts[0], " ", default_opts[2])
                print()
            return

        if args.command == 'update':
            check_for_updates()
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                print("\nCitation for retriever:\n")
                print(CITATION)
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:
                    print("\nDataset:  {}".format(dataset.name))
                    print("Citation:   {}".format(dataset.citation))
                    print("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        elif args.command == 'new_json':
            # create new JSON script
            create_json()
            return

        elif args.command == 'edit_json':
            # edit existing JSON script
            for json_file in [filename for filename in
                              os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']:
                if json_file.lower().find(args.filename.lower()) != -1:
                    edit_json(json_file)
                    return
            raise Exception("File not found")

        elif args.command == 'delete_json':
            # delete existing JSON script
            for json_file in [filename for filename in
                              os.listdir(os.path.join(HOME_DIR, 'scripts')) if filename[-5:] == '.json']:
                if json_file.lower().find(args.dataset.lower()) != -1:
                    confirm = input("Really remove " + json_file +
                                    " and all its contents? (y/N): ")
                    if confirm.lower().strip() in ['y', 'yes']:
                        # raise Exception(json_file)
                        os.remove(os.path.join(HOME_DIR, 'scripts', json_file))
                        try:
                            os.remove(os.path.join(
                                HOME_DIR, 'scripts', json_file[:-4] + 'py'))
                        except:
                            # Not compiled yet
                            pass
                    return
            raise Exception("File not found")

        if args.command == 'ls':
            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print("No scripts are currently available. Updating scripts now...")
                check_for_updates()
                print("\n\nScripts downloaded.\n")
                script_list = SCRIPT_LIST()

            all_scripts = []

            for script in script_list:
                if script.shortname:
                    if args.l is not None:
                        script_name = script.name + "\nShortname: " + script.shortname + "\n"
                        if script.tags:
                            script_name += "Tags: " + \
                                str([tag for tag in script.tags]) + "\n"
                        not_found = 0
                        for term in args.l:
                            if script_name.lower().find(term.lower()) == -1:
                                not_found = 1
                                break
                        if not_found == 0:
                            all_scripts.append(script_name)
                    else:
                        script_name = script.shortname
                        all_scripts.append(script_name)

            all_scripts = sorted(all_scripts, key=lambda s: s.lower())

            print("Available datasets : {}\n".format(len(all_scripts)))

            if args.l is None:
                from retriever import lscolumns
                lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower()))
            else:
                count = 1
                for script in all_scripts:
                    print("%d. %s" % (count, script))
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False
            sys.tracebacklimit = 0

        if hasattr(args, 'debug') and args.not_cached:
            use_cache = False
        else:
            use_cache = True

        if args.dataset is not None:
            scripts = name_matches(script_list, args.dataset)
        else:
            raise Exception("no dataset specified.")
        if scripts:
            for dataset in scripts:
                print("=> Installing", dataset.name)
                try:
                    dataset.download(engine, debug=debug, use_cache=use_cache)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print(e)
                    if debug:
                        raise
            print("Done!")
        else:
            print("The dataset {} isn't currently available in the Retriever".format(
                args.dataset))
            print("Run 'retriever ls to see a list of currently available datasets")
Beispiel #10
0
def main():
    """This function launches the EcoData Retriever."""
    if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'):
        # if no command line args are passed, launch GUI

        check_for_updates(graphical=False if current_platform == 'darwin' else True)
        lists = get_lists()

        from retriever.app.main import launch_app
        launch_app(lists)

    else:
        # otherwise, parse them

        script_list = SCRIPT_LIST()

        args = parser.parse_args()
        if args.quiet:
            sys.stdout = open(os.devnull, 'w')

        if args.command == 'help':
            parser.parse_args(['-h'])

        if hasattr(args, 'compile') and args.compile:
            script_list = SCRIPT_LIST(force_compile=True)

        if args.command == 'update':
            check_for_updates(graphical=False)
            script_list = SCRIPT_LIST()
            return

        elif args.command == 'citation':
            if args.dataset is None:
                citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION')
                print "\nCitation for retriever:\n"
                with open(citation_path) as citation_file:
                    print citation_file.read()
            else:
                scripts = name_matches(script_list, args.dataset)
                for dataset in scripts:

                    print ("\nCitation:   {}".format(dataset.citation))
                    print ("Description:   {}\n".format(dataset.description))

            return

        elif args.command == 'gui':
            lists = get_lists()

            from retriever.app.main import launch_app
            launch_app(lists)
            return

        elif args.command == 'new':
            f = open(args.filename, 'w')
            f.write(sample_script)
            f.close()

            return

        elif args.command == 'reset':
            reset_retriever(args.scope)
            return

        if args.command == 'ls' or args.dataset is None:

            # If scripts have never been downloaded there is nothing to list
            if not script_list:
                print "No scripts are currently available. Updating scripts now..."
                check_for_updates(graphical=False)
                print "\n\nScripts downloaded.\n"
                script_list = SCRIPT_LIST()

            all_scripts = []

            for script in script_list:
                if script.name:
                    if args.l!=None:
                        script_name = script.name + "\nShortname: " + script.shortname+"\n"
                        if script.tags:
                            script_name += "Tags: "+str([tag for tag in script.tags])+"\n"
                        not_found = 0
                        for term in args.l:
                            if script_name.lower().find(term.lower()) == -1:
                                not_found = 1
                                break
                        if not_found == 0:
                            all_scripts.append(script_name)
                    else:
                        script_name = script.shortname
                        all_scripts.append(script_name)

            all_scripts = sorted(all_scripts, key=lambda s: s.lower())

            print "Available datasets : {}\n".format(len(all_scripts))

            if args.l==None:
                import lscolumns
                lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower()))
            else:
                count = 1
                for script in all_scripts:
                    print ("%d. %s"%(count, script))
                    count += 1
            return

        engine = choose_engine(args.__dict__)

        if hasattr(args, 'debug') and args.debug:
            debug = True
        else:
            debug = False

        scripts = name_matches(script_list, args.dataset)
        if scripts:
            for dataset in scripts:
                print "=> Installing", dataset.name
                try:
                    dataset.download(engine, debug=debug)
                    dataset.engine.final_cleanup()
                except KeyboardInterrupt:
                    pass
                except Exception as e:
                    print e
                    if debug: raise
            print "Done!"
        else:
            print "The dataset {} isn't currently available in the Retriever".format(args.dataset)
            print "Run 'retriever ls to see a list of currently available datasets"