def choose_engine(opts, choice=True): """Prompts the user to select a database engine""" if "engine" in list(opts.keys()): enginename = opts["engine"] elif opts["command"] == "download": enginename = "download" else: if not choice: return None print("Choose a database engine:") for engine in engine_list: if engine.abbreviation: abbreviation = "(" + engine.abbreviation + ") " else: abbreviation = "" print(" " + abbreviation + engine.name) enginename = input(": ") enginename = enginename.lower() engine = Engine() if not enginename: engine = engine_list[0] else: for thisengine in engine_list: if (enginename == thisengine.name.lower() or thisengine.abbreviation and enginename == thisengine.abbreviation): engine = thisengine engine.opts = opts return engine
def choose_engine(opts, choice=True): """Prompts the user to select a database engine""" if "engine" in list(opts.keys()): enginename = opts["engine"] elif opts["command"] == "download": enginename = "download" else: if not choice: return None print("Choose a database engine:") for engine in engine_list: if engine.abbreviation: abbreviation = "(" + engine.abbreviation + ") " else: abbreviation = "" print(" " + abbreviation + engine.name) enginename = input(": ") enginename = enginename.lower() engine = Engine() if not enginename: engine = engine_list[0] else: for thisengine in engine_list: if (enginename == thisengine.name.lower() or thisengine.abbreviation and enginename == thisengine.abbreviation): engine = thisengine if 'table_name' in opts: if opts['table_name'] and "{table}" not in opts['table_name'] or "{db}" not in opts['table_name']: for opt in engine.required_opts: if opt[0] == 'table_name': raise Exception('Accepted Table format {fom}'.format(fom=opt[2])) engine.opts = opts return engine
def create_resources(file, skip_lines): """Creates resources for the script or errors out if not possible""" engine = Engine() table = engine.auto_create_table(Table(str(file), header_rows=skip_lines), filename=file, make=False) clean_table = table.__dict__ resource_dict = {} path_to_table = os.path.basename(clean_table["name"]) resource_dict["name"] = os.path.splitext(path_to_table)[0] resource_dict["schema"] = {} resource_dict["dialect"] = {} resource_dict["schema"]["fields"] = [] for cname, ctuple in clean_table["columns"]: resource_dict["schema"]["fields"].append({ "name": cname, "type": ctuple[0] }) resource_dict["url"] = "FILL" return resource_dict
def create_tabular_resources(self, file, skip_lines, encoding): """Create resources for tabular scripts""" engine = Engine() self.encoding = encoding engine.encoding = encoding table_val = Table(str(file), header_rows=skip_lines) table = engine.auto_create_table(table_val, filename=file, make=False) clean_table = table.__dict__ resource_dict = {} path_to_table = os.path.basename(clean_table["name"]) print("Processing... {file_name}".format(file_name=path_to_table)) r_name = os.path.splitext(path_to_table)[0].lower() resource_dict["name"] = clean_table_name(r_name) resource_dict["path"] = path_to_table resource_dict["schema"] = {} resource_dict["dialect"] = {"delimiter": ","} resource_dict["schema"]["fields"] = [] for cname, ctuple in clean_table["columns"]: if len(ctuple) >= 2: if ctuple[0] == "char": # char sizes need quotes char_size = "{a}".format(a=ctuple[1]) resource_dict["schema"]["fields"].append({ "name": cname, "type": ctuple[0], "size": char_size }) else: resource_dict["schema"]["fields"].append({ "name": cname, "type": ctuple[0], "size": ctuple[1] }) else: resource_dict["schema"]["fields"].append({ "name": cname, "type": ctuple[0] }) resource_dict["url"] = "fill" return resource_dict
def choose_engine(opts, choice=True): """Prompts the user to select a database engine""" if "engine" in list(opts.keys()): enginename = opts["engine"] elif opts["command"] == "download": enginename = "download" else: if not choice: return None print("Choose a database engine:") for engine in engine_list: if engine.abbreviation: abbreviation = "(" + engine.abbreviation + ") " else: abbreviation = "" print(" " + abbreviation + engine.name) enginename = input(": ") enginename = enginename.lower() engine = Engine() if not enginename: engine = engine_list[0] else: for thisengine in engine_list: if (enginename == thisengine.name.lower() or thisengine.abbreviation and enginename == thisengine.abbreviation): engine = thisengine if 'table_name' in opts: if opts['table_name'] \ and "{table}" not in opts['table_name'] \ or "{db}" not in opts['table_name']: for opt in engine.required_opts: if opt[0] == 'table_name': raise Exception('Accepted Table format ' '{fom}'.format(fom=opt[2])) engine.opts = opts return engine
def create_resources(file, skip_lines): """Creates resources for the script or errors out if not possible""" engine = Engine() table = engine.auto_create_table(Table(str(file), header_rows=skip_lines), filename=file, make=False) clean_table = table.__dict__ resource_dict = {} path_to_table = os.path.basename(clean_table["name"]) print("Processing... {file_name}".format(file_name=path_to_table)) resource_dict["name"] = os.path.splitext(path_to_table)[0].lower() resource_dict["path"] = path_to_table resource_dict["schema"] = {} resource_dict["dialect"] = {"delimiter": ","} resource_dict["schema"]["fields"] = [] for cname, ctuple in clean_table["columns"]: if len(ctuple) >= 2: if ctuple[0] == 'char': # char sizes need quotes char_size = "{a}".format(a=ctuple[1]) resource_dict["schema"]["fields"].append({ "name": cname, "type": ctuple[0], "size": char_size }) else: resource_dict["schema"]["fields"].append({ "name": cname, "type": ctuple[0], "size": ctuple[1] }) else: resource_dict["schema"]["fields"].append({ "name": cname, "type": ctuple[0] }) resource_dict["url"] = "FILL" return resource_dict
"""Tests for the EcoData Retriever""" import os from StringIO import StringIO from retriever.lib.engine import Engine from retriever.lib.table import Table from retriever.lib.templates import BasicTextTemplate from retriever.lib.tools import getmd5 from retriever import DATA_WRITE_PATH from nose.tools import with_setup # Create simple engine fixture test_engine = Engine() test_engine.table = Table("test") test_engine.script = BasicTextTemplate(tables={'test':test_engine.table}, shortname='test') test_engine.opts = {'database_name': '{db}_abc'} HOMEDIR = os.path.expanduser('~') def test_auto_get_columns(): """Basic test of getting column labels from header""" test_engine.table.delimiter = "," columns, column_values = test_engine.table.auto_get_columns("a,b,c,d") assert columns == [['a', None], ['b', None], ['c', None], ['d', None]] def test_auto_get_columns_cleanup(): """Test of automatically cleaning up column labels from header""" test_engine.table.delimiter = "," columns, column_values = test_engine.table.auto_get_columns("a),b.b,c/c,d___d,group") assert columns == [['a', None], ['b_b', None], ['c_c', None], ['d_d', None],
import retriever as rt from retriever.lib.engine import Engine from retriever.lib.table import TabularDataset from retriever.lib.templates import BasicTextTemplate from retriever.lib.cleanup import correct_invalid_value from retriever.lib.engine_tools import getmd5 from retriever.lib.engine_tools import xml2csv from retriever.lib.engine_tools import json2csv from retriever.lib.engine_tools import sort_file from retriever.lib.engine_tools import sort_csv from retriever.lib.engine_tools import create_file from retriever.lib.engine_tools import file_2list from retriever.lib.datapackage import clean_input, is_empty # Create simple engine fixture test_engine = Engine() test_engine.table = TabularDataset(**{"name": "test"}) test_engine.script = BasicTextTemplate( **{"tables": test_engine.table, "name": "test"}) test_engine.opts = {'database_name': '{db}_abc'} # Main paths HOMEDIR = os.path.expanduser('~') file_location = os.path.dirname(os.path.realpath(__file__)) retriever_root_dir = os.path.abspath(os.path.join(file_location, os.pardir)) # Setup paths for the raw data files used raw_dir_files = os.path.normpath(os.path.join(retriever_root_dir, 'raw_data/{file_name}')) # file: sample_zip.csv achive_zip = raw_dir_files.format(file_name='sample_zip.zip')
import retriever as rt from retriever.lib.engine import Engine from retriever.lib.table import TabularDataset from retriever.lib.templates import BasicTextTemplate from retriever.lib.cleanup import correct_invalid_value from retriever.lib.engine_tools import getmd5 from retriever.lib.engine_tools import xml2csv from retriever.lib.engine_tools import json2csv from retriever.lib.engine_tools import sort_file from retriever.lib.engine_tools import sort_csv from retriever.lib.engine_tools import create_file from retriever.lib.engine_tools import file_2list from retriever.lib.datapackage import clean_input, is_empty # Create simple engine fixture test_engine = Engine() test_engine.table = TabularDataset(**{"name": "test"}) test_engine.script = BasicTextTemplate(**{"tables": test_engine.table, "name": "test"}) test_engine.opts = {'database_name': '{db}_abc'} # Main paths HOMEDIR = os.path.expanduser('~') file_location = os.path.dirname(os.path.realpath(__file__)) retriever_root_dir = os.path.abspath(os.path.join(file_location, os.pardir)) # Setup paths for the raw data files used raw_dir_files = os.path.normpath(os.path.join(retriever_root_dir, 'raw_data/{file_name}')) # file: sample_zip.csv achive_zip = raw_dir_files.format(file_name='sample_zip.zip')