from retriever.lib.table import TabularDataset
from retriever.lib.templates import BasicTextTemplate
from retriever.lib.cleanup import correct_invalid_value
from retriever.lib.engine_tools import getmd5
from retriever.lib.engine_tools import xml2csv
from retriever.lib.engine_tools import json2csv
from retriever.lib.engine_tools import sort_file
from retriever.lib.engine_tools import sort_csv
from retriever.lib.engine_tools import create_file
from retriever.lib.engine_tools import file_2list
from retriever.lib.datapackage import clean_input, is_empty

# Create simple engine fixture
test_engine = Engine()
test_engine.table = TabularDataset(**{"name": "test"})
test_engine.script = BasicTextTemplate(
    **{"tables": test_engine.table, "name": "test"})
test_engine.opts = {'database_name': '{db}_abc'}

# Main paths
HOMEDIR = os.path.expanduser('~')
file_location = os.path.dirname(os.path.realpath(__file__))
retriever_root_dir = os.path.abspath(os.path.join(file_location, os.pardir))

# Setup paths for the raw data files used
raw_dir_files = os.path.normpath(os.path.join(retriever_root_dir,
                                              'raw_data/{file_name}'))
# file: sample_zip.csv
achive_zip = raw_dir_files.format(file_name='sample_zip.zip')

# file: test/sample_tar.csv
achive_tar = raw_dir_files.format(file_name='sample_tar.tar')
Exemple #2
0
"""Tests for the EcoData Retriever"""

import os
from StringIO import StringIO
from retriever.lib.engine import Engine
from retriever.lib.table import Table
from retriever.lib.templates import BasicTextTemplate
from retriever.lib.tools import getmd5
from retriever import DATA_WRITE_PATH
from nose.tools import with_setup

# Create simple engine fixture
test_engine = Engine()
test_engine.table = Table("test")
test_engine.script = BasicTextTemplate(tables={'test':test_engine.table},
                                       shortname='test')
test_engine.opts = {'database_name': '{db}_abc'}
HOMEDIR = os.path.expanduser('~')

def test_auto_get_columns():
    """Basic test of getting column labels from header"""
    test_engine.table.delimiter = ","
    columns, column_values = test_engine.table.auto_get_columns("a,b,c,d")
    assert columns == [['a', None], ['b', None], ['c', None], ['d', None]]


def test_auto_get_columns_cleanup():
    """Test of automatically cleaning up column labels from header"""
    test_engine.table.delimiter = ","
    columns, column_values = test_engine.table.auto_get_columns("a),b.b,c/c,d___d,group")
    assert columns == [['a', None], ['b_b', None], ['c_c', None], ['d_d', None],
Exemple #3
0
from retriever.lib.table import TabularDataset
from retriever.lib.templates import BasicTextTemplate
from retriever.lib.cleanup import correct_invalid_value
from retriever.lib.engine_tools import getmd5
from retriever.lib.engine_tools import xml2csv
from retriever.lib.engine_tools import json2csv
from retriever.lib.engine_tools import sort_file
from retriever.lib.engine_tools import sort_csv
from retriever.lib.engine_tools import create_file
from retriever.lib.engine_tools import file_2list
from retriever.lib.datapackage import clean_input, is_empty

# Create simple engine fixture
test_engine = Engine()
test_engine.table = TabularDataset(**{"name": "test"})
test_engine.script = BasicTextTemplate(**{"tables": test_engine.table, "name": "test"})
test_engine.opts = {'database_name': '{db}_abc'}

# Main paths
HOMEDIR = os.path.expanduser('~')
file_location = os.path.dirname(os.path.realpath(__file__))
retriever_root_dir = os.path.abspath(os.path.join(file_location, os.pardir))

# Setup paths for the raw data files used
raw_dir_files = os.path.normpath(os.path.join(retriever_root_dir,
                                              'raw_data/{file_name}'))
# file: sample_zip.csv
achive_zip = raw_dir_files.format(file_name='sample_zip.zip')

# file: test/sample_tar.csv
achive_tar = raw_dir_files.format(file_name='sample_tar.tar')