Example #1
0
def json_of_url(url):
    # Check if it's a local file
    if _is_local_file(url):
        # If it is we open it as a normal file
        return json.load(open(url, 'r'))
    else:
        # If it isn't we open the url as a file
        return json.load(urllib2.urlopen(url))
Example #2
0
def json_of_url(url):
    # Check if it's a local file
    if _is_local_file(url):
        # If it is we open it as a normal file
        return json.load(open(url, 'r'))
    else:
        # If it isn't we open the url as a file
        return json.load(urllib2.urlopen(url))
Example #3
0
def csvimport_fixture(name):
    data_fp = csvimport_fixture_file(name, 'data.csv')
    model_fp = csvimport_fixture_file(name, 'model.json')
    mapping_fp = csvimport_fixture_file(name, 'mapping.json')

    model = json.load(model_fp)

    if mapping_fp:
        model['mapping'] = json.load(mapping_fp)

    return data_fp, model
Example #4
0
def csvimport_fixture(name):
    model_fp = csvimport_fixture_file(name, 'model.json')
    mapping_fp = csvimport_fixture_file(name, 'mapping.json')
    model = json.load(model_fp)
    if mapping_fp:
        model['mapping'] = json.load(mapping_fp)
    dataset = Dataset(model)
    dataset.generate()
    db.session.add(dataset)
    data_path = csvimport_fixture_path(name, 'data.csv')
    user = make_account()
    source = Source(dataset, user, data_path)
    db.session.add(source)
    db.session.commit()
    return source
Example #5
0
def csvimport_fixture(name):
    model_fp = csvimport_fixture_file(name, 'model.json')
    mapping_fp = csvimport_fixture_file(name, 'mapping.json')
    model = json.load(model_fp)
    if mapping_fp:
        model['mapping'] = json.load(mapping_fp)
    dataset = Dataset(model)
    dataset.generate()
    db.session.add(dataset)
    data_path = csvimport_fixture_path(name, 'data.csv')
    user = h.make_account()
    source = Source(dataset, user, data_path)
    db.session.add(source)
    db.session.commit()
    return source
Example #6
0
    def __init__(self, package,
                 model_url=None, resource_uuid=None):

        if not isinstance(package, Package):
            package = Package(package)

        if resource_uuid:
            data = package.get_resource(resource_uuid)
        else:
            data = package.openspending_resource('data')

        if not model_url:
            # Use magic CKAN tags
            model = package.openspending_resource('model')
            model_url = model['url']


        model_fp = util.urlopen(model_url)
        try:
            model = json.load(model_fp)
        except Exception as e:
            raise ImporterError("Error encountered while parsing JSON model. "
                                "http://jsonlint.com might help! Error was: %s"
                                % e)


        csv = util.urlopen_lines(data["url"])
        super(CKANImporter, self).__init__(csv, model, data["url"])
Example #7
0
def csv_import(resource_url, model_url, **kwargs):
    import urllib
    from openspending.lib import json
    from openspending.etl import util
    from openspending.etl.importer import CSVImporter

    model = json.load(urllib.urlopen(model_url))
    csv = util.urlopen_lines(resource_url)
    importer = CSVImporter(csv, model, resource_url)

    importer.run(**kwargs)
Example #8
0
    def _test_mapping(self, dir):
        mapping_csv = h.fixture_file("csv_import/%s/mapping.csv" % dir)
        mapping_json = h.fixture_file("csv_import/%s/mapping.json" % dir)

        csv = mapping_csv.read()
        expected_mapping_data = json.load(mapping_json)

        importer = MappingImporter()
        observed_mapping_data = importer.import_from_string(csv)

        assert observed_mapping_data == expected_mapping_data
Example #9
0
    def input_json(**args):
        """ inputjson JSON data  """
        inputfile = args.get('inputfile', None)

        if len(inputfile) != 1:
            print "You need to specific one and only one output file"
            return

        inputfile = inputfile[0]

        try:
            f = open(inputfile, 'rb')
        except:
            print "file not found"

        inputobj = json.load(f)

        f.close()

        modeldict = {'MetadataOrg' : [],'DataOrg' : [],'Source' : [],'SourceFile' : [],'Dataset' : []}

        for theobj in inputobj:
            if theobj['model'] not in modeldict.keys():
                modeldict[theobj['model']] = [theobj]
            else:
                modeldict[theobj['model']].append(theobj)

        theorder = ['MetadataOrg','DataOrg','Source','SourceFile','Dataset']
        ordermapping = {'metadataorg': 'MetadataOrg','dataorg':'DataOrg','source':'Source','sourcefile':'SourceFile','dataset':'Dataset'}

        for orderitem in theorder:
            modelclass = getattr(openspending.model, orderitem)
            print modelclass
            for theobj in modeldict[orderitem]:
                for objkey in theobj['fields'].keys():
                    try:
                        if objkey.find("_id") != -1:
                            #need to find the object assoicated ot this and repopulate
                            modelobjstr = objkey.split('_')[0]
                            searchpk = theobj['fields'][objkey]
                            for foreignkeymod in modeldict[modelobjstr]:
                                if foreignkeymod['pk'] == searchpk:
                                    foreignmodelclass = getattr(openspending.model, ordermapping[modelobjstr])
                                    theobj[objkey] = foreignmodelclass.by_id(foreignkeymod['theid'])

                            
                    except:
                        pass


                theid = modelclass.import_json_dump(theobj)
                theobj['theid'] = theid
Example #10
0
    def _test_dataset_dir(self, dir):
        data_csv = h.fixture_file("csv_import/%s/data.csv" % dir)
        mapping_json = h.fixture_file("csv_import/%s/mapping.json" % dir)

        dataset_name = unicode(dir)

        model = csv_fixture_model()
        model["mapping"] = json.load(mapping_json)
        model["dataset"]["name"] = dataset_name

        lines = self.count_lines_in_stream(data_csv) - 1

        importer = CSVImporter(data_csv, model)
        importer.run()

        assert len(importer.errors) == 0, "Import should not throw errors"

        # check correct number of entries
        entries = Entry.find({"dataset.name": dataset_name})
        assert entries.count() == lines
Example #11
0
    def __init__(self, package,
                 model_url=None, mapping_url=None, resource_uuid=None):

        if not isinstance(package, ckan.Package):
            package = ckan.Package(package)

        if resource_uuid:
            data = package.get_resource(resource_uuid)
        else:
            data = package.openspending_resource('data')

        explicit = (model_url and not mapping_url) or (mapping_url and not model_url)

        if not explicit:
            # Use magic CKAN tags
            model = package.openspending_resource('model')
            mapping = package.openspending_resource('model:mapping')

            if model:
                model_url = model['url']
            elif mapping:
                mapping_url = mapping['url']


        # Model given
        if model_url and data:
            model = json.load(util.urlopen(model_url))

        # Mapping given, need to extract metadata from CKAN
        elif mapping_url and data:
            model = {}
            model['mapping'] = MappingImporter().import_from_url(mapping_url)
            model['dataset'] = package.metadata_for_resource(data)

        csv = util.urlopen_lines(data["url"])
        super(CKANImporter, self).__init__(csv, model, data["url"])
Example #12
0
from openspending.lib import ckan
from openspending.lib import json
from openspending.etl.command import daemon
from openspending.etl.ui.test import ControllerTestCase, url, helpers as h

MOCK_REGISTRY = json.load(h.fixture_file('mock_ckan.json'))

class TestLoadController(ControllerTestCase):
    def setup(self):
        super(TestLoadController, self).setup()
        self.patcher = h.patch('openspending.etl.ui.controllers.load.ckan.CkanClient',
                               spec=ckan.CkanClient)
        self.MockCkanClient = self.patcher.start()
        self.MockCkanClient.return_value = self.c = h.mock_ckan(MOCK_REGISTRY)

    def teardown(self):
        self.patcher.stop()
        super(TestLoadController, self).teardown()

    def test_packages(self):
        response = self.app.get(url(controller='load', action='packages'))

        # Show title for packages
        assert '<a href="http://ckan.net/package/baz">The Baz dataset</a>' in response

        # Show 'import' link for importable packages
        import_url = url(controller='load', action='start', package='bar')
        assert '<a href="%s">' % import_url in response

        # Show 'diagnose' link for non-importable packages
        diagnose_url = url(controller='load', action='diagnose', package='baz')
Example #13
0
def make_mock_ckan(*args, **kwargs):
    global current_mock_ckan
    current_mock_ckan = h.mock_ckan(json.load(h.fixture_file('mock_ckan.json')))
    return current_mock_ckan
Example #14
0
def _load_json(fp_or_str):
    try:
        return json.load(fp_or_str)
    except AttributeError:
        return json.loads(fp_or_str)
Example #15
0
def csv_fixture_mapping(name=None):
    if name is None:
        return default_mapping.copy()

    f = h.fixture_file("csv_import/%s-model.json" % name)
    return json.load(f)
Example #16
0

def parseDBJSON(args):
    if len(args['jsondata']) != 1:
        print "\n\nPlease specify one and only one json dump from python manage.py etldata dumpdata"
        sys.exit(1)

    try:
        jsonfile = open(args['jsondata'][0], 'rb')
    except Exception, e:
        print "failed to open file"
        print e
        sys.exit(1)

    try:
        databankjson = json.load(jsonfile)
    except Exception, e:
        print "\n\nYou hit an error on json loading"
        print e
        sys.exit(1)

    #split the objects to their approrpirate spot
    modelobjs = {}

    for jsonobj in databankjson:
        modelname = jsonobj['model'].split(".")[1]
        if modelname in modelobjs.keys():
            modelobjs[modelname].append(jsonobj)
        else:
            modelobjs[modelname] = [jsonobj]
Example #17
0

def parseDBJSON(args):
    if len(args['jsondata']) != 1:
        print "\n\nPlease specify one and only one json dump from python manage.py etldata dumpdata"
        sys.exit(1)

    try:
        jsonfile = open(args['jsondata'][0], 'rb')
    except Exception, e:
        print "failed to open file"
        print e
        sys.exit(1)

    try:
        databankjson = json.load(jsonfile)
    except Exception, e:
        print "\n\nYou hit an error on json loading"
        print e
        sys.exit(1)

    #split the objects to their approrpirate spot
    modelobjs = {}

    for jsonobj in databankjson:
        modelname = jsonobj['model'].split(".")[1]
        if modelname in modelobjs.keys():
            modelobjs[modelname].append(jsonobj)
        else:
            modelobjs[modelname] = [jsonobj]
Example #18
0
    def input_json(**args):
        """ inputjson JSON data  """
        inputfile = args.get('inputfile', None)

        if len(inputfile) != 1:
            print "You need to specific one and only one output file"
            return

        inputfile = inputfile[0]

        try:
            f = open(inputfile, 'rb')
        except:
            print "file not found"

        inputobj = json.load(f)

        f.close()

        modeldict = {
            'MetadataOrg': [],
            'DataOrg': [],
            'Source': [],
            'SourceFile': [],
            'Dataset': []
        }

        for theobj in inputobj:
            if theobj['model'] not in modeldict.keys():
                modeldict[theobj['model']] = [theobj]
            else:
                modeldict[theobj['model']].append(theobj)

        theorder = [
            'MetadataOrg', 'DataOrg', 'Source', 'SourceFile', 'Dataset'
        ]
        ordermapping = {
            'metadataorg': 'MetadataOrg',
            'dataorg': 'DataOrg',
            'source': 'Source',
            'sourcefile': 'SourceFile',
            'dataset': 'Dataset'
        }

        for orderitem in theorder:
            modelclass = getattr(openspending.model, orderitem)
            print modelclass
            for theobj in modeldict[orderitem]:
                for objkey in theobj['fields'].keys():
                    try:
                        if objkey.find("_id") != -1:
                            #need to find the object assoicated ot this and repopulate
                            modelobjstr = objkey.split('_')[0]
                            searchpk = theobj['fields'][objkey]
                            for foreignkeymod in modeldict[modelobjstr]:
                                if foreignkeymod['pk'] == searchpk:
                                    foreignmodelclass = getattr(
                                        openspending.model,
                                        ordermapping[modelobjstr])
                                    theobj[objkey] = foreignmodelclass.by_id(
                                        foreignkeymod['theid'])

                    except:
                        pass

                theid = modelclass.import_json_dump(theobj)
                theobj['theid'] = theid
Example #19
0
 def json_of_url(url):
     return json.load(urllib2.urlopen(url))
Example #20
0
 def json_of_url(url):
     return json.load(urllib2.urlopen(url))
Example #21
0

def parseDBJSON(args):
    if len(args['jsondata']) != 1:
        print "\n\nPlease specify one and only one json dump from python manage.py etldata dumpdata"
        sys.exit(1)

    try:
        jsonfile = open(args['jsondata'][0], 'rb')
    except Exception, e:
        print "failed to open file"
        print e
        sys.exit(1)

    try:
        databankjson = json.load(jsonfile)
    except Exception, e:
        print "\n\nYou hit an error on json loading"
        print e
        sys.exit(1)

    #split the objects to their approrpirate spot
    modelobjs = {}

    for jsonobj in databankjson:
        modelname = jsonobj['model'].split(".")[1]
        if modelname in modelobjs.keys():
            modelobjs[modelname].append(jsonobj)
        else:
            modelobjs[modelname] = [jsonobj]