Beispiel #1
0
 def list_cubes(self):
     cubes = []
     for dataset in Dataset.all():
         if not len(dataset.mapping):
             continue
         cubes.append({
             #change here too
             'name': dataset.name,
             'label': dataset.label
         })
     return cubes
Beispiel #2
0
 def list_cubes(self):
     cubes = []
     for dataset in Dataset.all():
         if not len(dataset.mapping):
             continue
         cubes.append({
             #change here too
             'name': dataset.name,
             'label': dataset.label
         })
     return cubes
Beispiel #3
0
def reindex():
    """ Grant admin privileges to given user """
    from openspending.core import db
    from openspending.model import Dataset
    from openspending.model.country import Country

    index = whoo.whoosh_index(current_app, Dataset)

    with index.writer() as writer:

        for dataset in Dataset.all():
            primary_field = dataset.pure_whoosh.primary_key_name
            searchable = dataset.__searchable__
            attrs = {}
            for key in searchable:
                try:
                    attrs[key] = unicode(getattr(dataset, key))
                except AttributeError:
                    raise AttributeError('{0} does not have {1} field {2}'
                            .format("Dataset", __searchable__, key))

                attrs[primary_field] = unicode(getattr(dataset, primary_field))
                writer.update_document(**attrs)

    #make sure we ahve all of the geometry tables in there
    createCountries(silent=True)

    index = whoo.whoosh_index(current_app, Country)
    with index.writer() as writer:



        for country in Country.all():
            primary_field = country.pure_whoosh.primary_key_name
            searchable = country.__searchable__
            attrs = {}
            for key in searchable:
                try:
                    attrs[key] = unicode(getattr(country, key))
                except AttributeError:
                    raise AttributeError('{0} does not have {1} field {2}'
                            .format("Country", __searchable__, key))

                attrs[primary_field] = unicode(getattr(country, primary_field))
                writer.update_document(**attrs)
Beispiel #4
0
def reindex():
    """ Grant admin privileges to given user """
    from openspending.core import db
    from openspending.model import Dataset
    from openspending.model.country import Country

    index = whoo.whoosh_index(current_app, Dataset)

    with index.writer() as writer:

        for dataset in Dataset.all():
            primary_field = dataset.pure_whoosh.primary_key_name
            searchable = dataset.__searchable__
            attrs = {}
            for key in searchable:
                try:
                    attrs[key] = unicode(getattr(dataset, key))
                except AttributeError:
                    raise AttributeError(
                        '{0} does not have {1} field {2}'.format(
                            "Dataset", __searchable__, key))

                attrs[primary_field] = unicode(getattr(dataset, primary_field))
                writer.update_document(**attrs)

    #make sure we ahve all of the geometry tables in there
    createCountries(silent=True)

    index = whoo.whoosh_index(current_app, Country)
    with index.writer() as writer:

        for country in Country.all():
            primary_field = country.pure_whoosh.primary_key_name
            searchable = country.__searchable__
            attrs = {}
            for key in searchable:
                try:
                    attrs[key] = unicode(getattr(country, key))
                except AttributeError:
                    raise AttributeError(
                        '{0} does not have {1} field {2}'.format(
                            "Country", __searchable__, key))

                attrs[primary_field] = unicode(getattr(country, primary_field))
                writer.update_document(**attrs)
Beispiel #5
0
    def output_json(**args):
        """ Output JSON data  """
        outputfile = args.get('outputfile', None)

        if len(outputfile) != 1:
            print "You need to specific one and only one output file"
            return

        outputfile = outputfile[0]

        #need to load in this order for relations
            #metadataorg
            #dataorg
            #source
            #sourcefile
                #wrap up files
            #dataset

        outputobj = []

        for metadataorg in MetadataOrg.all().all():
            outputobj.append(metadataorg.to_json_dump())

        for dataorg in DataOrg.all().all():
            outputobj.append(dataorg.to_json_dump())

        for source in Source.all().all():
            outputobj.append(source.to_json_dump())

        for sourcefile in SourceFile.all().all():

            outputobj.append(sourcefile.to_json_dump())

        for dataset in Dataset.all().all():
            outputobj.append(dataset.to_json_dump())



        with open(outputfile, 'wb') as f:
            json.dump(outputobj, f)

        print "success"
        print "written to ", outputfile
Beispiel #6
0
    def output_json(**args):
        """ Output JSON data  """
        outputfile = args.get('outputfile', None)

        if len(outputfile) != 1:
            print "You need to specific one and only one output file"
            return

        outputfile = outputfile[0]

        #need to load in this order for relations
        #metadataorg
        #dataorg
        #source
        #sourcefile
        #wrap up files
        #dataset

        outputobj = []

        for metadataorg in MetadataOrg.all().all():
            outputobj.append(metadataorg.to_json_dump())

        for dataorg in DataOrg.all().all():
            outputobj.append(dataorg.to_json_dump())

        for source in Source.all().all():
            outputobj.append(source.to_json_dump())

        for sourcefile in SourceFile.all().all():

            outputobj.append(sourcefile.to_json_dump())

        for dataset in Dataset.all().all():
            outputobj.append(dataset.to_json_dump())

        with open(outputfile, 'wb') as f:
            json.dump(outputobj, f)

        print "success"
        print "written to ", outputfile
Beispiel #7
0
    def reload_all(**args):
        """Reload all sources with mapping.  This will take a while"""

        datasets = Dataset.all().all()
        ids = []
        for dataset in datasets:
            ids.append(dataset.id)

        total = 0
        ran = 0

        for id in ids:
            dataset = Dataset.by_id(id)
            total += 1
            #has mapping and source
            if dataset.mapping and dataset.source:
                print "working on ", dataset
                load_source(dataset.source.id)
                ran += 1

        print "Ran", ran, "out of", total
Beispiel #8
0
    def reload_all(**args):
        """Reload all sources with mapping.  This will take a while"""

        datasets = Dataset.all().all()
        ids = []
        for dataset in datasets:
            ids.append(dataset.id)

        total = 0
        ran = 0

        for id in ids:
            dataset = Dataset.by_id(id)
            total +=1
            #has mapping and source
            if dataset.mapping and dataset.source:
                print "working on ", dataset
                load_source(dataset.source.id)
                ran +=1

        print "Ran", ran, "out of", total
Beispiel #9
0
def categories_list():

    page_num = request.args.get('page', None)

    perpage = request.args.get('perpage', 25)

    includesubs = request.args.get('includesubs', True)

    limit = request.args.get('limit', None)

    query_all = Dataset.all(order=True)

    if limit:
        query_all = query_all.limit(int(limit))

    numpages = 1
    page = 1
    if page_num:
        total_indicators = query_all.count()
        query_all = query_all.offset(int(page_num) * int(perpage)).limit(
            int(perpage))
        numpages = int(float(total_indicators) / float(perpage)) + 1
        page = page_num

    outputschema = {
        "page": page,
        "numpages": numpages,
        "data": {
            "categories": {
                "total": 0,
                "data": collections.OrderedDict({})
            },
            "subcategories": {
                #"total":0,
                "data": collections.OrderedDict({})
            },
            "sources": {
                "total": 0,
                "data": collections.OrderedDict({})
            },
            "colls": {
                "total": 0,
                "data": collections.OrderedDict({})
            },
            "indicators": {
                "total": 0,
                "data": collections.OrderedDict({})
            }
        }
    }

    for indicator in query_all.all():
        if not getattr(indicator, "mapping", None):
            continue
        keyname = indicator.name
        years = getattr(indicator, "years", None)
        if years:
            the_years = []
            the_years = years.split(",")
            the_years = map(int, the_years)
            the_years.sort()
        else:
            the_years = []
        dataorg = getattr(indicator, "dataorg", None)
        if not dataorg:
            dataorg = "None"
        else:

            if outputschema['data']['sources']['data'].get(
                    dataorg.label, None):
                outputschema['data']['sources']['data'][
                    dataorg.label]['indicators'].append(indicator.name)
            else:
                outputschema['data']['sources']['data'][dataorg.label] = {
                    'label': dataorg.label,
                    'indicators': [indicator.name]
                }
                outputschema['data']['sources']['total'] += 1

            dataorg = dataorg.label
        tags = getattr(indicator, "tags", [])
        subcategory = "None"
        category = "None"
        colls = "None"

        for tag in tags:
            if tag.category == "spsd":
                if outputschema['data']['categories']['data'].get(
                        tag.slug_label, None):
                    outputschema['data']['categories']['data'][
                        tag.slug_label]['indicators'].append(indicator.name)
                else:
                    outputschema['data']['categories']['data'][
                        tag.slug_label] = {
                            'label': tag.label,
                            'indicators': [indicator.name]
                            #"subcategories": {}
                        }
                    outputschema['data']['categories']['total'] += 1
                category = tag.slug_label
            elif tag.category == "subspsd":
                #if outputschema['data']['categories']['data'].get(tag.slug_label, None):
                outputschema['data']['subcategories']['data'][
                    tag.slug_label] = {
                        'label': tag.label
                    }

                subcategory = tag.slug_label

            elif tag.category == "colls":

                if outputschema['data']['colls']['data'].get(
                        tag.slug_label, None):
                    outputschema['data']['colls']['data'][
                        tag.slug_label]['indicators'].append(indicator.name)
                else:
                    outputschema['data']['colls']['data'][tag.slug_label] = {
                        'label': tag.label,
                        'indicators': [indicator.name]
                        #"subcategories": {}
                    }
                    outputschema['data']['colls']['total'] += 1
                colls = tag.slug_label

                #outputschema['data']['colls']['data'][tag.slug_label]= {'label':tag.label}

                #colls = "test" #tag.slug_label + "test"
                #print "colls: " + colls
        indicatorschema = {
            "label": getattr(indicator, "label", "No Label"),
            "description": getattr(indicator, "description", "No Description"),
            "source": dataorg,
            "category": category,
            "subcategory": subcategory,
            "colls": colls,
            "years": the_years
        }
        outputschema['data']['indicators']['data'][keyname] = indicatorschema
        outputschema['data']['indicators']['total'] += 1

    #outputschema['data']['indicators'] = list(sorted(outputschema['data']['indicators'].items(), key=lambda x: x))
    # resp = Response(response=json.dumps({'data':outputschema}),
    #         status=200, \
    #         mimetype="application/json")
    # return resp
    return json.dumps(outputschema)
Beispiel #10
0
def dataorgs():

    page_num = request.args.get('page', None)

    perpage = request.args.get('perpage', 25)

    includesubs = request.args.get('includesubs', True)

    limit = request.args.get('limit', None)

    query_all = Dataset.all(order=True)

    if limit:
        query_all = query_all.limit(int(limit))

    numpages = 1
    page = 1
    if page_num:
        total_indicators = query_all.count()
        query_all = query_all.offset(int(page_num) * int(perpage)).limit(int(perpage))
        numpages = int(float(total_indicators)/float(perpage)) + 1
        page = page_num

    outputschema = {
                        "page":page,
                        "numpages": numpages,
                        "data":
                            {
                                "categories":{
                                    "total":0,
                                    "data":collections.OrderedDict({})
                                },
                                "subcategories":{
                                    #"total":0,
                                    "data":collections.OrderedDict({})
                                },
                                "sources":{
                                    "total":0,
                                    "data":collections.OrderedDict({})
                                },
                                "indicators":{
                                    "total":0,
                                    "data":collections.OrderedDict({})
                                }
                            }
                    }


    for indicator in query_all.all():
        if not getattr(indicator, "mapping", None):
            continue
        keyname = indicator.name
        years = indicator.years
        the_years=[]
        the_years=years.split(",")
        the_years=map(int,the_years)
        the_years.sort()
        dataorg = getattr(indicator, "dataorg", None)
        if not dataorg:
            dataorg = "None"
        else:

            if outputschema['data']['sources']['data'].get(dataorg.label, None):
                outputschema['data']['sources']['data'][dataorg.label]['indicators'].append(indicator.name)
            else:
                outputschema['data']['sources']['data'][dataorg.label] = {
                                                                    'label': dataorg.label,
                                                                    'indicators': [indicator.name]
                                                                }
                outputschema['data']['sources']['total'] += 1

            dataorg = dataorg.label
        tags = getattr(indicator, "tags", [])
        subcategory = "None"
        category = "None"

        for tag in tags:
            if tag.category == "spsd":
                if outputschema['data']['categories']['data'].get(tag.slug_label, None):
                    outputschema['data']['categories']['data'][tag.slug_label]['indicators'].append(indicator.name)
                else:
                    outputschema['data']['categories']['data'][tag.slug_label] = {
                                                                        'label': tag.label,
                                                                        'indicators': [indicator.name]
                                                                        #"subcategories": {}
                                                                    }
                    outputschema['data']['categories']['total'] += 1
                category = tag.slug_label
            elif tag.category == "subspsd":
                #if outputschema['data']['categories']['data'].get(tag.slug_label, None):
                outputschema['data']['subcategories']['data'][tag.slug_label]= {'label':tag.label}
            #     if outputschema['data']['categories']['data'].get(tag.slug_label, None):
            #         if outputschema['data']['categories']['data'][tag.slug_label]['subcategories'].get(tag.slug_label, None):
            #             outputschema['data']['categories']['data'][tag.slug_label]['subcategories'][tag.slug_label]['indicators'].append(indicator.name)
            #         else:
            #             outputschema['data']['categories']['data'][tag.slug_label]['subcategories'][tag.slug_label] = {
            #                                                                                                             "label": tag.label,
            #                                                                                                             "indicators": [indicator.name]
            #                                                                                                             }
            #         ['indicators'].append(indicator.name)
            #     else:
            #         outputschema['data']['categories']['data'][tag.slug_label] = {
            #                                                             'label': tag.label,
            #                                                             'indicators': [indicator.name],
            #                                                             "subcategories": {}
            #                                                         }
            #         outputschema['data']['categories']['total'] += 1
                subcategory = tag.slug_label


        indicatorschema = {
                            "label":getattr(indicator, "label", "No Label"),
                            "description":getattr(indicator, "description", "No Description"),
                            "source":dataorg,
                            "category":category,
                            "subcategory":subcategory,
                            "years":the_years
                        }
        outputschema['data']['indicators']['data'][keyname] = indicatorschema
        outputschema['data']['indicators']['total'] += 1

    #outputschema['data']['indicators'] = list(sorted(outputschema['data']['indicators'].items(), key=lambda x: x))

    return json.dumps(outputschema)
Beispiel #11
0
def add_import_commands(manager):
    @manager.option('-f',
                    '--file-dir',
                    dest='file_dir',
                    help='File Dir to output the files')
    @manager.command
    def output_logs(**args):
        filedir = args.get("file_dir", None)
        log.info("Using filedir: %s", filedir)
        if not filedir:
            log.warn("Please specify an output dir")
            sys.exit()
        try:
            f = open(os.path.join(filedir, "LogFiles.zip"), 'wb')
        except Exception, e:
            log.warn("Could not open directory : %s", e)

        zf = zipfile.ZipFile(f, "w")

        for dataset in Dataset.all():
            if dataset.source and dataset.source.runs.first():
                datalogs = dataset.source.runs.first().records_as_json()
            else:
                log.info("Skipping : %s", dataset.name)
                continue

            if not len(datalogs):
                log.info("No Datalog for : %s", dataset.name)
                zf.writestr(dataset.name + "/loadinglog.csv", "All is well")
                continue

            outputfile = io.BytesIO()
            #figureout the headers

            dw = csv.DictWriter(outputfile,
                                delimiter=',',
                                extrasaction='ignore',
                                fieldnames=datalogs[0].keys())
            dw.writeheader()

            for row in datalogs:
                dw.writerow(row)
            zf.writestr(dataset.name + "/loadinglog.csv",
                        outputfile.getvalue())

            #write openrefine output
            preloadvalue = dataset.source.getORFile().getvalue()
            zf.writestr(dataset.name + "/preloadvalue.csv", preloadvalue)

            url = "http://*****:*****@name&format=csv"

            # Fill in your details here to be posted to the login form.
            LOCKDOWN_FORCE = current_app.config.get("LOCKDOWNUSER", False)
            LOCKDOWNUSER = current_app.config.get("LOCKDOWNUSER")
            LOCKDOWNPASSWORD = current_app.config.get("LOCKDOWNUSER")
            if LOCKDOWN_FORCE:
                payload = {
                    'username': LOCKDOWNUSER,
                    'password': LOCKDOWNPASSWORD
                }

                # Use 'with' to ensure the session context is closed after use.
                with requests.Session() as s:
                    try:
                        p = s.post('http://localhost:5000/lockdown',
                                   data=payload)

                        # An authorised request.
                        postloadvalue = s.get(url).content
                    except Exception, e:
                        log.warn("could not get authorized postload value " +
                                 str(e))
            else:

                try:
                    postloadvalue = requests.get(url).content
                except Exception, e:
                    log.warn("Could Not find post load content for " +
                             dataset.name)
Beispiel #12
0
def add_import_commands(manager):

    @manager.option('-f', '--file-dir',
                    dest='file_dir',
                    help='File Dir to output the files')
    @manager.command
    def output_logs(**args):
        filedir = args.get("file_dir", None)
        log.info("Using filedir: %s", filedir)
        if not filedir:
            log.warn("Please specify an output dir")
            sys.exit()
        try:
            f = open(os.path.join(filedir, "LogFiles.zip"), 'wb' )
        except Exception, e:
            log.warn("Could not open directory : %s", e)

        zf = zipfile.ZipFile(f, "w")

        for dataset in Dataset.all():
            if dataset.source and dataset.source.runs.first():
                datalogs = dataset.source.runs.first().records_as_json()
            else:
                log.info("Skipping : %s", dataset.name)
                continue

            if not len(datalogs):
                log.info("No Datalog for : %s", dataset.name)
                zf.writestr(dataset.name + "/loadinglog.csv", "All is well")
                continue
            
            outputfile = io.BytesIO()
            #figureout the headers

            dw = csv.DictWriter(outputfile, delimiter= ',', extrasaction='ignore', fieldnames=datalogs[0].keys())
            dw.writeheader()

            for row in datalogs:
                dw.writerow(row)
            zf.writestr(dataset.name +  "/loadinglog.csv", outputfile.getvalue())


            #write openrefine output
            preloadvalue = dataset.source.getORFile().getvalue()
            zf.writestr(dataset.name + "/preloadvalue.csv", preloadvalue)

            url = "http://*****:*****@name&format=csv"


            # Fill in your details here to be posted to the login form.
            LOCKDOWN_FORCE = current_app.config.get("LOCKDOWNUSER", False)
            LOCKDOWNUSER = current_app.config.get("LOCKDOWNUSER")
            LOCKDOWNPASSWORD = current_app.config.get("LOCKDOWNUSER")
            if LOCKDOWN_FORCE:
                payload = {
                    'username': LOCKDOWNUSER,
                    'password': LOCKDOWNPASSWORD
                }

                # Use 'with' to ensure the session context is closed after use.
                with requests.Session() as s:
                    try:
                        p = s.post('http://localhost:5000/lockdown', data=payload)

                        # An authorised request.
                        postloadvalue = s.get(url).content
                    except Exception, e:
                        log.warn("could not get authorized postload value " + str(e))
            else:

                try:
                    postloadvalue = requests.get(url).content
                except Exception, e:
                    log.warn("Could Not find post load content for " + dataset.name)