Beispiel #1
0
def geocoding(switch, dataset, modern, historical):
#if config:
    switch = 'historical'
    metadata = {}
    metadataitem = {}
    if title:
        metadataitem['title'] = title
        metadataitem['units'] = units

        if switch == 'modern':
            activeindex = modern.index
            coder = modern
            class1 = switch
        else:
            activeindex = historical.index
            coder = historical

        (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
        if switch == 'modern':
            maindata = moderndata
        else:
            # Do conversion to webmapper system
            if not historicaldata:
                maindata = moderndata
                webmapperindex = []
                for code in maindata.index:
                    try:
                        webmappercode = oecd2webmapper[int(code)]
                    except:
                        webmappercode = -1
                    webmapperindex.append(webmappercode)
                maindata.index = webmapperindex
                maindata = maindata[maindata.index > 0]
            else:
                maindata = historicaldata

        maindata = maindata.convert_objects(convert_numeric=True)                
        (cfilter, notint) = selectint(maindata.columns)

        codes = selectint(maindata.index)
        geo = load_geocodes(config, switch, codes, maindata, geolist)
        #for colname in notint:
            #maindata = maindata.drop(colname, axis=1)
        # Drop num if in dataframe
        if '1' in maindata.columns:
            maindata = maindata.drop('1', axis=1)
        #dataframe[pid] = maindata
        metadata[handle] = metadataitem
    return (maindata, metadata, coder)
Beispiel #2
0
def geocoding(switch, dataset, modern, historical):
    #if config:
    switch = 'historical'
    metadata = {}
    metadataitem = {}
    if title:
        metadataitem['title'] = title
        metadataitem['units'] = units

        if switch == 'modern':
            activeindex = modern.index
            coder = modern
            class1 = switch
        else:
            activeindex = historical.index
            coder = historical

        (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
        if switch == 'modern':
            maindata = moderndata
        else:
            # Do conversion to webmapper system
            if not historicaldata:
                maindata = moderndata
                webmapperindex = []
                for code in maindata.index:
                    try:
                        webmappercode = oecd2webmapper[int(code)]
                    except:
                        webmappercode = -1
                    webmapperindex.append(webmappercode)
                maindata.index = webmapperindex
                maindata = maindata[maindata.index > 0]
            else:
                maindata = historicaldata

        maindata = maindata.convert_objects(convert_numeric=True)
        (cfilter, notint) = selectint(maindata.columns)

        codes = selectint(maindata.index)
        geo = load_geocodes(config, switch, codes, maindata, geolist)
        #for colname in notint:
        #maindata = maindata.drop(colname, axis=1)
        # Drop num if in dataframe
        if '1' in maindata.columns:
            maindata = maindata.drop('1', axis=1)
        #dataframe[pid] = maindata
        metadata[handle] = metadataitem
    return (maindata, metadata, coder)
Beispiel #3
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    #switch = 'historical'

    # Load Geocoder
    (classification, dataset, title,
     units) = content2dataframe(config, config['geocoderhandle'])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '')
    (modern, historical) = loadgeocoder(config, dataset, 'geocoder')

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']),
                       int(datafilter['endyear'])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(' ', '_', filetitle)
    filetitle = re.sub(r'[\(\)]', '_', filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    #return ('test', 'test')

    if switch == 'modern':
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
    if switch == 'modern':
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ''
        try:
            if not historicaldata.empty:
                isdata = 'yes'
        except:
            isdata = 'no'

        if isdata == 'no':
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata['title'] = title
    else:
        metadata['title'] = ''

    if units:
        metadata['units'] = units
    else:
        metadata['units'] = ''

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

    # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter,
     nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    #return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    #return (finalsubset.to_html(), 'test')
    if fullpath:
        config['emptyvalues'] = 'no'
        if config['emptyvalues'] == 'no':
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(fullpath, icoder, metadata,
                                        icoder.columns, coderyears,
                                        finalsubset, isyear, ctrfilter)
    return (filetitle, fullpath, finalsubset)
Beispiel #4
0
from datasets import loaddataset, countrystats, loaddataset_fromurl, loadgeocoder, treemap
from sys import argv
import pandas as pd
import numpy as np

handles = []
remote = 'on'
handle = "hdl:10622/LIKXCZ"
config = configuration()

if remote:
    (classification, dataset) = loaddataset_fromurl(config['apiroot'], config['geocoderhandle'])
else:
    dataset = loaddataset(handles)

(modern, historical) = loadgeocoder(dataset)    
handles = []
handle = 'hdl:10622/DIUBXI'
#handle = 'hdl:10622/GZ7O1K'
handles.append(handle)
if remote:
    (class1, dataset) = loaddataset_fromurl(config['apiroot'], handle)
else:
    dataset = loaddataset(handles)

#modern.ix[76]['country']
#historical.ix[1]
print class1
#print dataset.to_html()

res = treemap(dataset)
Beispiel #5
0
handles = []
config = configuration()
config['remote'] = ''

# Geocoder
handle = config['geocoderhandle']
(classification, dataset, title,
 units) = content2dataframe(config, config['geocoderhandle'])
#if remote:
#    (classification, dataset) = loaddataset_fromurl(config, handle)
#else:
#    dataset = loaddataset(handles)

(geocoder, geolist, oecdlist) = buildgeocoder(dataset, config, '')
#print geocoder
(modern, historical) = loadgeocoder(config, dataset, 'geocoder')

switch = 'historical'
switch = 'modern'
if switch == 'modern':
    activeindex = modern.index
    coder = modern
    class1 = switch
else:
    activeindex = historical.index
    coder = historical

handle = "hdl:10622/DIUBXI"
handle = "hdl:10622/WNGZ4A"
handle = "hdl:10622/GZ7O1K"
Beispiel #6
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    # switch = 'historical'

    # Load Geocoder
    (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "")
    (modern, historical) = loadgeocoder(config, dataset, "geocoder")

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter["startyear"]:
        coderyears = []
        for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(" ", "_", filetitle)
    filetitle = re.sub(r"[\(\)]", "_", filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    # return ('test', 'test')

    if switch == "modern":
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, "")
    if switch == "modern":
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ""
        try:
            if not historicaldata.empty:
                isdata = "yes"
        except:
            isdata = "no"

        if isdata == "no":
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata["title"] = title
    else:
        metadata["title"] = ""

    if units:
        metadata["units"] = units
    else:
        metadata["units"] = ""

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

        # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    # return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter["ctrlist"]:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    # return (finalsubset.to_html(), 'test')
    if fullpath:
        config["emptyvalues"] = "no"
        if config["emptyvalues"] == "no":
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(
            fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter
        )
    return (filetitle, fullpath, finalsubset)
Beispiel #7
0
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)
    datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile

for ffile in onlyfiles:
    pid = 'clio'
    fullpath = mypath + '/' + ffile
    isexcel = re.match('(.+)\.xls', ffile)
    if isexcel:
        newfile = isexcel.group(1) + "-historical.xlsx"
        fulloutfile = outdir + "/" + newfile
        print fulloutfile
        pid = newfile
        handle = pid
        print handle
        (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID)
        (dataset, title, units) = compiledataset(csvfile)
        switch = 'historical'
        (maindata, metadata, coder) = geocoding(switch, dataset, modern, historical)    
        (moderndata, historicaldata) = loadgeocoder(config, maindata, '')
        maindata = conversion(maindata, moderndata, historicaldata)
        print metadata[handle]['title']
        outfile = store_dataset(fulloutfile, maindata, metadata, coder)
        print outfile

Beispiel #8
0
from sys import argv
import pandas as pd
import numpy as np

handles = []
remote = 'on'
handle = "hdl:10622/LIKXCZ"
config = configuration()

if remote:
    (classification, dataset) = loaddataset_fromurl(config['apiroot'],
                                                    config['geocoderhandle'])
else:
    dataset = loaddataset(handles)

(modern, historical) = loadgeocoder(dataset)
handles = []
handle = 'hdl:10622/DIUBXI'
#handle = 'hdl:10622/GZ7O1K'
handles.append(handle)
if remote:
    (class1, dataset) = loaddataset_fromurl(config['apiroot'], handle)
else:
    dataset = loaddataset(handles)

#modern.ix[76]['country']
#historical.ix[1]
print class1
#print dataset.to_html()

res = treemap(dataset)
Beispiel #9
0
                                    icoder.columns, coderyears, finalsubset,
                                    isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile


for ffile in onlyfiles:
    pid = 'clio'
    fullpath = mypath + '/' + ffile
    isexcel = re.match('(.+)\.xls', ffile)
    if isexcel:
        newfile = isexcel.group(1) + "-historical.xlsx"
        fulloutfile = outdir + "/" + newfile
        print fulloutfile
        pid = newfile
        handle = pid
        print handle
        (jsonfile, csvfile, tmptitle,
         tmpunits) = dataextractor(fullpath, path, pid, fileID)
        (dataset, title, units) = compiledataset(csvfile)
        switch = 'historical'
        (maindata, metadata, coder) = geocoding(switch, dataset, modern,
                                                historical)
        (moderndata, historicaldata) = loadgeocoder(config, maindata, '')
        maindata = conversion(maindata, moderndata, historicaldata)
        print metadata[handle]['title']
        outfile = store_dataset(fulloutfile, maindata, metadata, coder)
        print outfile
from openpyxl.cell import get_column_letter
import numpy as np

handles = []
remote = 'on'
config = configuration()

# Geocoder
handle = config['geocoderhandle']
if remote:
    (classification, dataset, title, units) = loaddataset_fromurl(config, handle)
else:
    dataset = loaddataset(handles)

(geocoder, geolist, oecd) = buildgeocoder(dataset, config, '')
(modern, historical) = loadgeocoder(config, dataset, 'geocoder')
coderyears = []
for i in range(1500, 2015):
    coderyears.append(i)

# Reading dataset
handle = "hdl:10622/WNGZ4A"
#handle = "hdl:10622/DIUBXI"
handles = []
handles.append(handle)
switch = 'historical'
#switch = 'modern'

if remote:
    (class1, dataset, title, units) = loaddataset_fromurl(config, handle)
else: