Esempio n. 1
0
def store_dataset(fullpath, maindata, metadata, coder):
    #fullpath = "subdata_set_last.xlsx"
    # Use case 1
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2016'
    #datafilter['ctrlist'] = '528,14,18,67'
    datafilter['ctrlist'] = '528'
    datafilter['ctrlist'] = '1523'
    datafilter['ctrlist'] = ''
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']), int(datafilter['endyear'])):
            coderyears.append(i)
    (datasubset, ctrlist) = datasetfilter(maindata, datafilter)

    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)
            
    a = datetime.now()    
    (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)
    datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile
Esempio n. 2
0
def store_dataset(fullpath, maindata, metadata, coder):
    #fullpath = "subdata_set_last.xlsx"
    # Use case 1
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2016'
    #datafilter['ctrlist'] = '528,14,18,67'
    datafilter['ctrlist'] = '528'
    datafilter['ctrlist'] = '1523'
    datafilter['ctrlist'] = ''
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']),
                       int(datafilter['endyear'])):
            coderyears.append(i)
    (datasubset, ctrlist) = datasetfilter(maindata, datafilter)

    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    a = datetime.now()
    (finalsubset, icoder, isyear, ctrfilter,
     nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)
    datafile = create_excel_dataset(fullpath, icoder, metadata[handle],
                                    icoder.columns, coderyears, finalsubset,
                                    isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile
Esempio n. 3
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    #switch = 'historical'

    # Load Geocoder
    (classification, dataset, title,
     units) = content2dataframe(config, config['geocoderhandle'])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '')
    (modern, historical) = loadgeocoder(config, dataset, 'geocoder')

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']),
                       int(datafilter['endyear'])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(' ', '_', filetitle)
    filetitle = re.sub(r'[\(\)]', '_', filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    #return ('test', 'test')

    if switch == 'modern':
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
    if switch == 'modern':
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ''
        try:
            if not historicaldata.empty:
                isdata = 'yes'
        except:
            isdata = 'no'

        if isdata == 'no':
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata['title'] = title
    else:
        metadata['title'] = ''

    if units:
        metadata['units'] = units
    else:
        metadata['units'] = ''

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

    # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter,
     nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    #return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    #return (finalsubset.to_html(), 'test')
    if fullpath:
        config['emptyvalues'] = 'no'
        if config['emptyvalues'] == 'no':
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(fullpath, icoder, metadata,
                                        icoder.columns, coderyears,
                                        finalsubset, isyear, ctrfilter)
    return (filetitle, fullpath, finalsubset)
Esempio n. 4
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    # switch = 'historical'

    # Load Geocoder
    (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "")
    (modern, historical) = loadgeocoder(config, dataset, "geocoder")

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter["startyear"]:
        coderyears = []
        for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(" ", "_", filetitle)
    filetitle = re.sub(r"[\(\)]", "_", filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    # return ('test', 'test')

    if switch == "modern":
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, "")
    if switch == "modern":
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ""
        try:
            if not historicaldata.empty:
                isdata = "yes"
        except:
            isdata = "no"

        if isdata == "no":
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata["title"] = title
    else:
        metadata["title"] = ""

    if units:
        metadata["units"] = units
    else:
        metadata["units"] = ""

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

        # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    # return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter["ctrlist"]:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    # return (finalsubset.to_html(), 'test')
    if fullpath:
        config["emptyvalues"] = "no"
        if config["emptyvalues"] == "no":
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(
            fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter
        )
    return (filetitle, fullpath, finalsubset)
Esempio n. 5
0
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
datafilter['ctrlist'] = '528,14,18,67'
datasubset = datasetfilter(maindata, datafilter)
#print datasubset.to_html

# Create Excel file out from dataframe
fullpath = config['webtest'] + "/subdata_set.xlsx"
(yearscolumns, notyears) = selectint(maindata.columns)
(countryinfo, notcountry) = selectint(maindata.index)
metadata = {}
metadata['title'] = 'testtitle'
metadata['units'] = 'testunits'
if title:
    metadata['title'] = title
if units:
    metadata['units'] = units

a = datetime.now()
(finalsubset, icoder, isyear, ctrfilter,
 nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns,
                                coderyears, finalsubset, isyear, ctrfilter)
b = datetime.now()
d = b - a
print "Time: " + str(d.seconds) + " seconds"
print datafile
#yearscolumns
print finalsubset.ix[67][1831]
Esempio n. 6
0
# Use case 1
datafilter = {}
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
datafilter['ctrlist'] = '528,14,18,67'
datasubset = datasetfilter(maindata, datafilter)
#print datasubset.to_html

# Create Excel file out from dataframe
fullpath = config['webtest'] + "/subdata_set.xlsx"
(yearscolumns, notyears) = selectint(maindata.columns)
(countryinfo, notcountry) = selectint(maindata.index)
metadata = {}
metadata['title'] = 'testtitle'
metadata['units'] = 'testunits'
if title:
    metadata['title'] = title
if units:
    metadata['units'] = units

a = datetime.now()
(finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter)
b = datetime.now()
d = b - a
print "Time: " + str(d.seconds) + " seconds"
print datafile
#yearscolumns
print finalsubset.ix[67][1831]