def main(): ############################################################################# # 0. # # Check if tmp folder exists, otherwise create it check_create_folder(settings.tmp_dir) # Build the list with countries and states admin_areas = get_aa_list() for chart in settings.charts: ind_source = settings.src_auxiliary + str(settings.current_edition) + '-' + str(chart["id"]) + '.csv' global_avg = False # Calculate the global average for this chart if "global_average" in chart and chart["global_average"]: global_avg = get_avg(chart, ind_source) for aa in admin_areas: iso = aa.lower() for lang in settings.langs: # Initialize the array that will be written to JSON json_data = {"name": iso, "iso": iso, "meta": {"title": chart["title"][lang], "label-x": chart["labelx"][lang], "label-y": chart["labely"][lang]}, "data": []} for serie in chart["series"]: if serie["id"] == 'country': # If we're dealing with a country, use the country name as label of serie serie_name = aa else: serie_name = serie["name"][lang] # Initialize the object for the serie serie_to_append = {"name": serie_name, "id": serie["id"], "values": []} # Add a note to the serie if chart["note"]: serie_to_append["note"] = add_note(serie, ind_source, aa) # Generate the actual data serie_to_append["values"] = chart['function'](serie, ind_source, lang, aa, chart["years"],global_avg) json_data["data"].append(serie_to_append) # Write the list to a JSON file file_path = (settings.exp_aux_json).format(lang=lang,indicator=chart["export"],aa=iso) write_json(file_path, json_data) # Fully remove the temp directory clean_dir(settings.tmp_dir, True) print "All done. The auxiliary data has been prepared for use on global-climatescope.org."
def main(): ############################################################################# # 0. # # Check if tmp folder exists, otherwise create it check_create_folder(settings.tmp_dir) # Build the list with countries and states admin_areas = get_aa_list() for chart in settings.charts: ind_source = settings.src_auxiliary + str(settings.current_edition) + '-' + str(chart["id"]) + '.csv' global_avg = False # Calculate the global average for this chart if "global_average" in chart and chart["global_average"]: global_avg = get_avg(chart, ind_source) for aa in admin_areas: iso = aa.lower() for lang in settings.langs: # Initialize the array that will be written to JSON json_data = {"name": iso, "iso": iso, "meta": {"title": chart["title"][lang], "label-x": chart["labelx"][lang], "label-y": chart["labely"][lang]}, "data": []} for serie in chart["series"]: if serie["id"] == 'country': # If we're dealing with a country, use the country name as label of serie serie_name = aa else: serie_name = serie["name"][lang] # Initialize the object for the serie serie_to_append = {"name": serie_name, "id": serie["id"], "values": []} # Generate the actual data serie_to_append["values"] = chart['function'](serie, ind_source, lang, aa, chart["years"],global_avg) json_data["data"].append(serie_to_append) # Write the list to a JSON file file_path = (settings.exp_aux_json).format(lang=lang,indicator=chart["export"],aa=iso) write_json(file_path, json_data) # Fully remove the temp directory clean_dir(settings.tmp_dir, True) print "All done. The auxiliary data has been prepared for use on global-climatescope.org."
def download_compranet(years): """ Download Compranet data for a list of years, unzip the files and convert the XLS to CSV :param years: The years for which to download data :type years: List :returns: :example: """ tmp_folder = os.path.join(settings.folder_full_cache, 'tmp') check_create_folder(tmp_folder) for year in years: file_name = os.path.join(settings.fn_prefix + year + settings.fn_extension) src_url = settings.compranet_base_url + file_name print "Downloading %s" % file_name download(url=src_url, path=tmp_folder) file_path = os.path.join(tmp_folder, file_name) with zipfile.ZipFile(file_path, 'r') as myzip: myzip.extractall(tmp_folder) pattern = os.path.join(tmp_folder, '*.xls*') for src_file in list_files(pattern): csv_path = os.path.join(settings.folder_full_cache, get_filename(src_file) + '.csv') wb = xlrd.open_workbook(src_file) sheet = wb.sheet_by_index(0) with open(csv_path, 'w') as csvfile: writer = unicodecsv.writer(csvfile, encoding='utf-8') for rownum in xrange(sheet.nrows): writer.writerow(sheet.row_values(rownum)) remove_folder(tmp_folder)
def main(args): """ Main function - launches the program. :param args: The Parser arguments :type args: Parser object :returns: List :example: ["Downloading files from the Compranet site."] """ if args: if args.sample: source_folder = settings.folder_sample_data else: # Use cached versions of the source data in csv format source_folder = settings.folder_full_cache check_create_folder(source_folder) if args.download: clean_folder(source_folder) download_compranet(settings.years) # Check if there are CSV files in the sample folder pattern = os.path.join(source_folder, '*.csv') source_data = list_files(pattern) if source_data: print "About to clean the data" clean_df = clean.clean_csv(source_data) print "About to store it in OCDS format" ocds.generate_json(clean_df) else: return["No source data found. Make sure there is at least one CSV file in " + source_folder, 1] return["Prepared and cleaned the files from the Compranet site.",0]
def main(args): """ Main function - launches the program. """ if args: check_create_folder(settings.folder_charts) df = pd.DataFrame() # Read in the JSON files, flatten the contracts and add them to a DataFrame for f in list_files(args.source + '*'): df = flatten_contracts(f, df) # Improve df['contract_period_startDate'] = df['contract_period_startDate'].convert_objects(convert_dates='coerce') df['tender_publicationDate'] = df['tender_publicationDate'].convert_objects(convert_dates='coerce') df['tender_tenderPeriod_startDate'] = df['tender_tenderPeriod_startDate'].convert_objects(convert_dates='coerce') df['award_date'] = df['award_date'].convert_objects(convert_dates='coerce') # Cut every contract that's before a starting date start_date = datetime.strptime(settings.start_date_charts,'%Y-%m-%d') end_date = datetime.strptime(settings.end_date_charts,'%Y-%m-%d') df = df[(df[settings.main_date_contract] >= start_date) & (df[settings.main_date_contract] <= end_date)] # Generate the summary statistics, independent of comparison or slice overview_data = chartdata.generate_overview(df) with open(os.path.join(settings.folder_charts, 'general.json'), 'w') as outfile: json.dump(overview_data, outfile) for dimension in settings.dimensions: for comparison in settings.comparisons: # Each unique combination of dimension + comparison is a 'lense' lense_id = dimension + '--' + comparison['id'] lense = { 'metadata': { 'id': lense_id }, 'charts': [] } for chart in settings.charts: if chart['dimension'] == dimension: if chart['function']: chart['meta']['data'] = [] previous_slice = False d = { } # Generate the chart data for sl in comparison['slices']: sliced_chart = { 'id': sl['id'], 'label': sl['label'] } # Prep the dataframe, slice it or serve it full if comparison['compare']: sliced_df = slice_df(df, comparison['compare'], sl['field']) else: sliced_df = df if not sliced_df.empty: current_slice = chart['function'](sliced_df) # Append the slice's data & meta-data sliced_chart['data'] = current_slice['data'] chart['meta']['data'].append(sliced_chart) # Update the domain based on the slice for axis, func in chart['domain'].items(): if previous_slice: d[axis] = func(d[axis], current_slice['domain'][axis]) else: d[axis] = current_slice['domain'][axis] previous_slice = True # Add the domain to the chart for axis, func in chart['domain'].items(): chart['meta'][axis]['domain'] = d[axis] # Append the chart data lense['charts'].append(chart['meta']) file_name = os.path.join(settings.folder_charts,lense_id + '.json') with open(file_name, 'w') as outfile: json.dump(lense, outfile)
def generate_json(df): """ Generate OCDS record packages for each month :param df: Dataframe with all the contracts :type args: DataFrame :returns: :example: """ check_create_folder(settings.folder_ocds_json) check_create_folder(settings.folder_tmp) clean_folder(settings.folder_tmp) # Group the Compranet by date df['group_date'] = df[settings.grouping_date].convert_objects(convert_dates='coerce') grouped_df = df.set_index('group_date').groupby(pd.TimeGrouper(freq='M')) # Store the records for each month in a temporary CSV file # The JSON files will be generated from these CSV files, which # is much more performant than iterating over the rows in pandas files = [] for month, records in grouped_df: if not records.empty: m = month.strftime("%Y%m%d") file_name = os.path.join(settings.folder_tmp, m + '.csv') files.append(file_name) records.to_csv(file_name, index=False) # Loop over each CSV file and create an OCDS record package for f in files: # Store the package meta-data ## ADD MONTH package = { "uri": os.path.join("http://example.com/" + get_filename(f) + '.json'), "publishedDate": get_filename(f), "records": [], "publisher": { "identifier": "100", "name": "Compranet" }, "packages": [] } # Read the file and generate the records with open(f, 'rb') as infile: data = csv.DictReader(infile) ocds_records = {} for record in data: record_id = record['NUMERO_PROCEDIMIENTO'] # Add the generic tender data for this record, # if it's not there already if not record_id in ocds_records: ocds_records[record_id] = get_tender_data(record) # The contract and award data needs to be added for each row # OCDS expects a unique ID for every award. NUMERO_EXPEDIENTE is not unique, hence # a custom ID award_id = str(record['NUMERO_EXPEDIENTE']) + '-' + str(len(ocds_records[record_id]['awards']) + 1) ocds_records[record_id]['awards'].append(get_award_data(record, award_id)) ocds_records[record_id]['contracts'].append(get_contract_data(record, award_id)) for key, value in ocds_records.iteritems(): package['records'].append(value) ofn = os.path.join(settings.folder_ocds_json, get_filename(f) + '.json') with open(ofn, 'w') as outfile: json.dump(package, outfile)
def main(args): """ Main function - launches the program. """ if args: check_create_folder(settings.folder_charts) df = pd.DataFrame() # Read in the JSON files, flatten the contracts and add them to a DataFrame for f in list_files(args.source + '*'): df = flatten_contracts(f, df) # Improve df['contract_period_startDate'] = df[ 'contract_period_startDate'].convert_objects( convert_dates='coerce') df['tender_publicationDate'] = df[ 'tender_publicationDate'].convert_objects(convert_dates='coerce') df['tender_tenderPeriod_startDate'] = df[ 'tender_tenderPeriod_startDate'].convert_objects( convert_dates='coerce') df['award_date'] = df['award_date'].convert_objects( convert_dates='coerce') # Cut every contract that's before a starting date start_date = datetime.strptime(settings.start_date_charts, '%Y-%m-%d') end_date = datetime.strptime(settings.end_date_charts, '%Y-%m-%d') df = df[(df[settings.main_date_contract] >= start_date) & (df[settings.main_date_contract] <= end_date)] # Generate the summary statistics, independent of comparison or slice overview_data = chartdata.generate_overview(df) with open(os.path.join(settings.folder_charts, 'general.json'), 'w') as outfile: json.dump(overview_data, outfile) for dimension in settings.dimensions: for comparison in settings.comparisons: # Each unique combination of dimension + comparison is a 'lense' lense_id = dimension + '--' + comparison['id'] lense = {'metadata': {'id': lense_id}, 'charts': []} for chart in settings.charts: if chart['dimension'] == dimension: if chart['function']: chart['meta']['data'] = [] previous_slice = False d = {} # Generate the chart data for sl in comparison['slices']: sliced_chart = { 'id': sl['id'], 'label': sl['label'] } # Prep the dataframe, slice it or serve it full if comparison['compare']: sliced_df = slice_df( df, comparison['compare'], sl['field']) else: sliced_df = df if not sliced_df.empty: current_slice = chart['function']( sliced_df) # Append the slice's data & meta-data sliced_chart['data'] = current_slice[ 'data'] chart['meta']['data'].append(sliced_chart) # Update the domain based on the slice for axis, func in chart['domain'].items(): if previous_slice: d[axis] = func( d[axis], current_slice['domain'][axis]) else: d[axis] = current_slice['domain'][ axis] previous_slice = True # Add the domain to the chart for axis, func in chart['domain'].items(): chart['meta'][axis]['domain'] = d[axis] # Append the chart data lense['charts'].append(chart['meta']) file_name = os.path.join(settings.folder_charts, lense_id + '.json') with open(file_name, 'w') as outfile: json.dump(lense, outfile)