def main():

  #############################################################################
  # 0.
  #

  # Check if tmp folder exists, otherwise create it
  check_create_folder(settings.tmp_dir)
  
  # Build the list with countries and states
  admin_areas = get_aa_list()

  for chart in settings.charts:
    ind_source = settings.src_auxiliary + str(settings.current_edition) + '-' + str(chart["id"]) + '.csv'
      
    global_avg = False
    # Calculate the global average for this chart    
    if "global_average" in chart and chart["global_average"]:
      global_avg = get_avg(chart, ind_source)
    
    for aa in admin_areas:
      iso = aa.lower()
      for lang in settings.langs:
        # Initialize the array that will be written to JSON
        json_data = {"name": iso, "iso": iso, "meta": {"title": chart["title"][lang], "label-x": chart["labelx"][lang], "label-y": chart["labely"][lang]}, "data": []}

        for serie in chart["series"]:
          if serie["id"] == 'country':
            # If we're dealing with a country, use the country name as label of serie
            serie_name = aa
          else:
            serie_name = serie["name"][lang]

          # Initialize the object for the serie    
          serie_to_append = {"name": serie_name, "id": serie["id"], "values": []}

          # Add a note to the serie
          if chart["note"]:
            serie_to_append["note"] = add_note(serie, ind_source, aa)

          # Generate the actual data
          serie_to_append["values"] = chart['function'](serie, ind_source, lang, aa, chart["years"],global_avg)

          json_data["data"].append(serie_to_append)

        # Write the list to a JSON file
        file_path = (settings.exp_aux_json).format(lang=lang,indicator=chart["export"],aa=iso)
        write_json(file_path, json_data)
  
  # Fully remove the temp directory
  clean_dir(settings.tmp_dir, True)

  print "All done. The auxiliary data has been prepared for use on global-climatescope.org."
Ejemplo n.º 2
0
def main():

  #############################################################################
  # 0.
  #

  # Check if tmp folder exists, otherwise create it
  check_create_folder(settings.tmp_dir)
  
  # Build the list with countries and states
  admin_areas = get_aa_list()

  for chart in settings.charts:
    ind_source = settings.src_auxiliary + str(settings.current_edition) + '-' + str(chart["id"]) + '.csv'
      
    global_avg = False
    # Calculate the global average for this chart    
    if "global_average" in chart and chart["global_average"]:
      global_avg = get_avg(chart, ind_source)
    
    for aa in admin_areas:
      iso = aa.lower()
      for lang in settings.langs:
        # Initialize the array that will be written to JSON
        json_data = {"name": iso, "iso": iso, "meta": {"title": chart["title"][lang], "label-x": chart["labelx"][lang], "label-y": chart["labely"][lang]}, "data": []}

        for serie in chart["series"]:
          if serie["id"] == 'country':
            # If we're dealing with a country, use the country name as label of serie
            serie_name = aa
          else:
            serie_name = serie["name"][lang]

          # Initialize the object for the serie    
          serie_to_append = {"name": serie_name, "id": serie["id"], "values": []}

          # Generate the actual data
          serie_to_append["values"] = chart['function'](serie, ind_source, lang, aa, chart["years"],global_avg)

          json_data["data"].append(serie_to_append)

        # Write the list to a JSON file
        file_path = (settings.exp_aux_json).format(lang=lang,indicator=chart["export"],aa=iso)
        write_json(file_path, json_data)
  
  # Fully remove the temp directory
  clean_dir(settings.tmp_dir, True)

  print "All done. The auxiliary data has been prepared for use on global-climatescope.org."
def download_compranet(years):
  """
  Download Compranet data for a list of years, unzip the files and convert 
  the XLS to CSV

  :param years:
    The years for which to download data
  :type years:
    List

  :returns:

  :example:

  """
  
  tmp_folder = os.path.join(settings.folder_full_cache, 'tmp')
  check_create_folder(tmp_folder)

  for year in years:
    file_name = os.path.join(settings.fn_prefix + year + settings.fn_extension)
    src_url = settings.compranet_base_url + file_name

    print "Downloading %s" % file_name
    download(url=src_url, path=tmp_folder) 

    file_path = os.path.join(tmp_folder, file_name)
    with zipfile.ZipFile(file_path, 'r') as myzip:
      myzip.extractall(tmp_folder)

  pattern = os.path.join(tmp_folder, '*.xls*')

  for src_file in list_files(pattern):
    csv_path = os.path.join(settings.folder_full_cache, get_filename(src_file) + '.csv')
    wb = xlrd.open_workbook(src_file)
    sheet = wb.sheet_by_index(0)

    with open(csv_path, 'w') as csvfile:
      writer = unicodecsv.writer(csvfile, encoding='utf-8')
      for rownum in xrange(sheet.nrows):
        writer.writerow(sheet.row_values(rownum))

  remove_folder(tmp_folder)
def main(args):
  """
  Main function - launches the program.
  :param args:
    The Parser arguments
  :type args:
    Parser object
  :returns:
    List  
  :example:
    ["Downloading files from the Compranet site."]
  """
  
  if args:

    if args.sample:
      source_folder = settings.folder_sample_data
    
    else:
      # Use cached versions of the source data in csv format
      source_folder = settings.folder_full_cache      
      check_create_folder(source_folder)
      
      if args.download:
        clean_folder(source_folder)
        download_compranet(settings.years)
        
    # Check if there are CSV files in the sample folder
    pattern = os.path.join(source_folder, '*.csv')
    source_data = list_files(pattern)

    if source_data:

      print "About to clean the data"
      clean_df = clean.clean_csv(source_data)
      
      print "About to store it in OCDS format"
      ocds.generate_json(clean_df)

    else:
      return["No source data found. Make sure there is at least one CSV file in " + source_folder, 1]

    return["Prepared and cleaned the files from the Compranet site.",0]
def main(args):
  """
  Main function - launches the program.
  """

  if args:
    check_create_folder(settings.folder_charts)

    df = pd.DataFrame()

    # Read in the JSON files, flatten the contracts and add them to a DataFrame
    for f in list_files(args.source + '*'):
      df = flatten_contracts(f, df)

    # Improve
    df['contract_period_startDate'] = df['contract_period_startDate'].convert_objects(convert_dates='coerce')
    df['tender_publicationDate'] = df['tender_publicationDate'].convert_objects(convert_dates='coerce')
    df['tender_tenderPeriod_startDate'] = df['tender_tenderPeriod_startDate'].convert_objects(convert_dates='coerce')
    df['award_date'] = df['award_date'].convert_objects(convert_dates='coerce')


    # Cut every contract that's before a starting date
    start_date = datetime.strptime(settings.start_date_charts,'%Y-%m-%d')
    end_date = datetime.strptime(settings.end_date_charts,'%Y-%m-%d')
    df = df[(df[settings.main_date_contract] >= start_date) & (df[settings.main_date_contract] <= end_date)]

    # Generate the summary statistics, independent of comparison or slice
    overview_data = chartdata.generate_overview(df)

    with open(os.path.join(settings.folder_charts, 'general.json'), 'w') as outfile:
      json.dump(overview_data, outfile)

    for dimension in settings.dimensions:
      for comparison in settings.comparisons:

        # Each unique combination of dimension + comparison is a 'lense'
        lense_id = dimension + '--' + comparison['id']
        lense = { 
          'metadata': { 
            'id': lense_id
          },
          'charts': []
        }

        for chart in settings.charts:
          if chart['dimension'] == dimension:
            if chart['function']:
              chart['meta']['data'] = []
         
              previous_slice = False
              d = { }

              # Generate the chart data
              for sl in comparison['slices']:
                sliced_chart = { 'id': sl['id'], 'label': sl['label'] }
                
                # Prep the dataframe, slice it or serve it full
                if comparison['compare']:
                  sliced_df = slice_df(df, comparison['compare'], sl['field'])
                else:
                  sliced_df = df

                if not sliced_df.empty:
                  current_slice = chart['function'](sliced_df)

                  # Append the slice's data & meta-data 
                  sliced_chart['data'] = current_slice['data']
                  chart['meta']['data'].append(sliced_chart)
                  
                  # Update the domain based on the slice
                  for axis, func in chart['domain'].items():
                    if previous_slice:
                      d[axis] = func(d[axis], current_slice['domain'][axis])
                    else:
                      d[axis] = current_slice['domain'][axis]
                    
                  previous_slice = True


              # Add the domain to the chart
              for axis, func in chart['domain'].items():
                chart['meta'][axis]['domain'] = d[axis]
              
            # Append the chart data
            lense['charts'].append(chart['meta'])

        file_name = os.path.join(settings.folder_charts,lense_id + '.json')
        with open(file_name, 'w') as outfile:
          json.dump(lense, outfile)
Ejemplo n.º 6
0
def generate_json(df):
  """
  Generate OCDS record packages for each month

  :param df:
    Dataframe with all the contracts
  :type args:
    DataFrame

  :returns:
      
  :example:

  """

  check_create_folder(settings.folder_ocds_json)
  check_create_folder(settings.folder_tmp)
  clean_folder(settings.folder_tmp)

  # Group the Compranet by date
  df['group_date'] = df[settings.grouping_date].convert_objects(convert_dates='coerce')
  grouped_df = df.set_index('group_date').groupby(pd.TimeGrouper(freq='M'))

  # Store the records for each month in a temporary CSV file
  # The JSON files will be generated from these CSV files, which
  # is much more performant than iterating over the rows in pandas
  files = []
  for month, records in grouped_df:
    if not records.empty:
      m = month.strftime("%Y%m%d")
      file_name = os.path.join(settings.folder_tmp, m + '.csv')
      files.append(file_name)
      records.to_csv(file_name, index=False)

  # Loop over each CSV file and create an OCDS record package
  for f in files:

    # Store the package meta-data
    ## ADD MONTH
    package = {
      "uri": os.path.join("http://example.com/" + get_filename(f) + '.json'),
      "publishedDate": get_filename(f),
      "records": [],
      "publisher": {
        "identifier": "100",
        "name": "Compranet"
      },
      "packages": []
    }

    # Read the file and generate the records
    with open(f, 'rb') as infile:
      data = csv.DictReader(infile)

      ocds_records = {}

      for record in data:
        record_id = record['NUMERO_PROCEDIMIENTO']

        # Add the generic tender data for this record,
        # if it's not there already
        if not record_id in ocds_records:
          ocds_records[record_id] = get_tender_data(record)

        # The contract and award data needs to be added for each row

        # OCDS expects a unique ID for every award. NUMERO_EXPEDIENTE is not unique, hence
        # a custom ID
        award_id = str(record['NUMERO_EXPEDIENTE']) + '-' + str(len(ocds_records[record_id]['awards']) + 1)
        
        ocds_records[record_id]['awards'].append(get_award_data(record, award_id))
        ocds_records[record_id]['contracts'].append(get_contract_data(record, award_id))

      for key, value in ocds_records.iteritems():
        package['records'].append(value)

    ofn = os.path.join(settings.folder_ocds_json, get_filename(f) + '.json')
    with open(ofn, 'w') as outfile:
      json.dump(package, outfile)
def main(args):
    """
  Main function - launches the program.
  """

    if args:
        check_create_folder(settings.folder_charts)

        df = pd.DataFrame()

        # Read in the JSON files, flatten the contracts and add them to a DataFrame
        for f in list_files(args.source + '*'):
            df = flatten_contracts(f, df)

        # Improve
        df['contract_period_startDate'] = df[
            'contract_period_startDate'].convert_objects(
                convert_dates='coerce')
        df['tender_publicationDate'] = df[
            'tender_publicationDate'].convert_objects(convert_dates='coerce')
        df['tender_tenderPeriod_startDate'] = df[
            'tender_tenderPeriod_startDate'].convert_objects(
                convert_dates='coerce')
        df['award_date'] = df['award_date'].convert_objects(
            convert_dates='coerce')

        # Cut every contract that's before a starting date
        start_date = datetime.strptime(settings.start_date_charts, '%Y-%m-%d')
        end_date = datetime.strptime(settings.end_date_charts, '%Y-%m-%d')
        df = df[(df[settings.main_date_contract] >= start_date)
                & (df[settings.main_date_contract] <= end_date)]

        # Generate the summary statistics, independent of comparison or slice
        overview_data = chartdata.generate_overview(df)

        with open(os.path.join(settings.folder_charts, 'general.json'),
                  'w') as outfile:
            json.dump(overview_data, outfile)

        for dimension in settings.dimensions:
            for comparison in settings.comparisons:

                # Each unique combination of dimension + comparison is a 'lense'
                lense_id = dimension + '--' + comparison['id']
                lense = {'metadata': {'id': lense_id}, 'charts': []}

                for chart in settings.charts:
                    if chart['dimension'] == dimension:
                        if chart['function']:
                            chart['meta']['data'] = []

                            previous_slice = False
                            d = {}

                            # Generate the chart data
                            for sl in comparison['slices']:
                                sliced_chart = {
                                    'id': sl['id'],
                                    'label': sl['label']
                                }

                                # Prep the dataframe, slice it or serve it full
                                if comparison['compare']:
                                    sliced_df = slice_df(
                                        df, comparison['compare'], sl['field'])
                                else:
                                    sliced_df = df

                                if not sliced_df.empty:
                                    current_slice = chart['function'](
                                        sliced_df)

                                    # Append the slice's data & meta-data
                                    sliced_chart['data'] = current_slice[
                                        'data']
                                    chart['meta']['data'].append(sliced_chart)

                                    # Update the domain based on the slice
                                    for axis, func in chart['domain'].items():
                                        if previous_slice:
                                            d[axis] = func(
                                                d[axis],
                                                current_slice['domain'][axis])
                                        else:
                                            d[axis] = current_slice['domain'][
                                                axis]

                                    previous_slice = True

                            # Add the domain to the chart
                            for axis, func in chart['domain'].items():
                                chart['meta'][axis]['domain'] = d[axis]

                        # Append the chart data
                        lense['charts'].append(chart['meta'])

                file_name = os.path.join(settings.folder_charts,
                                         lense_id + '.json')
                with open(file_name, 'w') as outfile:
                    json.dump(lense, outfile)