def create(): __create_or_delete__(path.assets('indicators')) overview = io.read_json(path.assets('overview.json')) # Start by compiling a giant data structure of every company companies = {} for d in overview: company_data = io.read_json(path.assets('%s.json' % d['id'])) companies[d['id']] = company_data # Now aggregate the data by indicator id using the survey data survey = io.read_json(path.assets('survey.json')) for item in survey: indicator_id = item['id'].lower() indicator_data = { 'id': item['id'], 'name': item['name'], 'follow': item['follow'], 'companies': [] } print indicator_id, item['follow'] for company_id, company in companies.iteritems(): company_data = [i for i in company if indicator_id == i['id'].lower()] if len(company_data) > 1: print 'Found too many company matches for', indicator_id # This might be an indicator that doesn't apply if not len(company_data): continue company_data = company_data[0] company_overview = [c for c in overview if company_id in c['id']] if len(company_overview) != 1: print 'Weirdness finding company from company overview' company_overview = company_overview[0] company_type = 'Telecommunications' if 'false' in company_overview['telco']: company_type = 'Internet' indicator_data['companies'].append({ 'name': company_overview['name'], 'id': company_overview['id'], 'display': company_overview['display'], 'score': company_data['score'], 'type': company_type, 'levels': company_data['levels'], 'services': company_data['services'] }) indicator_data['companies'] = sorted(indicator_data['companies'], key=lambda c: c['id']) io.write_json(path.assets('indicators/%s.json' % indicator_id), indicator_data)
def create_indicator_scores(): companies = io.read_json(path.assets("services.json")) survey = io.read_json(path.assets("survey.json")) indicator_data = [] for i in survey: indicator_id = i["id"].lower() scores = {} levels = {} for c in companies: c_name = c["display"] c_overall = c["overall"] if indicator_id in c_overall: scores[c_name] = c_overall[indicator_id] else: print "no %s in %s" % (indicator_id, " ".join(c_overall)) print i["name"] indicator_data.append({"id": indicator_id, "scores": scores, "text": i["text"], "name": i["name"]}) io.write_json(path.assets("indicator-overview.json"), indicator_data)
def create(): __create_or_delete__(path.assets('indicators')) overview = io.read_json(path.assets('overview.json')) # Start by compiling a giant data structure of every company companies = {} for d in overview: company_data = io.read_json(path.assets('%s.json' % d['id'])) companies[d['id']] = company_data # Now aggregate the data by indicator id using the survey data survey = io.read_json(path.assets('survey.json')) for item in survey: indicator_id = item['id'].lower() indicator_data = { 'id': item['id'], 'name': item['name'], 'follow': item['follow'], 'companies': [] } print indicator_id, item['follow'] for company_id, company in companies.iteritems(): company_data = [ i for i in company if indicator_id == i['id'].lower() ] if len(company_data) > 1: print 'Found too many company matches for', indicator_id # This might be an indicator that doesn't apply if not len(company_data): continue company_data = company_data[0] company_overview = [c for c in overview if company_id in c['id']] if len(company_overview) != 1: print 'Weirdness finding company from company overview' company_overview = company_overview[0] company_type = 'Telecommunications' if 'false' in company_overview['telco']: company_type = 'Internet' indicator_data['companies'].append({ 'name': company_overview['name'], 'id': company_overview['id'], 'display': company_overview['display'], 'score': company_data['score'], 'type': company_type, 'levels': company_data['levels'], 'services': company_data['services'] }) indicator_data['companies'] = sorted(indicator_data['companies'], key=lambda c: c['id']) io.write_json(path.assets('indicators/%s.json' % indicator_id), indicator_data)
def create(filename): # Create a dictionary where properties are company names overview = io.read_json(path.assets('overview.json')) companies = [name.snake_case(item['display']) for item in overview] company_dict = {} for c in companies: company_dict[c] = -1 # Now use that dictionary to save the index of those company names. raw = io.read_csv(path.raw(filename)) raw_header = raw[0] for idx, item in enumerate(raw_header): snake_header = name.snake_case(item) if snake_header in company_dict: company_dict[snake_header] = idx # This should be 0 if we've matched every company if not_all_found(company_dict.values()): print 'Not all companies accounted for in services overview csv' # This is where we check a ref file, or create one ref_path = path.ref('service-column-mapping.json') if os.path.isfile(ref_path): ref = io.read_json(ref_path) else: ref = [name.snake_case(row[0]) for row in raw[1:] if row[0] != ''] io.write_json(ref_path, ref) # Create a dictionary matching row number fo the indicator indicator_dict = {} for indic in ref: indicator_dict[indic] = -1 for idx, row in enumerate(raw): indicator = name.snake_case(row[0]) if indicator in indicator_dict: indicator_dict[indicator] = idx if not_all_found(indicator_dict.values()): print 'Not all indicators accounted for in services overview csv' # Baselines tel = 'telco' net = 'internet company' output = [] # Get a slice of all the columns that encompass each company stops = sorted(idx for idx in company_dict.values()) for idx, stop in enumerate(stops): next_stop = stops[idx+1] if idx + 1 < len(stops) else len(raw_header) company_range = [item[stop:next_stop] for item in raw] company = { 'display': company_range[0][0], 'name': name.filename(company_range[0][0]) } # The second item in the first row *should* be the type header_type = company_range[0][1].lower() if header_type not in [tel, net]: print 'No company type found. Instead, saw %s' % header_type company['type'] = header_type # The second row contains the service names service_names = [item for item in company_range[1]] services = [] for column_number, service_name in enumerate(service_names): # Get each indicator value for each service using # the indicator mapping we defined earlier scores = {} for indicator_name, row_number in indicator_dict.iteritems(): cell = company_range[row_number][column_number] scores[indicator_name] = company_range[row_number][column_number] # The first 'service' is actually just the overall # Do some spreadsheet format-checking here if column_number == 0: total = scores['total'] if not len(total): print 'No weighted total for %s %s' % (service_name, company['name']) if 'overall' not in service_name: print 'Service %s != "overall"' % service_name company['overall'] = scores # The second 'service' is usually the group score; # No need to save this, we don't use it here. elif column_number == 1 and 'group' in service_name: continue # Otherwise, call it a service. else: service = { 'name': service_name, 'scores': scores } # Get service type if it's available service_type = company_range[0][column_number] if len(service_type): service['type'] = service_type services.append(service) company['services'] = services output.append(company) io.write_json(path.assets('services.json'), output)
def create(filename): company_name = name.filename(filename[:-4]) all_services = io.read_json(path.assets('services.json')) service_data = [item for item in all_services if (company_name in item['name'].replace('.', ''))] if len(service_data) != 1: print 'Weird number of services found', len(service_data) service_data = service_data[0] # Create a mapping dictionary of just indicator names # each mapped to -1 ref = io.read_json(path.ref('service-column-mapping.json')) indicator_dict = {} for item in ref: if is_number(item[1:]): indicator_dict[item] = -1 # Map the indicator to the proper rows raw = io.read_csv(path.raw('companies/' + filename)) for idx, row in enumerate(raw): indicator = row[0].lower() if indicator in indicator_dict: indicator_dict[indicator] = idx # Use the survey data to map possible responses to position survey = io.read_json(path.assets('survey.json')) if not_all_found(indicator_dict.values()): print 'Not all indicators accounted for in services overview csv' all_indicators = [] # Get a slice of all the rows that encompass each company stops = sorted(idx for idx in indicator_dict.values()) for idx, stop in enumerate(stops): next_stop = stops[idx+1] if idx + 1 < len(stops) else len(raw) + 1 indicator_range = raw[stop:next_stop] # Divide that slice by empty rows split = array.slice_arr(indicator_range, array.is_empty_row) # The first slice contains consolidated answers, # comments, and sources. responses = split.pop(0) # The first row of responses is indicator name followed by # service categories header = [item for item in responses.pop(0) if len(item)] indicator_name = header[0] # Find the survey question we're looking for survey_item = ([item for item in survey if item['id'].lower() == indicator_name.lower()]) if len(survey_item) != 1: print 'Too many items in survey.json for this indicator' print indicator_name print survey_item indicator_data = { 'id': indicator_name, 'services': [], 'levels': [] } # Check if this indicator is valid before continuing if len(responses) == 1 and 'this indicator is n/a' in responses[0][0].lower(): continue else: # question scores follow the response text in the split array scores = split.pop(0) # ..followed by the overall indicator score (verify this) indicator_score = split.pop(-1)[0][1] if ('indicator score' in split[-1][0][0].lower()) else [] if not len(indicator_score): print '\nIndicator score not found in %s' % header[0] print split, '\n' else: indicator_data['score'] = indicator_score # ..and the same for the overall service scores level_scores = split.pop(-1)[0] if ('level score' in split[-1][0][0].lower()) else [] if not len(level_scores): print '\nService score not found in %s' % header[0] print split, '\n' # Determine the comments and sources location comments = responses.pop(-2) sources = responses.pop(-1) if ('comments' not in comments[0].lower() or 'sources' not in sources[0].lower()): print 'Comments not found in %s' % comments[0] print 'Sources not found in %s' % sources[0] # Some question text include an if-not-then clause, # which throws off the count between the text and the score. # Record it and then delete the row. indicator_data['follow'] = 0 for idx, row in enumerate(responses): if 'continue with B' in row[0] and len(set(row[1:])) == 1: indicator_data['follow'] = 1 del responses[idx] break if len(responses) != len(scores): print 'Length of responses and scores not matching' print len(responses), len(scores) # Save level responses, and level positions # Determine if this question has custom answers survey_levels = survey_item[0]['levels'] for idx, level in enumerate(responses): level_data = [] # Assume anything longer than 25 characters, # aka "no/insufficient evidence", is a custom response custom = 0 survey_options = survey_levels[idx]['responses'] for option in survey_options: if len(option) > 25: custom = 1 for level_idx, level_response in enumerate(level): # First level index is useless. if level_idx == 0 or not len(level_response): continue if len(header) <= level_idx: print 'No header available, this will break' service = header[level_idx] # Exclude group scores, operating company # from indicators that don't need them if (('(group)' in service or '(operating company)' in service ) and exclude_service(indicator_name)): continue # Shim issues where the response includes too much text. if len(level_response) > 25 and "no/insufficient" == level_response[:15]: level_response = "no/insufficient evidence" # Only add to the services list if we're on the first level. # Other, we add too many if idx == 0: if 'operating company' in service.lower(): service_type = 'operating company' elif 'group' in service.lower(): service_type = 'group' else: matching_service = [item for item in service_data['services'] if ( item['name'].lower() in service.lower())] if len(matching_service) == 1 and 'type' in matching_service[0]: service_type = matching_service[0]['type'] else: service_type = '' indicator_data['services'].append({ 'name': scrub_service_name(service), 'type': service_type, 'comments': comments[level_idx], 'sources': sources[level_idx], 'score': level_scores[level_idx] }) level_data.append({ 'response': level_response, 'score': scores[idx][level_idx] }) indicator_data['custom'] = custom indicator_data['levels'].append({ 'scores': level_data, 'text': survey_levels[idx]['text'] }) all_indicators.append(indicator_data) io.write_json(path.assets(company_name + '.json'), all_indicators)
def create(filename): # Create a dictionary where properties are company names overview = io.read_json(path.assets('overview.json')) companies = [name.snake_case(item['display']) for item in overview] company_dict = {} for c in companies: company_dict[c] = -1 # Now use that dictionary to save the index of those company names. raw = io.read_csv(path.raw(filename)) raw_header = raw[0] for idx, item in enumerate(raw_header): snake_header = name.snake_case(item) if snake_header in company_dict: company_dict[snake_header] = idx # This should be 0 if we've matched every company if not_all_found(company_dict.values()): print 'Not all companies accounted for in services overview csv' # This is where we check a ref file, or create one ref_path = path.ref('service-column-mapping.json') if os.path.isfile(ref_path): ref = io.read_json(ref_path) else: ref = [name.snake_case(row[0]) for row in raw[1:] if row[0] != ''] io.write_json(ref_path, ref) # Create a dictionary matching row number fo the indicator indicator_dict = {} for indic in ref: indicator_dict[indic] = -1 for idx, row in enumerate(raw): indicator = name.snake_case(row[0]) if indicator in indicator_dict: indicator_dict[indicator] = idx if not_all_found(indicator_dict.values()): print 'Not all indicators accounted for in services overview csv' # Baselines tel = 'telco' net = 'internet company' output = [] # Get a slice of all the columns that encompass each company stops = sorted(idx for idx in company_dict.values()) for idx, stop in enumerate(stops): next_stop = stops[idx + 1] if idx + 1 < len(stops) else len(raw_header) company_range = [item[stop:next_stop] for item in raw] company = { 'display': company_range[0][0], 'name': name.filename(company_range[0][0]) } # The second item in the first row *should* be the type header_type = company_range[0][1].lower() if header_type not in [tel, net]: print 'No company type found. Instead, saw %s' % header_type company['type'] = header_type # The second row contains the service names service_names = [item for item in company_range[1]] services = [] for column_number, service_name in enumerate(service_names): # Get each indicator value for each service using # the indicator mapping we defined earlier scores = {} for indicator_name, row_number in indicator_dict.iteritems(): cell = company_range[row_number][column_number] scores[indicator_name] = company_range[row_number][ column_number] # The first 'service' is actually just the overall # Do some spreadsheet format-checking here if column_number == 0: total = scores['total'] if not len(total): print 'No weighted total for %s %s' % (service_name, company['name']) if 'overall' not in service_name: print 'Service %s != "overall"' % service_name company['overall'] = scores # The second 'service' is usually the group score; # No need to save this, we don't use it here. elif column_number == 1 and 'group' in service_name: continue # Otherwise, call it a service. else: service = {'name': service_name, 'scores': scores} # Get service type if it's available service_type = company_range[0][column_number] if len(service_type): service['type'] = service_type services.append(service) company['services'] = services output.append(company) io.write_json(path.assets('services.json'), output)
def create(filename): company_name = name.filename(filename[:-4]) all_services = io.read_json(path.assets('services.json')) service_data = [ item for item in all_services if (company_name in item['name'].replace('.', '')) ] if len(service_data) != 1: print 'Weird number of services found', len(service_data) service_data = service_data[0] # Create a mapping dictionary of just indicator names # each mapped to -1 ref = io.read_json(path.ref('service-column-mapping.json')) indicator_dict = {} for item in ref: if is_number(item[1:]): indicator_dict[item] = -1 # Map the indicator to the proper rows raw = io.read_csv(path.raw('companies/' + filename)) for idx, row in enumerate(raw): indicator = row[0].lower() if indicator in indicator_dict: indicator_dict[indicator] = idx # Use the survey data to map possible responses to position survey = io.read_json(path.assets('survey.json')) if not_all_found(indicator_dict.values()): print 'Not all indicators accounted for in services overview csv' all_indicators = [] # Get a slice of all the rows that encompass each company stops = sorted(idx for idx in indicator_dict.values()) for idx, stop in enumerate(stops): next_stop = stops[idx + 1] if idx + 1 < len(stops) else len(raw) + 1 indicator_range = raw[stop:next_stop] # Divide that slice by empty rows split = array.slice_arr(indicator_range, array.is_empty_row) # The first slice contains consolidated answers, # comments, and sources. responses = split.pop(0) # The first row of responses is indicator name followed by # service categories header = [item for item in responses.pop(0) if len(item)] indicator_name = header[0] # Find the survey question we're looking for survey_item = ([ item for item in survey if item['id'].lower() == indicator_name.lower() ]) if len(survey_item) != 1: print 'Too many items in survey.json for this indicator' print indicator_name print survey_item indicator_data = {'id': indicator_name, 'services': [], 'levels': []} # Check if this indicator is valid before continuing if len(responses ) == 1 and 'this indicator is n/a' in responses[0][0].lower(): continue else: # question scores follow the response text in the split array scores = split.pop(0) # ..followed by the overall indicator score (verify this) indicator_score = split.pop(-1)[0][1] if ( 'indicator score' in split[-1][0][0].lower()) else [] if not len(indicator_score): print '\nIndicator score not found in %s' % header[0] print split, '\n' else: indicator_data['score'] = indicator_score # ..and the same for the overall service scores level_scores = split.pop(-1)[0] if ( 'level score' in split[-1][0][0].lower()) else [] if not len(level_scores): print '\nService score not found in %s' % header[0] print split, '\n' # Determine the comments and sources location comments = responses.pop(-2) sources = responses.pop(-1) if ('comments' not in comments[0].lower() or 'sources' not in sources[0].lower()): print 'Comments not found in %s' % comments[0] print 'Sources not found in %s' % sources[0] # Some question text include an if-not-then clause, # which throws off the count between the text and the score. # Record it and then delete the row. indicator_data['follow'] = 0 for idx, row in enumerate(responses): if 'continue with B' in row[0] and len(set(row[1:])) == 1: indicator_data['follow'] = 1 del responses[idx] break if len(responses) != len(scores): print 'Length of responses and scores not matching' print len(responses), len(scores) # Save level responses, and level positions # Determine if this question has custom answers survey_levels = survey_item[0]['levels'] for idx, level in enumerate(responses): level_data = [] # Assume anything longer than 25 characters, # aka "no/insufficient evidence", is a custom response custom = 0 survey_options = survey_levels[idx]['responses'] for option in survey_options: if len(option) > 25: custom = 1 for level_idx, level_response in enumerate(level): # First level index is useless. if level_idx == 0 or not len(level_response): continue if len(header) <= level_idx: print 'No header available, this will break' service = header[level_idx] # Exclude group scores, operating company # from indicators that don't need them if (('(group)' in service or '(operating company)' in service) and exclude_service(indicator_name)): continue # Shim issues where the response includes too much text. if len(level_response ) > 25 and "no/insufficient" == level_response[:15]: level_response = "no/insufficient evidence" # Only add to the services list if we're on the first level. # Other, we add too many if idx == 0: if 'operating company' in service.lower(): service_type = 'operating company' elif 'group' in service.lower(): service_type = 'group' else: matching_service = [ item for item in service_data['services'] if (item['name'].lower() in service.lower()) ] if len(matching_service ) == 1 and 'type' in matching_service[0]: service_type = matching_service[0]['type'] else: service_type = '' indicator_data['services'].append({ 'name': scrub_service_name(service), 'type': service_type, 'comments': comments[level_idx], 'sources': sources[level_idx], 'score': level_scores[level_idx] }) level_data.append({ 'response': level_response, 'score': scores[idx][level_idx] }) indicator_data['custom'] = custom indicator_data['levels'].append({ 'scores': level_data, 'text': survey_levels[idx]['text'] }) all_indicators.append(indicator_data) io.write_json(path.assets(company_name + '.json'), all_indicators)