Exemple #1
0
def create():

    __create_or_delete__(path.assets('indicators'))

    overview = io.read_json(path.assets('overview.json'))

    # Start by compiling a giant data structure of every company
    companies = {}
    for d in overview:
        company_data = io.read_json(path.assets('%s.json' % d['id']))
        companies[d['id']] = company_data

    # Now aggregate the data by indicator id using the survey data
    survey = io.read_json(path.assets('survey.json'))
    for item in survey:
        indicator_id = item['id'].lower()
        indicator_data = {
            'id': item['id'],
            'name': item['name'],
            'follow': item['follow'],
            'companies': []
        }

        print indicator_id, item['follow']

        for company_id, company in companies.iteritems():
            company_data = [i for i in company if indicator_id == i['id'].lower()]
            if len(company_data) > 1:
                print 'Found too many company matches for', indicator_id

            # This might be an indicator that doesn't apply
            if not len(company_data):
                continue

            company_data = company_data[0]
            company_overview = [c for c in overview if company_id in c['id']]

            if len(company_overview) != 1:
                print 'Weirdness finding company from company overview'
            company_overview = company_overview[0]

            company_type = 'Telecommunications'
            if 'false' in company_overview['telco']:
                company_type = 'Internet'

            indicator_data['companies'].append({
                'name': company_overview['name'],
                'id': company_overview['id'],
                'display': company_overview['display'],
                'score': company_data['score'],
                'type': company_type,
                'levels': company_data['levels'],
                'services': company_data['services']
            })


        indicator_data['companies'] = sorted(indicator_data['companies'], key=lambda c: c['id'])
        io.write_json(path.assets('indicators/%s.json' % indicator_id), indicator_data)
def create_indicator_scores():
    companies = io.read_json(path.assets("services.json"))
    survey = io.read_json(path.assets("survey.json"))

    indicator_data = []
    for i in survey:
        indicator_id = i["id"].lower()
        scores = {}
        levels = {}

        for c in companies:
            c_name = c["display"]
            c_overall = c["overall"]
            if indicator_id in c_overall:
                scores[c_name] = c_overall[indicator_id]
            else:
                print "no %s in %s" % (indicator_id, " ".join(c_overall))

        print i["name"]
        indicator_data.append({"id": indicator_id, "scores": scores, "text": i["text"], "name": i["name"]})
    io.write_json(path.assets("indicator-overview.json"), indicator_data)
def companies():
    __create_or_delete__(path.companies("")[:-1])
    overview = io.read_json(path.assets("overview.json"))
    for company in overview:
        filename = (company["name"] + ".md").lower().replace("&", "")
        with open(path.companies(filename), "w") as f:
            f.write("---\n")
            f.write("entity: %s\n" % filename[:-3])

            f.write("\nlayout: company\n\n")

            for key, value in company.iteritems():
                if key in ["total", "commitment", "privacy", "freedom"]:
                    f.write("%s: %s" % (key, int(round(float(value)))))
                else:
                    f.write("%s: %s" % (key, value))
                f.write("\n")
            f.write("\ndescription: one-line description\n")
            f.write("website: http://example.com")
            f.write("\n---\n\n")
            f.write("Paragraph describing this company\n")
Exemple #4
0
def companies():
    __create_or_delete__(path.companies('')[:-1])
    overview = io.read_json(path.assets('overview.json'))
    for company in overview:
        filename = (company['name'] + '.md').lower().replace('&', '')
        with open(path.companies(filename), 'w') as f:
            f.write('---\n')
            f.write('entity: %s\n' % filename[:-3])

            f.write('\nlayout: company\n\n')

            for key, value in company.iteritems():
                if key in ['total', 'commitment', 'privacy', 'freedom']:
                    f.write('%s: %s' % (key, int(round(float(value)))))
                else:
                    f.write('%s: %s' % (key, value))
                f.write('\n')
            f.write('\ndescription: one-line description\n')
            f.write('website: http://example.com')
            f.write('\n---\n\n')
            f.write('Paragraph describing this company\n')
def indicators():
    __create_or_delete__(path.indicators("")[:-1])
    custom = io.read_json(path.assets("custom-questions.json"))
    indicators = io.read_json(path.assets("survey.json"))
    for idx, indicator in enumerate(indicators):
        indicator_id = indicator["id"].lower()

        # Determine if it's in the list of custom indicators
        is_custom = False
        if indicator_id in custom:
            is_custom = True

        filename = indicator_id + ".md"
        with open(path.indicators(filename), "w") as f:
            f.write("---\n")
            f.write("sort: %s\n" % idx)
            f.write("entity: %s\n" % filename[:-3])
            f.write("entity_type: indicator\n")

            first_letter = filename[0]
            if "c" in first_letter:
                f.write("category_ref: commitment\n")
                f.write("category: Commitment\n")
            elif "f" in first_letter:
                f.write("category_ref: freedom-of-expression\n")
                f.write("category: Freedom of Expression\n")
            else:
                f.write("category_ref: privacy\n")
                f.write("category: Privacy\n")

            f.write("\nlayout: indicator\n\n")

            # text, id, name
            for item in ["name", "text", "id"]:
                f.write("%s: %s\n" % (item, indicator[item].encode("UTF-8")))

            # possible answers
            f.write("\nlevels:\n")
            for idx, resp in enumerate(indicator["levels"]):

                # first, just make sure we write level text
                if resp["text"] != 0:
                    f.write('  - text: "%s"\n' % resp["text"].encode("UTF-8"))
                    f.write("    id: %s\n" % resp["id"].encode("UTF-8"))

                # this is really only for c1.b at this point.
                if not is_custom and resp["id"].lower() in custom:
                    custom_answer = custom[resp["id"].lower()]
                    f.write("    choices:\n")
                    for a in custom_answer:
                        f.write('      - text: "%s"\n' % a["text"].encode("UTF-8"))
                        f.write('        score: "%s"\n' % a["score"])

            f.write("\nchoices:\n")
            if is_custom:
                custom_answer = custom[indicator_id]
                for a in custom_answer:
                    f.write('  - text: "%s"\n' % a["text"].encode("UTF-8"))
                    f.write('    score: "%s"\n' % a["score"])

            f.write("\n---\n\n")
Exemple #6
0
def create():

    __create_or_delete__(path.assets('indicators'))

    overview = io.read_json(path.assets('overview.json'))

    # Start by compiling a giant data structure of every company
    companies = {}
    for d in overview:
        company_data = io.read_json(path.assets('%s.json' % d['id']))
        companies[d['id']] = company_data

    # Now aggregate the data by indicator id using the survey data
    survey = io.read_json(path.assets('survey.json'))
    for item in survey:
        indicator_id = item['id'].lower()
        indicator_data = {
            'id': item['id'],
            'name': item['name'],
            'follow': item['follow'],
            'companies': []
        }

        print indicator_id, item['follow']

        for company_id, company in companies.iteritems():
            company_data = [
                i for i in company if indicator_id == i['id'].lower()
            ]
            if len(company_data) > 1:
                print 'Found too many company matches for', indicator_id

            # This might be an indicator that doesn't apply
            if not len(company_data):
                continue

            company_data = company_data[0]
            company_overview = [c for c in overview if company_id in c['id']]

            if len(company_overview) != 1:
                print 'Weirdness finding company from company overview'
            company_overview = company_overview[0]

            company_type = 'Telecommunications'
            if 'false' in company_overview['telco']:
                company_type = 'Internet'

            indicator_data['companies'].append({
                'name':
                company_overview['name'],
                'id':
                company_overview['id'],
                'display':
                company_overview['display'],
                'score':
                company_data['score'],
                'type':
                company_type,
                'levels':
                company_data['levels'],
                'services':
                company_data['services']
            })

        indicator_data['companies'] = sorted(indicator_data['companies'],
                                             key=lambda c: c['id'])
        io.write_json(path.assets('indicators/%s.json' % indicator_id),
                      indicator_data)
Exemple #7
0
 def setUp(self):
     self.data = read_json()
Exemple #8
0
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pymongo import MongoClient

sys.path.insert(0, str(Path(__file__).parent.parent / "etc"))

from lib.io import read_json

client = MongoClient('mongodb://localhost:9999')
db = client.tinder
users = db.users

objects = read_json('data/objects.json')

data = pd.DataFrame \
    .from_dict(objects, orient='index', columns=['count']) \
    .sort_values(by='count', ascending=False)

# Remove common features
common_features = [
    'Forehead',
    'Chin',
    'Jaw',
    'Sleeve',
    'Hairstyle',
    'Neck',
    'Gesture',
    'Eyebrow',
    'Nose',
Exemple #9
0
def create(filename):

    # Create a dictionary where properties are company names
    overview = io.read_json(path.assets('overview.json'))
    companies = [name.snake_case(item['display']) for item in overview]
    company_dict = {}
    for c in companies:
        company_dict[c] = -1

    # Now use that dictionary to save the index of those company names.
    raw = io.read_csv(path.raw(filename))
    raw_header = raw[0]
    for idx, item in enumerate(raw_header):
        snake_header = name.snake_case(item)
        if snake_header in company_dict:
            company_dict[snake_header] = idx

    # This should be 0 if we've matched every company
    if not_all_found(company_dict.values()):
        print 'Not all companies accounted for in services overview csv'

    # This is where we check a ref file, or create one
    ref_path = path.ref('service-column-mapping.json')
    if os.path.isfile(ref_path):
        ref = io.read_json(ref_path)
    else:
        ref = [name.snake_case(row[0]) for row in raw[1:] if row[0] != '']
        io.write_json(ref_path, ref)

    # Create a dictionary matching row number fo the indicator
    indicator_dict = {}
    for indic in ref:
        indicator_dict[indic] = -1
    for idx, row in enumerate(raw):
        indicator = name.snake_case(row[0])
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    # Baselines
    tel = 'telco'
    net = 'internet company'

    output = []

    # Get a slice of all the columns that encompass each company
    stops = sorted(idx for idx in company_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx+1] if idx + 1 < len(stops) else len(raw_header)
        company_range = [item[stop:next_stop] for item in raw]
        company = {
            'display': company_range[0][0],
            'name': name.filename(company_range[0][0])
        }

        # The second item in the first row *should* be the type
        header_type = company_range[0][1].lower()
        if header_type not in [tel, net]:
            print 'No company type found. Instead, saw %s' % header_type
        company['type'] = header_type

        # The second row contains the service names
        service_names = [item for item in company_range[1]]
        services = []
        for column_number, service_name in enumerate(service_names):

            # Get each indicator value for each service using
            # the indicator mapping we defined earlier
            scores = {}
            for indicator_name, row_number in indicator_dict.iteritems():
                cell = company_range[row_number][column_number]
                scores[indicator_name] = company_range[row_number][column_number]

            # The first 'service' is actually just the overall
            # Do some spreadsheet format-checking here
            if column_number == 0:
                total = scores['total']
                if not len(total):
                    print 'No weighted total for %s %s' % (service_name, company['name'])
                if 'overall' not in service_name:
                    print 'Service %s != "overall"' % service_name
                company['overall'] = scores

            # The second 'service' is usually the group score;
            # No need to save this, we don't use it here.
            elif column_number == 1 and 'group' in service_name:
                continue

            # Otherwise, call it a service.
            else:
                service = {
                    'name': service_name,
                    'scores': scores
                }

                # Get service type if it's available
                service_type = company_range[0][column_number]
                if len(service_type):
                    service['type'] = service_type
                services.append(service)

        company['services'] = services
        output.append(company)

    io.write_json(path.assets('services.json'), output)
Exemple #10
0
        required=False,
        const=True,
        help="A boolean indicating whether or not to submit images to the cloud "
            "computer vision service.")

    args = parser.parse_args()

    if args.download_file:
        assert not args.data_export, "You should select either the --download-from-tinder or --data-export [file_path] options, but not both."
        logging.info(authverif())
        recs = get_recommendations()
        write_json(recs, 'data/recommendations.json')

    else:
        assert args.data_export, "You must specify either a --download-file or a --data-export"
        recs = read_json(args.data_export)

    assert recs['meta']['status'] == 200, "Something went wrong"
    results = []

    for record in recs['data']['results']:
        results.append(flatten(record))

    try:
        with MongoClient('mongodb://localhost:9999') as client:
            db = client.tinder
            users = db.users

            for result in results:
                logging.debug(f"Upserting {result} to the database")
                # Update the database
Exemple #11
0
def create(filename):

    company_name = name.filename(filename[:-4])

    all_services = io.read_json(path.assets('services.json'))
    service_data = [item for item in all_services if (company_name
        in item['name'].replace('.', ''))]

    if len(service_data) != 1:
        print 'Weird number of services found', len(service_data)

    service_data = service_data[0]

    # Create a mapping dictionary of just indicator names
    # each mapped to -1
    ref = io.read_json(path.ref('service-column-mapping.json'))
    indicator_dict = {}
    for item in ref:
        if is_number(item[1:]):
            indicator_dict[item] = -1


    # Map the indicator to the proper rows
    raw = io.read_csv(path.raw('companies/' + filename))
    for idx, row in enumerate(raw):
        indicator = row[0].lower()
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    # Use the survey data to map possible responses to position
    survey = io.read_json(path.assets('survey.json'))

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    all_indicators = []

    # Get a slice of all the rows that encompass each company
    stops = sorted(idx for idx in indicator_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx+1] if idx + 1 < len(stops) else len(raw) + 1
        indicator_range = raw[stop:next_stop]

        # Divide that slice by empty rows
        split = array.slice_arr(indicator_range, array.is_empty_row)

        # The first slice contains consolidated answers,
        # comments, and sources.
        responses = split.pop(0)

        # The first row of responses is indicator name followed by
        # service categories
        header = [item for item in responses.pop(0) if len(item)]

        indicator_name = header[0]

        # Find the survey question we're looking for
        survey_item = ([item for item in survey
            if item['id'].lower() == indicator_name.lower()])

        if len(survey_item) != 1:
            print 'Too many items in survey.json for this indicator'
            print indicator_name
            print survey_item

        indicator_data = {
            'id': indicator_name,
            'services': [],
            'levels': []
        }

        # Check if this indicator is valid before continuing
        if len(responses) == 1 and 'this indicator is n/a' in responses[0][0].lower():
            continue
        else:

            # question scores follow the response text in the split array
            scores = split.pop(0)

            # ..followed by the overall indicator score (verify this)
            indicator_score = split.pop(-1)[0][1] if ('indicator score'
                in split[-1][0][0].lower()) else []
            if not len(indicator_score):
                print '\nIndicator score not found in %s' % header[0]
                print split, '\n'
            else:
                indicator_data['score'] = indicator_score

            # ..and the same for the overall service scores
            level_scores = split.pop(-1)[0] if ('level score'
                in split[-1][0][0].lower()) else []
            if not len(level_scores):
                print '\nService score not found in %s' % header[0]
                print split, '\n'

            # Determine the comments and sources location
            comments = responses.pop(-2)
            sources = responses.pop(-1)

            if ('comments' not in comments[0].lower() or
                'sources' not in sources[0].lower()):
                print 'Comments not found in %s' % comments[0]
                print 'Sources not found in %s' % sources[0]

            # Some question text include an if-not-then clause,
            # which throws off the count between the text and the score.
            # Record it and then delete the row.
            indicator_data['follow'] = 0
            for idx, row in enumerate(responses):
                if 'continue with B' in row[0] and len(set(row[1:])) == 1:
                    indicator_data['follow'] = 1
                    del responses[idx]
                    break

            if len(responses) != len(scores):
                print 'Length of responses and scores not matching'
                print len(responses), len(scores)


            # Save level responses, and level positions
            # Determine if this question has custom answers
            survey_levels = survey_item[0]['levels']
            for idx, level in enumerate(responses):
                level_data = []

                # Assume anything longer than 25 characters,
                # aka "no/insufficient evidence", is a custom response
                custom = 0
                survey_options = survey_levels[idx]['responses']
                for option in survey_options:
                    if len(option) > 25:
                        custom = 1

                for level_idx, level_response in enumerate(level):

                    # First level index is useless.
                    if level_idx == 0 or not len(level_response):
                        continue

                    if len(header) <= level_idx:
                        print 'No header available, this will break'

                    service = header[level_idx]

                    # Exclude group scores, operating company
                    # from indicators that don't need them
                    if (('(group)' in service or '(operating company)' in service )
                            and exclude_service(indicator_name)):
                        continue

                    # Shim issues where the response includes too much text.
                    if len(level_response) > 25 and "no/insufficient" == level_response[:15]:
                        level_response = "no/insufficient evidence"


                    # Only add to the services list if we're on the first level.
                    # Other, we add too many
                    if idx == 0:

                        if 'operating company' in service.lower():
                            service_type = 'operating company'
                        elif 'group' in service.lower():
                            service_type = 'group'
                        else:
                            matching_service = [item for item in service_data['services'] if (
                                item['name'].lower() in service.lower())]
                            if len(matching_service) == 1 and 'type' in matching_service[0]:
                                service_type = matching_service[0]['type']
                            else:
                                service_type = ''

                        indicator_data['services'].append({
                            'name': scrub_service_name(service),
                            'type': service_type,
                            'comments': comments[level_idx],
                            'sources': sources[level_idx],
                            'score': level_scores[level_idx]
                        })

                    level_data.append({
                        'response': level_response,
                        'score': scores[idx][level_idx]
                    })


                indicator_data['custom'] = custom
                indicator_data['levels'].append({
                    'scores': level_data,
                    'text': survey_levels[idx]['text']
                })

        all_indicators.append(indicator_data)

    io.write_json(path.assets(company_name + '.json'), all_indicators)
Exemple #12
0
def create(filename):

    # Create a dictionary where properties are company names
    overview = io.read_json(path.assets('overview.json'))
    companies = [name.snake_case(item['display']) for item in overview]
    company_dict = {}
    for c in companies:
        company_dict[c] = -1

    # Now use that dictionary to save the index of those company names.
    raw = io.read_csv(path.raw(filename))
    raw_header = raw[0]
    for idx, item in enumerate(raw_header):
        snake_header = name.snake_case(item)
        if snake_header in company_dict:
            company_dict[snake_header] = idx

    # This should be 0 if we've matched every company
    if not_all_found(company_dict.values()):
        print 'Not all companies accounted for in services overview csv'

    # This is where we check a ref file, or create one
    ref_path = path.ref('service-column-mapping.json')
    if os.path.isfile(ref_path):
        ref = io.read_json(ref_path)
    else:
        ref = [name.snake_case(row[0]) for row in raw[1:] if row[0] != '']
        io.write_json(ref_path, ref)

    # Create a dictionary matching row number fo the indicator
    indicator_dict = {}
    for indic in ref:
        indicator_dict[indic] = -1
    for idx, row in enumerate(raw):
        indicator = name.snake_case(row[0])
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    # Baselines
    tel = 'telco'
    net = 'internet company'

    output = []

    # Get a slice of all the columns that encompass each company
    stops = sorted(idx for idx in company_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx + 1] if idx + 1 < len(stops) else len(raw_header)
        company_range = [item[stop:next_stop] for item in raw]
        company = {
            'display': company_range[0][0],
            'name': name.filename(company_range[0][0])
        }

        # The second item in the first row *should* be the type
        header_type = company_range[0][1].lower()
        if header_type not in [tel, net]:
            print 'No company type found. Instead, saw %s' % header_type
        company['type'] = header_type

        # The second row contains the service names
        service_names = [item for item in company_range[1]]
        services = []
        for column_number, service_name in enumerate(service_names):

            # Get each indicator value for each service using
            # the indicator mapping we defined earlier
            scores = {}
            for indicator_name, row_number in indicator_dict.iteritems():
                cell = company_range[row_number][column_number]
                scores[indicator_name] = company_range[row_number][
                    column_number]

            # The first 'service' is actually just the overall
            # Do some spreadsheet format-checking here
            if column_number == 0:
                total = scores['total']
                if not len(total):
                    print 'No weighted total for %s %s' % (service_name,
                                                           company['name'])
                if 'overall' not in service_name:
                    print 'Service %s != "overall"' % service_name
                company['overall'] = scores

            # The second 'service' is usually the group score;
            # No need to save this, we don't use it here.
            elif column_number == 1 and 'group' in service_name:
                continue

            # Otherwise, call it a service.
            else:
                service = {'name': service_name, 'scores': scores}

                # Get service type if it's available
                service_type = company_range[0][column_number]
                if len(service_type):
                    service['type'] = service_type
                services.append(service)

        company['services'] = services
        output.append(company)

    io.write_json(path.assets('services.json'), output)
Exemple #13
0
def indicators():
    __create_or_delete__(path.indicators('')[:-1])
    custom = io.read_json(path.assets('custom-questions.json'))
    indicators = io.read_json(path.assets('survey.json'))
    for idx, indicator in enumerate(indicators):
        indicator_id = indicator['id'].lower()

        # Determine if it's in the list of custom indicators
        is_custom = False
        if indicator_id in custom:
            is_custom = True

        filename = (indicator_id + '.md')
        with open(path.indicators(filename), 'w') as f:
            f.write('---\n')
            f.write('sort: %s\n' % idx)
            f.write('entity: %s\n' % filename[:-3])
            f.write('entity_type: indicator\n')

            first_letter = filename[0]
            if 'c' in first_letter:
                f.write('category_ref: commitment\n')
                f.write('category: Commitment\n')
            elif 'f' in first_letter:
                f.write('category_ref: freedom-of-expression\n')
                f.write('category: Freedom of Expression\n')
            else:
                f.write('category_ref: privacy\n')
                f.write('category: Privacy\n')

            f.write('\nlayout: indicator\n\n')

            # text, id, name
            for item in ['name', 'text', 'id']:
                f.write('%s: %s\n' % (item, indicator[item].encode('UTF-8')))

            # possible answers
            f.write('\nlevels:\n')
            for idx, resp in enumerate(indicator['levels']):

                # first, just make sure we write level text
                if resp['text'] != 0:
                    f.write('  - text: "%s"\n' % resp['text'].encode('UTF-8'))
                    f.write('    id: %s\n' % resp['id'].encode('UTF-8'))

                # this is really only for c1.b at this point.
                if not is_custom and resp['id'].lower() in custom:
                    custom_answer = custom[resp['id'].lower()]
                    f.write('    choices:\n')
                    for a in custom_answer:
                        f.write('      - text: "%s"\n' %
                                a['text'].encode('UTF-8'))
                        f.write('        score: "%s"\n' % a['score'])

            f.write('\nchoices:\n')
            if is_custom:
                custom_answer = custom[indicator_id]
                for a in custom_answer:
                    f.write('  - text: "%s"\n' % a['text'].encode('UTF-8'))
                    f.write('    score: "%s"\n' % a['score'])

            f.write('\n---\n\n')
Exemple #14
0
def create(filename):

    company_name = name.filename(filename[:-4])

    all_services = io.read_json(path.assets('services.json'))
    service_data = [
        item for item in all_services
        if (company_name in item['name'].replace('.', ''))
    ]

    if len(service_data) != 1:
        print 'Weird number of services found', len(service_data)

    service_data = service_data[0]

    # Create a mapping dictionary of just indicator names
    # each mapped to -1
    ref = io.read_json(path.ref('service-column-mapping.json'))
    indicator_dict = {}
    for item in ref:
        if is_number(item[1:]):
            indicator_dict[item] = -1

    # Map the indicator to the proper rows
    raw = io.read_csv(path.raw('companies/' + filename))
    for idx, row in enumerate(raw):
        indicator = row[0].lower()
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    # Use the survey data to map possible responses to position
    survey = io.read_json(path.assets('survey.json'))

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    all_indicators = []

    # Get a slice of all the rows that encompass each company
    stops = sorted(idx for idx in indicator_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx + 1] if idx + 1 < len(stops) else len(raw) + 1
        indicator_range = raw[stop:next_stop]

        # Divide that slice by empty rows
        split = array.slice_arr(indicator_range, array.is_empty_row)

        # The first slice contains consolidated answers,
        # comments, and sources.
        responses = split.pop(0)

        # The first row of responses is indicator name followed by
        # service categories
        header = [item for item in responses.pop(0) if len(item)]

        indicator_name = header[0]

        # Find the survey question we're looking for
        survey_item = ([
            item for item in survey
            if item['id'].lower() == indicator_name.lower()
        ])

        if len(survey_item) != 1:
            print 'Too many items in survey.json for this indicator'
            print indicator_name
            print survey_item

        indicator_data = {'id': indicator_name, 'services': [], 'levels': []}

        # Check if this indicator is valid before continuing
        if len(responses
               ) == 1 and 'this indicator is n/a' in responses[0][0].lower():
            continue
        else:

            # question scores follow the response text in the split array
            scores = split.pop(0)

            # ..followed by the overall indicator score (verify this)
            indicator_score = split.pop(-1)[0][1] if (
                'indicator score' in split[-1][0][0].lower()) else []
            if not len(indicator_score):
                print '\nIndicator score not found in %s' % header[0]
                print split, '\n'
            else:
                indicator_data['score'] = indicator_score

            # ..and the same for the overall service scores
            level_scores = split.pop(-1)[0] if (
                'level score' in split[-1][0][0].lower()) else []
            if not len(level_scores):
                print '\nService score not found in %s' % header[0]
                print split, '\n'

            # Determine the comments and sources location
            comments = responses.pop(-2)
            sources = responses.pop(-1)

            if ('comments' not in comments[0].lower()
                    or 'sources' not in sources[0].lower()):
                print 'Comments not found in %s' % comments[0]
                print 'Sources not found in %s' % sources[0]

            # Some question text include an if-not-then clause,
            # which throws off the count between the text and the score.
            # Record it and then delete the row.
            indicator_data['follow'] = 0
            for idx, row in enumerate(responses):
                if 'continue with B' in row[0] and len(set(row[1:])) == 1:
                    indicator_data['follow'] = 1
                    del responses[idx]
                    break

            if len(responses) != len(scores):
                print 'Length of responses and scores not matching'
                print len(responses), len(scores)

            # Save level responses, and level positions
            # Determine if this question has custom answers
            survey_levels = survey_item[0]['levels']
            for idx, level in enumerate(responses):
                level_data = []

                # Assume anything longer than 25 characters,
                # aka "no/insufficient evidence", is a custom response
                custom = 0
                survey_options = survey_levels[idx]['responses']
                for option in survey_options:
                    if len(option) > 25:
                        custom = 1

                for level_idx, level_response in enumerate(level):

                    # First level index is useless.
                    if level_idx == 0 or not len(level_response):
                        continue

                    if len(header) <= level_idx:
                        print 'No header available, this will break'

                    service = header[level_idx]

                    # Exclude group scores, operating company
                    # from indicators that don't need them
                    if (('(group)' in service
                         or '(operating company)' in service)
                            and exclude_service(indicator_name)):
                        continue

                    # Shim issues where the response includes too much text.
                    if len(level_response
                           ) > 25 and "no/insufficient" == level_response[:15]:
                        level_response = "no/insufficient evidence"

                    # Only add to the services list if we're on the first level.
                    # Other, we add too many
                    if idx == 0:

                        if 'operating company' in service.lower():
                            service_type = 'operating company'
                        elif 'group' in service.lower():
                            service_type = 'group'
                        else:
                            matching_service = [
                                item for item in service_data['services']
                                if (item['name'].lower() in service.lower())
                            ]
                            if len(matching_service
                                   ) == 1 and 'type' in matching_service[0]:
                                service_type = matching_service[0]['type']
                            else:
                                service_type = ''

                        indicator_data['services'].append({
                            'name':
                            scrub_service_name(service),
                            'type':
                            service_type,
                            'comments':
                            comments[level_idx],
                            'sources':
                            sources[level_idx],
                            'score':
                            level_scores[level_idx]
                        })

                    level_data.append({
                        'response': level_response,
                        'score': scores[idx][level_idx]
                    })

                indicator_data['custom'] = custom
                indicator_data['levels'].append({
                    'scores':
                    level_data,
                    'text':
                    survey_levels[idx]['text']
                })

        all_indicators.append(indicator_data)

    io.write_json(path.assets(company_name + '.json'), all_indicators)