def main(root):
    for path in glob.glob(os.path.join(root, '*.pdf.html')):
        parser = Parser(path)
        parser.parse()
        matcher = InstitutionFuzzyMatcher()
        for institution, data in parser.data.iteritems():
            attrs = dict(institution=institution, year=parser.year, **data)

            # Derive acceptance and enrollment rates
            acceptance_rate = derive_rate(data['accepted'], data['applied'])
            enrollment_rate = derive_rate(data['enrolled'], data['accepted'])

            # Create or update institution admissions for this year
            institution = matcher.match(institution)
            defaults = {
                'year_type': 'fall',
                'number_of_applicants': data['applied'],
                'number_admitted': data['accepted'],
                'number_admitted_who_enrolled': data['enrolled'],
                'percent_of_applicants_admitted': acceptance_rate,
                'percent_of_admitted_who_enrolled': enrollment_rate
            }
            obj, row_count = create_or_update(PublicAdmissions.objects,
                    institution=institution, year=parser.year,
                    defaults=defaults)
            if obj:
                print 'created %s %d admissions...' % (
                    institution.name, parser.year)
            else:
                print 'updated %s %d admissions...' % (
                    institution.name, parser.year)
def main(path):
    # Parse 2011 6-year graduation rates
    json_text = open(path).read().decode("iso-8859-1")
    page_data = json.loads(json_text)[0]
    parser = Parser()
    for el in page_data["text"]:
        parser.feed(el)

    # Match institutions by name and create or update
    matcher = InstitutionFuzzyMatcher()
    for name, bachelor_6yr in parser.iter_results():
        institution = matcher.match(name)
        defaults = dict(bachelor_6yr=bachelor_6yr)
        obj, row_count = create_or_update(
            PublicGraduationRates.objects, institution=institution, year=2011, defaults=defaults
        )
        if obj:
            print "created %s graduation rates..." % institution.name
        else:
            print "updated %s graduation rates..." % institution.name
Exemple #3
0
def main(path):
    # Parse 2011 6-year graduation rates
    json_text = open(path).read().decode('iso-8859-1')
    page_data = json.loads(json_text)[0]
    parser = Parser()
    for el in page_data['text']:
        parser.feed(el)

    # Match institutions by name and create or update
    matcher = InstitutionFuzzyMatcher()
    for name, bachelor_6yr in parser.iter_results():
        institution = matcher.match(name)
        defaults = dict(bachelor_6yr=bachelor_6yr)
        obj, row_count = create_or_update(PublicGraduationRates.objects,
                                          institution=institution,
                                          year=2011,
                                          defaults=defaults)
        if obj:
            print "created %s graduation rates..." % institution.name
        else:
            print "updated %s graduation rates..." % institution.name