Esempio n. 1
0
    def handle(self, *args, **options):
        if len(args) != 1:
            print 'usage: python manage.py json_import filename.json'
            return
        file_path = args[0]
        f = open(file_path)
        scholarships = json.loads(f.read())
        i = 0
        dupe_count = 0
        for scholarship in scholarships:
            i += 1
            print scholarship['title']
            award_amount = self._extract_max_reward(scholarship['awards'])
            deadline = self._extract_deadline(scholarship['deadlines'])
            requirements = self._clean_html(scholarship['applicationRequirements'])
            high_school_eligible = self._is_high_school_eligible(scholarship['uses'])
            undergraduate_eligible = self._is_undergraduate_eligible(scholarship['uses'])
            graduate_eligible = self._is_graduate_eligible(scholarship['uses'])
            is_essay_required = self._is_essay_required(requirements)
            contact_info = self._clean_html(scholarship['contactInfo'])
            if '<script>' in contact_info or '<script>' in requirements:
                raise Exception('Found a script tag in the html!')

            scholarship_model = Scholarship()
            scholarship_model.title = scholarship['title']
            scholarship_model.amount_usd = award_amount
            scholarship_model.essay_required = is_essay_required
            scholarship_model.organization = scholarship['sponsor']
            scholarship_model.street_address = contact_info
            scholarship_model.third_party_url = scholarship['url']
            scholarship_model.high_school_eligible = high_school_eligible
            scholarship_model.undergrad_eligible = undergraduate_eligible
            scholarship_model.graduate_eligible = graduate_eligible
            scholarship_model.description = requirements
            scholarship_model.deadline = deadline

            if self._is_duplicate(scholarship_model):
                dupe_count += 1
                print 'duplicate. not saving.'
            else:
                print 'saving'
                scholarship_model.save()

            print '{} / {} duplicates: {}'.format(i, len(scholarships), dupe_count)
    def handle(self, *args, **options):
        if len(args) != 1:
            print "usage: python manage.py zinch_json_import filename.json"
            return
        file_path = args[0]
        f = open(file_path)
        scholarships = json.loads(f.read())
        i = 0
        dupe_count = 0
        for scholarship in scholarships:
            i += 1
            print scholarship["title"]
            award_amount = self._extract_award_amount(scholarship["amount"])
            deadline = self._extract_deadline(scholarship["deadline"])
            requirements = scholarship["eligibility"] + "<br />" + scholarship["application_overview"]
            description = scholarship["purpose"] + "<br />" + scholarship["background"]
            is_essay_required = self._is_essay_required(requirements) or self._is_essay_required(description)
            if "<script>" in requirements or "<script>" in description:
                raise Exception("Found a script tag in the html!")

            scholarship_model = Scholarship()
            scholarship_model.title = scholarship["title"]
            scholarship_model.amount_usd = award_amount
            scholarship_model.essay_required = is_essay_required
            scholarship_model.organization = scholarship["provider_name"]
            scholarship_model.third_party_url = scholarship["third_party_url"]
            scholarship_model.high_school_eligible = True
            scholarship_model.description = description
            scholarship_model.additional_requirements = requirements
            scholarship_model.deadline = deadline

            if self._is_duplicate(scholarship_model):
                dupe_count += 1
                print "duplicate. not saving."
            else:
                print "saving"
                scholarship_model.save()

            print "{} / {} duplicates: {}".format(i, len(scholarships), dupe_count)