def handle(self, *args, **options): if len(args) != 1: print 'usage: python manage.py json_import filename.json' return file_path = args[0] f = open(file_path) scholarships = json.loads(f.read()) i = 0 dupe_count = 0 for scholarship in scholarships: i += 1 print scholarship['title'] award_amount = self._extract_max_reward(scholarship['awards']) deadline = self._extract_deadline(scholarship['deadlines']) requirements = self._clean_html(scholarship['applicationRequirements']) high_school_eligible = self._is_high_school_eligible(scholarship['uses']) undergraduate_eligible = self._is_undergraduate_eligible(scholarship['uses']) graduate_eligible = self._is_graduate_eligible(scholarship['uses']) is_essay_required = self._is_essay_required(requirements) contact_info = self._clean_html(scholarship['contactInfo']) if '<script>' in contact_info or '<script>' in requirements: raise Exception('Found a script tag in the html!') scholarship_model = Scholarship() scholarship_model.title = scholarship['title'] scholarship_model.amount_usd = award_amount scholarship_model.essay_required = is_essay_required scholarship_model.organization = scholarship['sponsor'] scholarship_model.street_address = contact_info scholarship_model.third_party_url = scholarship['url'] scholarship_model.high_school_eligible = high_school_eligible scholarship_model.undergrad_eligible = undergraduate_eligible scholarship_model.graduate_eligible = graduate_eligible scholarship_model.description = requirements scholarship_model.deadline = deadline if self._is_duplicate(scholarship_model): dupe_count += 1 print 'duplicate. not saving.' else: print 'saving' scholarship_model.save() print '{} / {} duplicates: {}'.format(i, len(scholarships), dupe_count)
def handle(self, *args, **options): if len(args) != 1: print "usage: python manage.py zinch_json_import filename.json" return file_path = args[0] f = open(file_path) scholarships = json.loads(f.read()) i = 0 dupe_count = 0 for scholarship in scholarships: i += 1 print scholarship["title"] award_amount = self._extract_award_amount(scholarship["amount"]) deadline = self._extract_deadline(scholarship["deadline"]) requirements = scholarship["eligibility"] + "<br />" + scholarship["application_overview"] description = scholarship["purpose"] + "<br />" + scholarship["background"] is_essay_required = self._is_essay_required(requirements) or self._is_essay_required(description) if "<script>" in requirements or "<script>" in description: raise Exception("Found a script tag in the html!") scholarship_model = Scholarship() scholarship_model.title = scholarship["title"] scholarship_model.amount_usd = award_amount scholarship_model.essay_required = is_essay_required scholarship_model.organization = scholarship["provider_name"] scholarship_model.third_party_url = scholarship["third_party_url"] scholarship_model.high_school_eligible = True scholarship_model.description = description scholarship_model.additional_requirements = requirements scholarship_model.deadline = deadline if self._is_duplicate(scholarship_model): dupe_count += 1 print "duplicate. not saving." else: print "saving" scholarship_model.save() print "{} / {} duplicates: {}".format(i, len(scholarships), dupe_count)