def scrape(cookie): html = get_html(cookie) tree = get_tree(html) containers = tree.find_all('div', {'class': 'student_container'}) # Clear all students Student.query.delete() for container in containers: student = Student() student.surname, student.forename = clean_name( container.find('h5', { 'class': 'yalehead' }).text) student.year = clean_year( container.find('div', { 'class': 'student_year' }).text) student.pronoun = container.find('div', { 'class': 'student_info_pronoun' }).text info = container.find_all('div', {'class': 'student_info'}) student.college = info[0].text.replace(' College', '') try: student.email = info[1].find('a').text except AttributeError: student.email = guess_email(student) trivia = info[1].find_all(text=True, recursive=False) try: room = trivia.pop(0) if RE_ROOM.match(trivia[0]) else None if room: result = RE_ROOM.search(room) student.building_code, student.entryway, student.floor, student.suite, student.room = result.groups( ) student.birthday = trivia.pop() if RE_BIRTHDAY.match( trivia[-1]) else None student.major = trivia.pop() if trivia[-1] in MAJORS else None student.address = ', '.join(trivia) student.state = parse_address(trivia) except IndexError: pass db.session.add(student) db.session.commit() print('Done.')
def handle(self, *args, **options): faker = Faker() for _ in range(options['len']): book = Book() book.title = uuid.uuid4() book.save() subject, _ = Subject.objects.get_or_create(title='HTML') student = Student() student.name = faker.first_name() student.surname = faker.last_name() student.age = faker.random_number(digits=None) student.address = faker.address() student.birthday = faker.date() student.description = faker.text() student.email = faker.email() student.book = book student.subject = subject student.save() teacher, _ = Teacher.objects.get_or_create(name=faker.name()) teacher.students.add(student) teacher.save()