Ejemplo n.º 1
0
def scrape(cookie):
    html = get_html(cookie)
    tree = get_tree(html)
    containers = tree.find_all('div', {'class': 'student_container'})

    # Clear all students
    Student.query.delete()
    for container in containers:
        student = Student()

        student.surname, student.forename = clean_name(
            container.find('h5', {
                'class': 'yalehead'
            }).text)
        student.year = clean_year(
            container.find('div', {
                'class': 'student_year'
            }).text)
        student.pronoun = container.find('div', {
            'class': 'student_info_pronoun'
        }).text

        info = container.find_all('div', {'class': 'student_info'})

        student.college = info[0].text.replace(' College', '')
        try:
            student.email = info[1].find('a').text
        except AttributeError:
            student.email = guess_email(student)
        trivia = info[1].find_all(text=True, recursive=False)
        try:
            room = trivia.pop(0) if RE_ROOM.match(trivia[0]) else None
            if room:
                result = RE_ROOM.search(room)
                student.building_code, student.entryway, student.floor, student.suite, student.room = result.groups(
                )
            student.birthday = trivia.pop() if RE_BIRTHDAY.match(
                trivia[-1]) else None
            student.major = trivia.pop() if trivia[-1] in MAJORS else None
            student.address = ', '.join(trivia)
            student.state = parse_address(trivia)
        except IndexError:
            pass

        db.session.add(student)

    db.session.commit()
    print('Done.')
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        faker = Faker()
        for _ in range(options['len']):
            book = Book()
            book.title = uuid.uuid4()
            book.save()

            subject, _ = Subject.objects.get_or_create(title='HTML')

            student = Student()
            student.name = faker.first_name()
            student.surname = faker.last_name()
            student.age = faker.random_number(digits=None)
            student.address = faker.address()
            student.birthday = faker.date()
            student.description = faker.text()
            student.email = faker.email()
            student.book = book
            student.subject = subject
            student.save()

            teacher, _ = Teacher.objects.get_or_create(name=faker.name())
            teacher.students.add(student)
            teacher.save()