def test_unify_subject_names(self): """ Test if we properly get the first word from a subject """ name_1 = 'cs-ai' name_2 = 'social science' name_3 = 'physical-education stuff' name_4 = 'beatles pop-song' self.assertEqual(unify_subject_name(name_1), 'cs') self.assertEqual(unify_subject_name(name_2), 'social') self.assertEqual(unify_subject_name(name_3), 'physical') self.assertEqual(unify_subject_name(name_4), 'beatles')
def create_course(item, provider): """ Creates courses in database based on data parsed from provider """ subject = item.find('div', class_='ribbon-content').text.strip().lower() # subjects are stored lowercase in the DB subject = unify_subject_name(subject) course_info = item.find('div', class_='course-body') title = course_info.a.text instructor_description_paragraphs = item.find_all('p') instructor = instructor_description_paragraphs[0].text.strip() description = instructor_description_paragraphs[1].text.strip() url = item.a.attrs['href'] course, created = Course.objects.get_or_create(name=title, url=url, description=description, instructor=instructor, provider=provider) subject, created = Subject.objects.get_or_create(name=subject) course.subjects.add(subject) course.save()
def add_courses(json_dict): """ Adds courses from JSON into the database """ coursera_provider, created = Provider.objects.get_or_create(name='Coursera') for course in json_dict: url = 'https://www.coursera.org/course/' + course['short_name'] c, created = Course.objects.get_or_create(name=course['name'], description=course['short_description'], instructor=course['instructor'], url=url) c.provider = coursera_provider c.source, created = Source.objects.get_or_create(name=course['universities'][0]['name']) c.save() for category_id in course['category-ids']: better_subject_name = unify_subject_name(category_id) subject, created = Subject.objects.get_or_create(name=better_subject_name) c.subjects.add(subject) c.save()
def run(): """ Main function """ all_courses = get_all_courses() udacity_provider, created = Provider.objects.get_or_create(name='Udacity') for name, course in all_courses.iteritems(): c, created = Course.objects.get_or_create(name=course['name'], description=course['desc'], instructor=course['instr'], url=course['url']) c.provider = udacity_provider # source university not easily available in udacity # c.source, created = .... c.save() for subject_name in course['subj']: better_subject_name = unify_subject_name(subject_name) subject, created = Subject.objects.get_or_create(name=better_subject_name) c.subjects.add(subject) c.save()
def add_to_django(): """ Adds the data from the lists into appropriate fields of the database """ edx_provider, created = Provider.objects.get_or_create(name='edX') for i in range(len(title_list)): try: print('Adding ' + title_list[i] + ' course: ' + str(i)) except: pass c, created = Course.objects.get_or_create(name=title_list[i], description=desc_list[i], instructor=int_list[i], url=url_list[i]) c.provider = edx_provider c.source, created = Source.objects.get_or_create(name=uni_list[i]) c.save() better_subject_name = unify_subject_name(sub_list[i]) subject, created = Subject.objects.get_or_create(name=better_subject_name) c.subjects.add(subject) c.save() print('Done!') print(str(len(title_list)) + ' courses added!')