def parse_queens_textbooks(): for tb in main(True): if 'isbn_13' not in tb: continue isbn = tb['isbn_13'] required = tb.get('required', False) or False tb_info = amazon_textbook_fields(isbn) if tb_info and tb_info['title'] != 'Cannot Be Found': tb_data = { 'detail_url': tb_info['DetailPageURL'], 'image_url': tb_info["ImageURL"], 'author': tb_info["Author"], 'title': tb_info["Title"] } tb_obj, created = Textbook.objects.update_or_create(isbn=isbn, defaults=tb_data) if created: print "New textbook: {0} by {1}".format(tb_data['title'], tb_data['author']) courses = [QueensCourse.objects.get(code=c) for c in tb.get('courses', [])] for course in courses: offerings = QueensCourseOffering.objects.filter(course=course) for offering in offerings: # TODO: use update_or_create?? if offering.textbooks.filter(isbn=isbn): continue new_link = QueensLink(courseoffering=offering, textbook=tb_obj, is_required=bool(required)) new_link.save() print "--Saved to section {0}".format(offering.section_code)
def make_textbook(self, is_required, isbn_number, course_code, section_code): # Commented out ingestor for now. # Update/Create textbook. # self.ingestor['isbn'] = isbn_number # self.ingestor.update(amazon_textbook_fields(str(isbn_number))) # self.ingestor.ingest_textbook() # # Link to all course offerings. # self.ingestor['school'] = self.school # self.ingestor['course_code'] = course_code # self.ingestor['section_code'] = section_code # self.ingestor['term'] = self.term # self.ingestor['year'] = self.year # self.ingestor['isbn'] = isbn_number # self.ingestor['required'] = is_required # self.ingestor.ingest_textbook_link() # Before DB schema update, use old code: try: course = Course.objects.filter(code__contains=course_code, school=self.school)[0] print(course) except IndexError: print("index error (course does not exist): " + course_code) return sections = Section.objects.filter(course=course, meeting_section=section_code) textbook_data = amazon_textbook_fields(str(isbn_number)) if textbook_data is None or len(textbook_data) == 0: print("No such textbook on Amazon") return # update/create textbook textbook, created = Textbook.objects.update_or_create( isbn=isbn_number, defaults=textbook_data) self.create_count += int(created) # link to all course offerings for section in sections: section, created = TextbookLink.objects.update_or_create( is_required=is_required, section=section, textbook=textbook) # print results if created: try: print("Textbook created: " + str(textbook.title)) except UnicodeEncodeError: pass else: self.identified_count += 1 try: print("Textbook found, not created: " + str(textbook.title)) except UnicodeEncodeError: pass
def make_textbook(self, is_required, isbn_number, course_code, section_code): # Commented out ingestor for now. # Update/Create textbook. # self.ingestor['isbn'] = isbn_number # self.ingestor.update(amazon_textbook_fields(str(isbn_number))) # self.ingestor.ingest_textbook() # # Link to all course offerings. # self.ingestor['school'] = self.school # self.ingestor['course_code'] = course_code # self.ingestor['section_code'] = section_code # self.ingestor['term'] = self.term # self.ingestor['year'] = self.year # self.ingestor['isbn'] = isbn_number # self.ingestor['required'] = is_required # self.ingestor.ingest_textbook_link() # Before DB schema update, use old code: try: course = Course.objects.filter(code__contains = course_code, school = self.school)[0] print(course) except IndexError: print("index error (course does not exist): " + course_code) return sections = Section.objects.filter(course = course, meeting_section = section_code) textbook_data = amazon_textbook_fields(str(isbn_number)) if textbook_data is None or len(textbook_data) == 0: print ("No such textbook on Amazon") return # update/create textbook textbook, created = Textbook.objects.update_or_create(isbn=isbn_number, defaults=textbook_data) self.create_count += int(created) # link to all course offerings for section in sections: section, created = TextbookLink.objects.update_or_create( is_required = is_required, section = section, textbook = textbook ) # print results if created: try: print("Textbook created: " + str(textbook.title)) except UnicodeEncodeError: pass else: self.identified_count += 1 try: print("Textbook found, not created: " + str(textbook.title)) except UnicodeEncodeError: pass
def _parse_material(self, material): required = re.match('material-group_(.*)', material['id']).group(1) self.ingestor['required'] = required == 'REQUIRED' books = material.find_all('ul') for book in books: isbn = book.find('span', id='materialISBN') isbn.find('strong').extract() isbn = isbn.text.strip() self.ingestor['isbn'] = str(isbn) self.ingestor.update(amazon_textbook_fields(isbn)) self.ingestor.ingest_textbook() self.ingestor.ingest_textbook_link()
def _parse_material(self, material): required = re.match("material-group_(.*)", material["id"]).group(1) self.ingestor["required"] = required == "REQUIRED" books = material.find_all("ul") for book in books: isbn = book.find("span", id="materialISBN") isbn.find("strong").extract() isbn = isbn.text.strip() self.ingestor["isbn"] = str(isbn) self.ingestor.update(amazon_textbook_fields(isbn)) self.ingestor.ingest_textbook() self.ingestor.ingest_textbook_link()
def _parse_textbooks(self, soup): # BUG: gaurantee with regex match order and textbook status...? textbooks = zip( soup.find_all( 'span', id=re.compile(r'DERIVED_SSR_TXB_SSR_TXBDTL_ISBN\$\d*') ), soup.find_all( 'span', id=re.compile(r'DERIVED_SSR_TXB_SSR_TXB_STATDESCR\$\d*')) ) # Remove extra characters from isbn and tranform Required into boolean. for i in range(len(textbooks)): textbooks[i] = { 'isbn': filter(lambda x: x.isdigit(), textbooks[i][0].text), 'required': textbooks[i][1].text[0].upper() == 'R', } # Create textbooks. if self.textbooks: for textbook in textbooks: if (not textbook['isbn'] or (len(textbook['isbn']) != 10 and len(textbook['isbn']) != 13)): continue # NOTE: might skip some malformed-isbn values amazon_fields = amazon_textbook_fields(textbook['isbn']) if amazon_fields is not None: textbook.update(amazon_fields) else: # Make sure to clear ingestor from prev (temp fix) textbook.update({ 'detail_url': None, 'image_url': None, 'author': None, 'title': None, }) self.ingestor.update(textbook) self.ingestor.ingest_textbook() self.ingestor.setdefault('textbooks', []).append({ 'kind': 'textbook_link', 'isbn': textbook['isbn'], 'required': textbook['required'], })