def books_on(bookstore_url_from_tritonlink): """Returns book list based on the given course page at the UCSD Bookstore's website. :param bookstore_url_from_tritonlink: UCSD Bookstore website URL for a course section :type bookstore_url_from_tritonlink: string :rtype: :class:`BookList` """ url = bookstore_url_from_tritonlink.replace("https", "http", 1) tree, _url = make_tree4url()(url) booklist = BookList() for sextuple in grouper( 6, _skipping_availability_side_headers(book_cells(tree))): if config.LACK_BOOK_LIST in sextuple[0].text: # No book list return BookList(unknown=True) _sections, _instructor, required, author, title_comma_isbn = ( cell.text for cell in sextuple[:5]) availability = sextuple[-1] required = required == config.REQUIRED_BOOK_CODE title, isbn = title_comma_isbn.rsplit( ", ", 1) # Principles Of General Chemistry, 2 Edition, 9780077470500 if config.NO_TEXTBOOK_REQUIRED in title: return BookList(required=[]) if config.AS_SOFT_RESERVES in title: booklist.as_soft_reserves = True continue discounts = discounted_price(availability) if discounts: discount = discounts[0] if config.IN_STOCK not in availability.text: new = NaN else: # New Books, Not in Stock*, Retail Price: $65.70, Discounted Price: <FONT COLOR="#008000">$21.03</FONT> new = Decimal(discount.text[1:]) #remove dollar sign used = discount.tail else: new, used = availability.text.split("\n") new = _availability2price(new) used = _availability2price(used) booklist.add_book(Book(isbn, new, used, title, author), required) return booklist
def books_on(bookstore_url_from_tritonlink): """Returns book list based on the given course page at the UCSD Bookstore's website. :param bookstore_url_from_tritonlink: UCSD Bookstore website URL for a course section :type bookstore_url_from_tritonlink: string :rtype: :class:`BookList` """ url = bookstore_url_from_tritonlink.replace("https", "http", 1) tree, _url = make_tree4url()(url) booklist = BookList() for sextuple in grouper(6, _skipping_availability_side_headers(book_cells(tree))): if config.LACK_BOOK_LIST in sextuple[0].text:# No book list return BookList(unknown=True) _sections, _instructor, required, author, title_comma_isbn = (cell.text for cell in sextuple[:5]) availability = sextuple[-1] required = required == config.REQUIRED_BOOK_CODE title, isbn = title_comma_isbn.rsplit(", ", 1) # Principles Of General Chemistry, 2 Edition, 9780077470500 if config.NO_TEXTBOOK_REQUIRED in title: return BookList(required=[]) if config.AS_SOFT_RESERVES in title: booklist.as_soft_reserves = True continue discounts = discounted_price(availability) if discounts: discount = discounts[0] if config.IN_STOCK not in availability.text: new = NaN else: # New Books, Not in Stock*, Retail Price: $65.70, Discounted Price: <FONT COLOR="#008000">$21.03</FONT> new = Decimal(discount.text[1:])#remove dollar sign used = discount.tail else: new, used = availability.text.split("\n") new = _availability2price(new) used = _availability2price(used) booklist.add_book(Book(isbn, new, used, title, author), required) return booklist
from contextlib import closing as _closing from time import sleep as _sleep from decimal import Decimal from collections import namedtuple as _namedtuple #, OrderedDict from urlparse import urljoin as _urljoin from urllib import urlencode as _urlencode import triton_scraper.config from triton_scraper.util import RELATIVE_PREFIX, XPath from triton_scraper.fetchparse import make_tree4url from lxml import etree CAPE_SEARCH_URL = "http://www.cape.ucsd.edu/stats.html" _tree4url = make_tree4url() def url2tree(url): tree, _url = _tree4url(url) return tree #FIXME: enable # self_cape = XPath(RELATIVE_PREFIX+"/div[@align='right' and text() = 'SelfCAPE']") search_forms = XPath(RELATIVE_PREFIX + "/form[@name='searchQuery']") select_elements = XPath(RELATIVE_PREFIX + "/select") def _search_form_and_select_tag():
from contextlib import closing as _closing from time import sleep as _sleep from decimal import Decimal from collections import namedtuple as _namedtuple#, OrderedDict from urlparse import urljoin as _urljoin from urllib import urlencode as _urlencode import triton_scraper.config from triton_scraper.util import RELATIVE_PREFIX, XPath from triton_scraper.fetchparse import make_tree4url from lxml import etree CAPE_SEARCH_URL = "http://www.cape.ucsd.edu/stats.html" _tree4url = make_tree4url() def url2tree(url): tree, _url = _tree4url(url) return tree #FIXME: enable # self_cape = XPath(RELATIVE_PREFIX+"/div[@align='right' and text() = 'SelfCAPE']") search_forms = XPath(RELATIVE_PREFIX+"/form[@name='searchQuery']") select_elements = XPath(RELATIVE_PREFIX+"/select") def _search_form_and_select_tag(): tree = url2tree(CAPE_SEARCH_URL) form = search_forms(tree)[0] select = select_elements(form)[0] return form, select
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. """ This module fetches and holds data on TritonLink course restriction codes. :copyright: (c) 2010 by Christopher Rebert. :license: MIT, see :file:`LICENSE.txt` for more details. """ from triton_scraper.util import * from triton_scraper.config import RESTRICTION_CODE_URL as _RESTRICTION_CODE_URL from triton_scraper.fetchparse import make_tree4url restriction_codes_and_descriptions = XPath(RELATIVE_PREFIX+"/tr[not(@bgcolor)]/td/text()") tree, _url = make_tree4url()(_RESTRICTION_CODE_URL) # if .strip() needed to ignore blank row _CODE2DESCRIPTION = dict((code, desc) for code, desc in grouper(2, restriction_codes_and_descriptions(tree)) if code.strip()) del tree, _url def restriction_code2description(code): try: return _CODE2DESCRIPTION[code] except KeyError: # The webpage listing the codes is outdated/non-exhaustive return '"%s"' % code
def __init__(self): """Takes no arguments.""" self._tree4url = make_tree4url()
self.code = code #: Descriptive name of building #: :type: string self.name = name #: Area of UCSD campus that building is located in (e.g. "University Center") #: :type: string self.area = area __FORMAT = "{0.name} ({0.code}) in {0.area}" def __str__(self): return self.__FORMAT.format(self) # Initialize Building._CODE2OBJ tree = make_tree4url()(BUILDING_CODE_URL)[0] for quadruple in grouper(4, building_info_table_texts(tree)): code, name, area, _map_num = (s.strip() for s in quadruple) Building._CODE2OBJ[code] = Building(code, name, area) del quadruple, tree, code, name, area, _map_num # Begin total HACKS Building._CODE2OBJ["LEDDN"] = Building._CODE2OBJ[ "LEDDN AUD"] # Dammit TritonLink, "AUD" isn't a room! Building._CODE2OBJ["CPMC"] = Building( "CPMC", "Conrad Prebys Music Center", "Sixth") # TritonLink is outdated. Sixth is a guess. Building._CODE2OBJ["OTRSN"] = Building("OTRSN", "Otterson Hall (i.e. Rady School)", "Roosevelt") # TritonLink is outdated Building._CODE2OBJ["TM102"] = Building(
# THE SOFTWARE. """ This module fetches and holds data on TritonLink course restriction codes. :copyright: (c) 2010 by Christopher Rebert. :license: MIT, see :file:`LICENSE.txt` for more details. """ from triton_scraper.util import * from triton_scraper.config import RESTRICTION_CODE_URL as _RESTRICTION_CODE_URL from triton_scraper.fetchparse import make_tree4url restriction_codes_and_descriptions = XPath(RELATIVE_PREFIX + "/tr[not(@bgcolor)]/td/text()") tree, _url = make_tree4url()(_RESTRICTION_CODE_URL) # if .strip() needed to ignore blank row _CODE2DESCRIPTION = dict( (code, desc) for code, desc in grouper(2, restriction_codes_and_descriptions(tree)) if code.strip()) del tree, _url def restriction_code2description(code): try: return _CODE2DESCRIPTION[code] except KeyError: # The webpage listing the codes is outdated/non-exhaustive return '"%s"' % code
#: UCSD building code (e.g. "CSB") #: :type: string self.code = code #: Descriptive name of building #: :type: string self.name = name #: Area of UCSD campus that building is located in (e.g. "University Center") #: :type: string self.area = area __FORMAT = "{0.name} ({0.code}) in {0.area}" def __str__(self): return self.__FORMAT.format(self) # Initialize Building._CODE2OBJ tree = make_tree4url()(BUILDING_CODE_URL)[0] for quadruple in grouper(4, building_info_table_texts(tree)): code, name, area, _map_num = (s.strip() for s in quadruple) Building._CODE2OBJ[code] = Building(code, name, area) del quadruple, tree, code, name, area, _map_num # Begin total HACKS Building._CODE2OBJ["LEDDN"] = Building._CODE2OBJ["LEDDN AUD"] # Dammit TritonLink, "AUD" isn't a room! Building._CODE2OBJ["CPMC"] = Building("CPMC", "Conrad Prebys Music Center", "Sixth") # TritonLink is outdated. Sixth is a guess. Building._CODE2OBJ["OTRSN"] = Building("OTRSN", "Otterson Hall (i.e. Rady School)", "Roosevelt") # TritonLink is outdated Building._CODE2OBJ["TM102"] = Building("TM102", "TM102", "TM102") # Mystery building not in building code index Building._CODE2OBJ["MYR-A"] = Building("MYR-A", '"MYR-A"', "MYR-A") # Mystery building not in building code index Building._CODE2OBJ["SPIES"] = Building("SPIES", "SPIES", "SIO") # Mystery building not in building code index. SIO is a guess. # Less confusing Building._CODE2OBJ["CSB"].code = "CogSci (a.k.a. CSB)" # CSB != CompSci Bldg # Reflect situation on the ground :-)