Python make_tree4urlの例、triton_scraper.fetchparse.make_tree4url Pythonの例

コード例 #1

0

ファイルを表示

ファイル: bookstore.py プロジェクト: yiming-cai/TritonScraper

def books_on(bookstore_url_from_tritonlink):
    """Returns book list based on the given course page at the UCSD Bookstore's website.
    
    :param bookstore_url_from_tritonlink: UCSD Bookstore website URL for a course section
    :type bookstore_url_from_tritonlink: string
    :rtype: :class:`BookList`
    """
    url = bookstore_url_from_tritonlink.replace("https", "http", 1)
    tree, _url = make_tree4url()(url)
    booklist = BookList()
    for sextuple in grouper(
            6, _skipping_availability_side_headers(book_cells(tree))):
        if config.LACK_BOOK_LIST in sextuple[0].text:  # No book list
            return BookList(unknown=True)
        _sections, _instructor, required, author, title_comma_isbn = (
            cell.text for cell in sextuple[:5])
        availability = sextuple[-1]
        required = required == config.REQUIRED_BOOK_CODE
        title, isbn = title_comma_isbn.rsplit(
            ", ",
            1)  # Principles Of General Chemistry, 2 Edition, 9780077470500
        if config.NO_TEXTBOOK_REQUIRED in title:
            return BookList(required=[])
        if config.AS_SOFT_RESERVES in title:
            booklist.as_soft_reserves = True
            continue
        discounts = discounted_price(availability)
        if discounts:
            discount = discounts[0]
            if config.IN_STOCK not in availability.text:
                new = NaN
            else:
                # New Books, Not in Stock*, Retail Price: $65.70, Discounted Price: <FONT COLOR="#008000">$21.03</FONT>
                new = Decimal(discount.text[1:])  #remove dollar sign
            used = discount.tail
        else:
            new, used = availability.text.split("\n")
            new = _availability2price(new)
        used = _availability2price(used)
        booklist.add_book(Book(isbn, new, used, title, author), required)
    return booklist

コード例 #2

0

ファイルを表示

ファイル: bookstore.py プロジェクト: cvrebert/TritonScraper

def books_on(bookstore_url_from_tritonlink):
    """Returns book list based on the given course page at the UCSD Bookstore's website.
    
    :param bookstore_url_from_tritonlink: UCSD Bookstore website URL for a course section
    :type bookstore_url_from_tritonlink: string
    :rtype: :class:`BookList`
    """
    url = bookstore_url_from_tritonlink.replace("https", "http", 1)
    tree, _url = make_tree4url()(url)
    booklist = BookList()
    for sextuple in grouper(6, _skipping_availability_side_headers(book_cells(tree))):
        if config.LACK_BOOK_LIST in sextuple[0].text:# No book list
            return BookList(unknown=True)
        _sections, _instructor, required, author, title_comma_isbn = (cell.text for cell in sextuple[:5])
        availability = sextuple[-1]
        required = required == config.REQUIRED_BOOK_CODE
        title, isbn = title_comma_isbn.rsplit(", ", 1) # Principles Of General Chemistry, 2 Edition, 9780077470500
        if config.NO_TEXTBOOK_REQUIRED in title:
            return BookList(required=[])
        if config.AS_SOFT_RESERVES in title:
            booklist.as_soft_reserves = True
            continue
        discounts = discounted_price(availability)
        if discounts:
            discount = discounts[0]
            if config.IN_STOCK not in availability.text:
                new = NaN
            else:
                # New Books, Not in Stock*, Retail Price: $65.70, Discounted Price: <FONT COLOR="#008000">$21.03</FONT>
                new = Decimal(discount.text[1:])#remove dollar sign
            used = discount.tail
        else:
            new, used = availability.text.split("\n")
            new = _availability2price(new)
        used = _availability2price(used)
        booklist.add_book(Book(isbn, new, used, title, author), required)
    return booklist

コード例 #3

0

ファイルを表示

ファイル: cape.py プロジェクト: yiming-cai/TritonScraper

from contextlib import closing as _closing
from time import sleep as _sleep
from decimal import Decimal
from collections import namedtuple as _namedtuple  #, OrderedDict
from urlparse import urljoin as _urljoin
from urllib import urlencode as _urlencode

import triton_scraper.config
from triton_scraper.util import RELATIVE_PREFIX, XPath
from triton_scraper.fetchparse import make_tree4url
from lxml import etree

CAPE_SEARCH_URL = "http://www.cape.ucsd.edu/stats.html"

_tree4url = make_tree4url()


def url2tree(url):
    tree, _url = _tree4url(url)
    return tree


#FIXME: enable
# self_cape = XPath(RELATIVE_PREFIX+"/div[@align='right' and text() = 'SelfCAPE']")

search_forms = XPath(RELATIVE_PREFIX + "/form[@name='searchQuery']")
select_elements = XPath(RELATIVE_PREFIX + "/select")


def _search_form_and_select_tag():

コード例 #4

0

ファイルを表示

ファイル: cape.py プロジェクト: cvrebert/TritonScraper

from contextlib import closing as _closing
from time import sleep as _sleep
from decimal import Decimal
from collections import namedtuple as _namedtuple#, OrderedDict
from urlparse import urljoin as _urljoin
from urllib import urlencode as _urlencode

import triton_scraper.config
from triton_scraper.util import RELATIVE_PREFIX, XPath
from triton_scraper.fetchparse import make_tree4url
from lxml import etree

CAPE_SEARCH_URL = "http://www.cape.ucsd.edu/stats.html"

_tree4url = make_tree4url()
def url2tree(url):
    tree, _url = _tree4url(url)
    return tree

#FIXME: enable
# self_cape = XPath(RELATIVE_PREFIX+"/div[@align='right' and text() = 'SelfCAPE']")

search_forms = XPath(RELATIVE_PREFIX+"/form[@name='searchQuery']")
select_elements = XPath(RELATIVE_PREFIX+"/select")
def _search_form_and_select_tag():
    tree = url2tree(CAPE_SEARCH_URL)
    form = search_forms(tree)[0]
    select = select_elements(form)[0]
    return form, select

コード例 #5

0

ファイルを表示

ファイル: restriction_codes.py プロジェクト: cvrebert/TritonScraper

# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"""
This module fetches and holds data on TritonLink course restriction codes.

:copyright: (c) 2010 by Christopher Rebert.
:license: MIT, see :file:`LICENSE.txt` for more details.
"""

from triton_scraper.util import *
from triton_scraper.config import RESTRICTION_CODE_URL as _RESTRICTION_CODE_URL
from triton_scraper.fetchparse import make_tree4url

restriction_codes_and_descriptions = XPath(RELATIVE_PREFIX+"/tr[not(@bgcolor)]/td/text()")

tree, _url = make_tree4url()(_RESTRICTION_CODE_URL)
# if .strip() needed to ignore blank row
_CODE2DESCRIPTION = dict((code, desc) for code, desc in grouper(2, restriction_codes_and_descriptions(tree)) if code.strip())
del tree, _url

def restriction_code2description(code):
    try:
        return _CODE2DESCRIPTION[code]
    except KeyError:
        # The webpage listing the codes is outdated/non-exhaustive
        return '"%s"' % code

コード例 #6

0

ファイルを表示

ファイル: browser.py プロジェクト: yiming-cai/TritonScraper

 def __init__(self):
     """Takes no arguments."""
     self._tree4url = make_tree4url()

コード例 #7

0

ファイルを表示

ファイル: locations.py プロジェクト: yiming-cai/TritonScraper

        self.code = code
        #: Descriptive name of building
        #: :type: string
        self.name = name
        #: Area of UCSD campus that building is located in (e.g. "University Center")
        #: :type: string
        self.area = area

    __FORMAT = "{0.name} ({0.code}) in {0.area}"

    def __str__(self):
        return self.__FORMAT.format(self)


# Initialize Building._CODE2OBJ
tree = make_tree4url()(BUILDING_CODE_URL)[0]
for quadruple in grouper(4, building_info_table_texts(tree)):
    code, name, area, _map_num = (s.strip() for s in quadruple)
    Building._CODE2OBJ[code] = Building(code, name, area)
del quadruple, tree, code, name, area, _map_num

# Begin total HACKS
Building._CODE2OBJ["LEDDN"] = Building._CODE2OBJ[
    "LEDDN AUD"]  # Dammit TritonLink, "AUD" isn't a room!
Building._CODE2OBJ["CPMC"] = Building(
    "CPMC", "Conrad Prebys Music Center",
    "Sixth")  # TritonLink is outdated. Sixth is a guess.
Building._CODE2OBJ["OTRSN"] = Building("OTRSN",
                                       "Otterson Hall (i.e. Rady School)",
                                       "Roosevelt")  # TritonLink is outdated
Building._CODE2OBJ["TM102"] = Building(

コード例 #8

0

ファイルを表示

ファイル: restriction_codes.py プロジェクト: yiming-cai/TritonScraper

# THE SOFTWARE.
"""
This module fetches and holds data on TritonLink course restriction codes.

:copyright: (c) 2010 by Christopher Rebert.
:license: MIT, see :file:`LICENSE.txt` for more details.
"""

from triton_scraper.util import *
from triton_scraper.config import RESTRICTION_CODE_URL as _RESTRICTION_CODE_URL
from triton_scraper.fetchparse import make_tree4url

restriction_codes_and_descriptions = XPath(RELATIVE_PREFIX +
                                           "/tr[not(@bgcolor)]/td/text()")

tree, _url = make_tree4url()(_RESTRICTION_CODE_URL)
# if .strip() needed to ignore blank row
_CODE2DESCRIPTION = dict(
    (code, desc)
    for code, desc in grouper(2, restriction_codes_and_descriptions(tree))
    if code.strip())
del tree, _url


def restriction_code2description(code):
    try:
        return _CODE2DESCRIPTION[code]
    except KeyError:
        # The webpage listing the codes is outdated/non-exhaustive
        return '"%s"' % code

コード例 #9

0

ファイルを表示

ファイル: locations.py プロジェクト: cvrebert/TritonScraper

        #: UCSD building code (e.g. "CSB")
        #: :type: string
        self.code = code
        #: Descriptive name of building
        #: :type: string
        self.name = name
        #: Area of UCSD campus that building is located in (e.g. "University Center")
        #: :type: string
        self.area = area
    
    __FORMAT = "{0.name} ({0.code}) in {0.area}"
    def __str__(self):
        return self.__FORMAT.format(self)

# Initialize Building._CODE2OBJ
tree = make_tree4url()(BUILDING_CODE_URL)[0]
for quadruple in grouper(4, building_info_table_texts(tree)):
    code, name, area, _map_num = (s.strip() for s in quadruple)
    Building._CODE2OBJ[code] = Building(code, name, area)
del quadruple, tree, code, name, area, _map_num

# Begin total HACKS
Building._CODE2OBJ["LEDDN"] = Building._CODE2OBJ["LEDDN AUD"] # Dammit TritonLink, "AUD" isn't a room!
Building._CODE2OBJ["CPMC"] = Building("CPMC", "Conrad Prebys Music Center", "Sixth") # TritonLink is outdated. Sixth is a guess.
Building._CODE2OBJ["OTRSN"] = Building("OTRSN", "Otterson Hall (i.e. Rady School)", "Roosevelt") # TritonLink is outdated
Building._CODE2OBJ["TM102"] = Building("TM102", "TM102", "TM102") # Mystery building not in building code index
Building._CODE2OBJ["MYR-A"] = Building("MYR-A", '"MYR-A"', "MYR-A") # Mystery building not in building code index
Building._CODE2OBJ["SPIES"] = Building("SPIES", "SPIES", "SIO") # Mystery building not in building code index. SIO is a guess.
#       Less confusing
Building._CODE2OBJ["CSB"].code = "CogSci (a.k.a. CSB)" # CSB != CompSci Bldg
#       Reflect situation on the ground :-)