Esempio n. 1
0
def getListOfBases():
    """
    This function is here mainly for purposes of unittest

    Returns:
        list of str: Valid bases as they are used as URL parameters in links at
                     Aleph main page.
    """
    downer = Downloader()
    data = downer.download(ALEPH_URL + "/F/?func=file&file_name=base-list")
    dom = dhtmlparser.parseString(data.lower())

    # from default aleph page filter links containing local_base in their href
    base_links = filter(
        lambda x: "href" in x.params and "local_base" in x.params["href"],
        dom.find("a")
    )

    # split links by & - we will need only XXX from link.tld/..&local_base=XXX
    base_links = map(
        lambda x: x.params["href"].replace("?", "&", 1).split("&"),
        base_links
    )

    # filter only sections containing bases
    bases = map(
        lambda link: filter(lambda base: "local_base=" in base, link)[0],
        base_links
    )

    # filter bases from base sections
    bases = map(lambda x: x.split("=")[1].strip(), bases)

    return list(set(bases))  # list(set()) is same as unique()
Esempio n. 2
0
def login(username, password, http_proxy = None):
	"""
	Just login into spotify. This is usefull, because users from unsupported
	countries have to login thru IP from supported country every ~twoweeks, or
	their account is frozen until they do so.

	Function supports http_proxy parameter in format "http://server:port".

	Raise:
	 - SpotifierException if there is some problem.
	"""
	d = Downloader(http_proxy = http_proxy)
	dom = html.parseString(
		d.download(
			"https://www.spotify.com/us/login/?forward_url=%2Fus%2F",
		)
	)

	log_form = {
		"referrer": "",
		"utm-keywords": dom.find("input", {"name": "utm-keywords"})[0].params["value"],
		"user_name": username,
		"password": password
	}

	data = d.download(
		"https://www.spotify.com/us/xhr/json/login.php",
		post = log_form,
	)
	jdata = json.loads(data)

	if jdata["error"]:
		raise SpotifierException(jdata["msg"])
Esempio n. 3
0
def downloadMARCXML(doc_id, library, base="nkc"):
    """
    Download MARC XML document with given `doc_id` from given `library`.

    Args:
        doc_id (DocumentID): You will get this from :func:`getDocumentIDs`.
        library (str): "``NKC01``" in our case, but don't worry,
                   :func:`getDocumentIDs` adds library specification into
                   :class:`DocumentID` named tuple.

    Returns:
        str: MARC XML unicode string.

    Raises:
        LibraryNotFoundException
        DocumentNotFoundException
    """
    downer = Downloader()

    data = downer.download(
        ALEPH_URL + Template(DOC_URL_TEMPLATE).substitute(
            DOC_ID=doc_id,
            LIBRARY=library
        )
    )

    dom = dhtmlparser.parseString(data)

    # check if there are any errors
    # bad library error
    error = dom.find("login")
    if error:
        error_msg = error[0].find("error")

        if error_msg:
            raise LibraryNotFoundException(
                "Can't download document doc_id: '" + str(doc_id) + "' " +
                "(probably bad library: '" + library + "')!\nMessage: " +
                "\n".join(map(lambda x: x.getContent(), error_msg))
            )

    # another error - document not found
    error = dom.find("ill-get-doc")
    if error:
        error_msg = error[0].find("error")

        if error_msg:
            raise DocumentNotFoundException(
                "\n".join(map(lambda x: x.getContent(), error_msg))
            )

    return data  # MARCxml of document with given doc_id
Esempio n. 4
0
def downloadMARCOAI(doc_id, base):
    """
    Download MARC OAI document with given `doc_id` from given (logical) `base`.

    Funny part is, that some documents can be obtained only with this function
    in their full text.

    Args:
        doc_id (str):         You will get this from :func:`getDocumentIDs`.
        base (str, optional): Base from which you want to download Aleph
                              document.
                              This seems to be duplicite with
                              :func:`searchInAleph` parameters, but it's just
                              something Aleph's X-Services wants, so ..

    Returns:
        str: MARC XML Unicode string.

    Raises:
        InvalidAlephBaseException
        DocumentNotFoundException
    """
    downer = Downloader()

    data = downer.download(
        ALEPH_URL + Template(OAI_DOC_URL_TEMPLATE).substitute(
            DOC_ID=doc_id,
            BASE=base
        )
    )

    dom = dhtmlparser.parseString(data)

    # check for errors
    error = dom.find("error")
    if len(error) <= 0:  # no errors
        return data

    if "Error reading document" in error[0].getContent():
        raise DocumentNotFoundException(
            str(error[0].getContent())
        )
    else:
        raise InvalidAlephBaseException(
            error[0].getContent() + "\n" +
            "The base you are trying to access probably doesn't exist."
        )
Esempio n. 5
0
def downloadRecords(search_result, from_doc=1):
    """
    Download `MAX_RECORDS` documents from `search_result` starting from
    `from_doc`.

    Attr:
        search_result (dict): returned from :func:`searchInAleph`.
        from_doc (int, default 1): Start from document number `from_doc`.

    Returns:
        list: List of XML strings with documents in MARC OAI.
    """
    downer = Downloader()

    if "set_number" not in search_result:
        return []

    # set numbers should be probably aligned to some length
    set_number = str(search_result["set_number"])
    if len(set_number) < 6:
        set_number = (6 - len(set_number)) * "0" + set_number

    # download all no_records
    records = []
    for cnt in range(search_result["no_records"]):
        doc_number = from_doc + cnt

        if cnt >= MAX_RECORDS or doc_number > search_result["no_records"]:
            break

        set_data = downer.download(
            ALEPH_URL + Template(RECORD_URL_TEMPLATE).substitute(
                SET_NUM=set_number,
                RECORD_NUM=doc_number,
            )
        )

        records.append(set_data)

    return records
Esempio n. 6
0
def getDocumentIDs(aleph_search_result, number_of_docs=-1):
    """
    Get IDs, which can be used as parameters for other functions.

    Args:
        aleph_search_result (dict): returned from :func:`searchInAleph`
        number_of_docs (int, optional): how many :class:`DocumentID` from set
                          given by `aleph_search_result` should be returned.
                          Default -1 for all of them.

    Returns:
        list: :class:`DocumentID` named tuples to given `aleph_search_result`.

    Raises:
        AlephException: If Aleph returns unknown format of data.

    Note:
        Returned :class:`DocumentID` can be used as parameters to
        :func:`downloadMARCXML`.
    """
    downer = Downloader()

    if "set_number" not in aleph_search_result:
        return []

    # set numbers should be probably aligned to some length
    set_number = str(aleph_search_result["set_number"])
    if len(set_number) < 6:
        set_number = (6 - len(set_number)) * "0" + set_number

    # limit number of fetched documents, if -1, download all
    if number_of_docs <= 0:
        number_of_docs = aleph_search_result["no_entries"]

    # download data about given set
    set_data = downer.download(
        ALEPH_URL + Template(SET_URL_TEMPLATE).substitute(
            SET_NUMBER=set_number,
            NUMBER_OF_DOCS=number_of_docs,
        )
    )

    # parse data
    dom = dhtmlparser.parseString(set_data)
    set_data = dom.find("ill-get-set")

    # there should be at least one <ill-get-set> field
    if len(set_data) <= 0:
        raise AlephException("Aleph didn't returned set data.")

    ids = []
    for library in set_data:
        documents = _alephResultToDict(library)

        if "error" in documents:
            raise AlephException("getDocumentIDs: " + documents["error"])

        # convert all document records to DocumentID named tuple and extend
        # them to 'ids' array
        if isinstance(documents["doc-number"], list):
            ids.extend(
                map(
                    lambda x: DocumentID(
                        x,
                        documents["set-library"],
                        aleph_search_result["base"]
                    ),
                    set(documents["doc-number"])
                )
            )
        else:
            ids.append(
                DocumentID(
                    documents["doc-number"],
                    documents["set-library"],
                    aleph_search_result["base"]
                )
            )

    return ids
Esempio n. 7
0
def searchInAleph(base, phrase, considerSimilar, field):
    """
    Send request to the aleph search engine.

    Request itself is pretty useless, but it can be later used as parameter
    for :func:`getDocumentIDs`, which can fetch records from Aleph.

    Args:
        base (str): which database you want to use
        phrase (str): what do you want to search
        considerSimilar (bool): fuzzy search, which is not working at all, so
                               don't use it
        field (str): where you want to look (see: :attr:`VALID_ALEPH_FIELDS`)

    Returns:
        dictionary: consisting from following fields:

            | error (optional): present if there was some form of error
            | no_entries (int): number of entries that can be fetch from aleph
            | no_records (int): no idea what is this, but it is always >= than
                                `no_entries`
            | set_number (int): important - something like ID of your request
            | session-id (str): used to count users for licensing purposes

    Example:
      Returned dict::

        {
         'session-id': 'YLI54HBQJESUTS678YYUNKEU4BNAUJDKA914GMF39J6K89VSCB',
         'set_number': 36520,
         'no_records': 1,
         'no_entries': 1
        }

    Raises:
        AlephException: if Aleph doesn't return any information
        InvalidAlephFieldException: if specified field is not valid
    """
    downer = Downloader()

    if field.lower() not in VALID_ALEPH_FIELDS:
        raise InvalidAlephFieldException("Unknown field '" + field + "'!")

    param_url = Template(SEARCH_URL_TEMPLATE).substitute(
        PHRASE=quote_plus(phrase),  # urlencode phrase
        BASE=base,
        FIELD=field,
        SIMILAR="Y" if considerSimilar else "N"
    )
    result = downer.download(ALEPH_URL + param_url)

    dom = dhtmlparser.parseString(result)

    find = dom.find("find")  # find <find> element :)
    if len(find) <= 0:
        raise AlephException("Aleph didn't returned any information.")
    find = find[0]

    # convert aleph result into dictionary
    result = _alephResultToDict(find)

    # add informations about base into result
    result["base"] = base

    if "error" not in result:
        return result

    # handle errors
    if result["error"] == "empty set":
        result["no_entries"] = 0  # empty set have 0 entries
        return result
    else:
        raise AlephException(result["error"])
Esempio n. 8
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Interpreter version: python 2.7
#
# Imports =====================================================================
from collections import namedtuple

import dhtmlparser  # pip install -U dhtmlparser
from httpkie import Downloader  # pip install -U httpkie

# Variables ===================================================================
downloader = Downloader()
MIPSInfo = namedtuple("MIPSInfo", "name mips year")


# Functions & classes =========================================================
def get_table():
    page = downloader.download(
        "https://en.wikipedia.org/wiki/Instructions_per_second")

    dom = dhtmlparser.parseString(page)

    return dom.find("table", {"class": "wikitable sortable"})[0]


def parse_table():
    for tr in get_table().find("tr"):
        tds = tr.find("td")

        if not tds:
Esempio n. 9
0
def register(username, password, email, gender, date_of_birth_ts, http_proxy = None):
	"""
	Register new account, raise proper exceptions if there is a problem:
	 - InvalidUsernameException
	 - InvalidPasswordException
	 - EmailTakenException
	 - InvalidGenderException is raised when gender parameter is not "male"/"female"
	 - SpotifierException is raised in other cases (see .value for details from
	   server)

	Email is not verified, so you can use pretty much everything.

	Bevare of date_of_birth_ts timestamp - spotify won't let you register too
	much young accounts, so in case of trouble, try subtracting 567648000 for 18
	years.

	Function supports http_proxy parameter in format "http://server:port".
	"""
	d = Downloader(http_proxy = http_proxy)
	d.download(  # cookies
		"https://www.spotify.com/us/login/?forward_url=%2Fus%2F",
	)
	dom = html.parseString(
		d.download("https://www.spotify.com/us/signup/?forward_url=%2Fus%2F"),
	)

	# check username
	valid_username = d.download(
		"https://www.spotify.com/us/xhr/json/isUsernameAvailable.php",
		get = {"username": username}
	)
	if valid_username.strip() != "true":
		raise InvalidUsernameException(
			"Username '" + username + "' is invalid or already in use!"
		)

	# check password lenght
	min_password_len = dom.find("input", {"name": "password"})[0]
	min_password_len = int(min_password_len.params["data-rule-minlength"])
	if len(password) <= min_password_len:
		raise InvalidPasswordException("Password is too short.")

	# check email
	valid_email = d.download(
		"https://www.spotify.com/us/xhr/json/isEmailAvailable.php",
		get = {"email": email}
	)
	if valid_email.strip() != "true":
		raise EmailTakenException("Email is already used!")

	day, month, year = time.strftime(
		"%d %m %Y", time.localtime(int(date_of_birth_ts))
	).split()

	gender = gender.lower()
	if gender != "male" and gender != "female":
		raise InvalidGenderException(
			"Spotify doesn't support '" + gender + "' as gender!"
		)

	reg_form = {
		"form_token":    dom.find("input", {"name": "form_token"})[0].params["value"],
		"creation_flow": "",
		"forward_url":   "/us/",
		"username":      username,
		"password":      password,
		"email":         email,
		"confirm_email": email,
		"gender":        gender,
		"dob_month":     month,
		"dob_day":       day,
		"dob_year":      year,
		"signup_pre_tick_eula": "true",
	}

	data = d.download(
		"https://www.spotify.com/us/xhr/json/sign-up-for-spotify.php",
		post = reg_form,
	)

	jdata = json.loads(data)
	if jdata["status"] != 1:
		errors = []
		for error in jdata["errors"]:
			errors.append(error + ": " + jdata["errors"][error]["message"])
		raise SpotifierException(
			jdata["message"] + "\n" +
			"\n".join(errors)
		)