def login(username, password, http_proxy = None): """ Just login into spotify. This is usefull, because users from unsupported countries have to login thru IP from supported country every ~twoweeks, or their account is frozen until they do so. Function supports http_proxy parameter in format "http://server:port". Raise: - SpotifierException if there is some problem. """ d = Downloader(http_proxy = http_proxy) dom = html.parseString( d.download( "https://www.spotify.com/us/login/?forward_url=%2Fus%2F", ) ) log_form = { "referrer": "", "utm-keywords": dom.find("input", {"name": "utm-keywords"})[0].params["value"], "user_name": username, "password": password } data = d.download( "https://www.spotify.com/us/xhr/json/login.php", post = log_form, ) jdata = json.loads(data) if jdata["error"]: raise SpotifierException(jdata["msg"])
def getListOfBases(): """ This function is here mainly for purposes of unittest Returns: list of str: Valid bases as they are used as URL parameters in links at Aleph main page. """ downer = Downloader() data = downer.download(ALEPH_URL + "/F/?func=file&file_name=base-list") dom = dhtmlparser.parseString(data.lower()) # from default aleph page filter links containing local_base in their href base_links = filter( lambda x: "href" in x.params and "local_base" in x.params["href"], dom.find("a") ) # split links by & - we will need only XXX from link.tld/..&local_base=XXX base_links = map( lambda x: x.params["href"].replace("?", "&", 1).split("&"), base_links ) # filter only sections containing bases bases = map( lambda link: filter(lambda base: "local_base=" in base, link)[0], base_links ) # filter bases from base sections bases = map(lambda x: x.split("=")[1].strip(), bases) return list(set(bases)) # list(set()) is same as unique()
def downloadMARCXML(doc_id, library, base="nkc"): """ Download MARC XML document with given `doc_id` from given `library`. Args: doc_id (DocumentID): You will get this from :func:`getDocumentIDs`. library (str): "``NKC01``" in our case, but don't worry, :func:`getDocumentIDs` adds library specification into :class:`DocumentID` named tuple. Returns: str: MARC XML unicode string. Raises: LibraryNotFoundException DocumentNotFoundException """ downer = Downloader() data = downer.download( ALEPH_URL + Template(DOC_URL_TEMPLATE).substitute( DOC_ID=doc_id, LIBRARY=library ) ) dom = dhtmlparser.parseString(data) # check if there are any errors # bad library error error = dom.find("login") if error: error_msg = error[0].find("error") if error_msg: raise LibraryNotFoundException( "Can't download document doc_id: '" + str(doc_id) + "' " + "(probably bad library: '" + library + "')!\nMessage: " + "\n".join(map(lambda x: x.getContent(), error_msg)) ) # another error - document not found error = dom.find("ill-get-doc") if error: error_msg = error[0].find("error") if error_msg: raise DocumentNotFoundException( "\n".join(map(lambda x: x.getContent(), error_msg)) ) return data # MARCxml of document with given doc_id
def downloadMARCOAI(doc_id, base): """ Download MARC OAI document with given `doc_id` from given (logical) `base`. Funny part is, that some documents can be obtained only with this function in their full text. Args: doc_id (str): You will get this from :func:`getDocumentIDs`. base (str, optional): Base from which you want to download Aleph document. This seems to be duplicite with :func:`searchInAleph` parameters, but it's just something Aleph's X-Services wants, so .. Returns: str: MARC XML Unicode string. Raises: InvalidAlephBaseException DocumentNotFoundException """ downer = Downloader() data = downer.download( ALEPH_URL + Template(OAI_DOC_URL_TEMPLATE).substitute( DOC_ID=doc_id, BASE=base ) ) dom = dhtmlparser.parseString(data) # check for errors error = dom.find("error") if len(error) <= 0: # no errors return data if "Error reading document" in error[0].getContent(): raise DocumentNotFoundException( str(error[0].getContent()) ) else: raise InvalidAlephBaseException( error[0].getContent() + "\n" + "The base you are trying to access probably doesn't exist." )
def downloadRecords(search_result, from_doc=1): """ Download `MAX_RECORDS` documents from `search_result` starting from `from_doc`. Attr: search_result (dict): returned from :func:`searchInAleph`. from_doc (int, default 1): Start from document number `from_doc`. Returns: list: List of XML strings with documents in MARC OAI. """ downer = Downloader() if "set_number" not in search_result: return [] # set numbers should be probably aligned to some length set_number = str(search_result["set_number"]) if len(set_number) < 6: set_number = (6 - len(set_number)) * "0" + set_number # download all no_records records = [] for cnt in range(search_result["no_records"]): doc_number = from_doc + cnt if cnt >= MAX_RECORDS or doc_number > search_result["no_records"]: break set_data = downer.download( ALEPH_URL + Template(RECORD_URL_TEMPLATE).substitute( SET_NUM=set_number, RECORD_NUM=doc_number, ) ) records.append(set_data) return records
def getDocumentIDs(aleph_search_result, number_of_docs=-1): """ Get IDs, which can be used as parameters for other functions. Args: aleph_search_result (dict): returned from :func:`searchInAleph` number_of_docs (int, optional): how many :class:`DocumentID` from set given by `aleph_search_result` should be returned. Default -1 for all of them. Returns: list: :class:`DocumentID` named tuples to given `aleph_search_result`. Raises: AlephException: If Aleph returns unknown format of data. Note: Returned :class:`DocumentID` can be used as parameters to :func:`downloadMARCXML`. """ downer = Downloader() if "set_number" not in aleph_search_result: return [] # set numbers should be probably aligned to some length set_number = str(aleph_search_result["set_number"]) if len(set_number) < 6: set_number = (6 - len(set_number)) * "0" + set_number # limit number of fetched documents, if -1, download all if number_of_docs <= 0: number_of_docs = aleph_search_result["no_entries"] # download data about given set set_data = downer.download( ALEPH_URL + Template(SET_URL_TEMPLATE).substitute( SET_NUMBER=set_number, NUMBER_OF_DOCS=number_of_docs, ) ) # parse data dom = dhtmlparser.parseString(set_data) set_data = dom.find("ill-get-set") # there should be at least one <ill-get-set> field if len(set_data) <= 0: raise AlephException("Aleph didn't returned set data.") ids = [] for library in set_data: documents = _alephResultToDict(library) if "error" in documents: raise AlephException("getDocumentIDs: " + documents["error"]) # convert all document records to DocumentID named tuple and extend # them to 'ids' array if isinstance(documents["doc-number"], list): ids.extend( map( lambda x: DocumentID( x, documents["set-library"], aleph_search_result["base"] ), set(documents["doc-number"]) ) ) else: ids.append( DocumentID( documents["doc-number"], documents["set-library"], aleph_search_result["base"] ) ) return ids
def searchInAleph(base, phrase, considerSimilar, field): """ Send request to the aleph search engine. Request itself is pretty useless, but it can be later used as parameter for :func:`getDocumentIDs`, which can fetch records from Aleph. Args: base (str): which database you want to use phrase (str): what do you want to search considerSimilar (bool): fuzzy search, which is not working at all, so don't use it field (str): where you want to look (see: :attr:`VALID_ALEPH_FIELDS`) Returns: dictionary: consisting from following fields: | error (optional): present if there was some form of error | no_entries (int): number of entries that can be fetch from aleph | no_records (int): no idea what is this, but it is always >= than `no_entries` | set_number (int): important - something like ID of your request | session-id (str): used to count users for licensing purposes Example: Returned dict:: { 'session-id': 'YLI54HBQJESUTS678YYUNKEU4BNAUJDKA914GMF39J6K89VSCB', 'set_number': 36520, 'no_records': 1, 'no_entries': 1 } Raises: AlephException: if Aleph doesn't return any information InvalidAlephFieldException: if specified field is not valid """ downer = Downloader() if field.lower() not in VALID_ALEPH_FIELDS: raise InvalidAlephFieldException("Unknown field '" + field + "'!") param_url = Template(SEARCH_URL_TEMPLATE).substitute( PHRASE=quote_plus(phrase), # urlencode phrase BASE=base, FIELD=field, SIMILAR="Y" if considerSimilar else "N" ) result = downer.download(ALEPH_URL + param_url) dom = dhtmlparser.parseString(result) find = dom.find("find") # find <find> element :) if len(find) <= 0: raise AlephException("Aleph didn't returned any information.") find = find[0] # convert aleph result into dictionary result = _alephResultToDict(find) # add informations about base into result result["base"] = base if "error" not in result: return result # handle errors if result["error"] == "empty set": result["no_entries"] = 0 # empty set have 0 entries return result else: raise AlephException(result["error"])
def register(username, password, email, gender, date_of_birth_ts, http_proxy = None): """ Register new account, raise proper exceptions if there is a problem: - InvalidUsernameException - InvalidPasswordException - EmailTakenException - InvalidGenderException is raised when gender parameter is not "male"/"female" - SpotifierException is raised in other cases (see .value for details from server) Email is not verified, so you can use pretty much everything. Bevare of date_of_birth_ts timestamp - spotify won't let you register too much young accounts, so in case of trouble, try subtracting 567648000 for 18 years. Function supports http_proxy parameter in format "http://server:port". """ d = Downloader(http_proxy = http_proxy) d.download( # cookies "https://www.spotify.com/us/login/?forward_url=%2Fus%2F", ) dom = html.parseString( d.download("https://www.spotify.com/us/signup/?forward_url=%2Fus%2F"), ) # check username valid_username = d.download( "https://www.spotify.com/us/xhr/json/isUsernameAvailable.php", get = {"username": username} ) if valid_username.strip() != "true": raise InvalidUsernameException( "Username '" + username + "' is invalid or already in use!" ) # check password lenght min_password_len = dom.find("input", {"name": "password"})[0] min_password_len = int(min_password_len.params["data-rule-minlength"]) if len(password) <= min_password_len: raise InvalidPasswordException("Password is too short.") # check email valid_email = d.download( "https://www.spotify.com/us/xhr/json/isEmailAvailable.php", get = {"email": email} ) if valid_email.strip() != "true": raise EmailTakenException("Email is already used!") day, month, year = time.strftime( "%d %m %Y", time.localtime(int(date_of_birth_ts)) ).split() gender = gender.lower() if gender != "male" and gender != "female": raise InvalidGenderException( "Spotify doesn't support '" + gender + "' as gender!" ) reg_form = { "form_token": dom.find("input", {"name": "form_token"})[0].params["value"], "creation_flow": "", "forward_url": "/us/", "username": username, "password": password, "email": email, "confirm_email": email, "gender": gender, "dob_month": month, "dob_day": day, "dob_year": year, "signup_pre_tick_eula": "true", } data = d.download( "https://www.spotify.com/us/xhr/json/sign-up-for-spotify.php", post = reg_form, ) jdata = json.loads(data) if jdata["status"] != 1: errors = [] for error in jdata["errors"]: errors.append(error + ": " + jdata["errors"][error]["message"]) raise SpotifierException( jdata["message"] + "\n" + "\n".join(errors) )