Esempio n. 1
0
def get_url_from_mapper(filters):
    """
    Sends a request to Gratka's URL mapper which returns a valid URL given the supplied key-value pairs
    :param filters: see :meth:`gratka.category.get_category` for reference
    :return: A valid Gratka.pl URL as string
    """
    paramlist = []
    for k, v in filters.items():
        if isinstance(v, list):
            for element in v:
                paramlist.append((k, str(element)))
        else:
            paramlist.append((k, str(v)))

    url = "http://www.gratka.pl/mapper/"

    payload = "\r\n".join([
        "------WebKitFormBoundary7MA4YWxkTrZu0gW\r\nContent-Disposition: form-data; name=\"{0}\"\r\n\r\n{1}"
        .format(p[0], p[1]) for p in paramlist
    ])
    headers = {
        'content-type':
        "multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW",
        'cache-control': "no-cache",
        'User-Agent': get_random_user_agent()
    }
    response = requests.request("POST",
                                url,
                                data=payload.encode("utf-8"),
                                headers=headers)
    return json.loads(response.text)["redirectUrl"]
Esempio n. 2
0
def get_url_for_filters(payload):
    """ Parses url from trojmiasto.pl search engine using POST method for given payload of data

    :param payload: Tuple of tuples containing POST key and argument
    :type payload: tuple
    :return: Url generated by trojmiasto.pl search engine
    :rtype: str
    """
    response = requests.post(SEARCH_URL, payload, headers={'User-Agent': get_random_user_agent()})
    html_parser = BeautifulSoup(response.content, "html.parser")
    url = html_parser.find(class_="nice-select-tsi").find("option").next_sibling.next_sibling.attrs["value"]
    return url
Esempio n. 3
0
def get_content_for_url(url):
    """ Connects with given url

    If environmental variable DEBUG is True it will cache response for url in /var/temp directory

    :param url: Website url
    :type url: str
    :return: Response for requested url
    """
    response = requests.get(url, headers={'User-Agent': get_random_user_agent()})
    response.raise_for_status()
    return response
Esempio n. 4
0
def get_content_from_source(url):
    """ Connects with given url
    If environmental variable DEBUG is True it will cache response for url in /var/temp directory
    :param url: Website url
    :type url: str
    :return: Response for requested url
    """
    response = requests.get(url,
                            headers={'User-Agent': get_random_user_agent()})
    try:
        response.raise_for_status()
    except requests.HTTPError as e:
        log.warning('Request for {0} failed. Error: {1}'.format(url, e))
        return None
    return response.content
Esempio n. 5
0
def get_offer_phone_numbers(offer_id, cookie, csrf_token):
    """
    This method makes a request to the OtoDom API asking for the poster's phone number(s) and returns it.

    :param offer_id: string, taken from context, see the return section of :meth:`scrape.category.get_category` for
                    reference
    :param cookie: string, see :meth:`scrape.utils.get_cookie_from` for reference
    :param csrf_token: string, see :meth:`scrape.utils.get_csrf_token` for reference
    :rtype: list(string)
    :return: A list of phone numbers as strings (no spaces, no '+48')
    """
    url = "https://www.otodom.pl/ajax/misc/contact/phone/{0}/".format(offer_id)
    payload = "CSRFToken={0}".format(csrf_token)
    headers = {
        'cookie': "{0}".format(cookie),
        'content-type': "application/x-www-form-urlencoded",
        'User-Agent': get_random_user_agent()
    }

    response = requests.request("POST", url, data=payload, headers=headers)
    if response.status_code == 404:
        return []
    return json.loads(response.text)["value"]
Esempio n. 6
0
def get_response_for_url(url):
    """
    :param url: an url, most likely from the :meth:`gratka.utils.get_url` method
    :return: a requests.response object
    """
    return requests.get(url, headers={'User-Agent': get_random_user_agent()})