def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.url_targets = kwargs['url_targets'] self.base_url = kwargs['url'] self.url_ext, self.target = self.url_targets.pop(0) self.apiurl = self.base_url + self.url_ext
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.orgurl = 'https://api.github.com/orgs/' self.orgs = kwargs['orgs'] self.org = self.orgs.pop(0) self.repos = [] self.repo = {} self.needs_setup = True params = {'per_page': 100} if self.last_date: params.update({ 'since': self.last_date.isoformat() }) if self.auth: self.params = { 'client_id': self.auth[0], 'client_secret': self.auth[1] } self.oauth_params = '?client_id={id}&client_secret={secret}'.format(id=self.auth[0], secret=self.auth[1]) self.ratelimit_url = 'https://api.github.com/rate_limit' + self.oauth_params self.params.update(**params) else: self.params = params self.auth = None self.remaining_requests = self.get_remaining_requests()
def __init__(self, mode): self.currency = 'GTQ' self.url = 'http://www.book-hotel-beds.com/' self.mode = mode self.cities = cities self.source = 'book-hotel-beds.com' self.banners = [] BaseScraper.__init__(self)
def __init__(self, mode): self.url = 'http://www.lodebernal.com/' self.cities = cities[1:] self.mode = mode self.banners = [] self.source = 'lodebernal.com' self.currency = 'GTQ' BaseScraper.__init__(self)
def __init__(self, mode): self.url = 'https://www.bestday.com/Hotels/' self.currency = 'USD' self.cities = cities self.source = 'bestday.com' self.mode = mode self.banners = [] BaseScraper.__init__(self)
def __init__(self, mode): self.url = 'http://www.elconventoantigua.com/suites-convento-boutique-hotel' self.mode = mode self.cities = cities[1:] self.currency = 'USD' self.source = 'elconventoantigua.com' self.banners = [] BaseScraper.__init__(self)
def __init__(self, mode): self.mode = mode self.log_path = 'C:\\users\\indisersa\\Desktop\hotels\\logs\\banguat.log' self.url = 'http://banguat.gob.gt/default.asp' self.headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0) Gecko/20100101 Firefox/39.0' } BaseScraper.__init__(self)
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.spaces = kwargs['spaces'] self.needs_setup = True self.base_url = "https://wiki.mongodb.com/rest/prototype/1/" self.page = 0 self.page_ids = [] self.page_id = -1
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) kinds = kwargs['kinds'] self.apiurl = kwargs['siteurl'] self.kinds = kinds self.kind = '' self.urlexts = [] self.needs_setup = True
def __init__(self, mode): self.url = 'https://www.marriott.com/hotels/hotel-rooms/guacy-courtyard-guatemala-city/' self.checkin_checkout_element = './/div[@aria-label="{}"]' self.further_element = './/div[@title="Next month"]' self.cities = cities[:1] self.mode = mode self.source = 'marriott.com' self.currency = 'USD' self.banners = ['.//button[contains(@class, "close")]'] BaseScraper.__init__(self)
def __init__(self, mode): self.url = 'https://www.radisson.com/' self.cities = cities[:1] self.mode = mode self.banners = [ './/div[@class="cookieControl"]/div/div/table/tbody/tr/td/a[@class="commit"]', ] self.source = 'radisson.com' self.currency = 'GTQ' BaseScraper.__init__(self)
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.labels = kwargs['labels'] self.group_re = re.compile(r'^\[(.*)\]\s+(.*)') self.subject_re = re.compile(r'([\[\(] *)?(\bRE|FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$', re.IGNORECASE | re.MULTILINE) self.header_re = re.compile(".*\(X-GM-THRID (\d+) X-GM-MSGID (\d+)") self.message_parts = '(RFC822 X-GM-MSGID X-GM-THRID)' self.size = 100
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.labels = kwargs['labels'] self.group_re = re.compile(r'^\[(.*)\]\s+(.*)') self.subject_re = re.compile( r'([\[\(] *)?(\bRE|FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$', re.IGNORECASE | re.MULTILINE) self.header_re = re.compile(".*\(X-GM-THRID (\d+) X-GM-MSGID (\d+)") self.message_parts = '(RFC822 X-GM-MSGID X-GM-THRID)' self.size = 100
def __init__(self, mode): self.currency = 'GTQ' self.source = 'booking.com' self.url = 'https://www.booking.com/' self.cities = cities self.mode = mode self.banners = [ './/div[contains(@class, "close")]', '', ] BaseScraper.__init__(self)
def __init__(self, mode): self.url = 'https://www.expedia.com/Hotels' self.cities = cities self.mode = mode self.currency = 'USD' self.source = 'expedia.com' self.banners = [ './/span[contains(@class, "icon-close")]', './/div[@class="hero-banner-box cf"]', ] BaseScraper.__init__(self)
def __init__(self, mode): self.url = 'https://www.us.despegar.com/hotels/' self.currency = 'USD' self.mode = mode self.cities = cities self.source = 'us.despegar.com' self.banners = [ './/i[@class="nevo-modal-close nevo-icon-close"]', './/span[contains(@class, "eva-close")]', './/span[contains(@class, "as-login-close")]' ] BaseScraper.__init__(self)
def __init__(self, mode): self.url = 'https://www.hotels.com/?pos=HCOM_US&locale=en_US' self.mode = mode self.cities = cities self.banners = [ './/button[contains(@class, "close")]', './/div[@class="widget-query-group widget-query-occupancy"]', './/div/span[@class="title"][contains(text(), "Save an extra")]/following-sibling::span[@class="close-button"]', './/span[contains(@class, "close")]', './/button[contains(@class, "cta")]' ] self.currency = 'USD' self.source = 'hotels.com' BaseScraper.__init__(self)
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.skip = kwargs['skip'] self.apiurl = 'https://jira.mongodb.org/rest/api/2/search/' self.pkeys = [] self.needs_setup = True self.limit = 100 self.total = 0 self.processed = 0 self.params = { 'startAt': 0, 'maxResults': self.limit, 'fields': 'key,summary,description,comment,status' } self.project = ""
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.apiurl = "https://api.stackexchange.com/2.1/search" self.tags = kwargs['tags'] self.tag = 0 self.params = { 'site': 'stackoverflow', 'tagged': self.tags[self.tag], 'filter': '!*1Klotvkqr2dciMbX*Qdafx4aenCPiyZAdUE1x(1w', 'page': 1, 'order': 'asc', 'pagesize': 100 } if self.last_date: date = int(mktime(self.last_date.timetuple())) self.params.update({ 'sort': 'activity', 'min': date, }) else: self.params.update({'sort': 'creation'})
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.apiurl = "https://api.stackexchange.com/2.1/search" self.tags = kwargs['tags'] self.tag = 0 self.params = { 'site': 'stackoverflow', 'tagged': self.tags[self.tag], 'filter': '!*1Klotvkqr2dciMbX*Qdafx4aenCPiyZAdUE1x(1w', 'page': 1, 'order': 'asc', 'pagesize': 100 } if self.last_date: date = int(mktime(self.last_date.timetuple())) self.params.update({ 'sort': 'activity', 'min': date, }) else: self.params.update({ 'sort': 'creation' })
def __init__(self): BaseScraper.__init__(self)
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.apiurl = 'https://corp.mongodb.com/api/employee' self.params = {'expand': 'team'}
def __init__(self, name, **kwargs): BaseScraper.__init__(self, name, **kwargs) self._setup_logger(__name__) self.apiurl = "https://corp.mongodb.com/api/employee" self.params = {"expand": "team"}
def __init__(self): self.counter = 0 self.base_url = 'https://www.carmax.com/cars/' BaseScraper.__init__(self)
def __init__(self): self.count = 0 self.base_url = 'https://carmax.com/research/' BaseScraper.__init__(self)
def __init__(self): self.base_url = 'https://www.carmax.com/research/acura/ilx-hybrid/2013' BaseScraper.__init__(self)