Example #1
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.url_targets = kwargs['url_targets']
     self.base_url = kwargs['url']
     self.url_ext, self.target = self.url_targets.pop(0)
     self.apiurl = self.base_url + self.url_ext
Example #2
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.url_targets = kwargs['url_targets']
     self.base_url = kwargs['url']
     self.url_ext, self.target = self.url_targets.pop(0)
     self.apiurl = self.base_url + self.url_ext
Example #3
0
    def __init__(self, name, **kwargs):
        BaseScraper.__init__(self, name, **kwargs)
        self._setup_logger(__name__)
        self.orgurl = 'https://api.github.com/orgs/'
        self.orgs = kwargs['orgs']
        self.org = self.orgs.pop(0)
        self.repos = []
        self.repo = {}

        self.needs_setup = True

        params = {'per_page': 100}
        if self.last_date:
            params.update({
                'since': self.last_date.isoformat()
            })
        if self.auth:
            self.params = {
                'client_id': self.auth[0],
                'client_secret': self.auth[1]
            }
            self.oauth_params = '?client_id={id}&client_secret={secret}'.format(id=self.auth[0], secret=self.auth[1])
            self.ratelimit_url = 'https://api.github.com/rate_limit' + self.oauth_params
            self.params.update(**params)
        else:
            self.params = params
        self.auth = None

        self.remaining_requests = self.get_remaining_requests()
 def __init__(self, mode):
     self.currency = 'GTQ'
     self.url = 'http://www.book-hotel-beds.com/'
     self.mode = mode
     self.cities = cities
     self.source = 'book-hotel-beds.com'
     self.banners = []
     BaseScraper.__init__(self)
Example #5
0
 def __init__(self, mode):
     self.url = 'http://www.lodebernal.com/'
     self.cities = cities[1:]
     self.mode = mode
     self.banners = []
     self.source = 'lodebernal.com'
     self.currency = 'GTQ'
     BaseScraper.__init__(self)
Example #6
0
 def __init__(self, mode):
     self.url = 'https://www.bestday.com/Hotels/'
     self.currency = 'USD'
     self.cities = cities
     self.source = 'bestday.com'
     self.mode = mode
     self.banners = []
     BaseScraper.__init__(self)
 def __init__(self, mode):
     self.url = 'http://www.elconventoantigua.com/suites-convento-boutique-hotel'
     self.mode = mode
     self.cities = cities[1:]
     self.currency = 'USD'
     self.source = 'elconventoantigua.com'
     self.banners = []
     BaseScraper.__init__(self)
Example #8
0
 def __init__(self, mode):
     self.mode = mode
     self.log_path = 'C:\\users\\indisersa\\Desktop\hotels\\logs\\banguat.log'
     self.url = 'http://banguat.gob.gt/default.asp'
     self.headers = {
         'User-Agent':
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0) Gecko/20100101 Firefox/39.0'
     }
     BaseScraper.__init__(self)
Example #9
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.spaces = kwargs['spaces']
     self.needs_setup = True
     self.base_url = "https://wiki.mongodb.com/rest/prototype/1/"
     self.page = 0
     self.page_ids = []
     self.page_id = -1
Example #10
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.spaces = kwargs['spaces']
     self.needs_setup = True
     self.base_url = "https://wiki.mongodb.com/rest/prototype/1/"
     self.page = 0
     self.page_ids = []
     self.page_id = -1
Example #11
0
    def __init__(self, name, **kwargs):
        BaseScraper.__init__(self, name, **kwargs)
        self._setup_logger(__name__)
        kinds = kwargs['kinds']
        self.apiurl = kwargs['siteurl']
        self.kinds = kinds
        self.kind = ''
        self.urlexts = []

        self.needs_setup = True
Example #12
0
 def __init__(self, mode):
     self.url = 'https://www.marriott.com/hotels/hotel-rooms/guacy-courtyard-guatemala-city/'
     self.checkin_checkout_element = './/div[@aria-label="{}"]'
     self.further_element = './/div[@title="Next month"]'
     self.cities = cities[:1]
     self.mode = mode
     self.source = 'marriott.com'
     self.currency = 'USD'
     self.banners = ['.//button[contains(@class, "close")]']
     BaseScraper.__init__(self)
Example #13
0
 def __init__(self, mode):
     self.url = 'https://www.radisson.com/'
     self.cities = cities[:1]
     self.mode = mode
     self.banners = [
         './/div[@class="cookieControl"]/div/div/table/tbody/tr/td/a[@class="commit"]',
     ]
     self.source = 'radisson.com'
     self.currency = 'GTQ'
     BaseScraper.__init__(self)
Example #14
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.labels = kwargs['labels']
     self.group_re = re.compile(r'^\[(.*)\]\s+(.*)')
     self.subject_re = re.compile(r'([\[\(] *)?(\bRE|FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$',
                                  re.IGNORECASE | re.MULTILINE)
     self.header_re = re.compile(".*\(X-GM-THRID (\d+) X-GM-MSGID (\d+)")
     self.message_parts = '(RFC822 X-GM-MSGID X-GM-THRID)'
     self.size = 100
Example #15
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.labels = kwargs['labels']
     self.group_re = re.compile(r'^\[(.*)\]\s+(.*)')
     self.subject_re = re.compile(
         r'([\[\(] *)?(\bRE|FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$',
         re.IGNORECASE | re.MULTILINE)
     self.header_re = re.compile(".*\(X-GM-THRID (\d+) X-GM-MSGID (\d+)")
     self.message_parts = '(RFC822 X-GM-MSGID X-GM-THRID)'
     self.size = 100
Example #16
0
 def __init__(self, mode):
     self.currency = 'GTQ'
     self.source = 'booking.com'
     self.url = 'https://www.booking.com/'
     self.cities = cities
     self.mode = mode
     self.banners = [
         './/div[contains(@class, "close")]',
         '',
     ]
     BaseScraper.__init__(self)
Example #17
0
 def __init__(self, mode):
     self.url = 'https://www.expedia.com/Hotels'
     self.cities = cities
     self.mode = mode
     self.currency = 'USD'
     self.source = 'expedia.com'
     self.banners = [
         './/span[contains(@class, "icon-close")]',
         './/div[@class="hero-banner-box cf"]',
     ]
     BaseScraper.__init__(self)
Example #18
0
 def __init__(self, mode):
     self.url = 'https://www.us.despegar.com/hotels/'
     self.currency = 'USD'
     self.mode = mode
     self.cities = cities
     self.source = 'us.despegar.com'
     self.banners = [
         './/i[@class="nevo-modal-close nevo-icon-close"]',
         './/span[contains(@class, "eva-close")]',
         './/span[contains(@class, "as-login-close")]'
     ]
     BaseScraper.__init__(self)
Example #19
0
 def __init__(self, mode):
     self.url = 'https://www.hotels.com/?pos=HCOM_US&locale=en_US'
     self.mode = mode
     self.cities = cities
     self.banners = [
         './/button[contains(@class, "close")]',
         './/div[@class="widget-query-group widget-query-occupancy"]',
         './/div/span[@class="title"][contains(text(), "Save an extra")]/following-sibling::span[@class="close-button"]',
         './/span[contains(@class, "close")]',
         './/button[contains(@class, "cta")]'
     ]
     self.currency = 'USD'
     self.source = 'hotels.com'
     BaseScraper.__init__(self)
Example #20
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.skip = kwargs['skip']
     self.apiurl = 'https://jira.mongodb.org/rest/api/2/search/'
     self.pkeys = []
     self.needs_setup = True
     self.limit = 100
     self.total = 0
     self.processed = 0
     self.params = {
         'startAt': 0,
         'maxResults': self.limit,
         'fields': 'key,summary,description,comment,status'
     }
     self.project = ""
Example #21
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.skip = kwargs['skip']
     self.apiurl = 'https://jira.mongodb.org/rest/api/2/search/'
     self.pkeys = []
     self.needs_setup = True
     self.limit = 100
     self.total = 0
     self.processed = 0
     self.params = {
         'startAt': 0,
         'maxResults': self.limit,
         'fields': 'key,summary,description,comment,status'
     }
     self.project = ""
Example #22
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.apiurl = "https://api.stackexchange.com/2.1/search"
     self.tags = kwargs['tags']
     self.tag = 0
     self.params = {
         'site': 'stackoverflow',
         'tagged': self.tags[self.tag],
         'filter': '!*1Klotvkqr2dciMbX*Qdafx4aenCPiyZAdUE1x(1w',
         'page': 1,
         'order': 'asc',
         'pagesize': 100
     }
     if self.last_date:
         date = int(mktime(self.last_date.timetuple()))
         self.params.update({
             'sort': 'activity',
             'min': date,
         })
     else:
         self.params.update({'sort': 'creation'})
Example #23
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.apiurl = "https://api.stackexchange.com/2.1/search"
     self.tags = kwargs['tags']
     self.tag = 0
     self.params = {
         'site': 'stackoverflow',
         'tagged': self.tags[self.tag],
         'filter': '!*1Klotvkqr2dciMbX*Qdafx4aenCPiyZAdUE1x(1w',
         'page': 1,
         'order': 'asc',
         'pagesize': 100
     }
     if self.last_date:
         date = int(mktime(self.last_date.timetuple()))
         self.params.update({
             'sort': 'activity',
             'min': date,
         })
     else:
         self.params.update({
             'sort': 'creation'
         })
Example #24
0
 def __init__(self):
     BaseScraper.__init__(self)
Example #25
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.apiurl = 'https://corp.mongodb.com/api/employee'
     self.params = {'expand': 'team'}
Example #26
0
 def __init__(self, name, **kwargs):
     BaseScraper.__init__(self, name, **kwargs)
     self._setup_logger(__name__)
     self.apiurl = "https://corp.mongodb.com/api/employee"
     self.params = {"expand": "team"}
Example #27
0
 def __init__(self):
     self.counter = 0
     self.base_url = 'https://www.carmax.com/cars/'
     BaseScraper.__init__(self)
Example #28
0
 def __init__(self):
     self.count = 0
     self.base_url = 'https://carmax.com/research/'
     BaseScraper.__init__(self)
Example #29
0
 def __init__(self):
     self.base_url = 'https://www.carmax.com/research/acura/ilx-hybrid/2013'
     BaseScraper.__init__(self)