Beispiel #1
0
class LyricsdotcomBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.lyrics.com'
    search = URL('/serp.php\?st=(?P<pattern>.*)&qtype=(?P<criteria>1|2)',
                 SearchPage)
    songLyrics = URL('/lyric/(?P<id>\d*)', LyricsPage)
    artistsong = URL('/artist/(?P<id>.*)', ArtistPages)

    def iter_lyrics(self, criteria, pattern):
        if criteria == 'song':
            self.search.go(pattern=pattern, criteria=1)
            assert self.search.is_here()
            for song in self.page.iter_lyrics():
                yield song
        elif criteria == 'artist':
            self.search.go(pattern=pattern, criteria=2)
            assert self.search.is_here()
            for artist in self.page.iter_artists():
                for song in self.artistsong.go(id=artist.id).iter_lyrics():
                    yield song

    def get_lyrics(self, id):
        return self.songLyrics.go(id=id).get_lyrics()
Beispiel #2
0
class ParolesmaniaBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.parolesmania.com/'
    searchSong = URL('recherche.php\?c=title&k=(?P<pattern>[^/]*).*',
                     SearchSongPage)
    searchArtist = URL('recherche.php\?c=artist&k=(?P<pattern>[^/]*).*',
                       SearchArtistPage)
    songLyrics = URL(
        'paroles_(?P<artistid>[^/]*)/paroles_(?P<songid>[^/]*)\.html',
        LyricsPage)
    artistSongs = URL('paroles_(?P<artistid>[^/]*)\.html', ArtistSongsPage)

    def iter_lyrics(self, criteria, pattern):
        if criteria == 'artist':
            artist_ids = self.searchArtist.go(pattern=pattern).get_artist_ids()
            it = []
            # we just take the 3 first artists to avoid too many page loadings
            for aid in artist_ids[:3]:
                it = itertools.chain(
                    it,
                    self.artistSongs.go(artistid=aid).iter_lyrics())
            return it
        elif criteria == 'song':
            return self.searchSong.go(pattern=pattern).iter_lyrics()

    def get_lyrics(self, id):
        ids = id.split('|')
        try:
            self.songLyrics.go(artistid=ids[0], songid=ids[1])
            songlyrics = self.page.get_lyrics()
            return songlyrics
        except BrowserHTTPNotFound:
            return
Beispiel #3
0
class LyricsplanetBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.lyricsplanet.com/'
    home = URL('$', HomePage)
    search = URL('search\.php$', SearchPage)
    artist = URL('search\.php\?field=artisttitle&value=(?P<artistid>[^/]*)$',
                 ArtistPage)
    lyrics = URL('lyrics\.php\?id=(?P<songid>[^/]*)$', LyricsPage)

    def iter_lyrics(self, criteria, pattern):
        self.home.stay_or_go()
        assert self.home.is_here()
        self.page.search_lyrics(criteria, pattern)
        assert self.search.is_here()
        if criteria == 'song':
            return self.page.iter_song_lyrics()
        elif criteria == 'artist':
            artist_ids = self.page.get_artist_ids()
            it = []
            # we just take the 3 first artists to avoid too many page loadings
            for aid in artist_ids[:3]:
                it = itertools.chain(
                    it,
                    self.artist.go(artistid=aid).iter_lyrics())
            return it

    def get_lyrics(self, id):
        try:
            self.lyrics.go(songid=id)
            songlyrics = self.page.get_lyrics()
            return songlyrics
        except BrowserHTTPNotFound:
            return
Beispiel #4
0
class IpinfodbBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'https://ipinfodb.com/'
    home = URL('$', LocationPage)

    def get_location(self, ipaddr):
        self.home.go(data={'ip': ipaddr})
        return self.page.get_location()
Beispiel #5
0
class EnsapBrowser(LoginBrowser):
    BASEURL = 'https://ensap.gouv.fr'
    PROFILE = Firefox()

    loginp = URL('/web/views/contenus/accueilnonconnecte.html', LoginPage)
    loginvalidity = URL('/authentification', LoginValidityPage)
    authp = URL('/prive/initialiserhabilitation/v1', LoginControlPage)
    homep = URL('/prive/accueilconnecte/v1', HomePage)
    documents = URL('/prive/remuneration/v1/(?P<year>\d+)', DocumentsPage)
    listyears = URL('/prive/listeanneeremuneration/v1', ListYear)
    logged = False
    token = None

    def do_login(self):
        self.logger.debug('call Browser.do_login')
        if self.logged:
            return True

        self.loginp.stay_or_go()
        self.loginvalidity.go(data={
            "identifiant": self.username,
            "secret": self.password
        })
        if not self.page.check_logged():
            raise BrowserIncorrectPassword()
        self.authp.go(data="{}", headers={'Content-Type': 'application/json'})
        self.token = self.page.get_xsrf()
        self.logged = True

    @need_login
    def iter_documents(self, subscription):
        self.listyears.go()
        years = self.page.get_years()
        # use reverse order of list to get recent documents first
        for year in years[::-1]:
            self.documents.stay_or_go(year=year,
                                      headers={"X-XSRF-TOKEN": self.token})
            self.token = self.session.cookies.get("XSRF-TOKEN")
            for doc in self.page.iter_documents():
                yield doc

    @need_login
    def iter_subscription(self):
        self.homep.stay_or_go(headers={"X-XSRF-TOKEN": self.token})
        self.token = self.session.cookies.get("XSRF-TOKEN")
        return self.page.iter_subscription()

    @need_login
    def get_document(self, id):
        return find_object(self.iter_documents(None),
                           id=id,
                           error=DocumentNotFound())
Beispiel #6
0
class ColissimoBrowser(PagesBrowser):
    BASEURL = 'http://www.colissimo.fr'
    PROFILE = Firefox()

    tracking_url = URL('/portail_colissimo/suivre.do\?colispart=(?P<_id>.*)',
                       TrackingPage)

    def get_tracking_info(self, _id):
        self.tracking_url.stay_or_go(_id=_id)
        events = list(self.page.iter_infos())
        if len(events) == 0:
            error = self.page.get_error()
            raise ParcelNotFound(u"Parcel not found: {}".format(error))
        return events
Beispiel #7
0
class ColissimoBrowser(PagesBrowser):
    BASEURL = 'https://www.laposte.fr'
    PROFILE = Firefox()

    tracking_url = URL(
        '/particulier/outils/suivre-vos-envois\?code=(?P<_id>.*)',
        TrackingPage)

    def get_tracking_info(self, _id):
        self.tracking_url.stay_or_go(_id=_id)
        events = list(self.page.iter_infos())
        if len(events) == 0:
            error = self.page.get_error()
            raise ParcelNotFound(u"Parcel not found: {}".format(error))
        return events
Beispiel #8
0
class HybrideBrowser(PagesBrowser):
    PROFILE = Firefox()
    BASEURL = 'https://www.lhybride.org/'

    program_page = URL('programmation/a-venir.html', ProgramPage)
    event_page = URL('programmation/item/(?P<_id>.*)', EventPage)

    def list_events(self, date_from, date_to=None, city=None, categories=None):
        return self.program_page.go().list_events(date_from=date_from,
                                                  date_to=date_to,
                                                  city=city,
                                                  categories=categories)

    def get_event(self, _id, event=None):
        return self.event_page.go(_id=_id).get_event(obj=event)
Beispiel #9
0
class IpinfodbBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'https://ipinfodb.com/'
    home = URL('$', HomePage)
    search = URL('ip_locator.php', LocationPage)

    def get_location(self, ipaddr):
        try:
            self.home.go()
            self.page.search(ipaddr)
            iploc = self.page.get_location()
            return iploc
        except BrowserHTTPNotFound:
            return
Beispiel #10
0
class EnsapBrowser(LoginBrowser):
    BASEURL = 'https://ensap.gouv.fr'
    PROFILE = Firefox()

    loginp = URL('/web/views/contenus/accueilnonconnecte.html', LoginPage)
    loginvalidity = URL('/authentification', LoginValidityPage)
    authp = URL('/prive/initialiserhabilitation/v1', LoginControlPage)
    homep = URL('/prive/accueilconnecte/v1', HomePage)
    documents = URL('/prive/remuneration/v1', DocumentsPage)
    logged = False
    token = None

    def do_login(self):
        self.logger.debug('call Browser.do_login')
        if self.logged:
            return True

        self.loginp.stay_or_go()
        self.loginvalidity.go(data={
            "identifiant": self.username,
            "secret": self.password
        })
        if not self.page.check_logged():
            raise BrowserIncorrectPassword()
        self.authp.go(data={"": ""})
        self.token = self.page.get_xsrf()
        self.logged = True

    @need_login
    def iter_documents(self, subscription):
        self.documents.stay_or_go(headers={"X-XSRF-TOKEN": self.token})
        self.token = self.session.cookies.get("XSRF-TOKEN")
        #        return self.bills.go().iter_bills(subid=subscription.id)
        return self.page.iter_documents()

    @need_login
    def iter_subscription(self):
        self.homep.stay_or_go(headers={"X-XSRF-TOKEN": self.token})
        self.token = self.session.cookies.get("XSRF-TOKEN")
        return self.page.iter_subscription()

    @need_login
    def get_document(self, id):
        return find_object(self.iter_documents(None),
                           id=id,
                           error=DocumentNotFound())
Beispiel #11
0
class CpasbienBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.cpasbien.cm/'
    search = URL('recherche/(?P<pattern>.*).html,trie-seeds-d', SearchPage)
    torrent = URL('dl-torrent/(?P<id>.*)\.html', TorrentPage)

    def iter_torrents(self, pattern):
        self.search.go(pattern=pattern)
        return self.page.iter_torrents()

    def get_torrent(self, fullid):
        try:
            self.torrent.go(id=fullid)
            torrent = self.page.get_torrent()
            return torrent
        except BrowserHTTPNotFound:
            return
Beispiel #12
0
class ParolesmusiqueBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.paroles-musique.com/'
    home = URL('$',
                 HomePage)
    songResults = URL('lyrics-paroles-0-.*,0.php',
                 SongResultsPage)
    artistResults = URL('lyrics-paroles-.*-0,0.php',
                  ArtistResultsPage)
    songLyrics = URL('paroles-(?P<songid>.*,p[0-9]*)',
                  SonglyricsPage)
    artistSongs = URL('paroles-(?P<artistid>.*,a[0-9]*)',
                  ArtistSongsPage)


    def iter_lyrics(self, criteria, pattern):
        self.home.stay_or_go()
        assert self.home.is_here()
        self.page.search_lyrics(criteria, pattern)
        if criteria == 'song':
            assert self.songResults.is_here()
            return self.page.iter_lyrics()
        elif criteria == 'artist':
            assert self.artistResults.is_here()
            artist_ids = self.page.get_artist_ids()
            it = []
            # we just take the 3 first artists to avoid too many page loadings
            for aid in artist_ids[:3]:
                it = itertools.chain(it, self.artistSongs.go(artistid=aid).iter_lyrics())
            return it


    def get_lyrics(self, id):
        try:
            self.songLyrics.go(songid=id)
            songlyrics = self.page.get_lyrics()
            return songlyrics
        except BrowserHTTPNotFound:
            return
Beispiel #13
0
class KickassBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'https://kat.cr/'
    search = URL('usearch/(?P<pattern>.*)/\?field=seeders&sorder=desc',
                 SearchPage)
    torrent = URL('torrent-t(?P<id>.*).html', '.*-t[0-9]*\.html', TorrentPage)

    def iter_torrents(self, pattern):
        self.search.go(pattern=pattern)
        #print( self.page.content)
        return self.page.iter_torrents()

    def get_torrent(self, fullid):
        try:
            self.torrent.go(id=fullid)
            torrent = self.page.get_torrent()
            return torrent
        except BrowserHTTPNotFound:
            return
Beispiel #14
0
class LyricsmodeBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.lyricsmode.com/'
    search = URL('search\.php\?search=(?P<pattern>[^&/]*)$',
                 SearchPage)
    songLyrics = URL('lyrics/(?P<letterid>[^/]*)/(?P<artistid>[^/]*)/(?P<songid>[^/]*)\.html$',
                  LyricsPage)


    def iter_lyrics(self, criteria, pattern):
        return self.search.go(pattern=pattern).iter_lyrics()

    def get_lyrics(self, id):
        subid = id.split('|')
        try:
            self.songLyrics.go(letterid=subid[0], artistid=subid[1], songid=subid[2])
            songlyrics = self.page.get_lyrics()
            return songlyrics
        except BrowserHTTPNotFound:
            return
Beispiel #15
0
class LyricsdotcomBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.lyrics.com/'
    search = URL(
        'search\.php\?keyword=(?P<pattern>[^&]*)&what=all&search_btn=Search',
        SearchPage)
    songLyrics = URL('(?P<id>[^/]*-lyrics-[^/]*)\.html$', LyricsPage)

    def iter_lyrics(self, criteria, pattern):
        self.search.go(pattern=pattern)
        assert self.search.is_here()
        return self.page.iter_lyrics()

    def get_lyrics(self, id):
        real_id = id.split('|')[0]
        try:
            self.songLyrics.go(id=real_id)
            songlyrics = self.page.get_lyrics()
            return songlyrics
        except BrowserHTTPNotFound:
            return
Beispiel #16
0
class Paroles2chansonsBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://paroles2chansons.lemonde.fr/'
    home = URL('$',
                 HomePage)
    search = URL('search',
                 SearchPage)
    artist = URL('paroles-(?P<artistid>[^/]*)$',
                  ArtistPage)
    lyrics = URL('paroles-(?P<artistid>[^/]*)/paroles-(?P<songid>[^/]*)\.html',
                  LyricsPage)

    def iter_lyrics(self, criteria, pattern):
        self.home.stay_or_go()
        assert self.home.is_here()
        self.page.search_lyrics(pattern)
        assert self.search.is_here()
        if criteria == 'song':
            return self.page.iter_song_lyrics()
        elif criteria == 'artist':
            artist_ids = self.page.get_artist_ids()
            it = []
            # we just take the 3 first artists to avoid too many page loadings
            for aid in artist_ids[:3]:
                it = itertools.chain(it, self.artist.go(artistid=aid).iter_lyrics())
            return it

    def get_lyrics(self, id):
        ids = id.split('|')
        try:
            self.lyrics.go(artistid=ids[0], songid=ids[1])
            songlyrics = self.page.get_lyrics()
            return songlyrics
        except BrowserHTTPNotFound:
            return
Beispiel #17
0
 def get_resume(self, film_id):
     self.set_json_header()
     _id = film_id.split('/')[-1]
     resume = self.json_page.go(_id=_id).get_resume()
     self.set_profile(Firefox())
     return resume
Beispiel #18
0
class LogicimmoBrowser(PagesBrowser):
    BASEURL = 'https://www.logic-immo.com/'
    PROFILE = Firefox()
    city = URL(
        'asset/t9/getLocalityT9.php\?site=fr&lang=fr&json=%22(?P<pattern>.*)%22',
        CitiesPage)
    search = URL(
        '(?P<type>location-immobilier|vente-immobilier|recherche-colocation)-(?P<cities>.*)/options/(?P<options>.*)',
        SearchPage)
    housing = URL('detail-(?P<_id>.*).htm', HousingPage)
    phone = URL('(?P<urlcontact>.*)', PhonePage)

    TYPES = {
        POSTS_TYPES.RENT: 'location-immobilier',
        POSTS_TYPES.SALE: 'vente-immobilier',
        POSTS_TYPES.SHARING: 'recherche-colocation',
        POSTS_TYPES.FURNISHED_RENT: 'location-immobilier',
        POSTS_TYPES.VIAGER: 'vente-immobilier'
    }

    RET = {
        HOUSE_TYPES.HOUSE: '2',
        HOUSE_TYPES.APART: '1',
        HOUSE_TYPES.LAND: '3',
        HOUSE_TYPES.PARKING: '10',
        HOUSE_TYPES.OTHER: '14'
    }

    def __init__(self, *args, **kwargs):
        super(LogicimmoBrowser, self).__init__(*args, **kwargs)
        self.session.headers['X-Requested-With'] = 'XMLHttpRequest'

    def get_cities(self, pattern):
        if pattern:
            return self.city.go(pattern=pattern).get_cities()

    def search_housings(self, type, cities, nb_rooms, area_min, area_max,
                        cost_min, cost_max, house_types):
        if type not in self.TYPES:
            raise TypeNotSupported()

        options = []

        ret = []
        if type == POSTS_TYPES.VIAGER:
            ret = ['15']
        else:
            for house_type in house_types:
                if house_type in self.RET:
                    ret.append(self.RET.get(house_type))

        if len(ret):
            options.append('groupprptypesids=%s' % ','.join(ret))

        if type == POSTS_TYPES.FURNISHED_RENT:
            options.append('searchoptions=4')

        options.append('pricemin=%s' % (cost_min if cost_min else '0'))

        if cost_max:
            options.append('pricemax=%s' % cost_max)

        options.append('areamin=%s' % (area_min if area_min else '0'))

        if area_max:
            options.append('areamax=%s' % area_max)

        if nb_rooms:
            if type == POSTS_TYPES.SHARING:
                options.append('nbbedrooms=%s' %
                               ','.join([str(i) for i in range(nb_rooms, 7)]))
            else:
                options.append('nbrooms=%s' %
                               ','.join([str(i) for i in range(nb_rooms, 7)]))

        self.search.go(type=self.TYPES.get(type, 'location-immobilier'),
                       cities=cities,
                       options='/'.join(options))

        if type == POSTS_TYPES.SHARING:
            return self.page.iter_sharing()

        return self.page.iter_housings(query_type=type)

    def get_housing(self, _id, housing=None):
        return self.housing.go(_id=_id).get_housing(obj=housing)

    def get_phone(self, _id):
        if _id.startswith('location') or _id.startswith('vente'):
            urlcontact, params = self.housing.stay_or_go(
                _id=_id).get_phone_url_datas()
            return self.phone.go(urlcontact=urlcontact,
                                 params=params).get_phone()