def __init__(self, media, year): self.baseurl = "http://www.omdbapi.com/?" self.headers = {"user-agent": spoof().browser("Chrome", 0)} if media not in ["movie", "episode", "series"]: raise ValueError("invalid mediatype %s" % media) else: self.payload = {"type": media, "v": 1, "r": "json", "y": year}
def listings(self): req = requests.get( self.url, headers={"user-agent": spoof().browser("Chrome", 0)}) soup = bsoup(req.text, "html5lib") titles = soup.findAll("a", {"class": "visual-title dark"}) ttls = np.unique([title.get_text().strip() for title in titles]) dicts = [] patt = re.compile("..\dth\s(Anniversary)") for t in ttls: if t.find("3D") > -1: pass elif t.find("Anniversary") > -1: pass elif t.find("(") > -1: ttl, yr = t.split(" (") dicts.append({ "title": ttl, "year": yr.replace(")", ""), "rank": "", "watched": None }) else: ttl, yr = t, "" dicts.append({ "title": ttl, "year": yr, "rank": "", "watched": None }) m = data.Data(mediatype="movie") m.collect(args=dicts) return m.movies
''' Created on Jan 9, 2017 @author: dysmas ''' import os from useragentx.useragent import spoof def fullpath(filename): return os.path.abspath(os.path.join(os.path.dirname(__file__), filename)) useragent = spoof().browser("Chrome", 0)
def __init__(self, title, year): self.title, self.year = title, year self.header = {"user-agent": spoof().browser("Chrome", 0)} self.baseurl = "http://www.rogerebert.com/reviews/" self.url = self._build_url()
def __init__(self, ID): self.url = 'http://www.imdb.com/title/%s' % ID self.headers = {"user-agent": spoof().browser("Chrome", 0)}
def __init__(self, mediatype, dbfile=fullpath("data/data.db")): self.mediatype = mediatype self.req_headers = {'user-agent': spoof().browser("Firefox", 0)} self.dbfile = dbfile self.db = Database(dbfile=self.dbfile) self.movies = []