imdb_id = url.split("/")[-2] image.retrieve(image_url, "../www/static/images/posters/%s.png" % imdb_id) cursor.execute( "INSERT IGNORE INTO movies (imdb_id, name, image_url, plot, created_time)" "VALUES (%s, %s, %s, %s, %s)", (imdb_id, name.encode("utf-8"), image_url, plot.encode("utf-8"), datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) if cursor.lastrowid != 0: self.distributor.add_task(DetailScrapeTask, { "url": url, "imdb_id": imdb_id, "id": cursor.lastrowid }) yield None if __name__ == "__main__": logging.basicConfig( level=logging.DEBUG, filename="logs/imdb_nowplaying.log", filemode="w", format="%(asctime)s\t%(levelname)s\t%(threadName)s\t%(message)s") distributor = ThreadDistributor(1) distributor.add_task(NowPlayingScrapeTask, "http://www.imdb.com/nowplaying/") distributor.run()
db_connect = MySQLdb.connect(DB["host"], DB["user"], DB["passwd"], DB["name"]) cursor = db_connect.cursor() class InitializeTask(Task): """ Read tweet from database for recent movie """ def run(self): cursor.execute("""SELECT `id`, `name` FROM movies WHERE DATEDIFF(NOW(), created_time) <= %s""", TIME_TH) movies = cursor.fetchall() print movies yield None if __name__ == "__main__": logging.basicConfig( level = logging.DEBUG, filename = "logs/tweets_error.log", filemod = "w", format="%(asctime)s\t%(levelname)s\t(%(threadName)-10s)\t%(message)s" ) distributor = ThreadDistributor(N_THREADS) distributor.add_task(InitializeTask) distributor.run() cursor.close()