def __init__(self, id, buffer): self.buffer = buffer self.html = fromstring(buffer) self.id = id self.alternative_title = None self.countries = list() self.countries_to_save = list() self.slogan = None self.persons = list() self.length = None self.year = None self.cast = list() self.ratings = list() self.genres = list() self.rating_kinopoisk = None self.rating_imdb = None self.rating_critics = None self.age_restriction = None self.premieres = list() self.world_premiere = None self.dates = list() self.boxes = list() self.rating_mpaa = None self.production_status = None self.full_id = self.get_full_id() logger.info('Full ID = %s' % self.full_id) self.cache = FileCache(namespace='kinopoisk', path=os.environ.get('CACHE_PATH')) self.net = NetworkManager() self.parse()
def __init__(self): parser = argparse.ArgumentParser( description='Parse database export script') parser.add_argument('--url', help='Process only this URL', type=str, required=False) self.args = parser.parse_args() self.cache = FileCache(namespace='germany-cities', path=os.environ.get('CACHE_PATH')) self.net = NetworkManager()
def __init__(self, id): self.id = id self.name = None self.alternative_name = None self.birth_date = None self.birth_place = None self.death_date = None self.death_place = None self.growth = None self.cache = FileCache(namespace='kinopoisk', path=os.environ.get('CACHE_PATH')) self.net = NetworkManager() page = self.get_page('%s/name/%s/' % (self.base, id)) self.parse(page)
def __init__(self): parser = argparse.ArgumentParser(description='kinopoisk.ru parser') parser.add_argument('--year', type=int, help='Year to process') parser.add_argument('--hostname', type=str, help='Hostname', required=False, default=gethostname()) parser.add_argument('--film-id', type=int, help='Film ID') parser.add_argument('--sleep-time', type=int, help='Max sleep time between requests', default=20) parser.add_argument('--total', required=False, default=False, action='store_true') parser.add_argument('--read-only', required=False, default=False, action='store_true') parser.add_argument('--update', required=False, default=False, action='store_true') parser.add_argument('--start-page', required=False, default=1, type=int) parser.add_argument('--persons', required=False, default=False, action='store_true') parser.add_argument('--from-id', required=False, default=1, type=int) parser.add_argument('--to-id', required=False, default=None) self.args = parser.parse_args() self.cache = FileCache(namespace='kinopoisk', path=os.environ.get('CACHE_PATH')) self.net = NetworkManager() # Initialization of database connection db.connect(config.dsn) if self.args.year is not None: self.set_year(self.args.year)
def __init__(self): logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO, stream=sys.stdout) logger.setLevel(logging.INFO) logging.getLogger('cache').setLevel(logging.INFO) super(App, self).__init__() self.parser.add_argument('--post', type=str, help='Post to parse') self.parser.add_argument('--from-year', type=int, help='Year to start parse from', default=2006) self.parser.add_argument('--from-month', type=int, help='Month to start parse from', default=1) self.parser.add_argument( '--update', action='store_true', default=False, help='Do not use cache to construct post list') self.parser.add_argument('--db', type=str, help='Database DSN', default=db) self.parser.add_argument('--image', type=str, help='Process one image and exit') self.args = self.parser.parse_args() self.net = NetworkManager() self.cache = FileCache(path=cache_path, namespace='varlamov.ru') self.conn = psycopg2.connect(self.args.db) self.conn.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
def __init__(self): self.cache = FileCache(namespace='russian-cities', path=os.environ.get('CACHE_PATH')) self.net = NetworkManager()