Esempio n. 1
0
 def test_places(self, all_places):
     test_foreign_title = all_places[0].find('span', {
         'class': 'text-grey'
     }).get_text()
     test_a = all_places[0].find('a', {'class': 'all'})
     logger.info(TAG, test_foreign_title)
     logger.info(TAG, test_a.text)
Esempio n. 2
0
 def get_page_source(self, url):
     logger.info(TAG, 'browser load url: {0}'.format(url))
     self.init_session()
     self.driver.get(url)
     html = self.driver.page_source
     self.close()
     return html
Esempio n. 3
0
 def save_img(self, url, film_id):
     logger.info(TAG, 'save img url: {0}'.format(url))
     img_data = requests.get(url).content
     with open(
             'cache/images/{0}_800x600.{1}'.format(film_id,
                                                   IMG_FILE_FORMAT),
             'wb') as handler:
         handler.write(img_data)
Esempio n. 4
0
    def clear_db(self):
        os.chdir(DB_FOLDER)
        db_names = [m for m in glob.glob("*.{}".format(SQLITE_DB_EXT))]
        logger.info(TAG, db_names)
        for db in db_names:
            os.remove(db)

        os.chdir(self.work_dir)
Esempio n. 5
0
    def get_page_source(self, url, is_wall=False, film_id=None):
        if film_id is not None and self.check_cached_film(film_id):
            return self.get_cached_film(film_id)
        html = self.browser.get_page_source(url)

        if film_id is not None:
            logger.info(TAG, 'caching page: {0}'.format(film_id))
            self.cache_page(html, film_id)
        return html
Esempio n. 6
0
 def download_images(self):
     logger.info(
         TAG, 'self.top_250_films len: {0}'.format(len(self.top_250_films)))
     for film in self.top_250_films:
         text = self.load_film_wall(film.film_id)
         if text == None:
             logger.info(TAG, 'wall page is null')
             continue
         self.process_img(text, film)
Esempio n. 7
0
    def get_stylized_images_names(self):
        os.chdir(FULL_STYLIZED_IMAGES_PATH)
        images_names = [
            i.replace('.{}'.format(IMG_FILE_FORMAT), '')
            for i in [m for m in glob.glob("*.{}".format(IMG_FILE_FORMAT))]
        ]
        shuffle(images_names)

        logger.info(TAG, images_names)
        logger.info(TAG, len(images_names))

        os.chdir(self.work_dir)
        return images_names
Esempio n. 8
0
    def resize(self, basewidth):
        os.chdir(FULL_STYLIZED_IMAGES_PATH)
        images = [i for i in glob.glob('*.{}'.format(IMG_FILE_FORMAT))]
        os.chdir(self.work_dir)

        for image in images:
            image_in = '{0}/{1}'.format(FULL_STYLIZED_IMAGES_PATH, image)
            image_out = '{0}/{1}'.format(RESIZED_IMAGES_FOLDER, image)
            resize(image_in, image_out, basewidth)
            logger.info(TAG, image_out)
            if self.test_mode:
                break

        logger.info(TAG, 'Success resized {} images'.format(len(images)))
Esempio n. 9
0
 def process_films(self, films):
     i = 1
     for film in films:
         json = self.createJson(film)
         row = self.cur.fetchone()
         if row == None:
             self.insert_puzzle(i, self.get_film_title(film), json, 0)
             logger.info(
                 TAG, 'insert {0} film with lang {1}'.format(i, self.lang))
         else:
             self.update_puzzle(self.get_film_title(film), json, 0, row[0])
             logger.info(
                 TAG, 'update {0} film with lang {1}'.format(i, self.lang))
         self.update_levels(row, i)
         i = i + 1
Esempio n. 10
0
 def update_country(self, film_id, country):
     c = self.con.cursor()
     c.execute("SELECT * FROM puzzles")
     parsed_string = None
     row = 1
     while row is not None:
         row = c.fetchone()
         parsed_string = json.loads(row[2])
         if parsed_string['film_id'] == film_id:
             parsed_string['country'] = country
             result = json.dumps(parsed_string,
                                 ensure_ascii=False).encode('utf8')
             self.update_puzzle(row[1], str(result), 0, row[0])
             break
     logger.info(TAG, 'film not found')
     c.close()
Esempio n. 11
0
    def __init__(self, lang='ru'):
        self.base_dir = os.getcwd()
        self.lang = lang
        logger.info(TAG, self.base_dir)

        check_folder_and_create(DB_FOLDER)
        check_folder_and_create(CSV_OUTPUT_FOLDER)

        db_path = os.path.join(
            self.base_dir, "{0}/{1}_{2}.{3}".format(DB_FOLDER, DB_FILE_NAME,
                                                    lang, SQLITE_DB_EXT))

        self.con = lite.connect(db_path)
        with self.con:
            self.cur = self.con.cursor()
            self.create_db(self.con)
            self.cur.execute("SELECT * FROM puzzles")
        self.mLevel = 1
        self.level_pack_count = 40
Esempio n. 12
0
 def process_img(self, text, film):
     soup = BeautifulSoup(text)
     film_list = soup.find('table', {'class': 'fotos fotos2'})
     if film_list is None:
         film_list = soup.find('table', {'class': 'fotos fotos1'})
         if film_list is None:  # page not have wallpapers
             logger.info(
                 TAG, 'film {0} not have wallpapers'.format(film.film_id))
             return
     alla = film_list.find_all('a', href=True)
     #logger.info(alla)
     for a in alla:  # get only 800x600 wallpaper
         url = a.get('href')
         img_url = self.getImageUrl(url)
         img_url = '{0}{1}'.format('http://', img_url)
         film.img_url = img_url
         self.save_img(img_url, film.film_id)
         self.save_img_url(film)
         break
Esempio n. 13
0
    def setup_top_250_films_ids(self, film_ids_list=None):
        text = self.get_page_content(TOP_250)
        self.cache_page(text, 'top250')
        soup = BeautifulSoup(text)

        all_places = soup.find_all('tr',
                                   {'id': re.compile("top250_place_[0-9]")})
        #self.test_places(all_places)

        logger.info(TAG, 'Films count: {}'.format(len(all_places)))
        i = 0
        for place in all_places:  #all_a[2:252]
            foreign_title = place.find('span', {'class': 'text-grey'})
            if foreign_title is not None:
                foreign_title = foreign_title.get_text()
            a = place.find('a', {'class': 'all'})

            film_id = a.get('href').replace('/', ' ').split(' ')[2]

            if film_ids_list is not None:
                if film_id not in film_ids_list:
                    continue

            country = self.retrive_country(film_id)
            if country is None:
                continue
            film = Film(film_id, "", self.clear_film_title(a.text),
                        country.lower())
            if foreign_title is not None:
                film.is_eng = True
                film.foreign_title = foreign_title
            logger.info(TAG, 'film_id: {0}'.format(film.film_id))
            self.top_250_films.append(film)
            logger.info(TAG, 'film proceed: {0}'.format(i))
            #logger.info(TAG, str(a.text))
            i += 1

            if self.test_mode:
                if i == 2:
                    break

        shuffle(self.top_250_films)
        arrange_films_to_db(self.top_250_films)
Esempio n. 14
0
    def init_workspace(self):
        models_dir = os.getcwd() + '/torch/models'
        check_folder_and_create(models_dir)

        self.act_dir = os.getcwd() + '/torch/'

        self.work_dir = os.getcwd()
        os.chdir(self.act_dir + 'models')

        self.models = [m for m in glob.glob("*.t7")]

        logger.info(TAG, self.models)
        logger.info(TAG, len(self.models))

        self.lua_script = 'fast_neural_style.lua'

        if not os.path.exists('{0}{1}'.format(self.act_dir, self.lua_script)):
            logger.info(
                TAG,
                'Please put {0} to folder {1}'.format(self.lua_script,
                                                      self.act_dir))
Esempio n. 15
0
 def get_page_source(self, url):
     logger.info(TAG, '{0} load url: {1}'.format(self.PRESIX, url))
Esempio n. 16
0
    def get_json_film_info_with_id(self, film_id):
        html = self.load_film(film_id)
        soup = BeautifulSoup(html)

        if html is None:
            logger.info(TAG, 'film with id {0} not found'.format(film_id))
            return None

        film_hearder = soup.find(
            'div', {'class', 'feature_film_background country_num1'})
        title_h1 = film_hearder.find('h1', {'class', 'moviename-big'})
        foreign_title_h1 = film_hearder.find('span')

        title = title_h1.get_text()
        foreign_title = foreign_title_h1.get_text()

        film_table = soup.find('table', {'class': 'info'})
        all_tr = film_table.find_all('tr')

        keys = [
            'year', 'country', 'tagline_ru', 'director', 'genre', 'dollar',
            'time'
        ]
        indxs = [{
            'num': 0,
            'is_a': True
        }, {
            'num': 1,
            'is_a': True
        }, {
            'num': 2,
            'is_a': False
        }, {
            'num': 3,
            'is_a': True
        }, {
            'num': 10,
            'is_a': True
        }, {
            'num': 11,
            'is_a': True
        }, {
            'num': 19,
            'is_a': False
        }]

        film_json = {}
        for i, tr in enumerate(all_tr):
            if indxs[0]['num'] == i:
                td = tr.find_all('td')[1]
                text = td.find_all(
                    'a')[0].get_text() if indxs[0]['is_a'] else td.get_text()
                film_json[keys[0]] = text
                indxs.pop(0)
                keys.pop(0)

        film_json['title'] = title
        film_json['forreign_title'] = foreign_title
        film_json['film_id'] = str(film_id)

        return json.dumps(film_json, ensure_ascii=False).encode('utf8')
Esempio n. 17
0
 def load_film(self, film_id, add=''):
     logger.info(TAG, 'load film id: {0}'.format(film_id))
     url = '%s/film/%s%s' % (BASE_URL, film_id, add)
     return self.get_page_source(url, is_wall=True, film_id=film_id)
Esempio n. 18
0
 def save_img_url(self, film):
     url = '{0} {1}'.format(film.film_id, film.img_url)
     logger.info(TAG, 'save img url: {0}'.format(url))
     with open('cache/img_urls.txt', 'a') as the_file:
         the_file.write('{0}\n'.format(url))
Esempio n. 19
0
 def test_setup_all(self):
     for film in self.top_250_films:
         logger.info(TAG, film.film_id)