def crawl_by_title(movie_name, verbose, year=None, parent_pbar=None): """Extracts a movie profile from Metacritic and saves it to disk.""" def _print(msg): if verbose: if parent_pbar is not None: parent_pbar.set_description(msg) parent_pbar.refresh() sys.stdout.flush() tqdm() else: print(msg) os.makedirs(METACRITIC_DIR_PATH, exist_ok=True) file_name = _parse_name_for_file_name(movie_name) + ".json" file_path = os.path.join(METACRITIC_DIR_PATH, file_name) if os.path.isfile(file_path): _print('{} already processed'.format(movie_name)) return _result.EXIST try: props = get_metacritic_movie_properties(movie_name, year) props = {'mc_'+key: props[key] for key in props} with open(file_path, 'w+') as json_file: json.dump(props, json_file, indent=2, sort_keys=True) _print("Done saving a profile for {}.".format(movie_name)) return _result.SUCCESS except Exception as exc: _print("Extracting a profile for {} failed".format(movie_name)) # traceback.print_exc() return _result.FAILURE
def crawl_by_title(movie_name, verbose, year=None, parent_pbar=None): """Extracts a movie profile from IMDB and saves it to disk.""" def _print(msg): if verbose: if parent_pbar is not None: parent_pbar.set_description(msg) parent_pbar.refresh() sys.stdout.flush() tqdm() else: print(msg) os.makedirs(_IMDB_DIR_PATH, exist_ok=True) file_name = _parse_name_for_file_name(movie_name) + '.json' file_path = os.path.join(_IMDB_DIR_PATH, file_name) if os.path.isfile(file_path): _print('{} already processed'.format(movie_name)) return _result.EXIST # _print("Extracting a profile for {} from IMDB...".format(movie_name)) try: props = crawl_movie_profile(movie_name, year) # _print("Profile extracted succesfully") # _print("Saving profile for {} to disk...".format(movie_name)) with open(file_path, 'w+') as json_file: # json.dump(props, json_file, cls=_RottenJsonEncoder, indent=2) json.dump(props, json_file, indent=2) _print("Done saving a profile for {}.".format(movie_name)) return _result.SUCCESS except Exception as exc: _print("Extracting a profile for {} failed".format(movie_name)) # traceback.print_exc() return _result.FAILURE
def test_dump_monkey(self): """Testing dumps of monkey types.""" try: _build_test_dirs() johnny = TestDump._Monkey("Johnny", 54) dicti = {"my_pet": johnny} with open(_TEST_FILE, 'w+') as fileobj: morejson.dump(dicti, fileobj, default=TestDump._monkey_default_encoder) with open(_TEST_FILE, 'r') as fileobj: res = morejson.load(fileobj, object_hook=TestDump._monkey_object_hook) self.assertEqual(dicti, res) finally: _dismantle_test_dirs()
def build_united_profiles(verbose): """Build movie profiles with data from all resources.""" os.makedirs(_UNITED_DIR_PATH, exist_ok=True) prof_names = sorted(_prof_names_in_all_resources()) if verbose: print("Building movie profiles with data from all resources.") prof_names = tqdm(prof_names) for prof_name in prof_names: file_name = prof_name + '.json' imdb_prof_path = os.path.join(_IMDB_DIR_PATH, file_name) with open(imdb_prof_path, 'r') as imbd_prof_file: imdb_prof = json.load(imbd_prof_file) meta_prof_path = os.path.join(_METACRITIC_DIR_PATH, file_name) with open(meta_prof_path, 'r') as meta_prof_file: meta_prof = json.load(meta_prof_file) united_prof = {**imdb_prof, **meta_prof} united_prof_fpath = os.path.join(_UNITED_DIR_PATH, file_name) with open(united_prof_fpath, 'w+') as unite_prof_file: json.dump(united_prof, unite_prof_file, indent=2, sort_keys=True)
def test_regular_dump(self): """Testing dump and load of regular types.""" try: _build_test_dirs() dicti = { 'array': [1, 2, 3], 'string': 'trololo', 'int': 1, 'float': 4.32, 'true': True, 'false': False, 'null': None } with open(_TEST_FILE, 'w+') as fileobj: morejson.dump(dicti, fileobj) with open(_TEST_FILE, 'r') as fileobj: self.assertEqual(dicti, json.load(fileobj)) finally: _dismantle_test_dirs()
def test_dumps_complex(self): """Testing dump and load of complex types.""" try: _build_test_dirs() dicti = { 'complex1': complex(1, 34.2), 'complex2': complex(-98.213, 91823), 'array': [1, 2, 3], 'string': 'trololo', 'int': 1, 'float': 4.32, 'true': True, 'false': False, 'null': None } with open(_TEST_FILE, 'w+') as fileobj: morejson.dump(dicti, fileobj) with open(_TEST_FILE, 'r') as fileobj: self.assertEqual(dicti, morejson.load(fileobj)) finally: _dismantle_test_dirs()
def test_dumps_timedelta(self): """Testing dump and load of timedelta types.""" try: _build_test_dirs() dicti = { 'timedelta1': datetime.timedelta(days=392), 'timedelta2': datetime.timedelta(weeks=2, hours=23), 'timedelta3': datetime.timedelta(microseconds=27836), 'array': [1, 2, 3], 'string': 'trololo', 'int': 1, 'float': 4.32, 'true': True, 'false': False, 'null': None } with open(_TEST_FILE, 'w+') as fileobj: morejson.dump(dicti, fileobj) with open(_TEST_FILE, 'r') as fileobj: self.assertEqual(dicti, morejson.load(fileobj)) finally: _dismantle_test_dirs()
def test_dumps_datetime_with_fold(self): """Testing dump and load of datetime types.""" if sys.version_info.major < 3 or sys.version_info.minor < 6: return try: _build_test_dirs() dt = datetime.datetime(year=2012, month=10, day=10, fold=1) dicti = { 'datetime': dt, 'array': [1, 2, 3], 'string': 'trololo', 'int': 1, 'float': 4.32, 'true': True, 'false': False, 'null': None } with open(_TEST_FILE, 'w+') as fileobj: morejson.dump(dicti, fileobj) with open(_TEST_FILE, 'r') as fileobj: self.assertEqual(dicti, morejson.load(fileobj)) finally: _dismantle_test_dirs()