def get_posts_by_tag_name(tagname, num=None, path=None): agent = Agent() agent.update(Tag(tagname)) tag = Tag(tagname) media = set() pointer = None if num == None: media_count = tag.media_count else: media_count = num limit = 50 batch_num = math.ceil(media_count / limit) for i in range(batch_num): if i == batch_num - 1: count = media_count - limit * (batch_num - 1) batch_media, pointer = agent.get_media(tag, pointer=pointer, count=count) else: batch_media, pointer = agent.get_media(tag, pointer=pointer, count=limit) for j, item in enumerate(batch_media): print("Getting media: " + str(i * 50 + j + 1) + " / " + str(media_count)) agent.update(Media(item.code)) media.add(Media(item.code)) media_posts = {} for i, item in enumerate(media): post_info = copy.copy(item) post_info.likes = dict(post_info.likes) post_info.comments = dict(post_info.comments) post_info.location = str(post_info.location) media_posts[i] = post_info.__dict__ media_dict = {"posts": media_posts} media_json = json.dumps(media_dict, indent=2) print(media_json) if path == None: path = './data/tag__' + tagname pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/tag__' + tagname + '__last_posts.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(media_json) return media
def test_get_media_tag_long(count, name): anon = Agent() tag = Tag(name) data, pointer = anon.get_media(tag, count=count) assert (min(tag.media_count, count) == len(data)) assert ((pointer is None) == (tag.media_count <= count)) Tag.clear_cache() Media.clear_cache()
def test_get_media_location_long(count, id): anon = Agent() location = Location(id) data, pointer = anon.get_media(location, count=count) assert (min(location.media_count, count) == len(data)) assert ((pointer is None) == (location.media_count <= count)) Location.clear_cache() Media.clear_cache()
def test_get_media_account(count, username): anon = Agent() account = Account(username) data, pointer = anon.get_media(account, count=count) assert (min(account.media_count, count) == len(data)) assert ((pointer is None) == (account.media_count <= count)) Account.clear_cache() Media.clear_cache()
def test_get_media_tag_pointer(count, name): anon = Agent() tag = Tag(name) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(tag, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (tag.media_count <= count)) Tag.clear_cache() Media.clear_cache()
def test_get_media_location_pointer(count, id): anon = Agent() location = Location(id) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(location, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (location.media_count <= count)) Location.clear_cache() Media.clear_cache()
def test_get_media_account_pointer(count, username): anon = Agent() account = Account(username) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(account, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (account.media_count <= count)) Account.clear_cache() Media.clear_cache()
def has_post(username): agent = Agent() account = Account(username) media1, _ = agent.get_media(account) return len(media1) != 0
#VIP = 'mariasharapova' #VIP = 'katemakarova1' FRIENDS = 20 # Количество друзей с наибольшей перепиской agent = Agent() #exception_manager.repeats = 10 # Количество повторов при ошибке - поставил в библиотеке for vip in VIPS: print(datetime.now().strftime("%H:%M:%S"), vip, '------------------------------') account = Account(vip) all_medias_codes = [] all_medias = [] try: medias, pointer = agent.get_media(account) except Exception as e: print(datetime.now().strftime("%H:%M:%S"), 'Сбой') medias, pointer = agent.get_media(account) for media in medias: all_medias.append(media) all_medias_codes.append(media.code) loaded = False while pointer: sleep(round(random() * 2)) try: medias, pointer = agent.get_media(account, count=50, pointer=pointer) except Exception as e: print(datetime.now().strftime("%H:%M:%S"), 'Сбой')