def test_update_tag(name): anon = Agent() tag = Tag(name) data = anon.update(tag) Tag.clear_cache()
def setup_function(): Account.clear_cache() Comment.clear_cache() Location.clear_cache() Media.clear_cache() Story.clear_cache() Tag.clear_cache()
def test_update_tag(login, password, name): agent = AgentAccount(login, password) tag = Tag(name) data = agent.update(tag) Tag.clear_cache()
def setup_function(): Account.clear_cache() Media.clear_cache() Location.clear_cache() Tag.clear_cache() if not anon["global_delay"] is None: min_delay = anon["global_delay"].get("min", 0) max_delay = anon["global_delay"].get("max", 120) sleep(random() * (max_delay - min_delay) + min_delay)
def get_posts_by_tag_name(tagname, num=None, path=None): agent = Agent() agent.update(Tag(tagname)) tag = Tag(tagname) media = set() pointer = None if num == None: media_count = tag.media_count else: media_count = num limit = 50 batch_num = math.ceil(media_count / limit) for i in range(batch_num): if i == batch_num - 1: count = media_count - limit * (batch_num - 1) batch_media, pointer = agent.get_media(tag, pointer=pointer, count=count) else: batch_media, pointer = agent.get_media(tag, pointer=pointer, count=limit) for j, item in enumerate(batch_media): print("Getting media: " + str(i * 50 + j + 1) + " / " + str(media_count)) agent.update(Media(item.code)) media.add(Media(item.code)) media_posts = {} for i, item in enumerate(media): post_info = copy.copy(item) post_info.likes = dict(post_info.likes) post_info.comments = dict(post_info.comments) post_info.location = str(post_info.location) media_posts[i] = post_info.__dict__ media_dict = {"posts": media_posts} media_json = json.dumps(media_dict, indent=2) print(media_json) if path == None: path = './data/tag__' + tagname pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/tag__' + tagname + '__last_posts.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(media_json) return media
def test_get_media_tag_long(login, password, count, name): agent = AgentAccount(login, password) tag = Tag(name) data, pointer = agent.get_media(tag, count=count) assert (min(tag.media_count, count) == len(data)) assert ((pointer is None) == (tag.media_count <= count)) Tag.clear_cache() Media.clear_cache()
def test_get_media_tag_long(count, name): anon = Agent() tag = Tag(name) data, pointer = anon.get_media(tag, count=count) assert (min(tag.media_count, count) == len(data)) assert ((pointer is None) == (tag.media_count <= count)) Tag.clear_cache() Media.clear_cache()
def test_get_media_tag_pointer(count, name): anon = Agent() tag = Tag(name) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(tag, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (tag.media_count <= count)) Tag.clear_cache() Media.clear_cache()
def test_get_media_tag_pointer(login, password, count, name): agent = AgentAccount(login, password) tag = Tag(name) pointer = None data = [] for i in range(count): tmp, pointer = agent.get_media(tag, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (tag.media_count <= count)) Account.clear_cache() Media.clear_cache() Tag.clear_cache()
async def test_async_update_tag(async_agent, settings, name): tag = Tag(name) data = await async_agent.update(tag, settings=settings) assert not data is None assert not tag.name is None assert not tag.media_count is None assert tag.top_posts
async def test_async_get_media_tag(async_agent, delay, settings, count, name): tag = Tag(name) data, pointer = await async_agent.get_media(tag, count=count, delay=delay, settings=settings) assert min(tag.media_count, count) == len(data) assert (pointer is None) == (tag.media_count <= count)
def test_get_media_tag_pointer(agent, delay, settings, count, name): tag = Tag(name) pointer = None data = [] for _ in range(count): tmp, pointer = agent.get_media(tag, pointer=pointer, settings=settings) sleep(delay) data.extend(tmp) assert (pointer is None) == (tag.media_count == len(data))
def get_tag_info(tagname, path=None): agent = Agent() agent.update(Tag(tagname)) tag = Tag(tagname) tag_info = copy.copy(tag) tag_info.media = dict(tag_info.media) tag_dict = {"tag": tag_info.__dict__} tag_json = json.dumps(tag_dict, indent=2) if path == None: path = './data/tag__' + tagname pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/tag__' + tagname + '__tag_info.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(tag_json) return tag
def get_comments_by_tag(word, numberOfPost): try: os.mkdir("{}".format(word)) except FileExistsError: pass tag = Tag(word) medias = agent.get_media(obj=tag, count = numberOfPost ) #get tuple with post identifiers and tag identifier counter = 0 for media in medias[0]: comments = "" post = agent.update(obj=Media(media))#get post with comments for t in post["edge_media_to_comment"]["edges"]: comment = str(t["node"]["text"]) #create str from comments comment = preprocess_text(comment) if comment != "": comments += (comment + "\n") if comments != "": counter += 1 with open("{}/{}.txt".format(word, media), "w", encoding = "utf-8", ) as f: f.write(comments) return counter
def test_clear_cache_tag(): tag = Tag("test") Tag.clear_cache() assert(Tag._cache == dict())
def test_clear_cache_tag(id): tag = Tag(id) assert Tag.cache == {id: tag} Tag.clear_cache() assert Tag.cache == dict()