def get_posts(hashtag): print("Starting collection of posts with hashtag %s" % hashtag) time.sleep(random.randint(1, 20)) while True: try: data = parler.hashtags_feed(hashtag, 100, cursor="") feed = models.FeedSchema().load(data) break except: time.sleep(20) while feed.last == False: time.sleep(random.randint(1, 20)) with open(filename % (hashtag, str(datetime.date.today()), "posts"), mode="a") as posts: json.dump(feed.items, posts) with open(filename % (hashtag, str(datetime.date.today()), "users"), mode="a") as users: json.dump(feed.users, users) with open(filename % (hashtag, str(datetime.date.today()), "links"), mode="a") as links: json.dump(feed.links, links) while True: try: data = parler.hashtags_feed(hashtag, 100, cursor=feed.next) feed = models.FeedSchema().load(data) break except: time.sleep(20)
def user_items(username, expiration=None, delete_active=False): """Retrieve posts and comments created by username.""" for item_type in ['post', 'comment']: data = parler.created_items(item_type, username, limit=100, cursor="") if len(data[item_type + 's']) == 0: break # skip processing if no posts/comments feed = models.FeedSchema().load(data) count = 0 while count < 20: try: for item in feed.items: process_item(item_type, item, expiration, delete_active) if feed.last: break more_items = feed.next data = parler.created_items(item_type, username, limit=100, cursor=more_items) feed = models.FeedSchema().load(data) count += 1 except: traceback.print_exc() time.sleep(5) finally: time.sleep(1)
def get_posts(hashtag): print("Starting collection of posts with hashtag %s" % hashtag) time.sleep(random.randint(1, max_sleep_limit)) while True: try: data = parler.hashtags_feed(hashtag, 100, cursor="") feed = models.FeedSchema().load(data) break except Exception as e: if e.__class__.__name__ == "TypeError:": sys.exit() time.sleep(max_sleep_limit) with open(filename % (hashtag, str(datetime.date.today()), "posts"), mode="a", encoding="utf-8") as posts: exputils.writetocsv(posts, feed.items, insert_headers=True) with open(filename % (hashtag, str(datetime.date.today()), "users"), mode="a", encoding="utf-8") as users: exputils.writetocsv(users, feed.users, insert_headers=True) with open(filename % (hashtag, str(datetime.date.today()), "links"), mode="a", encoding="utf-8") as links: exputils.writetocsv(links, feed.links, insert_headers=True) while True: if feed.last: logging.info("Exiting, all done.") break time.sleep(random.randint(max_sleep_limit / 2, max_sleep_limit)) try: data = parler.hashtags_feed(hashtag, 100, cursor=feed.next) feed = models.FeedSchema().load(data) except Exception as e: if e.__class__.__name__ == "TypeError:": sys.exit() time.sleep(max_sleep_limit) finally: logging.info("Writing to file...") with open(filename % (hashtag, str(datetime.date.today()), "posts"), mode="a", encoding="utf-8") as posts: exputils.writetocsv(posts, feed.items, insert_headers=False) with open(filename % (hashtag, str(datetime.date.today()), "users"), mode="a", encoding="utf-8") as users: exputils.writetocsv(users, feed.users, insert_headers=False) with open(filename % (hashtag, str(datetime.date.today()), "links"), mode="a", encoding="utf-8") as links: exputils.writetocsv(links, feed.links, insert_headers=False)
def get_posts(username, output_dir): logging.info("Starting collection of posts from user %s" % username) while True: try: userdetails = parler.profile(username) data = parler.user_feed(userdetails.get("_id"), 100, cursor="") feed = models.FeedSchema().load(data) break except Exception as e: if e.__class__.__name__ == "TypeError:": sys.exit() time.sleep(max_sleep_limit) with open(filename % (output_dir, username, str(datetime.date.today()), "posts"), mode="a", encoding="utf-8") as posts: exputils.writetocsv(posts, feed.items, insert_headers=True) with open(filename % (output_dir, username, str(datetime.date.today()), "links"), mode="a", encoding="utf-8") as links: exputils.writetocsv(links, feed.links, insert_headers=True) for thread in feed.items: get_comments(username=username, thread_id=thread.get("Id"), output_dir=output_dir) while True: logging.info("is last? %s", feed.last) if feed.last: logging.info("Exiting, all done.") break time.sleep(random.randint(1, max_sleep_limit)) try: data = parler.user_feed( userdetails.get("_id"), 100, cursor=feed.next) feed = models.FeedSchema().load(data) except Exception as e: traceback.print_exc() if e.__class__.__name__ == "TypeError:": sys.exit() time.sleep(max_sleep_limit) finally: with open(filename % (output_dir, username, str(datetime.date.today()), "posts"), mode="a", encoding="utf-8") as posts: exputils.writetocsv(posts, feed.items) with open(filename % (output_dir, username, str(datetime.date.today()), "links"), mode="a", encoding="utf-8") as links: exputils.writetocsv(links, feed.links) for thread in feed.items: get_comments(username=username, thread_id=thread.get("Id"), output_dir=output_dir)
def get_comments(username, thread_id, output_dir): data = parler.comments(thread_id, cursor="") feed = models.FeedSchema().load(data) while True: if feed.last: logging.info("Exiting, all done.") break try: data = parler.comments(thread_id, cursor=feed.next) feed = models.FeedSchema().load(data) except Exception as e: if e.__class__.__name__ == "TypeError:": sys.exit() time.sleep(max_sleep_limit) finally: logging.info("Writing to file...") with open(filename % (output_dir, username + "_" + thread_id, str(datetime.date.today()), "users"), mode="a", encoding="utf-8") as users: exputils.writetocsv(users, feed.users, insert_headers=False) with open(filename % (output_dir, username + "_" + thread_id, str(datetime.date.today()), "comments"), mode="a", encoding="utf-8") as links: exputils.writetocsv(links, feed.links, insert_headers=False) time.sleep(random.randint(max_sleep_limit/2, max_sleep_limit))
def user_items(username, item_types, expiration=None, delete_active=False): """Retrieve posts and comments created by username.""" if not item_types: item_types = ['post', 'comment'] for item_type in item_types: data = parler.created_items(item_type, username, limit=100, cursor="") # print(json.dumps(data, indent=1)) item_key = item_type + 's' if item_key not in data.keys() or len(data[item_key]) == 0: print('*** no', item_key, 'available for', username) continue # skip processing if no posts/comments print('***', item_key, 'for', username) # parse comments using same schema as posts if "comments" in data.keys(): data["posts"] = data["comments"].copy() del data["comments"] feed = models.FeedSchema().load(data) count = 0 while count < 20: try: for item in feed.items: process_item(item_type, item, expiration, delete_active) if feed.last: break more_items = feed.next data = parler.created_items(item_type, username, limit=100, cursor=more_items) # parse comments using same schema as posts if "comments" in data.keys(): data["posts"] = data["comments"].copy() del data["comments"] feed = models.FeedSchema().load(data) count += 1 except: traceback.print_exc() time.sleep(5) finally: time.sleep(1)
import sys import signal sys.path.insert(1, os.path.join(sys.path[0], "..")) from Parler import Parler, models load_dotenv(dotenv_path=".parler.env") parler = Parler(jst=os.getenv("JST"), mst=os.getenv("MST"), debug=False) interval = 2 filename = "data/stopthesteal_11_09-10.json" if len(sys.argv) > 1: filename = sys.argv[1] data = parler.hashtags_feed("stopthesteal", 100, cursor="") feed = models.FeedSchema().load(data) while feed.last == False: time.sleep(interval) # for i in feed.items: # if i.get("Id2") == "84d168ce1cb94114a7cb3d4f17a30ae0": exit() with open(filename, mode="a") as posts: json.dump(feed.items, posts) try: data = parler.hashtags_feed("stopthesteal", 100, cursor=feed.next) feed = models.FeedSchema().load(data) except: time.sleep(20) data = parler.hashtags_feed("stopthesteal", 100, cursor=feed.next) feed = models.FeedSchema().load(data) print("File needs cleaning up....")
def get_posts(search_hit): print("Starting collection of posts with hashtag %s" % search_hit) time.sleep(random.randint(1, max_sleep_limit)) while True: try: if "@" not in search_hit: data = parler.hashtags_feed(search_hit, 100, cursor="") feed = models.FeedSchema().load(data) else: userdetails = parler.profile(search_hit.replace("@", "")) data = parler.user_feed(userdetails.get("_id"), 100, cursor="") feed = models.FeedSchema().load(data) break except: traceback.print_exc() time.sleep(max_sleep_limit) with open(filename % (search_hit, str(datetime.date.today()), "posts"), mode="a") as posts: exputils.writetocsv(posts, feed.items, insert_headers=True) with open(filename % (search_hit, str(datetime.date.today()), "users"), mode="a") as users: exputils.writetocsv(users, feed.users, insert_headers=True) with open(filename % (search_hit, str(datetime.date.today()), "links"), mode="a") as links: exputils.writetocsv(links, feed.links, insert_headers=True) for link in feed.links: if "image-cdn.parler.com" in link.get("Long"): #hosted by parler! save_file(link.get("Long"), link.get("Id")) if "video.parler.com" in link.get("Long"): #hosted by parler! save_file(link.get("Long"), link.get("Id")) while True: if feed.last: logging.info("Exiting, all done.") break time.sleep(random.randint(max_sleep_limit / 2, max_sleep_limit)) try: if "@" in search_hit: data = parler.hashtags_feed(search_hit, 100, cursor="") feed = models.FeedSchema().load(data) else: userdetails = parler.profile(search_hit.replace("@", "")) data = parler.user_feed(userdetails.get("_id"), 100, cursor="") feed = models.FeedSchema().load(data) except: traceback.print_exc() time.sleep(max_sleep_limit) finally: logging.info("Writing to file...") with open(filename % (search_hit, str(datetime.date.today()), "posts"), mode="a") as posts: exputils.writetocsv(posts, feed.items, insert_headers=False) with open(filename % (search_hit, str(datetime.date.today()), "users"), mode="a") as users: exputils.writetocsv(users, feed.users, insert_headers=False) with open(filename % (search_hit, str(datetime.date.today()), "links"), mode="a") as links: exputils.writetocsv(links, feed.links, insert_headers=False) for link in feed.links: if "image-cdn.parler.com" in link.get( "Long"): #hosted by parler! save_file(link.get("Long"), link.get("Id")) if "video.parler.com" in link.get("Long"): #hosted by parler! save_file(link.get("Long"), link.get("Id"))