def media_scraper(session, site_name, only_links, link, locations, directory, post_count, username, api_type, app_token): seperator = " | " media_set = [] original_link = link for location in locations: link = original_link print("Scraping ["+str(seperator.join(location[1])) + "]. Should take less than a minute.") array = format_directory( j_directory, site_name, username, location[0], api_type) user_directory = array[0] location_directory = array[2][0][1] metadata_directory = array[1] directories = array[2]+[location[1]] pool = ThreadPool() ceil = math.ceil(post_count / 100) a = list(range(ceil)) offset_array = [] if api_type == "Posts": for b in a: b = b * 100 offset_array.append(link.replace( "offset=0", "offset=" + str(b))) if api_type == "Messages": offset_count = 0 while True: y = json_request(session, link) if "list" in y: if y["list"]: offset_array.append(link) if y["hasMore"]: offset_count2 = offset_count+100 offset_count = offset_count2-100 link = link.replace( "offset=" + str(offset_count), "offset=" + str(offset_count2)) offset_count = offset_count2 else: break else: break else: break if api_type == "Stories": offset_array.append(link) if api_type == "Highlights": r = json_request(session, link) if "error" in r: break for item in r: link2 = "https://onlyfans.com/api2/v2/stories/highlights/" + \ str(item["id"])+"?app-token="+app_token+"" offset_array.append(link2) x = pool.starmap(scrape_array, product( offset_array, [session], [directories], [username], [api_type])) results = format_media_set(location[0], x) if results["valid"]: os.makedirs(directory, exist_ok=True) os.makedirs(location_directory, exist_ok=True) if export_metadata: os.makedirs(metadata_directory, exist_ok=True) archive_directory = metadata_directory+location[0] export_archive(results, archive_directory) media_set.append(results) return [media_set, directory]
def media_scraper(session, site_name, only_links, link, locations, directory, api_count, username, api_type, app_token): seperator = " | " master_set = [] media_set = [] original_link = link for location in locations: pool = ThreadPool() link = original_link print("Scraping [" + str(seperator.join(location[1])) + "]. Should take less than a minute.") array = format_directory(j_directory, site_name, username, location[0], api_type) user_directory = array[0] location_directory = array[2][0][1] metadata_directory = array[1] directories = array[2] + [location[1]] if not master_set: if api_type == "Posts": ceil = math.ceil(api_count / 100) a = list(range(ceil)) for b in a: b = b * 100 master_set.append( link.replace("offset=0", "offset=" + str(b))) if api_type == "Archived": ceil = math.ceil(api_count / 100) a = list(range(ceil)) for b in a: b = b * 100 master_set.append( link.replace("offset=0", "offset=" + str(b))) def xmessages(link): f_offset_count = 0 while True: y = json_request(session, link) if "list" in y: if y["list"]: master_set.append(link) if y["hasMore"]: f_offset_count2 = f_offset_count + 100 f_offset_count = f_offset_count2 - 100 link = link.replace( "offset=" + str(f_offset_count), "offset=" + str(f_offset_count2)) f_offset_count = f_offset_count2 else: break else: break else: break def process_chats(subscriber): fool = subscriber["withUser"] fool_id = str(fool["id"]) link_2 = "https://onlyfans.com/api2/v2/chats/"+fool_id + \ "/messages?limit=100&offset=0&order=desc&app-token="+app_token+"" xmessages(link_2) if api_type == "Messages": xmessages(link) if api_type == "Mass Messages": messages = [] offset_count = 0 while True: y = json_request(session, link) if y: messages.append(y) offset_count2 = offset_count + 99 offset_count = offset_count2 - 99 link = link.replace("offset=" + str(offset_count), "offset=" + str(offset_count2)) offset_count = offset_count2 else: break messages = list(chain(*messages)) message_count = 0 def process_mass_messages(message, limit): text = message["textCropped"].replace("&", "") link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \ text+"&app-token="+app_token y = json_request(session, link_2) return y limit = "10" if len(messages) > 99: limit = "2" subscribers = pool.starmap(process_mass_messages, product(messages, [limit])) subscribers = [ item for sublist in subscribers for item in sublist["list"] ] seen = set() subscribers = [ x for x in subscribers if x["withUser"]["id"] not in seen and not seen.add(x["withUser"]["id"]) ] x = pool.starmap(process_chats, product(subscribers)) if api_type == "Stories": master_set.append(link) if api_type == "Highlights": r = json_request(session, link) if "error" in r: break for item in r["list"]: link2 = "https://stars.avn.com/api2/v2/stories/collections/" + \ str(item["id"]) master_set.append(link2) x = pool.starmap( scrape_array, product(master_set, [session], [directories], [username], [api_type])) results = format_media_set(location[0], x) seen = set() results["valid"] = [ x for x in results["valid"] if x["filename"] not in seen and not seen.add(x["filename"]) ] if results["valid"]: os.makedirs(directory, exist_ok=True) os.makedirs(location_directory, exist_ok=True) if export_metadata: os.makedirs(metadata_directory, exist_ok=True) archive_directory = metadata_directory + location[0] export_archive(results, archive_directory) media_set.append(results) return [media_set, directory]