def download(media, session, directory, username): count = 0 while count < 11: link = media["link"] r = json_request(session, link, "HEAD", True, False) if not r: return False header = r.headers content_length = int(header["content-length"]) date_object = datetime.strptime(media["postedAt"], "%d-%m-%Y %H:%M:%S") og_filename = media["filename"] media["ext"] = os.path.splitext(og_filename)[1] media["ext"] = media["ext"].replace(".", "") download_path = media["directory"] + media["filename"] timestamp = date_object.timestamp() if not overwrite_files: if check_for_dupe_file(download_path, content_length): return r = json_request(session, link, "GET", True, False) if not r: return False try: with open(download_path, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) except (ConnectionResetError): count += 1 continue format_image(download_path, timestamp) logger.info("Link: {}".format(link)) logger.info("Path: {}".format(download_path)) return True
def download(media, session, directory, username): while True: link = media["link"] r = json_request(session, link, "HEAD", True, False) if not r: break header = r.headers content_length = int(header["content-length"]) date_object = datetime.strptime(media["postedAt"], "%d-%m-%Y %H:%M:%S") og_filename = media["filename"] media["ext"] = os.path.splitext(og_filename)[1] media["ext"] = media["ext"].replace(".", "") download_path = media["directory"] + media["filename"] timestamp = date_object.timestamp() if not overwrite_files: if os.path.isfile(download_path): local_size = os.path.getsize(download_path) if local_size == content_length: return r = json_request(session, link, "GET", True, False) if not r: break with open(download_path, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) format_image(download_path, timestamp) logger.info("Link: {}".format(link)) logger.info("Path: {}".format(download_path)) return True
def create_session(user_agent, app_token, auth_array): me_api = [] auth_count = 1 auth_version = "(V1)" count = 1 try: auth_cookies = [ {'name': 'auth_id', 'value': auth_array["auth_id"]}, {'name': 'auth_hash', 'value': auth_array["auth_hash"]} ] while auth_count < 3: if auth_count == 2: auth_version = "(V2)" if auth_array["sess"]: del auth_cookies[2] count = 1 while count < 11: session = requests.Session() print("Auth "+auth_version+" Attempt "+str(count)+"/"+"10") max_threads = multiprocessing.cpu_count() session.mount( 'https://', requests.adapters.HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads)) session.headers = { 'User-Agent': user_agent, 'Referer': 'https://onlyfans.com/'} if auth_array["sess"]: auth_cookies.append( {'name': 'sess', 'value': auth_array["sess"]}) for auth_cookie in auth_cookies: session.cookies.set(**auth_cookie) link = "https://onlyfans.com/api2/v2/users/customer?app-token="+app_token # r = json_request(session, link, "HEAD", True, False) r = json_request(session, link) count += 1 if not r: continue me_api = r if 'error' in r: error_message = r["error"]["message"] print(error_message) if "token" in error_message: break continue else: print("Welcome "+r["name"]) option_string = "username or profile link" link = "https://onlyfans.com/api2/v2/subscriptions/count/all?app-token="+app_token r = json_request(session, link) if not r: break subscriber_count = r["subscriptions"]["all"] return [session, option_string, subscriber_count, me_api] auth_count += 1 except Exception as e: print(e) input() return [False, me_api]
def multi(array, session): link = array[0] performer = array[1] if performer: session = requests.Session() x = json_request(session, link) if not x["subscribedByData"]: x["subscribedByData"] = dict() x["subscribedByData"]["expiredAt"] = datetime.utcnow().isoformat() x["subscribedByData"]["price"] = x["subscribePrice"] x["subscribedByData"]["subscribePrice"] = 0 x = [x] else: x = json_request(session, link) return x
def start_datascraper(session, identifier, site_name, app_token, choice_type=None): if choice_type == 0: if blacklist_name: link = "https://onlyfans.com/api2/v2/lists?offset=0&limit=100&app-token=" + app_token r = json_request(session, link) if not r: return [False, []] x = [c for c in r if blacklist_name == c["name"]] if x: users = x[0]["users"] bl_ids = [x["username"] for x in users] if identifier in bl_ids: print("Blacklisted: " + identifier) return [False, []] print("Scrape Processing") info = link_check(session, app_token, identifier) if not info["subbed"]: print(info["user"]) print("First time? Did you forget to edit your config.json file?") return [False, []] user = info["user"] is_me = user["is_me"] post_counts = info["count"] user_id = str(user["id"]) username = user["username"] print("Name: " + username) array = scrape_choice(user_id, app_token, post_counts, is_me) prep_download = [] for item in array: print("Type: " + item[2]) only_links = item[1][3] post_count = str(item[1][4]) item[1].append(username) item[1].pop(3) api_type = item[2] results = media_scraper(session, site_name, only_links, *item[1], api_type, app_token) for result in results[0]: if not only_links: media_set = result if not media_set["valid"]: continue directory = results[1] location = result["type"] prep_download.append([ media_set["valid"], session, directory, username, post_count, location ]) # When profile is done scraping, this function will return True print("Scrape Completed" + "\n") return [True, prep_download]
def get_subscriptions(session, app_token, subscriber_count, me_api, auth_count=0): link = "https://stars.avn.com/api2/v2/subscriptions/following/?limit=10&marker=&offset=0" r = json_request(session, link) if not r: return None for x in r["list"]: x["auth_count"] = auth_count return r["list"]
def link_check(session, app_token, identifier): link = 'https://stars.avn.com/api2/v2/users/' + str(identifier) y = json_request(session, link) temp_user_id2 = dict() y["is_me"] = False if not y: temp_user_id2["subbed"] = False temp_user_id2["user"] = "******" return temp_user_id2 if "error" in y: temp_user_id2["subbed"] = False temp_user_id2["user"] = y["error"]["message"] return temp_user_id2 now = datetime.utcnow().date() result_date = datetime.utcnow().date() if "email" not in y: subscribedByData = y # if subscribedByData: # expired_at = subscribedByData["expiredAt"] # result_date = datetime.fromisoformat( # expired_at).replace(tzinfo=None).date() if y["followedBy"]: subbed = True elif y["subscribedBy"]: subbed = True elif y["subscribedOn"]: subbed = True # elif y["subscribedIsExpiredNow"] == False: # subbed = True elif result_date >= now: subbed = True else: subbed = False else: subbed = True y["is_me"] = True if not subbed: temp_user_id2["subbed"] = False temp_user_id2["user"] = "******" return temp_user_id2 else: temp_user_id2["subbed"] = True temp_user_id2["user"] = y temp_user_id2["count"] = [ y["postsCount"], [y["photosCount"], y["videosCount"]] ] return temp_user_id2
def link_check(session, app_token, username): link = 'https://onlyfans.com/api2/v2/users/' + username + \ '&app-token=' + app_token y = json_request(session, link) temp_user_id2 = dict() if not y: temp_user_id2[0] = False temp_user_id2[1] = "No users found" return temp_user_id2 if "error" in y: temp_user_id2[0] = False temp_user_id2[1] = y["error"]["message"] return temp_user_id2 now = datetime.utcnow().date() result_date = datetime.utcnow().date() if "email" not in y: subscribedByData = y["subscribedByData"] if subscribedByData: expired_at = subscribedByData["expiredAt"] result_date = datetime.fromisoformat(expired_at).replace( tzinfo=None).date() if y["subscribedBy"]: subbed = True elif y["subscribedOn"]: subbed = True elif y["subscribedIsExpiredNow"] == False: subbed = True elif result_date >= now: subbed = True else: subbed = False else: subbed = True if not subbed: temp_user_id2[0] = False temp_user_id2[1] = "You're not subscribed to the user" return temp_user_id2 else: temp_user_id2[0] = True temp_user_id2[1] = str(y["id"]) temp_user_id2[2] = [ y["postsCount"], [y["photosCount"], y["videosCount"], y["audiosCount"]] ] return temp_user_id2
def xmessages(link): f_offset_count = 0 while True: y = json_request(session, link) if "list" in y: if y["list"]: master_set.append(link) if y["hasMore"]: f_offset_count2 = f_offset_count+100 f_offset_count = f_offset_count2-100 link = link.replace( "offset=" + str(f_offset_count), "offset=" + str(f_offset_count2)) f_offset_count = f_offset_count2 else: break else: break else: break
def media_scraper(session, site_name, only_links, link, locations, directory, post_count, username, api_type, app_token): seperator = " | " media_set = [] original_link = link for location in locations: link = original_link print("Scraping ["+str(seperator.join(location[1])) + "]. Should take less than a minute.") array = format_directory( j_directory, site_name, username, location[0], api_type) user_directory = array[0] location_directory = array[2][0][1] metadata_directory = array[1] directories = array[2]+[location[1]] pool = ThreadPool() ceil = math.ceil(post_count / 100) a = list(range(ceil)) offset_array = [] if api_type == "Posts": for b in a: b = b * 100 offset_array.append(link.replace( "offset=0", "offset=" + str(b))) if api_type == "Messages": offset_count = 0 while True: y = json_request(session, link) if "list" in y: if y["list"]: offset_array.append(link) if y["hasMore"]: offset_count2 = offset_count+100 offset_count = offset_count2-100 link = link.replace( "offset=" + str(offset_count), "offset=" + str(offset_count2)) offset_count = offset_count2 else: break else: break else: break if api_type == "Stories": offset_array.append(link) if api_type == "Highlights": r = json_request(session, link) if "error" in r: break for item in r: link2 = "https://onlyfans.com/api2/v2/stories/highlights/" + \ str(item["id"])+"?app-token="+app_token+"" offset_array.append(link2) x = pool.starmap(scrape_array, product( offset_array, [session], [directories], [username], [api_type])) results = format_media_set(location[0], x) if results["valid"]: os.makedirs(directory, exist_ok=True) os.makedirs(location_directory, exist_ok=True) if export_metadata: os.makedirs(metadata_directory, exist_ok=True) archive_directory = metadata_directory+location[0] export_archive(results, archive_directory) media_set.append(results) return [media_set, directory]
def scrape_array(link, session, directory, username, api_type): media_set = [[], []] media_type = directory[1] count = 0 found = False y = json_request(session, link) if "error" in y: return media_set x = 0 if api_type == "Highlights": y = y["stories"] if api_type == "Messages": y = y["list"] master_date = "01-01-0001 00:00:00" for media_api in y: for media in media_api["media"]: date = "-001-11-30T00:00:00+00:00" size = 0 if "source" in media: source = media["source"] link = source["source"] size = source["size"] date = media_api["postedAt"] if "postedAt" in media_api else media_api["createdAt"] if "src" in media: link = media["src"] size = media["info"]["preview"]["size"] if "info" in media_api else 1 date = media_api["createdAt"] if not link: continue if "ca2.convert" in link: link = media["preview"] new_dict = dict() new_dict["post_id"] = media_api["id"] new_dict["link"] = link if date == "-001-11-30T00:00:00+00:00": date_string = master_date date_object = datetime.strptime( master_date, "%d-%m-%Y %H:%M:%S") else: date_object = datetime.fromisoformat(date) date_string = date_object.replace(tzinfo=None).strftime( "%d-%m-%Y %H:%M:%S") master_date = date_string if media["type"] not in media_type: x += 1 continue if "text" not in media_api: media_api["text"] = "" new_dict["text"] = media_api["text"] if media_api["text"] else "" new_dict["postedAt"] = date_string file_name = link.rsplit('/', 1)[-1] file_name, ext = os.path.splitext(file_name) ext = ext.__str__().replace(".", "").split('?')[0] file_path = reformat(directory[0][1], file_name, new_dict["text"], ext, date_object, username, format_path, date_format, text_length, maximum_length) new_dict["directory"] = directory[0][1] new_dict["filename"] = file_path.rsplit('/', 1)[-1] new_dict["size"] = size if size == 0: media_set[1].append(new_dict) continue media_set[0].append(new_dict) return media_set
def create_session(user_agent, app_token, auth_array): me_api = [] auth_count = 1 auth_version = "(V1)" count = 1 try: auth_cookies = [] while auth_count < 3: if auth_count == 2: auth_version = "(V2)" if auth_array["sess"]: del auth_cookies[2] count = 1 session = requests.Session() print("Auth " + auth_version + " Attempt " + str(count) + "/" + "10") max_threads = multiprocessing.cpu_count() session.mount( 'https://', requests.adapters.HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads)) session.headers = { 'User-Agent': user_agent, 'Referer': 'https://stars.avn.com/' } if auth_array["sess"]: found = False for auth_cookie in auth_cookies: if auth_array["sess"] == auth_cookie["value"]: found = True break if not found: auth_cookies.append({ 'name': 'sess', 'value': auth_array["sess"], 'domain': '.stars.avn.com' }) for auth_cookie in auth_cookies: session.cookies.set(**auth_cookie) while count < 11: link = "https://stars.avn.com/api2/v2/users/me" r = json_request(session, link) count += 1 if not r: auth_cookies = [] continue me_api = r if 'error' in r: error = r["error"] error_message = r["error"]["message"] if error["code"] == 101: error_message = "Blocked by 2FA." print(error_message) if "token" in error_message: break continue else: print("Welcome " + r["name"]) option_string = "username or profile link" array = dict() array["session"] = session array["option_string"] = option_string array["subscriber_count"] = r["followingCount"] array["me_api"] = me_api return array auth_count += 1 except Exception as e: log_error.exception(e) # input("Enter to continue") array = dict() array["session"] = None array["me_api"] = me_api return array
def process_mass_messages(message, limit): text = message["textCropped"].replace("&", "") link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \ text+"&app-token="+app_token y = json_request(session, link_2) return y
def media_scraper(session, site_name, only_links, link, locations, directory, api_count, username, api_type, app_token): seperator = " | " master_set = [] media_set = [] original_link = link for location in locations: pool = ThreadPool() link = original_link print("Scraping [" + str(seperator.join(location[1])) + "]. Should take less than a minute.") array = format_directory(j_directory, site_name, username, location[0], api_type) user_directory = array[0] location_directory = array[2][0][1] metadata_directory = array[1] directories = array[2] + [location[1]] if not master_set: if api_type == "Posts": ceil = math.ceil(api_count / 100) a = list(range(ceil)) for b in a: b = b * 100 master_set.append( link.replace("offset=0", "offset=" + str(b))) if api_type == "Archived": ceil = math.ceil(api_count / 100) a = list(range(ceil)) for b in a: b = b * 100 master_set.append( link.replace("offset=0", "offset=" + str(b))) def xmessages(link): f_offset_count = 0 while True: y = json_request(session, link) if "list" in y: if y["list"]: master_set.append(link) if y["hasMore"]: f_offset_count2 = f_offset_count + 100 f_offset_count = f_offset_count2 - 100 link = link.replace( "offset=" + str(f_offset_count), "offset=" + str(f_offset_count2)) f_offset_count = f_offset_count2 else: break else: break else: break def process_chats(subscriber): fool = subscriber["withUser"] fool_id = str(fool["id"]) link_2 = "https://onlyfans.com/api2/v2/chats/"+fool_id + \ "/messages?limit=100&offset=0&order=desc&app-token="+app_token+"" xmessages(link_2) if api_type == "Messages": xmessages(link) if api_type == "Mass Messages": messages = [] offset_count = 0 while True: y = json_request(session, link) if y: messages.append(y) offset_count2 = offset_count + 99 offset_count = offset_count2 - 99 link = link.replace("offset=" + str(offset_count), "offset=" + str(offset_count2)) offset_count = offset_count2 else: break messages = list(chain(*messages)) message_count = 0 def process_mass_messages(message, limit): text = message["textCropped"].replace("&", "") link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \ text+"&app-token="+app_token y = json_request(session, link_2) return y limit = "10" if len(messages) > 99: limit = "2" subscribers = pool.starmap(process_mass_messages, product(messages, [limit])) subscribers = [ item for sublist in subscribers for item in sublist["list"] ] seen = set() subscribers = [ x for x in subscribers if x["withUser"]["id"] not in seen and not seen.add(x["withUser"]["id"]) ] x = pool.starmap(process_chats, product(subscribers)) if api_type == "Stories": master_set.append(link) if api_type == "Highlights": r = json_request(session, link) if "error" in r: break for item in r["list"]: link2 = "https://stars.avn.com/api2/v2/stories/collections/" + \ str(item["id"]) master_set.append(link2) x = pool.starmap( scrape_array, product(master_set, [session], [directories], [username], [api_type])) results = format_media_set(location[0], x) seen = set() results["valid"] = [ x for x in results["valid"] if x["filename"] not in seen and not seen.add(x["filename"]) ] if results["valid"]: os.makedirs(directory, exist_ok=True) os.makedirs(location_directory, exist_ok=True) if export_metadata: os.makedirs(metadata_directory, exist_ok=True) archive_directory = metadata_directory + location[0] export_archive(results, archive_directory) media_set.append(results) return [media_set, directory]