Example #1
0
def prepare_scraper(sessions, site_name, item):
    api_type = item["api_type"]
    api_array = item["api_array"]
    link = api_array["api_link"]
    locations = api_array["media_types"]
    username = api_array["username"]
    directory = api_array["directory"]
    api_count = api_array["post_count"]
    seperator = " | "
    user_directory = ""
    metadata_directory = ""
    master_set = []
    media_set = []
    metadata_set = []
    original_link = link
    for location in locations:
        pool = ThreadPool()
        link = original_link
        print("Scraping [" + str(seperator.join(location[1])) +
              "]. Should take less than a minute.")
        array = format_directory(j_directory, site_name, username, location[0],
                                 api_type)
        user_directory = array[0]
        location_directory = array[2][0][1]
        metadata_directory = array[1]
        directories = array[2] + [location[1]]
        if not master_set:
            if api_type == "Profile":
                profile_scraper(link, sessions[0], directory, username)
                return
            if api_type == "Posts":
                num = 100
                link = link.replace("limit=0", "limit=" + str(num))
                original_link = link
                ceil = math.ceil(api_count / num)
                a = list(range(ceil))
                for b in a:
                    b = b * num
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))
            if api_type == "Archived":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))

            def xmessages(link):
                f_offset_count = 0
                while True:
                    y = json_request(sessions[0], link)
                    if not y:
                        return
                    if "list" in y:
                        if y["list"]:
                            master_set.append(link)
                            if y["hasMore"]:
                                f_offset_count2 = f_offset_count + 100
                                f_offset_count = f_offset_count2 - 100
                                link = link.replace(
                                    "offset=" + str(f_offset_count),
                                    "offset=" + str(f_offset_count2))
                                f_offset_count = f_offset_count2
                            else:
                                break
                        else:
                            break
                    else:
                        break

            def process_chats(subscriber):
                fool = subscriber["withUser"]
                fool_id = str(fool["id"])
                link_2 = "https://onlyfans.com/api2/v2/chats/"+fool_id + \
                    "/messages?limit=100&offset=0&order=desc&app-token="+app_token+""
                xmessages(link_2)

            if api_type == "Messages":
                xmessages(link)
            if api_type == "Mass Messages":
                results = []
                max_threads = multiprocessing.cpu_count()
                offset_count = 0
                offset_count2 = max_threads
                while True:

                    def process_messages(link, session):
                        y = json_request(session, link)
                        if y and "error" not in y:
                            return y
                        else:
                            return []

                    link_list = [
                        link.replace("offset=0", "offset=" + str(i * 30))
                        for i in range(offset_count, offset_count2)
                    ]
                    link_list = pool.starmap(process_messages,
                                             product(link_list, [sessions[0]]))
                    if all(not result for result in link_list):
                        break
                    link_list2 = list(chain(*link_list))

                    results.append(link_list2)
                    offset_count = offset_count2
                    offset_count2 = offset_count * 2
                unsorted_messages = list(chain(*results))
                unsorted_messages.sort(key=lambda x: x["id"])
                messages = unsorted_messages

                def process_mass_messages(message, limit):
                    text = message["textCropped"].replace("&", "")
                    link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
                        text+"&app-token="+app_token
                    y = json_request(sessions[0], link_2)
                    if None == y or "error" in y:
                        return []
                    return y

                limit = "10"
                if len(messages) > 99:
                    limit = "2"
                subscribers = pool.starmap(process_mass_messages,
                                           product(messages, [limit]))
                subscribers = filter(None, subscribers)
                subscribers = [
                    item for sublist in subscribers for item in sublist["list"]
                ]
                seen = set()
                subscribers = [
                    x for x in subscribers if x["withUser"]["id"] not in seen
                    and not seen.add(x["withUser"]["id"])
                ]
                x = pool.starmap(process_chats, product(subscribers))
            if api_type == "Stories":
                master_set.append(link)
            if api_type == "Highlights":
                r = json_request(sessions[0], link)
                if "error" in r:
                    break
                for item in r:
                    link2 = "https://onlyfans.com/api2/v2/stories/highlights/" + \
                        str(item["id"])+"?app-token="+app_token+""
                    master_set.append(link2)
        master_set2 = assign_session(master_set, len(sessions))
        x = pool.starmap(
            media_scraper,
            product(master_set2, [sessions], [directories], [username],
                    [api_type]))
        print
        results = format_media_set(location[0], x)
        seen = set()
        results["valid"] = [
            x for x in results["valid"]
            if x["filename"] not in seen and not seen.add(x["filename"])
        ]
        seen = set()
        location_directories = [
            x["directory"] for x in results["valid"]
            if x["directory"] not in seen and not seen.add(x["directory"])
        ]
        if results["valid"]:
            results["valid"] = [
                list(g) for k, g in groupby(results["valid"],
                                            key=lambda x: x["post_id"])
            ]
            os.makedirs(directory, exist_ok=True)
            for location_directory in location_directories:
                os.makedirs(location_directory, exist_ok=True)
        if results["invalid"]:
            results["invalid"] = [
                list(g) for k, g in groupby(results["invalid"],
                                            key=lambda x: x["post_id"])
            ]
        if sort_free_paid_posts:
            ofsorter.sorter(user_directory, api_type, location[0], results)
        metadata_set.append(results)
        media_set.append(results)

    if export_metadata:
        metadata_set = [x for x in metadata_set if x["valid"] or x["invalid"]]
        for item in metadata_set:
            if item["valid"] or item["invalid"]:
                legacy_metadata = os.path.join(user_directory, api_type,
                                               "Metadata")
                if delete_legacy_metadata:
                    if os.path.isdir(legacy_metadata):
                        shutil.rmtree(legacy_metadata)
        if metadata_set:
            os.makedirs(metadata_directory, exist_ok=True)
            archive_directory = os.path.join(metadata_directory, api_type)
            metadata_set_copy = copy.deepcopy(metadata_set)
            metadata_set = filter_metadata(metadata_set_copy)
            export_archive(metadata_set, archive_directory, json_settings)
    return [media_set, directory]
Example #2
0
def prepare_scraper(sessions, site_name, item):
    api_type = item["api_type"]
    api_array = item["api_array"]
    link = api_array["api_link"]
    locations = api_array["media_types"]
    username = api_array["username"]
    directory = api_array["directory"]
    api_count = api_array["post_count"]
    master_set = []
    media_set = []
    metadata_set = []
    pool = multiprocessing()
    formatted_directories = main_helper.format_directories(
        j_directory, site_name, username, locations, api_type)
    model_directory = formatted_directories["model_directory"]
    metadata_directory = formatted_directories["metadata_directory"]
    api_directory = formatted_directories["api_directory"]
    if api_type == "Posts":
        ceil = math.ceil(api_count / 100)
        a = list(range(ceil))
        for b in a:
            b = b * 100
            master_set.append(link.replace("offset=0", "offset=" + str(b)))
    if api_type == "Archived":
        ceil = math.ceil(api_count / 100)
        a = list(range(ceil))
        for b in a:
            b = b * 100
            master_set.append(link.replace("offset=0", "offset=" + str(b)))
    if api_type == "Stories":
        master_set.append(link)
    if api_type == "Highlights":
        r = main_helper.json_request(sessions[0], link)
        if "error" in r:
            return
        for item in r["list"]:
            link2 = "https://stars.avn.com/api2/v2/stories/collections/" + \
                str(item["id"])
            master_set.append(link2)
    master_set2 = main_helper.assign_session(master_set, sessions)
    media_set = {}
    media_set["set"] = []
    media_set["found"] = False
    count = len(master_set2)
    max_attempts = 100
    for attempt in list(range(max_attempts)):
        print("Scrape Attempt: " + str(attempt + 1) + "/" + str(max_attempts))
        media_set2 = pool.starmap(
            media_scraper,
            product(master_set2, [sessions], [formatted_directories],
                    [username], [api_type]))
        media_set["set"].extend(media_set2)
        faulty = [x for x in media_set2 if not x]
        if not faulty:
            print("Found: " + api_type)
            media_set["found"] = True
            break
        else:
            if count < 2:
                break
            num = len(faulty) * 100
            print("Missing " + str(num) + " Posts... Retrying...")
            master_set2 = main_helper.restore_missing_data(
                master_set2, media_set2)
    if not media_set["found"]:
        print("No " + api_type + " Found.")
    media_set = media_set["set"]
    main_helper.delete_empty_directories(api_directory)
    media_set = [x for x in media_set]
    media_set = main_helper.format_media_set(media_set)

    metadata_set = media_set
    if export_metadata:
        metadata_set = [x for x in metadata_set if x["valid"] or x["invalid"]]
        for item in metadata_set:
            if item["valid"] or item["invalid"]:
                legacy_metadata = formatted_directories["legacy_metadata"]
                if delete_legacy_metadata:
                    if os.path.isdir(legacy_metadata):
                        shutil.rmtree(legacy_metadata)
        if metadata_set:
            os.makedirs(metadata_directory, exist_ok=True)
            archive_directory = os.path.join(metadata_directory, api_type)
            metadata_set_copy = copy.deepcopy(metadata_set)
            metadata_set = main_helper.filter_metadata(metadata_set_copy)
            main_helper.export_archive(metadata_set, archive_directory,
                                       json_settings)
    return [media_set, directory]
Example #3
0
def prepare_scraper(sessions, site_name, item):
    api_type = item["api_type"]
    api_array = item["api_array"]
    link = api_array["api_link"]
    locations = api_array["media_types"]
    username = api_array["username"]
    directory = api_array["directory"]
    api_count = api_array["post_count"]
    master_set = []
    media_set = []
    metadata_set = []
    pool = ThreadPool()
    formatted_directories = main_helper.format_directories(
        j_directory, site_name, username, locations, api_type)
    model_directory = formatted_directories["model_directory"]
    api_directory = formatted_directories["api_directory"]
    metadata_directory = formatted_directories["metadata_directory"]
    legacy_metadata_directory = os.path.join(api_directory, "Metadata")
    # legacy_metadata = main_helper.legacy_metadata(legacy_metadata_directory)
    if api_type == "Profile":
        profile_scraper(link, sessions[0], directory, username)
        return
    if api_type == "Posts":
        num = 100
        link = link.replace("limit=0", "limit="+str(num))
        original_link = link
        ceil = math.ceil(api_count / num)
        a = list(range(ceil))
        for b in a:
            b = b * num
            master_set.append(link.replace(
                "offset=0", "offset=" + str(b)))
    if api_type == "Archived":
        ceil = math.ceil(api_count / 100)
        a = list(range(ceil))
        for b in a:
            b = b * 100
            master_set.append(link.replace(
                "offset=0", "offset=" + str(b)))

    def xmessages(link):
        f_offset_count = 0
        while True:
            y = main_helper.json_request(sessions[0], link)
            if not y:
                return
            if "list" in y:
                if y["list"]:
                    master_set.append(link)
                    if y["hasMore"]:
                        f_offset_count2 = f_offset_count+100
                        f_offset_count = f_offset_count2-100
                        link = link.replace(
                            "offset=" + str(f_offset_count), "offset=" + str(f_offset_count2))
                        f_offset_count = f_offset_count2
                    else:
                        break
                else:
                    break
            else:
                break

    def process_chats(subscriber):
        fool = subscriber["withUser"]
        fool_id = str(fool["id"])
        link_2 = f"https://onlyfans.com/api2/v2/chats/{fool_id}/messages?limit=100&offset=0&order=desc&app-token={app_token}"
        xmessages(link_2)
    if api_type == "Messages":
        xmessages(link)
    if api_type == "Mass Messages":
        results = []
        max_threads = multiprocessing.cpu_count()
        offset_count = 0
        offset_count2 = max_threads
        while True:
            def process_messages(link, session):
                y = main_helper.json_request(session, link)
                if y and "error" not in y:
                    return y
                else:
                    return []
            link_list = [link.replace(
                "offset=0", "offset="+str(i*30)) for i in range(offset_count, offset_count2)]
            link_list = pool.starmap(process_messages, product(
                link_list, [sessions[0]]))
            if all(not result for result in link_list):
                break
            link_list2 = list(chain(*link_list))

            results.append(link_list2)
            offset_count = offset_count2
            offset_count2 = offset_count*2
        unsorted_messages = list(chain(*results))
        unsorted_messages.sort(key=lambda x: x["id"])
        messages = unsorted_messages

        def process_mass_messages(message, limit):
            text = message["textCropped"].replace("&", "")
            link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
                text+"&app-token="+app_token
            y = main_helper.json_request(sessions[0], link_2)
            if None == y or "error" in y:
                return []
            return y
        limit = "10"
        if len(messages) > 99:
            limit = "2"
        subscribers = pool.starmap(process_mass_messages, product(
            messages, [limit]))
        subscribers = filter(None, subscribers)
        subscribers = [
            item for sublist in subscribers for item in sublist["list"]]
        seen = set()
        subscribers = [x for x in subscribers if x["withUser"]
                       ["id"] not in seen and not seen.add(x["withUser"]["id"])]
        x = pool.starmap(process_chats, product(
            subscribers))
    if api_type == "Stories":
        master_set.append(link)
    if api_type == "Highlights":
        r = main_helper.json_request(sessions[0], link)
        if "error" in r:
            return
        for item in r:
            link2 = f"https://onlyfans.com/api2/v2/stories/highlights/{item['id']}?app-token={app_token}"
            master_set.append(link2)
    master_set2 = main_helper.assign_session(master_set, sessions)
    media_set = []
    count = len(master_set2)
    max_attempts = 100
    for attempt in list(range(max_attempts)):
        print("Scrape Attempt: "+str(attempt+1)+"/"+str(max_attempts))
        media_set2 = pool.starmap(media_scraper, product(
            master_set2, [sessions], [formatted_directories], [username], [api_type]))
        media_set.extend(media_set2)
        if count > 1:
            faulty = [x for x in media_set2 if not x]
            if not faulty:
                print("Found: "+api_type)
                break
            else:
                num = len(faulty)*100
                print("Missing "+str(num)+" Posts... Retrying...")
                master_set2 = main_helper.restore_missing_data(
                    master_set2, media_set2)
        else:
            print("No "+api_type+" Found.")
            break
    main_helper.delete_empty_directories(api_directory)
    media_set = [x for x in media_set]
    media_set = main_helper.format_media_set(media_set)

    metadata_set = media_set
    if export_metadata:
        print
        metadata_set = [x for x in metadata_set if x["valid"] or x["invalid"]]
        for item in metadata_set:
            if item["valid"] or item["invalid"]:
                legacy_metadata = formatted_directories["legacy_metadata"]
        if metadata_set:
            os.makedirs(metadata_directory, exist_ok=True)
            archive_directory = os.path.join(metadata_directory, api_type)
            metadata_set_copy = copy.deepcopy(metadata_set)
            metadata_set = main_helper.filter_metadata(metadata_set_copy)
            main_helper.export_archive(
                metadata_set, archive_directory, json_settings)
    return [media_set, directory]