Beispiel #1
0
 def __init__(self, api=None, subscription=None) -> None:
     if api:
         username = subscription.username
         download_info = subscription.download_info
         if download_info:
             self.downloaded = True
             metadata_locations = download_info["metadata_locations"]
             directory = download_info["directory"]
             for api_type, value in subscription.scraped:
                 if not value or not isinstance(value, create_metadata):
                     continue
                 for location, v in value.content:
                     if location == "Texts":
                         continue
                     media_set = v.valid
                     string = "Download Processing\n"
                     string += f"Name: {username} | Type: {api_type} | Count: {len(media_set)} {location} | Directory: {directory}\n"
                     print(string)
                     pool = multiprocessing()
                     pool.starmap(self.download, product(
                         media_set, [api]))
                 metadata_path = metadata_locations.get(api_type)
                 if metadata_path:
                     value = value.export()
                     if export_metadata:
                         export_archive(value, metadata_path,
                                        json_settings)
                 else:
                     print
         else:
             self.downloaded = False
Beispiel #2
0
def paid_content_scraper(apis: list[start]):
    for api in apis:
        paid_contents = []
        paid_contents = api.get_paid_content()
        authed = api.auth
        authed.subscriptions = authed.subscriptions
        for paid_content in paid_contents:
            author = paid_content.get("author")
            author = paid_content.get("fromUser", author)
            subscription = api.get_subscription(
                check=True, identifier=author["id"])
            if not subscription:
                subscription = create_subscription(author)
                authed.subscriptions.append(subscription)
            if paid_content["responseType"] == "post":
                if paid_content["isArchived"]:
                    print(f"Model: {author['username']}")
                    # print(
                    #     "ERROR, PLEASE REPORT THIS AS AN ISSUE AND TELL ME WHICH MODEL YOU'RE SCRAPIMG, THANKS")
                    # input()
                    # exit()
            api_type = paid_content["responseType"].capitalize()+"s"
            api_media = getattr(subscription.scraped, api_type)
            api_media.append(paid_content)
            print
        count = 0
        max_count = len(authed.subscriptions)
        for subscription in authed.subscriptions:
            string = f"Scraping - {subscription.username} | {count} / {max_count}"
            print(string)
            subscription.sessions = api.sessions
            username = subscription.username
            site_name = "OnlyFans"
            media_type = format_media_types()
            count += 1
            for api_type, paid_content in subscription.scraped:
                if api_type == "Archived":
                    continue
                formatted_directories = format_directories(
                    j_directory, site_name, username, metadata_directory_format, media_type, api_type)
                metadata_directory = formatted_directories["metadata_directory"]
                metadata_path = os.path.join(
                    metadata_directory, api_type+".json")
                new_metadata = media_scraper(paid_content, api,
                                             formatted_directories, username, api_type)
                if new_metadata:
                    api_path = os.path.join(api_type, "")
                    new_metadata_object = process_metadata(
                        api, new_metadata, formatted_directories, subscription, api_type, api_path, metadata_path, site_name)
                    new_metadata_set = new_metadata_object.convert()
                    if export_metadata:
                        export_archive(new_metadata_set,
                                       metadata_path, json_settings)
Beispiel #3
0
def start_datascraper(api, identifier, site_name, choice_type=None):
    print("Scrape Processing")
    subscription = api.get_subscription(identifier)
    if not subscription:
        return [False, subscription]
    post_count = subscription.postsCount
    user_id = str(subscription.id)
    avatar = subscription.avatar
    username = subscription.username
    link = subscription.link
    formatted_directories = main_helper.format_directories(
        j_directory, site_name, username)
    metadata_directory = formatted_directories["metadata_directory"]
    archive_path = os.path.join(metadata_directory, "Mass Messages.json")
    if subscription.is_me:
        imported = import_archive(archive_path)
        mass_messages = api.get_mass_messages(resume=imported)
        export_archive(mass_messages, archive_path,
                       json_settings, rename=False)
    info = {}
    info["download"] = prepare_download.start(
        username=username, link=link, image_url=avatar, post_count=post_count, webhook=webhook)
    print("Name: "+username)
    api_array = scrape_choice(api, subscription)
    api_array = format_options(api_array, "apis")
    apis = api_array[0]
    api_string = api_array[1]
    if not json_settings["auto_scrape_apis"]:
        print("Apis: "+api_string)
        value = int(input().strip())
    else:
        value = 0
    if value:
        apis = [apis[value]]
    else:
        apis.pop(0)
    for item in apis:
        print("Type: "+item["api_type"])
        only_links = item["api_array"]["only_links"]
        post_count = str(item["api_array"]["post_count"])
        item["api_array"]["username"] = username
        item["api_array"]["subscription"] = subscription
        api_type = item["api_type"]
        results = prepare_scraper(
            api, site_name, item)
        print
    if any(x for x in subscription.scraped):
        subscription.download_info["directory"] = j_directory
        subscription.download_info["model_directory"] = os.path.join(
            j_directory, username)
        subscription.download_info["webhook"] = webhook
    print("Scrape Completed"+"\n")
    return [True, info]
Beispiel #4
0
def paid_content_scraper(apis: list[start]):
    for api in apis:
        paid_contents = api.get_paid_content()
        authed = api.auth
        authed.subscriptions = authed.subscriptions
        for paid_content in paid_contents:
            author = paid_content.get("author")
            author = paid_content.get("fromUser", author)
            subscription = api.get_subscription(check=True,
                                                identifier=author["id"])
            if not subscription:
                subscription = create_subscription(author)
                authed.subscriptions.append(subscription)
            api_type = paid_content["responseType"].capitalize() + "s"
            api_media = getattr(subscription.scraped, api_type)
            api_media.append(paid_content)
            print
        count = 0
        max_count = len(authed.subscriptions)
        for subscription in authed.subscriptions:
            string = f"Scraping - {subscription.username} | {count} / {max_count}"
            print(string)
            subscription.sessions = api.sessions
            username = subscription.username
            site_name = "OnlyFans"
            media_type = format_media_types()
            count += 1
            for api_type, paid_content in subscription.scraped:
                formatted_directories = format_directories(
                    j_directory, site_name, username,
                    metadata_directory_format, media_type, api_type)
                metadata_directory = formatted_directories[
                    "metadata_directory"]
                metadata_path = os.path.join(metadata_directory,
                                             api_type + ".json")
                new_metadata = media_scraper(paid_content, api,
                                             formatted_directories, username,
                                             api_type)
                if new_metadata:
                    api_path = os.path.join(api_type, "")
                    new_metadata_object = process_metadata(
                        api, new_metadata, formatted_directories, subscription,
                        api_type, api_path, metadata_path, site_name)
                    new_metadata_set = new_metadata_object.convert()
                    if export_metadata:
                        export_archive(new_metadata_set, metadata_path,
                                       json_settings)
Beispiel #5
0
def account_setup(api):
    status = False
    auth = api.login()
    if auth:
        profile_directory = json_global_settings["profile_directories"][0]
        profile_directory = os.path.abspath(profile_directory)
        profile_directory = os.path.join(profile_directory, auth["username"])
        profile_metadata_directory = os.path.join(profile_directory,
                                                  "Metadata")
        metadata_filepath = os.path.join(profile_metadata_directory,
                                         "Mass Messages.json")
        print
        if auth["isPerformer"]:
            imported = import_archive(metadata_filepath)
            mass_messages = api.get_mass_messages(resume=imported)
            export_archive(mass_messages, metadata_filepath, json_settings)
        # chats = api.get_chats()
        subscriptions = api.get_subscriptions()
        status = True
    return status
Beispiel #6
0
def account_setup(api: start, identifier=""):
    status = False
    authed = api.login()
    if isinstance(authed, create_auth):
        jobs = json_settings["jobs"]
        profile_directory = json_global_settings["profile_directories"][0]
        profile_directory = os.path.abspath(profile_directory)
        profile_directory = os.path.join(profile_directory, authed.username)
        profile_metadata_directory = os.path.join(
            profile_directory, "Metadata")
        metadata_filepath = os.path.join(
            profile_metadata_directory, "Mass Messages.json")
        print
        if authed.isPerformer:
            imported = import_archive(metadata_filepath)
            mass_messages = api.get_mass_messages(resume=imported)
            export_archive(mass_messages, metadata_filepath,
                           json_settings)
        # chats = api.get_chats()
        if not identifier and jobs["scrape_names"]:
            # metadata_filepath = os.path.join(
            #     profile_metadata_directory, "Subscriptions.json")
            # imported = import_archive(metadata_filepath)
            subscriptions = api.get_subscriptions()
        # collection = []
        # for subscription in subscriptions:
        #     delattr(subscription,"download_info")
        #     delattr(subscription,"sessions")
        #     delattr(subscription,"scraped")
        #     delattr(subscription,"is_me")
        #     delattr(subscription,"links")
        #     collection.append(subscription)
        # collection = jsonpickle.encode(
        #     collection, unpicklable=False)
        # collection = jsonpickle.decode(collection)
        # export_archive(collection, metadata_filepath,
        #                 json_settings)
        status = True
    return status
Beispiel #7
0
def paid_content_scraper(api):
    paid_contents = api.get_paid_content(refresh=False)
    results = []
    for paid_content in paid_contents:
        metadata_locations = {}
        author = paid_content.get("author")
        author = paid_content.get("fromUser", author)
        subscription = create_subscription(author)
        subscription.sessions = api.sessions
        subscription.download_info["directory"] = j_directory
        username = subscription.username
        model_directory = os.path.join(j_directory, username)
        api_type = paid_content["responseType"].capitalize() + "s"
        subscription.download_info["metadata_locations"] = j_directory
        subscription.download_info["metadata_locations"] = metadata_locations
        site_name = "OnlyFans"
        media_type = format_media_types()
        formatted_directories = format_directories(j_directory, site_name,
                                                   username,
                                                   metadata_directory_format,
                                                   media_type, api_type)
        metadata_directory = formatted_directories["metadata_directory"]
        metadata_path = os.path.join(metadata_directory, api_type + ".json")
        metadata_locations[api_type] = metadata_path
        new_metadata = media_scraper([paid_content], api,
                                     formatted_directories, username, api_type)
        for directory in new_metadata["directories"]:
            os.makedirs(directory, exist_ok=True)
        api_path = os.path.join(api_type, "")
        new_metadata_object = process_metadata(api, new_metadata,
                                               formatted_directories,
                                               subscription, api_type,
                                               api_path, metadata_path,
                                               site_name)
        new_metadata_set = new_metadata_object.convert()
        if export_metadata:
            export_archive(new_metadata_set, metadata_path, json_settings)
        download_media(api, subscription)
    return results
Beispiel #8
0
def paid_content_scraper(api):
    paid_contents = api.get_paid_content(refresh=False)
    results = []
    for paid_content in paid_contents:
        author = paid_content.get("author")
        author = paid_content.get("fromUser", author)
        subscription = create_subscription(author)
        subscription.sessions = api.sessions
        subscription.download_info["directory"] = j_directory
        username = subscription.username
        model_directory = os.path.join(j_directory, username)
        metadata_folder = os.path.join(model_directory, "Metadata")
        api_type = paid_content["responseType"].capitalize()+"s"
        metadata_path = os.path.join(
            metadata_folder, api_type+".json")
        site_name = "OnlyFans"
        media_type = format_media_types()
        formatted_directories = main_helper.format_directories(
            j_directory, site_name, username, media_type, api_type)
        new_item = media_scraper([paid_content], api,
                                 formatted_directories, username, api_type)
        for directory in new_item["directories"]:
            os.makedirs(directory, exist_ok=True)
        download_metadata = prepare_metadata(new_item).metadata
        subscription.set_scraped(api_type, download_metadata)
        metadata = prepare_metadata(new_item, export=True).metadata
        metadata = jsonpickle.encode(
            metadata, unpicklable=False)
        new_metadata = jsonpickle.decode(metadata)
        old_metadata = import_archive(metadata_path)
        if old_metadata:
            old_metadata = metadata_fixer(directory=metadata_path.replace(
                ".json", ""), metadata_types=old_metadata)
            unrefined = compare_metadata(
                new_metadata, old_metadata, new_chain=True)
            unrefined = prepare_metadata(unrefined, export=True).metadata
            new_metadata = jsonpickle.encode(
                unrefined, unpicklable=False)
            new_metadata = jsonpickle.decode(new_metadata)
            results.append(new_metadata)
        os.makedirs(model_directory, exist_ok=True)
        a = export_archive(new_metadata, metadata_path, json_settings)
        x = download_media(api, subscription)
    return results
Beispiel #9
0
def prepare_scraper(session, site_name, only_links, link, locations, directory,
                    api_count, username, api_type, app_token):
    seperator = " | "
    user_directory = ""
    metadata_directory = ""
    master_set = []
    media_set = []
    metadata_set = []
    original_link = link
    for location in locations:
        pool = ThreadPool()
        link = original_link
        print("Scraping [" + str(seperator.join(location[1])) +
              "]. Should take less than a minute.")
        array = format_directory(j_directory, site_name, username, location[0],
                                 api_type)
        user_directory = array[0]
        location_directory = array[2][0][1]
        metadata_directory = array[1]
        directories = array[2] + [location[1]]
        if not master_set:
            if api_type == "Posts":
                num = 100
                link = link.replace("limit=0", "limit=" + str(num))
                original_link = link
                ceil = math.ceil(api_count / num)
                a = list(range(ceil))
                for b in a:
                    b = b * num
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))
            if api_type == "Archived":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))

            def xmessages(link):
                f_offset_count = 0
                while True:
                    y = json_request(session, link)
                    if "list" in y:
                        if y["list"]:
                            master_set.append(link)
                            if y["hasMore"]:
                                f_offset_count2 = f_offset_count + 100
                                f_offset_count = f_offset_count2 - 100
                                link = link.replace(
                                    "offset=" + str(f_offset_count),
                                    "offset=" + str(f_offset_count2))
                                f_offset_count = f_offset_count2
                            else:
                                break
                        else:
                            break
                    else:
                        break

            def process_chats(subscriber):
                fool = subscriber["withUser"]
                fool_id = str(fool["id"])
                link_2 = "https://onlyfans.com/api2/v2/chats/"+fool_id + \
                    "/messages?limit=100&offset=0&order=desc&app-token="+app_token+""
                xmessages(link_2)

            if api_type == "Messages":
                xmessages(link)
            if api_type == "Mass Messages":
                results = []
                max_threads = multiprocessing.cpu_count()
                offset_count = 0
                offset_count2 = max_threads
                while True:

                    def process_messages(link, session):
                        y = json_request(session, link)
                        if y and "error" not in y:
                            return y
                        else:
                            return []

                    link_list = [
                        link.replace("offset=0", "offset=" + str(i * 30))
                        for i in range(offset_count, offset_count2)
                    ]
                    link_list = pool.starmap(process_messages,
                                             product(link_list, [session]))
                    if all(not result for result in link_list):
                        break
                    link_list2 = list(chain(*link_list))

                    results.append(link_list2)
                    offset_count = offset_count2
                    offset_count2 = offset_count * 2
                unsorted_messages = list(chain(*results))
                unsorted_messages.sort(key=lambda x: x["id"])
                messages = unsorted_messages

                def process_mass_messages(message, limit):
                    text = message["textCropped"].replace("&", "")
                    link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
                        text+"&app-token="+app_token
                    y = json_request(session, link_2)
                    if None == y or "error" in y:
                        return []
                    return y

                limit = "10"
                if len(messages) > 99:
                    limit = "2"
                subscribers = pool.starmap(process_mass_messages,
                                           product(messages, [limit]))
                subscribers = filter(None, subscribers)
                subscribers = [
                    item for sublist in subscribers for item in sublist["list"]
                ]
                seen = set()
                subscribers = [
                    x for x in subscribers if x["withUser"]["id"] not in seen
                    and not seen.add(x["withUser"]["id"])
                ]
                x = pool.starmap(process_chats, product(subscribers))
            if api_type == "Stories":
                master_set.append(link)
            if api_type == "Highlights":
                r = json_request(session, link)
                if "error" in r:
                    break
                for item in r:
                    link2 = "https://onlyfans.com/api2/v2/stories/highlights/" + \
                        str(item["id"])+"?app-token="+app_token+""
                    master_set.append(link2)
        x = pool.starmap(
            media_scraper,
            product(master_set, [session], [directories], [username],
                    [api_type]))
        print
        results = format_media_set(location[0], x)
        seen = set()
        results["valid"] = [
            x for x in results["valid"]
            if x["filename"] not in seen and not seen.add(x["filename"])
        ]
        seen = set()
        location_directories = [
            x["directory"] for x in results["valid"]
            if x["directory"] not in seen and not seen.add(x["directory"])
        ]
        if results["valid"]:
            results["valid"] = [
                list(g) for k, g in groupby(results["valid"],
                                            key=lambda x: x["post_id"])
            ]
            os.makedirs(directory, exist_ok=True)
            for location_directory in location_directories:
                os.makedirs(location_directory, exist_ok=True)
        if results["invalid"]:
            results["invalid"] = [
                list(g) for k, g in groupby(results["invalid"],
                                            key=lambda x: x["post_id"])
            ]
        if sort_free_paid_posts:
            ofsorter.sorter(user_directory, api_type, location[0], results)
        metadata_set.append(results)
        media_set.append(results)

    if export_metadata:
        metadata_set = [x for x in metadata_set if x["valid"] or x["invalid"]]
        for item in metadata_set:
            if item["valid"] or item["invalid"]:
                legacy_metadata = os.path.join(user_directory, api_type,
                                               "Metadata")
                if delete_legacy_metadata:
                    if os.path.isdir(legacy_metadata):
                        shutil.rmtree(legacy_metadata)
        if metadata_set:
            os.makedirs(metadata_directory, exist_ok=True)
            archive_directory = os.path.join(metadata_directory, api_type)
            export_archive(metadata_set, archive_directory, json_settings)
    return [media_set, directory]
Beispiel #10
0
def prepare_scraper(sessions, site_name, item):
    api_type = item["api_type"]
    api_array = item["api_array"]
    link = api_array["api_link"]
    locations = api_array["media_types"]
    username = api_array["username"]
    directory = api_array["directory"]
    api_count = api_array["post_count"]
    master_set = []
    media_set = []
    metadata_set = []
    pool = multiprocessing()
    formatted_directories = main_helper.format_directories(
        j_directory, site_name, username, locations, api_type)
    model_directory = formatted_directories["model_directory"]
    metadata_directory = formatted_directories["metadata_directory"]
    api_directory = formatted_directories["api_directory"]
    if api_type == "Posts":
        ceil = math.ceil(api_count / 100)
        a = list(range(ceil))
        for b in a:
            b = b * 100
            master_set.append(link.replace("offset=0", "offset=" + str(b)))
    if api_type == "Archived":
        ceil = math.ceil(api_count / 100)
        a = list(range(ceil))
        for b in a:
            b = b * 100
            master_set.append(link.replace("offset=0", "offset=" + str(b)))
    if api_type == "Stories":
        master_set.append(link)
    if api_type == "Highlights":
        r = main_helper.json_request(sessions[0], link)
        if "error" in r:
            return
        for item in r["list"]:
            link2 = "https://stars.avn.com/api2/v2/stories/collections/" + \
                str(item["id"])
            master_set.append(link2)
    master_set2 = main_helper.assign_session(master_set, sessions)
    media_set = {}
    media_set["set"] = []
    media_set["found"] = False
    count = len(master_set2)
    max_attempts = 100
    for attempt in list(range(max_attempts)):
        print("Scrape Attempt: " + str(attempt + 1) + "/" + str(max_attempts))
        media_set2 = pool.starmap(
            media_scraper,
            product(master_set2, [sessions], [formatted_directories],
                    [username], [api_type]))
        media_set["set"].extend(media_set2)
        faulty = [x for x in media_set2 if not x]
        if not faulty:
            print("Found: " + api_type)
            media_set["found"] = True
            break
        else:
            if count < 2:
                break
            num = len(faulty) * 100
            print("Missing " + str(num) + " Posts... Retrying...")
            master_set2 = main_helper.restore_missing_data(
                master_set2, media_set2)
    if not media_set["found"]:
        print("No " + api_type + " Found.")
    media_set = media_set["set"]
    main_helper.delete_empty_directories(api_directory)
    media_set = [x for x in media_set]
    media_set = main_helper.format_media_set(media_set)

    metadata_set = media_set
    if export_metadata:
        metadata_set = [x for x in metadata_set if x["valid"] or x["invalid"]]
        for item in metadata_set:
            if item["valid"] or item["invalid"]:
                legacy_metadata = formatted_directories["legacy_metadata"]
                if delete_legacy_metadata:
                    if os.path.isdir(legacy_metadata):
                        shutil.rmtree(legacy_metadata)
        if metadata_set:
            os.makedirs(metadata_directory, exist_ok=True)
            archive_directory = os.path.join(metadata_directory, api_type)
            metadata_set_copy = copy.deepcopy(metadata_set)
            metadata_set = main_helper.filter_metadata(metadata_set_copy)
            main_helper.export_archive(metadata_set, archive_directory,
                                       json_settings)
    return [media_set, directory]
Beispiel #11
0
def prepare_scraper(api: start, site_name, item):
    authed = api.auth
    api_type = item["api_type"]
    api_array = item["api_array"]
    subscription = api_array["subscription"]
    media_type = api_array["media_types"]
    username = api_array["username"]
    master_set = []
    pool = multiprocessing()
    formatted_directories = format_directories(
        j_directory, site_name, username, metadata_directory_format, media_type, api_type)
    legacy_model_directory = formatted_directories["legacy_model_directory"]
    metadata_directory = formatted_directories["metadata_directory"]
    download_directory = formatted_directories["download_directory"]
    if api_type == "Profile":
        profile_scraper(api, site_name, api_type, username,
                        maximum_length, download_directory)
        return True
    if api_type == "Stories":
        master_set = subscription.get_stories()
        highlights = subscription.get_highlights()
        valid_highlights = []
        for highlight in highlights:
            if "error" == highlight:
                continue
            highlight = subscription.get_highlights(
                hightlight_id=highlight["id"])
            valid_highlights.append(highlight)
        master_set.extend(valid_highlights)
        print
    if api_type == "Posts":
        master_set = subscription.get_posts()
    if api_type == "Archived":
        master_set = subscription.get_archived(api)
    if api_type == "Messages":
        unrefined_set = subscription.get_messages()
        unrefined_set = process_messages(api, subscription, unrefined_set)
        mass_messages = getattr(authed, "mass_messages")
        if subscription.is_me and mass_messages:
            mass_messages = getattr(authed, "mass_messages")
            unrefined_set2 = process_mass_messages(api,
                                                   subscription, metadata_directory, mass_messages)
            unrefined_set += unrefined_set2
        master_set = [unrefined_set]
    master_set2 = master_set
    parent_type = ""
    if "Archived" == api_type:
        unrefined_set = []
        for master_set3 in master_set2:
            if not isinstance(master_set3, dict):
                continue
            parent_type = master_set3["type"]
            results = master_set3["results"]
            unrefined_result = pool.starmap(media_scraper, product(
                results, [api], [formatted_directories], [username], [api_type], [parent_type]))
            unrefined_set.append(unrefined_result)
        unrefined_set = list(chain(*unrefined_set))
    else:
        unrefined_set = pool.starmap(media_scraper, product(
            master_set2, [api], [formatted_directories], [username], [api_type], [parent_type]))
        unrefined_set = [x for x in unrefined_set]
    new_metadata = main_helper.format_media_set(unrefined_set)
    if not new_metadata:
        print("No "+api_type+" Found.")
        delattr(subscription.scraped, api_type)
    if new_metadata:
        metadata_path = os.path.join(
            metadata_directory, api_type+".json")
        api_path = os.path.join(api_type, parent_type)
        new_metadata_object = process_metadata(
            api, new_metadata, formatted_directories, subscription, api_type, api_path, metadata_path, site_name)
        new_metadata_set = new_metadata_object.export()
        if export_metadata:
            export_archive(new_metadata_set, metadata_path, json_settings)
            print
    return True
Beispiel #12
0
def process_mass_messages(api: start, subscription, metadata_directory, mass_messages) -> list:
    def compare_message(queue_id, remote_messages):
        for message in remote_messages:
            if "isFromQueue" in message and message["isFromQueue"]:
                if queue_id == message["queueId"]:
                    return message
                print
        print
    global_found = []
    chats = []
    session = api.sessions[0]
    salt = json_global_settings["random_string"]
    encoded = f"{session.ip}{salt}"
    encoded = encoded.encode('utf-8')
    hash = hashlib.md5(encoded).hexdigest()
    profile_directory = json_global_settings["profile_directories"][0]
    profile_directory = os.path.abspath(profile_directory)
    profile_directory = os.path.join(profile_directory, subscription.username)
    profile_metadata_directory = os.path.join(profile_directory, "Metadata")
    mass_message_path = os.path.join(
        profile_metadata_directory, "Mass Messages.json")
    chats_path = os.path.join(profile_metadata_directory, "Chats.json")
    if os.path.exists(chats_path):
        chats = import_archive(chats_path)
    date_object = datetime.today()
    date_string = date_object.strftime("%d-%m-%Y %H:%M:%S")
    for mass_message in mass_messages:
        if "status" not in mass_message:
            mass_message["status"] = ""
        if "found" not in mass_message:
            mass_message["found"] = {}
        if "hashed_ip" not in mass_message:
            mass_message["hashed_ip"] = ""
        mass_message["hashed_ip"] = mass_message.get("hashed_ip", hash)
        mass_message["date_hashed"] = mass_message.get(
            "date_hashed", date_string)
        if mass_message["isCanceled"]:
            continue
        queue_id = mass_message["id"]
        text = mass_message["textCropped"]
        text = html.unescape(text)
        mass_found = mass_message["found"]
        if mass_message["found"] or not mass_message["mediaType"]:
            continue
        identifier = None
        if chats:
            list_chats = chats
            for chat in list_chats:
                identifier = chat["identifier"]
                messages = chat["messages"]["list"]
                mass_found = compare_message(queue_id, messages)
                if mass_found:
                    mass_message["found"] = mass_found
                    mass_message["status"] = True
                    break
        if not mass_found:
            list_chats = subscription.search_messages(text=text, limit=2)
            if not list_chats:
                continue
            for item in list_chats["list"]:
                user = item["withUser"]
                identifier = user["id"]
                messages = []
                print("Getting Messages")
                keep = ["id", "username"]
                list_chats2 = [
                    x for x in chats if x["identifier"] == identifier]
                if list_chats2:
                    chat2 = list_chats2[0]
                    messages = chat2["messages"]["list"]
                    messages = subscription.get_messages(
                        identifier=identifier, resume=messages)
                    for message in messages:
                        message["withUser"] = {
                            k: item["withUser"][k] for k in keep}
                        message["fromUser"] = {
                            k: message["fromUser"][k] for k in keep}
                    mass_found = compare_message(queue_id, messages)
                    if mass_found:
                        mass_message["found"] = mass_found
                        mass_message["status"] = True
                        break
                else:
                    item2 = {}
                    item2["identifier"] = identifier
                    item2["messages"] = subscription.get_messages(
                        identifier=identifier)
                    chats.append(item2)
                    messages = item2["messages"]["list"]
                    for message in messages:
                        message["withUser"] = {
                            k: item["withUser"][k] for k in keep}
                        message["fromUser"] = {
                            k: message["fromUser"][k] for k in keep}
                    mass_found = compare_message(queue_id, messages)
                    if mass_found:
                        mass_message["found"] = mass_found
                        mass_message["status"] = True
                        break
                    print
                print
            print
        if not mass_found:
            mass_message["status"] = False
    export_archive(chats, chats_path, json_settings)
    for mass_message in mass_messages:
        found = mass_message["found"]
        if found and found["media"]:
            user = found["withUser"]
            identifier = user["id"]
            print
            date_hashed_object = datetime.strptime(
                mass_message["date_hashed"], "%d-%m-%Y %H:%M:%S")
            next_date_object = date_hashed_object+timedelta(days=1)
            print
            if mass_message["hashed_ip"] != hash or date_object > next_date_object:
                print("Getting Message By ID")
                x = subscription.get_message_by_id(
                    identifier=identifier, identifier2=found["id"], limit=1)
                new_found = x["result"]["list"][0]
                new_found["withUser"] = found["withUser"]
                mass_message["found"] = new_found
                mass_message["hashed_ip"] = hash
                mass_message["date_hashed"] = date_string
            global_found.append(found)
        print
    print
    main_helper.export_archive(
        mass_messages, mass_message_path, json_settings)
    return global_found
Beispiel #13
0
def prepare_scraper(session, site_name, only_links, link, locations, directory,
                    api_count, username, api_type):
    seperator = " | "
    master_set = []
    media_set = []
    original_link = link
    for location in locations:
        pool = ThreadPool()
        link = original_link
        print("Scraping [" + str(seperator.join(location[1])) +
              "]. Should take less than a minute.")
        array = format_directory(j_directory, site_name, username, location[0],
                                 api_type)
        user_directory = array[0]
        location_directory = array[2][0][1]
        metadata_directory = array[1]
        directories = array[2] + [location[1]]
        if not master_set:

            if api_type == "Posts":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))
            if api_type == "Archived":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))

            # def xmessages(link):
            #     f_offset_count = 0
            #     while True:
            #         y = json_request(session, link)
            #         if "list" in y:
            #             if y["list"]:
            #                 master_set.append(link)
            #                 if y["hasMore"]:
            #                     f_offset_count2 = f_offset_count+100
            #                     f_offset_count = f_offset_count2-100
            #                     link = link.replace(
            #                         "offset=" + str(f_offset_count), "offset=" + str(f_offset_count2))
            #                     f_offset_count = f_offset_count2
            #                 else:
            #                     break
            #             else:
            #                 break
            #         else:
            #             break

            # def process_chats(subscriber):
            #     fool = subscriber["withUser"]
            #     fool_id = str(fool["id"])
            #     link_2 = "https://onlyfans.com/api2/v2/chats/"+fool_id + \
            #         "/messages?limit=100&offset=0&order=desc&app-token="+app_token+""
            #     xmessages(link_2)
            # if api_type == "Messages":
            #     xmessages(link)
            # if api_type == "Mass Messages":
            #     messages = []
            #     offset_count = 0
            #     while True:
            #         y = json_request(session, link)
            #         if y:
            #             messages.append(y)
            #             offset_count2 = offset_count+99
            #             offset_count = offset_count2-99
            #             link = link.replace(
            #                 "offset=" + str(offset_count), "offset=" + str(offset_count2))
            #             offset_count = offset_count2
            #         else:
            #             break
            #     messages = list(chain(*messages))
            #     message_count = 0

            #     def process_mass_messages(message, limit):
            #         text = message["textCropped"].replace("&", "")
            #         link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
            #             text+"&app-token="+app_token
            #         y = json_request(session, link_2)
            #         return y
            #     limit = "10"
            #     if len(messages) > 99:
            #         limit = "2"
            #     subscribers = pool.starmap(process_mass_messages, product(
            #         messages, [limit]))
            #     subscribers = [
            #         item for sublist in subscribers for item in sublist["list"]]
            #     seen = set()
            #     subscribers = [x for x in subscribers if x["withUser"]
            #                    ["id"] not in seen and not seen.add(x["withUser"]["id"])]
            #     x = pool.starmap(process_chats, product(
            #         subscribers))
            if api_type == "Stories":
                master_set.append(link)
            if api_type == "Highlights":
                r = json_request(session, link)
                if "error" in r:
                    break
                for item in r["list"]:
                    link2 = "https://stars.avn.com/api2/v2/stories/collections/" + \
                        str(item["id"])
                    master_set.append(link2)
        x = pool.starmap(
            media_scraper,
            product(master_set, [session], [directories], [username],
                    [api_type]))
        results = format_media_set(location[0], x)
        seen = set()
        results["valid"] = [
            x for x in results["valid"]
            if x["filename"] not in seen and not seen.add(x["filename"])
        ]
        if results["valid"]:
            os.makedirs(directory, exist_ok=True)
            os.makedirs(location_directory, exist_ok=True)
            if export_metadata:
                os.makedirs(metadata_directory, exist_ok=True)
                archive_directory = os.path.join(metadata_directory, api_type)
                export_archive([results], archive_directory, json_settings)
        media_set.append(results)

    return [media_set, directory]
Beispiel #14
0
def prepare_scraper(sessions, site_name, item):
    api_type = item["api_type"]
    api_array = item["api_array"]
    link = api_array["api_link"]
    locations = api_array["media_types"]
    username = api_array["username"]
    directory = api_array["directory"]
    api_count = api_array["post_count"]
    master_set = []
    media_set = []
    metadata_set = []
    pool = ThreadPool()
    formatted_directories = main_helper.format_directories(
        j_directory, site_name, username, locations, api_type)
    model_directory = formatted_directories["model_directory"]
    api_directory = formatted_directories["api_directory"]
    metadata_directory = formatted_directories["metadata_directory"]
    legacy_metadata_directory = os.path.join(api_directory, "Metadata")
    # legacy_metadata = main_helper.legacy_metadata(legacy_metadata_directory)
    if api_type == "Profile":
        profile_scraper(link, sessions[0], directory, username)
        return
    if api_type == "Posts":
        num = 100
        link = link.replace("limit=0", "limit="+str(num))
        original_link = link
        ceil = math.ceil(api_count / num)
        a = list(range(ceil))
        for b in a:
            b = b * num
            master_set.append(link.replace(
                "offset=0", "offset=" + str(b)))
    if api_type == "Archived":
        ceil = math.ceil(api_count / 100)
        a = list(range(ceil))
        for b in a:
            b = b * 100
            master_set.append(link.replace(
                "offset=0", "offset=" + str(b)))

    def xmessages(link):
        f_offset_count = 0
        while True:
            y = main_helper.json_request(sessions[0], link)
            if not y:
                return
            if "list" in y:
                if y["list"]:
                    master_set.append(link)
                    if y["hasMore"]:
                        f_offset_count2 = f_offset_count+100
                        f_offset_count = f_offset_count2-100
                        link = link.replace(
                            "offset=" + str(f_offset_count), "offset=" + str(f_offset_count2))
                        f_offset_count = f_offset_count2
                    else:
                        break
                else:
                    break
            else:
                break

    def process_chats(subscriber):
        fool = subscriber["withUser"]
        fool_id = str(fool["id"])
        link_2 = f"https://onlyfans.com/api2/v2/chats/{fool_id}/messages?limit=100&offset=0&order=desc&app-token={app_token}"
        xmessages(link_2)
    if api_type == "Messages":
        xmessages(link)
    if api_type == "Mass Messages":
        results = []
        max_threads = multiprocessing.cpu_count()
        offset_count = 0
        offset_count2 = max_threads
        while True:
            def process_messages(link, session):
                y = main_helper.json_request(session, link)
                if y and "error" not in y:
                    return y
                else:
                    return []
            link_list = [link.replace(
                "offset=0", "offset="+str(i*30)) for i in range(offset_count, offset_count2)]
            link_list = pool.starmap(process_messages, product(
                link_list, [sessions[0]]))
            if all(not result for result in link_list):
                break
            link_list2 = list(chain(*link_list))

            results.append(link_list2)
            offset_count = offset_count2
            offset_count2 = offset_count*2
        unsorted_messages = list(chain(*results))
        unsorted_messages.sort(key=lambda x: x["id"])
        messages = unsorted_messages

        def process_mass_messages(message, limit):
            text = message["textCropped"].replace("&", "")
            link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
                text+"&app-token="+app_token
            y = main_helper.json_request(sessions[0], link_2)
            if None == y or "error" in y:
                return []
            return y
        limit = "10"
        if len(messages) > 99:
            limit = "2"
        subscribers = pool.starmap(process_mass_messages, product(
            messages, [limit]))
        subscribers = filter(None, subscribers)
        subscribers = [
            item for sublist in subscribers for item in sublist["list"]]
        seen = set()
        subscribers = [x for x in subscribers if x["withUser"]
                       ["id"] not in seen and not seen.add(x["withUser"]["id"])]
        x = pool.starmap(process_chats, product(
            subscribers))
    if api_type == "Stories":
        master_set.append(link)
    if api_type == "Highlights":
        r = main_helper.json_request(sessions[0], link)
        if "error" in r:
            return
        for item in r:
            link2 = f"https://onlyfans.com/api2/v2/stories/highlights/{item['id']}?app-token={app_token}"
            master_set.append(link2)
    master_set2 = main_helper.assign_session(master_set, sessions)
    media_set = []
    count = len(master_set2)
    max_attempts = 100
    for attempt in list(range(max_attempts)):
        print("Scrape Attempt: "+str(attempt+1)+"/"+str(max_attempts))
        media_set2 = pool.starmap(media_scraper, product(
            master_set2, [sessions], [formatted_directories], [username], [api_type]))
        media_set.extend(media_set2)
        if count > 1:
            faulty = [x for x in media_set2 if not x]
            if not faulty:
                print("Found: "+api_type)
                break
            else:
                num = len(faulty)*100
                print("Missing "+str(num)+" Posts... Retrying...")
                master_set2 = main_helper.restore_missing_data(
                    master_set2, media_set2)
        else:
            print("No "+api_type+" Found.")
            break
    main_helper.delete_empty_directories(api_directory)
    media_set = [x for x in media_set]
    media_set = main_helper.format_media_set(media_set)

    metadata_set = media_set
    if export_metadata:
        print
        metadata_set = [x for x in metadata_set if x["valid"] or x["invalid"]]
        for item in metadata_set:
            if item["valid"] or item["invalid"]:
                legacy_metadata = formatted_directories["legacy_metadata"]
        if metadata_set:
            os.makedirs(metadata_directory, exist_ok=True)
            archive_directory = os.path.join(metadata_directory, api_type)
            metadata_set_copy = copy.deepcopy(metadata_set)
            metadata_set = main_helper.filter_metadata(metadata_set_copy)
            main_helper.export_archive(
                metadata_set, archive_directory, json_settings)
    return [media_set, directory]
Beispiel #15
0
def prepare_scraper(api, site_name, item):
    authed = api.auth
    sessions = api.sessions
    api_type = item["api_type"]
    api_array = item["api_array"]
    link = api_array["api_link"]
    subscription = api_array["subscription"]
    locations = api_array["media_types"]
    username = api_array["username"]
    directory = api_array["directory"]
    api_count = api_array["post_count"]
    master_set = []
    media_set = []
    metadata_set = []
    pool = multiprocessing()
    formatted_directories = main_helper.format_directories(
        j_directory, site_name, username, locations, api_type)
    model_directory = formatted_directories["model_directory"]
    api_directory = formatted_directories["api_directory"]
    metadata_directory = formatted_directories["metadata_directory"]
    archive_directory = os.path.join(metadata_directory, api_type)
    archive_path = archive_directory+".json"
    imported = import_archive(archive_path)
    legacy_metadata_directory = os.path.join(api_directory, "Metadata")
    if api_type == "Profile":
        profile_scraper(api, directory, username)
        return
    if api_type == "Stories":
        master_set = subscription.get_stories()
        highlights = subscription.get_highlights()
        valid_highlights = []
        for highlight in highlights:
            highlight = subscription.get_highlights(
                hightlight_id=highlight["id"])
            valid_highlights.append(highlight)
        master_set.extend(valid_highlights)
        print
    if api_type == "Posts":
        master_set = subscription.get_posts()
    if api_type == "Archived":
        master_set = subscription.get_archived(api)
    if api_type == "Messages":
        unrefined_set = subscription.get_messages()
        if "list" in unrefined_set:
            unrefined_set = unrefined_set["list"]
        if subscription.is_me:
            mass_messages = authed["mass_messages"]
            unrefined_set2 = process_mass_message(api,
                                                  subscription, metadata_directory, mass_messages)
            unrefined_set += unrefined_set2
            print
        master_set = [unrefined_set]
    master_set2 = master_set
    parent_type = ""
    if "Archived" == api_type:
        unrefined_set = []
        for master_set3 in master_set2:
            parent_type = master_set3["type"]
            results = master_set3["results"]
            unrefined_result = pool.starmap(media_scraper, product(
                results, [api], [formatted_directories], [username], [api_type], [parent_type]))
            unrefined_set.append(unrefined_result)
        unrefined_set = list(chain(*unrefined_set))
    else:
        unrefined_set = pool.starmap(media_scraper, product(
            master_set2, [api], [formatted_directories], [username], [api_type], [parent_type]))
        unrefined_set = [x for x in unrefined_set]
    metadata_set = main_helper.format_media_set(unrefined_set)
    if not metadata_set:
        print("No "+api_type+" Found.")
        delattr(subscription.scraped, api_type)
    if metadata_set:
        if export_metadata:
            os.makedirs(metadata_directory, exist_ok=True)
            old_metadata = metadata_fixer(archive_directory)
            old_metadata_set = prepare_metadata(old_metadata).metadata
            old_metadata_set2 = jsonpickle.encode(
                old_metadata_set, unpicklable=False)
            old_metadata_set2 = jsonpickle.decode(old_metadata_set2)
            metadata_set = compare_metadata(metadata_set, old_metadata_set2)
            metadata_set = prepare_metadata(metadata_set).metadata
            metadata_set2 = jsonpickle.encode(metadata_set, unpicklable=False)
            metadata_set2 = jsonpickle.decode(metadata_set2)
            metadata_set2 = main_helper.filter_metadata(metadata_set2)
            metadata_set2 = legacy_metadata_fixer(
                legacy_metadata_directory, metadata_set2)
            main_helper.export_archive(
                metadata_set2, archive_directory, json_settings, legacy_directory=legacy_metadata_directory)
        else:
            metadata_set = prepare_metadata(metadata_set).metadata
        subscription = api.get_subscription(username)
        subscription.set_scraped(api_type, metadata_set)
    return [subscription.scraped]