예제 #1
0
def media_scraper(session, site_name, only_links, link, locations, directory, post_count, username, api_type, app_token):
    seperator = " | "
    media_set = []
    original_link = link
    for location in locations:
        link = original_link
        print("Scraping ["+str(seperator.join(location[1])) +
              "]. Should take less than a minute.")
        array = format_directory(
            j_directory, site_name, username, location[0], api_type)
        user_directory = array[0]
        location_directory = array[2][0][1]
        metadata_directory = array[1]
        directories = array[2]+[location[1]]

        pool = ThreadPool()
        ceil = math.ceil(post_count / 100)
        a = list(range(ceil))
        offset_array = []
        if api_type == "Posts":
            for b in a:
                b = b * 100
                offset_array.append(link.replace(
                    "offset=0", "offset=" + str(b)))
        if api_type == "Messages":
            offset_count = 0
            while True:
                y = json_request(session, link)
                if "list" in y:
                    if y["list"]:
                        offset_array.append(link)
                        if y["hasMore"]:
                            offset_count2 = offset_count+100
                            offset_count = offset_count2-100
                            link = link.replace(
                                "offset=" + str(offset_count), "offset=" + str(offset_count2))
                            offset_count = offset_count2
                        else:
                            break
                    else:
                        break
                else:
                    break
        if api_type == "Stories":
            offset_array.append(link)
        if api_type == "Highlights":
            r = json_request(session, link)
            if "error" in r:
                break
            for item in r:
                link2 = "https://onlyfans.com/api2/v2/stories/highlights/" + \
                    str(item["id"])+"?app-token="+app_token+""
                offset_array.append(link2)
        x = pool.starmap(scrape_array, product(
            offset_array, [session], [directories], [username], [api_type]))
        results = format_media_set(location[0], x)
        if results["valid"]:
            os.makedirs(directory, exist_ok=True)
            os.makedirs(location_directory, exist_ok=True)
            if export_metadata:
                os.makedirs(metadata_directory, exist_ok=True)
                archive_directory = metadata_directory+location[0]
                export_archive(results, archive_directory)
        media_set.append(results)

    return [media_set, directory]
예제 #2
0
def media_scraper(session, site_name, only_links, link, locations, directory,
                  api_count, username, api_type, app_token):
    seperator = " | "
    master_set = []
    media_set = []
    original_link = link
    for location in locations:
        pool = ThreadPool()
        link = original_link
        print("Scraping [" + str(seperator.join(location[1])) +
              "]. Should take less than a minute.")
        array = format_directory(j_directory, site_name, username, location[0],
                                 api_type)
        user_directory = array[0]
        location_directory = array[2][0][1]
        metadata_directory = array[1]
        directories = array[2] + [location[1]]
        if not master_set:

            if api_type == "Posts":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))
            if api_type == "Archived":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))

            def xmessages(link):
                f_offset_count = 0
                while True:
                    y = json_request(session, link)
                    if "list" in y:
                        if y["list"]:
                            master_set.append(link)
                            if y["hasMore"]:
                                f_offset_count2 = f_offset_count + 100
                                f_offset_count = f_offset_count2 - 100
                                link = link.replace(
                                    "offset=" + str(f_offset_count),
                                    "offset=" + str(f_offset_count2))
                                f_offset_count = f_offset_count2
                            else:
                                break
                        else:
                            break
                    else:
                        break

            def process_chats(subscriber):
                fool = subscriber["withUser"]
                fool_id = str(fool["id"])
                link_2 = "https://onlyfans.com/api2/v2/chats/"+fool_id + \
                    "/messages?limit=100&offset=0&order=desc&app-token="+app_token+""
                xmessages(link_2)

            if api_type == "Messages":
                xmessages(link)
            if api_type == "Mass Messages":
                messages = []
                offset_count = 0
                while True:
                    y = json_request(session, link)
                    if y:
                        messages.append(y)
                        offset_count2 = offset_count + 99
                        offset_count = offset_count2 - 99
                        link = link.replace("offset=" + str(offset_count),
                                            "offset=" + str(offset_count2))
                        offset_count = offset_count2
                    else:
                        break
                messages = list(chain(*messages))
                message_count = 0

                def process_mass_messages(message, limit):
                    text = message["textCropped"].replace("&", "")
                    link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
                        text+"&app-token="+app_token
                    y = json_request(session, link_2)
                    return y

                limit = "10"
                if len(messages) > 99:
                    limit = "2"
                subscribers = pool.starmap(process_mass_messages,
                                           product(messages, [limit]))
                subscribers = [
                    item for sublist in subscribers for item in sublist["list"]
                ]
                seen = set()
                subscribers = [
                    x for x in subscribers if x["withUser"]["id"] not in seen
                    and not seen.add(x["withUser"]["id"])
                ]
                x = pool.starmap(process_chats, product(subscribers))
            if api_type == "Stories":
                master_set.append(link)
            if api_type == "Highlights":
                r = json_request(session, link)
                if "error" in r:
                    break
                for item in r["list"]:
                    link2 = "https://stars.avn.com/api2/v2/stories/collections/" + \
                        str(item["id"])
                    master_set.append(link2)
        x = pool.starmap(
            scrape_array,
            product(master_set, [session], [directories], [username],
                    [api_type]))
        results = format_media_set(location[0], x)
        seen = set()
        results["valid"] = [
            x for x in results["valid"]
            if x["filename"] not in seen and not seen.add(x["filename"])
        ]
        if results["valid"]:
            os.makedirs(directory, exist_ok=True)
            os.makedirs(location_directory, exist_ok=True)
            if export_metadata:
                os.makedirs(metadata_directory, exist_ok=True)
                archive_directory = metadata_directory + location[0]
                export_archive(results, archive_directory)
        media_set.append(results)

    return [media_set, directory]