def download(media, session, directory, username):
        count = 0
        while count < 11:
            link = media["link"]
            r = json_request(session, link, "HEAD", True, False)
            if not r:
                return False

            header = r.headers
            content_length = int(header["content-length"])
            date_object = datetime.strptime(media["postedAt"],
                                            "%d-%m-%Y %H:%M:%S")
            og_filename = media["filename"]
            media["ext"] = os.path.splitext(og_filename)[1]
            media["ext"] = media["ext"].replace(".", "")
            download_path = media["directory"] + media["filename"]
            timestamp = date_object.timestamp()
            if not overwrite_files:
                if check_for_dupe_file(download_path, content_length):
                    return
            r = json_request(session, link, "GET", True, False)
            if not r:
                return False
            try:
                with open(download_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=1024):
                        if chunk:  # filter out keep-alive new chunks
                            f.write(chunk)
            except (ConnectionResetError):
                count += 1
                continue
            format_image(download_path, timestamp)
            logger.info("Link: {}".format(link))
            logger.info("Path: {}".format(download_path))
            return True
Exemple #2
0
    def download(media, session, directory, username):
        while True:
            link = media["link"]
            r = json_request(session, link, "HEAD", True, False)
            if not r:
                break

            header = r.headers
            content_length = int(header["content-length"])
            date_object = datetime.strptime(media["postedAt"],
                                            "%d-%m-%Y %H:%M:%S")
            og_filename = media["filename"]
            media["ext"] = os.path.splitext(og_filename)[1]
            media["ext"] = media["ext"].replace(".", "")
            download_path = media["directory"] + media["filename"]
            timestamp = date_object.timestamp()
            if not overwrite_files:
                if os.path.isfile(download_path):
                    local_size = os.path.getsize(download_path)
                    if local_size == content_length:
                        return
            r = json_request(session, link, "GET", True, False)
            if not r:
                break
            with open(download_path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)
            format_image(download_path, timestamp)
            logger.info("Link: {}".format(link))
            logger.info("Path: {}".format(download_path))
            return True
Exemple #3
0
def create_session(user_agent, app_token, auth_array):
    me_api = []
    auth_count = 1
    auth_version = "(V1)"
    count = 1
    try:
        auth_cookies = [
            {'name': 'auth_id', 'value': auth_array["auth_id"]},
            {'name': 'auth_hash', 'value': auth_array["auth_hash"]}
        ]
        while auth_count < 3:
            if auth_count == 2:
                auth_version = "(V2)"
                if auth_array["sess"]:
                    del auth_cookies[2]
                count = 1
            while count < 11:
                session = requests.Session()
                print("Auth "+auth_version+" Attempt "+str(count)+"/"+"10")
                max_threads = multiprocessing.cpu_count()
                session.mount(
                    'https://', requests.adapters.HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads))
                session.headers = {
                    'User-Agent': user_agent, 'Referer': 'https://onlyfans.com/'}
                if auth_array["sess"]:
                    auth_cookies.append(
                        {'name': 'sess', 'value': auth_array["sess"]})
                for auth_cookie in auth_cookies:
                    session.cookies.set(**auth_cookie)

                link = "https://onlyfans.com/api2/v2/users/customer?app-token="+app_token

                # r = json_request(session, link, "HEAD", True, False)
                r = json_request(session, link)
                count += 1
                if not r:
                    continue
                me_api = r
                if 'error' in r:
                    error_message = r["error"]["message"]
                    print(error_message)
                    if "token" in error_message:
                        break
                    continue
                else:
                    print("Welcome "+r["name"])
                option_string = "username or profile link"
                link = "https://onlyfans.com/api2/v2/subscriptions/count/all?app-token="+app_token
                r = json_request(session, link)
                if not r:
                    break
                subscriber_count = r["subscriptions"]["all"]
                return [session, option_string, subscriber_count, me_api]
            auth_count += 1
    except Exception as e:
        print(e)
        input()
    return [False, me_api]
Exemple #4
0
 def multi(array, session):
     link = array[0]
     performer = array[1]
     if performer:
         session = requests.Session()
         x = json_request(session, link)
         if not x["subscribedByData"]:
             x["subscribedByData"] = dict()
             x["subscribedByData"]["expiredAt"] = datetime.utcnow().isoformat()
             x["subscribedByData"]["price"] = x["subscribePrice"]
             x["subscribedByData"]["subscribePrice"] = 0
         x = [x]
     else:
         x = json_request(session, link)
     return x
Exemple #5
0
def start_datascraper(session,
                      identifier,
                      site_name,
                      app_token,
                      choice_type=None):
    if choice_type == 0:
        if blacklist_name:
            link = "https://onlyfans.com/api2/v2/lists?offset=0&limit=100&app-token=" + app_token
            r = json_request(session, link)
            if not r:
                return [False, []]
            x = [c for c in r if blacklist_name == c["name"]]
            if x:
                users = x[0]["users"]
                bl_ids = [x["username"] for x in users]
                if identifier in bl_ids:
                    print("Blacklisted: " + identifier)
                    return [False, []]
    print("Scrape Processing")
    info = link_check(session, app_token, identifier)
    if not info["subbed"]:
        print(info["user"])
        print("First time? Did you forget to edit your config.json file?")
        return [False, []]
    user = info["user"]
    is_me = user["is_me"]
    post_counts = info["count"]
    user_id = str(user["id"])
    username = user["username"]
    print("Name: " + username)
    array = scrape_choice(user_id, app_token, post_counts, is_me)
    prep_download = []
    for item in array:
        print("Type: " + item[2])
        only_links = item[1][3]
        post_count = str(item[1][4])
        item[1].append(username)
        item[1].pop(3)
        api_type = item[2]
        results = media_scraper(session, site_name, only_links, *item[1],
                                api_type, app_token)
        for result in results[0]:
            if not only_links:
                media_set = result
                if not media_set["valid"]:
                    continue
                directory = results[1]
                location = result["type"]
                prep_download.append([
                    media_set["valid"], session, directory, username,
                    post_count, location
                ])
    # When profile is done scraping, this function will return True
    print("Scrape Completed" + "\n")
    return [True, prep_download]
Exemple #6
0
def get_subscriptions(session,
                      app_token,
                      subscriber_count,
                      me_api,
                      auth_count=0):
    link = "https://stars.avn.com/api2/v2/subscriptions/following/?limit=10&marker=&offset=0"
    r = json_request(session, link)
    if not r:
        return None
    for x in r["list"]:
        x["auth_count"] = auth_count
    return r["list"]
Exemple #7
0
def link_check(session, app_token, identifier):
    link = 'https://stars.avn.com/api2/v2/users/' + str(identifier)
    y = json_request(session, link)
    temp_user_id2 = dict()
    y["is_me"] = False
    if not y:
        temp_user_id2["subbed"] = False
        temp_user_id2["user"] = "******"
        return temp_user_id2
    if "error" in y:
        temp_user_id2["subbed"] = False
        temp_user_id2["user"] = y["error"]["message"]
        return temp_user_id2
    now = datetime.utcnow().date()
    result_date = datetime.utcnow().date()
    if "email" not in y:
        subscribedByData = y
        # if subscribedByData:
        # expired_at = subscribedByData["expiredAt"]
        # result_date = datetime.fromisoformat(
        #     expired_at).replace(tzinfo=None).date()
        if y["followedBy"]:
            subbed = True
        elif y["subscribedBy"]:
            subbed = True
        elif y["subscribedOn"]:
            subbed = True
        # elif y["subscribedIsExpiredNow"] == False:
        #     subbed = True
        elif result_date >= now:
            subbed = True
        else:
            subbed = False
    else:
        subbed = True
        y["is_me"] = True
    if not subbed:
        temp_user_id2["subbed"] = False
        temp_user_id2["user"] = "******"
        return temp_user_id2
    else:
        temp_user_id2["subbed"] = True
        temp_user_id2["user"] = y
        temp_user_id2["count"] = [
            y["postsCount"], [y["photosCount"], y["videosCount"]]
        ]
        return temp_user_id2
Exemple #8
0
def link_check(session, app_token, username):
    link = 'https://onlyfans.com/api2/v2/users/' + username + \
           '&app-token=' + app_token
    y = json_request(session, link)
    temp_user_id2 = dict()
    if not y:
        temp_user_id2[0] = False
        temp_user_id2[1] = "No users found"
        return temp_user_id2
    if "error" in y:
        temp_user_id2[0] = False
        temp_user_id2[1] = y["error"]["message"]
        return temp_user_id2
    now = datetime.utcnow().date()
    result_date = datetime.utcnow().date()
    if "email" not in y:
        subscribedByData = y["subscribedByData"]
        if subscribedByData:
            expired_at = subscribedByData["expiredAt"]
            result_date = datetime.fromisoformat(expired_at).replace(
                tzinfo=None).date()
        if y["subscribedBy"]:
            subbed = True
        elif y["subscribedOn"]:
            subbed = True
        elif y["subscribedIsExpiredNow"] == False:
            subbed = True
        elif result_date >= now:
            subbed = True
        else:
            subbed = False
    else:
        subbed = True
    if not subbed:
        temp_user_id2[0] = False
        temp_user_id2[1] = "You're not subscribed to the user"
        return temp_user_id2
    else:
        temp_user_id2[0] = True
        temp_user_id2[1] = str(y["id"])
        temp_user_id2[2] = [
            y["postsCount"],
            [y["photosCount"], y["videosCount"], y["audiosCount"]]
        ]
        return temp_user_id2
Exemple #9
0
 def xmessages(link):
     f_offset_count = 0
     while True:
         y = json_request(session, link)
         if "list" in y:
             if y["list"]:
                 master_set.append(link)
                 if y["hasMore"]:
                     f_offset_count2 = f_offset_count+100
                     f_offset_count = f_offset_count2-100
                     link = link.replace(
                         "offset=" + str(f_offset_count), "offset=" + str(f_offset_count2))
                     f_offset_count = f_offset_count2
                 else:
                     break
             else:
                 break
         else:
             break
Exemple #10
0
def media_scraper(session, site_name, only_links, link, locations, directory, post_count, username, api_type, app_token):
    seperator = " | "
    media_set = []
    original_link = link
    for location in locations:
        link = original_link
        print("Scraping ["+str(seperator.join(location[1])) +
              "]. Should take less than a minute.")
        array = format_directory(
            j_directory, site_name, username, location[0], api_type)
        user_directory = array[0]
        location_directory = array[2][0][1]
        metadata_directory = array[1]
        directories = array[2]+[location[1]]

        pool = ThreadPool()
        ceil = math.ceil(post_count / 100)
        a = list(range(ceil))
        offset_array = []
        if api_type == "Posts":
            for b in a:
                b = b * 100
                offset_array.append(link.replace(
                    "offset=0", "offset=" + str(b)))
        if api_type == "Messages":
            offset_count = 0
            while True:
                y = json_request(session, link)
                if "list" in y:
                    if y["list"]:
                        offset_array.append(link)
                        if y["hasMore"]:
                            offset_count2 = offset_count+100
                            offset_count = offset_count2-100
                            link = link.replace(
                                "offset=" + str(offset_count), "offset=" + str(offset_count2))
                            offset_count = offset_count2
                        else:
                            break
                    else:
                        break
                else:
                    break
        if api_type == "Stories":
            offset_array.append(link)
        if api_type == "Highlights":
            r = json_request(session, link)
            if "error" in r:
                break
            for item in r:
                link2 = "https://onlyfans.com/api2/v2/stories/highlights/" + \
                    str(item["id"])+"?app-token="+app_token+""
                offset_array.append(link2)
        x = pool.starmap(scrape_array, product(
            offset_array, [session], [directories], [username], [api_type]))
        results = format_media_set(location[0], x)
        if results["valid"]:
            os.makedirs(directory, exist_ok=True)
            os.makedirs(location_directory, exist_ok=True)
            if export_metadata:
                os.makedirs(metadata_directory, exist_ok=True)
                archive_directory = metadata_directory+location[0]
                export_archive(results, archive_directory)
        media_set.append(results)

    return [media_set, directory]
Exemple #11
0
def scrape_array(link, session, directory, username, api_type):
    media_set = [[], []]
    media_type = directory[1]
    count = 0
    found = False
    y = json_request(session, link)
    if "error" in y:
        return media_set
    x = 0
    if api_type == "Highlights":
        y = y["stories"]
    if api_type == "Messages":
        y = y["list"]
    master_date = "01-01-0001 00:00:00"
    for media_api in y:
        for media in media_api["media"]:
            date = "-001-11-30T00:00:00+00:00"
            size = 0
            if "source" in media:
                source = media["source"]
                link = source["source"]
                size = source["size"]
                date = media_api["postedAt"] if "postedAt" in media_api else media_api["createdAt"]
            if "src" in media:
                link = media["src"]
                size = media["info"]["preview"]["size"] if "info" in media_api else 1
                date = media_api["createdAt"]
            if not link:
                continue
            if "ca2.convert" in link:
                link = media["preview"]
            new_dict = dict()
            new_dict["post_id"] = media_api["id"]
            new_dict["link"] = link
            if date == "-001-11-30T00:00:00+00:00":
                date_string = master_date
                date_object = datetime.strptime(
                    master_date, "%d-%m-%Y %H:%M:%S")
            else:
                date_object = datetime.fromisoformat(date)
                date_string = date_object.replace(tzinfo=None).strftime(
                    "%d-%m-%Y %H:%M:%S")
                master_date = date_string

            if media["type"] not in media_type:
                x += 1
                continue
            if "text" not in media_api:
                media_api["text"] = ""
            new_dict["text"] = media_api["text"] if media_api["text"] else ""
            new_dict["postedAt"] = date_string
            file_name = link.rsplit('/', 1)[-1]
            file_name, ext = os.path.splitext(file_name)
            ext = ext.__str__().replace(".", "").split('?')[0]
            file_path = reformat(directory[0][1], file_name,
                                 new_dict["text"], ext, date_object, username, format_path, date_format, text_length, maximum_length)
            new_dict["directory"] = directory[0][1]
            new_dict["filename"] = file_path.rsplit('/', 1)[-1]
            new_dict["size"] = size
            if size == 0:
                media_set[1].append(new_dict)
                continue
            media_set[0].append(new_dict)
    return media_set
Exemple #12
0
def create_session(user_agent, app_token, auth_array):
    me_api = []
    auth_count = 1
    auth_version = "(V1)"
    count = 1
    try:
        auth_cookies = []
        while auth_count < 3:
            if auth_count == 2:
                auth_version = "(V2)"
                if auth_array["sess"]:
                    del auth_cookies[2]
                count = 1
            session = requests.Session()
            print("Auth " + auth_version + " Attempt " + str(count) + "/" +
                  "10")
            max_threads = multiprocessing.cpu_count()
            session.mount(
                'https://',
                requests.adapters.HTTPAdapter(pool_connections=max_threads,
                                              pool_maxsize=max_threads))
            session.headers = {
                'User-Agent': user_agent,
                'Referer': 'https://stars.avn.com/'
            }
            if auth_array["sess"]:
                found = False
                for auth_cookie in auth_cookies:
                    if auth_array["sess"] == auth_cookie["value"]:
                        found = True
                        break
                if not found:
                    auth_cookies.append({
                        'name': 'sess',
                        'value': auth_array["sess"],
                        'domain': '.stars.avn.com'
                    })
            for auth_cookie in auth_cookies:
                session.cookies.set(**auth_cookie)
            while count < 11:

                link = "https://stars.avn.com/api2/v2/users/me"
                r = json_request(session, link)
                count += 1
                if not r:
                    auth_cookies = []
                    continue
                me_api = r
                if 'error' in r:
                    error = r["error"]
                    error_message = r["error"]["message"]
                    if error["code"] == 101:
                        error_message = "Blocked by 2FA."
                    print(error_message)
                    if "token" in error_message:
                        break
                    continue
                else:
                    print("Welcome " + r["name"])
                option_string = "username or profile link"
                array = dict()
                array["session"] = session
                array["option_string"] = option_string
                array["subscriber_count"] = r["followingCount"]
                array["me_api"] = me_api
                return array
            auth_count += 1
    except Exception as e:
        log_error.exception(e)
        # input("Enter to continue")
    array = dict()
    array["session"] = None
    array["me_api"] = me_api
    return array
Exemple #13
0
 def process_mass_messages(message, limit):
     text = message["textCropped"].replace("&", "")
     link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
         text+"&app-token="+app_token
     y = json_request(session, link_2)
     return y
Exemple #14
0
def media_scraper(session, site_name, only_links, link, locations, directory,
                  api_count, username, api_type, app_token):
    seperator = " | "
    master_set = []
    media_set = []
    original_link = link
    for location in locations:
        pool = ThreadPool()
        link = original_link
        print("Scraping [" + str(seperator.join(location[1])) +
              "]. Should take less than a minute.")
        array = format_directory(j_directory, site_name, username, location[0],
                                 api_type)
        user_directory = array[0]
        location_directory = array[2][0][1]
        metadata_directory = array[1]
        directories = array[2] + [location[1]]
        if not master_set:

            if api_type == "Posts":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))
            if api_type == "Archived":
                ceil = math.ceil(api_count / 100)
                a = list(range(ceil))
                for b in a:
                    b = b * 100
                    master_set.append(
                        link.replace("offset=0", "offset=" + str(b)))

            def xmessages(link):
                f_offset_count = 0
                while True:
                    y = json_request(session, link)
                    if "list" in y:
                        if y["list"]:
                            master_set.append(link)
                            if y["hasMore"]:
                                f_offset_count2 = f_offset_count + 100
                                f_offset_count = f_offset_count2 - 100
                                link = link.replace(
                                    "offset=" + str(f_offset_count),
                                    "offset=" + str(f_offset_count2))
                                f_offset_count = f_offset_count2
                            else:
                                break
                        else:
                            break
                    else:
                        break

            def process_chats(subscriber):
                fool = subscriber["withUser"]
                fool_id = str(fool["id"])
                link_2 = "https://onlyfans.com/api2/v2/chats/"+fool_id + \
                    "/messages?limit=100&offset=0&order=desc&app-token="+app_token+""
                xmessages(link_2)

            if api_type == "Messages":
                xmessages(link)
            if api_type == "Mass Messages":
                messages = []
                offset_count = 0
                while True:
                    y = json_request(session, link)
                    if y:
                        messages.append(y)
                        offset_count2 = offset_count + 99
                        offset_count = offset_count2 - 99
                        link = link.replace("offset=" + str(offset_count),
                                            "offset=" + str(offset_count2))
                        offset_count = offset_count2
                    else:
                        break
                messages = list(chain(*messages))
                message_count = 0

                def process_mass_messages(message, limit):
                    text = message["textCropped"].replace("&", "")
                    link_2 = "https://onlyfans.com/api2/v2/chats?limit="+limit+"&offset=0&filter=&order=activity&query=" + \
                        text+"&app-token="+app_token
                    y = json_request(session, link_2)
                    return y

                limit = "10"
                if len(messages) > 99:
                    limit = "2"
                subscribers = pool.starmap(process_mass_messages,
                                           product(messages, [limit]))
                subscribers = [
                    item for sublist in subscribers for item in sublist["list"]
                ]
                seen = set()
                subscribers = [
                    x for x in subscribers if x["withUser"]["id"] not in seen
                    and not seen.add(x["withUser"]["id"])
                ]
                x = pool.starmap(process_chats, product(subscribers))
            if api_type == "Stories":
                master_set.append(link)
            if api_type == "Highlights":
                r = json_request(session, link)
                if "error" in r:
                    break
                for item in r["list"]:
                    link2 = "https://stars.avn.com/api2/v2/stories/collections/" + \
                        str(item["id"])
                    master_set.append(link2)
        x = pool.starmap(
            scrape_array,
            product(master_set, [session], [directories], [username],
                    [api_type]))
        results = format_media_set(location[0], x)
        seen = set()
        results["valid"] = [
            x for x in results["valid"]
            if x["filename"] not in seen and not seen.add(x["filename"])
        ]
        if results["valid"]:
            os.makedirs(directory, exist_ok=True)
            os.makedirs(location_directory, exist_ok=True)
            if export_metadata:
                os.makedirs(metadata_directory, exist_ok=True)
                archive_directory = metadata_directory + location[0]
                export_archive(results, archive_directory)
        media_set.append(results)

    return [media_set, directory]