예제 #1
0
def start_datascraper(api: start, identifier, site_name, choice_type=None):
    print("Scrape Processing")
    subscription = api.get_subscription(identifier=identifier)
    if not subscription:
        return [False, subscription]
    post_count = subscription.postsCount
    user_id = str(subscription.id)
    avatar = subscription.avatar
    username = subscription.username
    link = subscription.link
    print("Name: " + username)
    api_array = scrape_choice(api, subscription)
    api_array = format_options(api_array, "apis")
    apis = api_array[0]
    api_string = api_array[1]
    if not json_settings["auto_scrape_apis"]:
        print("Apis: " + api_string)
        value = int(input().strip())
    else:
        value = 0
    if value:
        apis = [apis[value]]
    else:
        apis.pop(0)
    metadata_locations = {}
    for item in apis:
        print("Type: " + item["api_type"])
        only_links = item["api_array"]["only_links"]
        post_count = str(item["api_array"]["post_count"])
        item["api_array"]["username"] = username
        item["api_array"]["subscription"] = subscription
        api_type = item["api_type"]
        results = prepare_scraper(api, site_name, item)
    print("Scrape Completed" + "\n")
    return [True, subscription]
예제 #2
0
def manage_subscriptions(api: start,
                         auth_count=0,
                         identifier="",
                         refresh: bool = False):
    if identifier:
        results = api.get_subscription(identifier=identifier)
        results = [results]
    else:
        results = api.get_subscriptions(refresh=refresh)
    if blacklist_name:
        r = api.get_lists()
        if not r:
            return [False, []]
        new_results = [c for c in r if blacklist_name == c["name"]]
        if new_results:
            item = new_results[0]
            list_users = item["users"]
            if int(item["usersCount"]) > 2:
                list_id = str(item["id"])
                list_users = api.get_lists_users(list_id)
            users = list_users
            bl_ids = [x["username"] for x in users]
            results2 = results.copy()
            for result in results2:
                identifier = result.username
                if identifier in bl_ids:
                    print("Blacklisted: " + identifier)
                    results.remove(result)
    results.sort(key=lambda x: x.subscribedByData.expiredAt)
    results.sort(key=lambda x: x.is_me, reverse=True)
    results2 = []
    for result in results:
        result.auth_count = auth_count
        username = result.username
        now = datetime.utcnow().date()
        # subscribedBy = result["subscribedBy"]
        subscribedByData = result.subscribedByData
        result_date = subscribedByData.expiredAt if subscribedByData else datetime.utcnow(
        ).isoformat()
        price = subscribedByData.price
        subscribePrice = subscribedByData.subscribePrice
        result_date = datetime.fromisoformat(result_date).replace(
            tzinfo=None).date()
        if ignore_type in ["paid"]:
            if price > 0:
                continue
        if ignore_type in ["free"]:
            if subscribePrice == 0:
                continue
        results2.append(result)
    api.auth.subscriptions = results2
    return results2
예제 #3
0
def profile_scraper(api: start, site_name, api_type, username, text_length,
                    base_directory):
    reformats = {}
    reformats["metadata_directory_format"] = json_settings[
        "metadata_directory_format"]
    reformats["file_directory_format"] = json_settings["file_directory_format"]
    reformats["file_directory_format"] = reformats[
        "file_directory_format"].replace("{value}", "")
    reformats["filename_format"] = json_settings["filename_format"]
    option = {}
    option["site_name"] = site_name
    option["api_type"] = api_type
    option["username"] = username
    option["date_format"] = date_format
    option["maximum_length"] = text_length
    option["directory"] = base_directory
    a, b, c = prepare_reformat(option, keep_vars=True).reformat(reformats)
    print
    y = api.get_subscription(identifier=username)
    override_media_types = []
    avatar = y.avatar
    header = y.header
    if avatar:
        override_media_types.append(["Avatars", avatar])
    if header:
        override_media_types.append(["Headers", header])
    for override_media_type in override_media_types:
        new_dict = dict()
        media_type = override_media_type[0]
        media_link = override_media_type[1]
        new_dict["links"] = [media_link]
        directory2 = os.path.join(b, media_type)
        os.makedirs(directory2, exist_ok=True)
        download_path = os.path.join(directory2,
                                     media_link.split("/")[-2] + ".jpg")
        if not overwrite_files:
            if os.path.isfile(download_path):
                continue
        session = api.sessions[0]
        r = api.json_request(media_link,
                             session,
                             stream=True,
                             json_format=False,
                             sleep=False)
        if not isinstance(r, requests.Response):
            continue
        while True:
            downloader = main_helper.downloader(r, download_path)
            if not downloader:
                continue
            break