def start_datascraper(api: start, identifier, site_name, choice_type=None): print("Scrape Processing") subscription = api.get_subscription(identifier=identifier) if not subscription: return [False, subscription] post_count = subscription.postsCount user_id = str(subscription.id) avatar = subscription.avatar username = subscription.username link = subscription.link print("Name: " + username) api_array = scrape_choice(api, subscription) api_array = format_options(api_array, "apis") apis = api_array[0] api_string = api_array[1] if not json_settings["auto_scrape_apis"]: print("Apis: " + api_string) value = int(input().strip()) else: value = 0 if value: apis = [apis[value]] else: apis.pop(0) metadata_locations = {} for item in apis: print("Type: " + item["api_type"]) only_links = item["api_array"]["only_links"] post_count = str(item["api_array"]["post_count"]) item["api_array"]["username"] = username item["api_array"]["subscription"] = subscription api_type = item["api_type"] results = prepare_scraper(api, site_name, item) print("Scrape Completed" + "\n") return [True, subscription]
def manage_subscriptions(api: start, auth_count=0, identifier="", refresh: bool = False): if identifier: results = api.get_subscription(identifier=identifier) results = [results] else: results = api.get_subscriptions(refresh=refresh) if blacklist_name: r = api.get_lists() if not r: return [False, []] new_results = [c for c in r if blacklist_name == c["name"]] if new_results: item = new_results[0] list_users = item["users"] if int(item["usersCount"]) > 2: list_id = str(item["id"]) list_users = api.get_lists_users(list_id) users = list_users bl_ids = [x["username"] for x in users] results2 = results.copy() for result in results2: identifier = result.username if identifier in bl_ids: print("Blacklisted: " + identifier) results.remove(result) results.sort(key=lambda x: x.subscribedByData.expiredAt) results.sort(key=lambda x: x.is_me, reverse=True) results2 = [] for result in results: result.auth_count = auth_count username = result.username now = datetime.utcnow().date() # subscribedBy = result["subscribedBy"] subscribedByData = result.subscribedByData result_date = subscribedByData.expiredAt if subscribedByData else datetime.utcnow( ).isoformat() price = subscribedByData.price subscribePrice = subscribedByData.subscribePrice result_date = datetime.fromisoformat(result_date).replace( tzinfo=None).date() if ignore_type in ["paid"]: if price > 0: continue if ignore_type in ["free"]: if subscribePrice == 0: continue results2.append(result) api.auth.subscriptions = results2 return results2
def profile_scraper(api: start, site_name, api_type, username, text_length, base_directory): reformats = {} reformats["metadata_directory_format"] = json_settings[ "metadata_directory_format"] reformats["file_directory_format"] = json_settings["file_directory_format"] reformats["file_directory_format"] = reformats[ "file_directory_format"].replace("{value}", "") reformats["filename_format"] = json_settings["filename_format"] option = {} option["site_name"] = site_name option["api_type"] = api_type option["username"] = username option["date_format"] = date_format option["maximum_length"] = text_length option["directory"] = base_directory a, b, c = prepare_reformat(option, keep_vars=True).reformat(reformats) print y = api.get_subscription(identifier=username) override_media_types = [] avatar = y.avatar header = y.header if avatar: override_media_types.append(["Avatars", avatar]) if header: override_media_types.append(["Headers", header]) for override_media_type in override_media_types: new_dict = dict() media_type = override_media_type[0] media_link = override_media_type[1] new_dict["links"] = [media_link] directory2 = os.path.join(b, media_type) os.makedirs(directory2, exist_ok=True) download_path = os.path.join(directory2, media_link.split("/")[-2] + ".jpg") if not overwrite_files: if os.path.isfile(download_path): continue session = api.sessions[0] r = api.json_request(media_link, session, stream=True, json_format=False, sleep=False) if not isinstance(r, requests.Response): continue while True: downloader = main_helper.downloader(r, download_path) if not downloader: continue break