def manage_subscriptions(api: start, auth_count=0, identifier="", refresh: bool = False): if identifier: results = api.get_subscription(identifier=identifier) results = [results] else: results = api.get_subscriptions(refresh=refresh) if blacklist_name: r = api.get_lists() if not r: return [False, []] new_results = [c for c in r if blacklist_name == c["name"]] if new_results: item = new_results[0] list_users = item["users"] if int(item["usersCount"]) > 2: list_id = str(item["id"]) list_users = api.get_lists_users(list_id) users = list_users bl_ids = [x["username"] for x in users] results2 = results.copy() for result in results2: identifier = result.username if identifier in bl_ids: print("Blacklisted: " + identifier) results.remove(result) results.sort(key=lambda x: x.subscribedByData.expiredAt) results.sort(key=lambda x: x.is_me, reverse=True) results2 = [] for result in results: result.auth_count = auth_count username = result.username now = datetime.utcnow().date() # subscribedBy = result["subscribedBy"] subscribedByData = result.subscribedByData result_date = subscribedByData.expiredAt if subscribedByData else datetime.utcnow( ).isoformat() price = subscribedByData.price subscribePrice = subscribedByData.subscribePrice result_date = datetime.fromisoformat(result_date).replace( tzinfo=None).date() if ignore_type in ["paid"]: if price > 0: continue if ignore_type in ["free"]: if subscribePrice == 0: continue results2.append(result) api.auth.subscriptions = results2 return results2
def profile_scraper(api: start, site_name, api_type, username, text_length, base_directory): reformats = {} reformats["metadata_directory_format"] = json_settings[ "metadata_directory_format"] reformats["file_directory_format"] = json_settings["file_directory_format"] reformats["file_directory_format"] = reformats[ "file_directory_format"].replace("{value}", "") reformats["filename_format"] = json_settings["filename_format"] option = {} option["site_name"] = site_name option["api_type"] = api_type option["username"] = username option["date_format"] = date_format option["maximum_length"] = text_length option["directory"] = base_directory a, b, c = prepare_reformat(option, keep_vars=True).reformat(reformats) print y = api.get_subscription(identifier=username) override_media_types = [] avatar = y.avatar header = y.header if avatar: override_media_types.append(["Avatars", avatar]) if header: override_media_types.append(["Headers", header]) for override_media_type in override_media_types: new_dict = dict() media_type = override_media_type[0] media_link = override_media_type[1] new_dict["links"] = [media_link] directory2 = os.path.join(b, media_type) os.makedirs(directory2, exist_ok=True) download_path = os.path.join(directory2, media_link.split("/")[-2] + ".jpg") if not overwrite_files: if os.path.isfile(download_path): continue session = api.sessions[0] r = api.json_request(media_link, session, stream=True, json_format=False, sleep=False) if not isinstance(r, requests.Response): continue while True: downloader = main_helper.downloader(r, download_path) if not downloader: continue break
def account_setup(api: start, identifier=""): status = False authed = api.login() if isinstance(authed, create_auth): jobs = json_settings["jobs"] profile_directory = json_global_settings["profile_directories"][0] profile_directory = os.path.abspath(profile_directory) profile_directory = os.path.join(profile_directory, authed.username) profile_metadata_directory = os.path.join( profile_directory, "Metadata") metadata_filepath = os.path.join( profile_metadata_directory, "Mass Messages.json") print if authed.isPerformer: imported = import_archive(metadata_filepath) mass_messages = api.get_mass_messages(resume=imported) export_archive(mass_messages, metadata_filepath, json_settings) # chats = api.get_chats() if not identifier and jobs["scrape_names"]: # metadata_filepath = os.path.join( # profile_metadata_directory, "Subscriptions.json") # imported = import_archive(metadata_filepath) subscriptions = api.get_subscriptions() # collection = [] # for subscription in subscriptions: # delattr(subscription,"download_info") # delattr(subscription,"sessions") # delattr(subscription,"scraped") # delattr(subscription,"is_me") # delattr(subscription,"links") # collection.append(subscription) # collection = jsonpickle.encode( # collection, unpicklable=False) # collection = jsonpickle.decode(collection) # export_archive(collection, metadata_filepath, # json_settings) status = True return status
def start_datascraper(api: start, identifier, site_name, choice_type=None): print("Scrape Processing") subscription = api.get_subscription(identifier=identifier) if not subscription: return [False, subscription] post_count = subscription.postsCount user_id = str(subscription.id) avatar = subscription.avatar username = subscription.username link = subscription.link print("Name: "+username) api_array = scrape_choice(api, subscription) api_array = format_options(api_array, "apis") apis = api_array[0] api_string = api_array[1] if not json_settings["auto_scrape_apis"]: print("Apis: "+api_string) value = int(input().strip()) else: value = 0 if value: apis = [apis[value]] else: apis.pop(0) metadata_locations = {} for item in apis: print("Type: "+item["api_type"]) only_links = item["api_array"]["only_links"] post_count = str(item["api_array"]["post_count"]) item["api_array"]["username"] = username item["api_array"]["subscription"] = subscription api_type = item["api_type"] results = prepare_scraper( api, site_name, item) print print("Scrape Completed"+"\n") return [True, subscription]
def scrape_choice(api: start, subscription): user_id = subscription.id post_count = subscription.postsCount archived_count = subscription.archivedPostsCount media_types = ["Images", "Videos", "Audios", "Texts"] if auto_choice: input_choice = auto_choice else: print('Scrape: a = Everything | b = Images | c = Videos | d = Audios | e = Texts') input_choice = input().strip() user_api = api.links(user_id).users message_api = api.links(user_id).message_api mass_messages_api = api.links().mass_messages_api stories_api = api.links(user_id).stories_api list_highlights = api.links(user_id).list_highlights post_api = api.links(user_id).post_api archived_api = api.links(user_id).archived_posts # ARGUMENTS only_links = False if "-l" in input_choice: only_links = True input_choice = input_choice.replace(" -l", "") mandatory = [j_directory, only_links] y = ["photo", "video", "stream", "gif", "audio", "text"] u_array = ["You have chosen to scrape {}", [ user_api, media_types, *mandatory, post_count], "Profile"] s_array = ["You have chosen to scrape {}", [ stories_api, media_types, *mandatory, post_count], "Stories"] h_array = ["You have chosen to scrape {}", [ list_highlights, media_types, *mandatory, post_count], "Highlights"] p_array = ["You have chosen to scrape {}", [ post_api, media_types, *mandatory, post_count], "Posts"] m_array = ["You have chosen to scrape {}", [ message_api, media_types, *mandatory, post_count], "Messages"] a_array = ["You have chosen to scrape {}", [ archived_api, media_types, *mandatory, archived_count], "Archived"] array = [u_array, s_array, p_array, a_array, m_array] # array = [u_array, s_array, p_array, a_array, m_array] # array = [s_array, h_array, p_array, a_array, m_array] # array = [s_array] # array = [u_array] # array = [p_array] # array = [a_array] # array = [m_array] new_array = [] valid_input = True for xxx in array: if xxx[2] == "Mass Messages": if not subscription.is_me: continue new_item = dict() new_item["api_message"] = xxx[0] new_item["api_array"] = {} new_item["api_array"]["api_link"] = xxx[1][0] new_item["api_array"]["media_types"] = xxx[1][1] new_item["api_array"]["directory"] = xxx[1][2] new_item["api_array"]["only_links"] = xxx[1][3] new_item["api_array"]["post_count"] = xxx[1][4] formatted = format_media_types() if input_choice == "a": name = "All" new_item["api_array"]["media_types"] = formatted elif input_choice == "b": name = "Images" new_item["api_array"]["media_types"] = [formatted[0]] print elif input_choice == "c": name = "Videos" new_item["api_array"]["media_types"] = [formatted[1]] elif input_choice == "d": name = "Audios" new_item["api_array"]["media_types"] = [formatted[2]] elif input_choice == "e": name = "Texts" new_item["api_array"]["media_types"] = [formatted[3]] else: print("Invalid Choice") valid_input = False break new_item["api_type"] = xxx[2] if valid_input: new_array.append(new_item) return new_array
def link_check(api: start, identifier): y = api.get_user(identifier) return y