def __callGraphQL(self, query, variables=None): query = self.__resolveFragments(query) json_request = {'query': query} if variables is not None: json_request['variables'] = variables response = requests.post(self.url, json=json_request, headers=self.headers, cookies=self.cookies) if response.status_code == 200: result = response.json() if result.get("errors"): for error in result["errors"]: log.error(f"GraphQL error: {error}") if result.get("error"): for error in result["error"]["errors"]: log.error(f"GraphQL error: {error}") if result.get("data"): data = result['data'] parse_response(data) return data elif response.status_code == 401: sys.exit( "HTTP Error 401, Unauthorized. Cookie authentication most likely failed" ) else: raise ConnectionError( "GraphQL query failed:{} - {}. Query: {}. Variables: {}". format(response.status_code, response.content, query, variables))
def callGraphQL(self, query: dict, referer: str): headers = { "Accept-Encoding": "gzip, deflate", "Content-Type": "application/json", "Accept": "application/json", "Referer": referer, "DNT": "1", } if not query: return None try: response = requests.post(self.api, json=query, headers=headers) if response.status_code == 200: result = response.json() if result.get("error"): for error in result["error"]["errors"]: raise Exception(f"GraphQL error: {error}") return result else: raise ConnectionError( f"GraphQL query failed:{response.status_code} - {response.content}" ) except Exception as err: log.error(f"GraphqQL query failed {err}") return None
def validId(scene_data): if scene_data["url"]: scene_url = scene_data["url"] searchUrl(scene_url, "scene") else: log.error('Bad URL (e.g. not themoviedb.org/movie/*****)') sys.exit(1)
def callGraphQL(self, query: dict): headers = { "Content-type": "application/json", "argonath-api-key": self.api_key, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0", "Origin": self.homepage, "Referer": self.homepage } try: response = requests.post(self.api, json=query, headers=headers, timeout=10) if response.status_code == 200: result = response.json() if result.get("error"): for error in result["error"]["errors"]: raise Exception(f"GraphQL error: {error}") if result.get("data"): return result.get("data") elif response.status_code >= 400: sys.exit(f"HTTP Error {response.status_code}, {response.text}") else: raise ConnectionError( f"GraphQL query failed:{response.status_code} - {response.text}" ) except Exception as err: log.error(f"GraphqQL query failed {err}") return None
def searchUrl(scene_url, type): if "themoviedb.org/movie/" not in scene_url: log.error('The URL is not from a TMDB URL (e.g. /movie/*****)') sys.exit(1) scene_id = re.sub('.+/', '', scene_url) if not scene_id: log.error( "Error with the ID ({})\nAre you sure that the end of your URL is correct ?" .format(scene_url)) sys.exit(1) getId(scene_id, type)
def scrapeUrlToString(url): scraper = cloudscraper.create_scraper() try: scraped = scraper.get(url) except: log.error("scrape error") sys.exit(1) if scraped.status_code >= 400: log.error('HTTP Error: %s' % scraped.status_code) sys.exit(1) return scraped.content
def query_xml(gallery_path, title): res={"title":title} try: tree=ET.parse(gallery_path) except Exception as e: log.error(f'xml parsing failed:{e}') print(json.dumps(res)) exit(1) if tree.find("Title") != None: res["title"] = (tree.find("Title").text).title() if tree.find("Web") != None: res["url"] = tree.find("Web").text # if tree.find("Series") != None: # Collection = tree.find("Series").text if tree.find("Summary") != None: res["details"] = tree.find("Summary").text if tree.find("Released") != None: res["date"] = tree.find("Released").text if tree.find("Genre") != None: if tree.find("Genre").text: # Need a more suitable spot for this but one doesn't really exist yet if tree.find("Series").text: split_tags = [t for x in tree.findall("Genre") for t in x.text.split(", ")]+[str("Series/Parody: " + tree.find("Series").text)] else: split_tags = [t for x in tree.findall("Genre") for t in x.text.split(", ")] if "tags" in res: res["tags"] += [{"name":x.title()} for x in split_tags] else: res["tags"] = [{"name":x.title()} for x in split_tags] if tree.find("Characters") != None: if tree.find("Characters").text: split_performers = [t for x in tree.findall("Characters") for t in x.text.split(", ")] if "performers" in res: res["performers"] += [{"name":x.title()} for x in split_performers] else: res["performers"] = [{"name":x.title()} for x in split_performers] if tree.find("Writer") != None: if tree.find("Writer").text: res["studio"] = {"name":tree.find("Writer").text} return res
def main(): stdin = sys.stdin.read() log.debug(stdin) fragment = json.loads(stdin) if not fragment['url']: log.error('No URL entered.') sys.exit(1) url = fragment['url'].strip() site, studio, el_id, slug = get_from_url(url) if site is None: log.error('The URL could not be parsed') sys.exit(1) response, err = make_request(url, f"https://{site}") if err is not None: log.error(f"Could not fetch page HTML: {err}") sys.exit(1) j = fetch_page_json(response) if j is None: log.error('Could not find JSON on page') sys.exit(1) if len(sys.argv) == 0 or sys.argv[1] == "scene": scrape_scene(page_json=j, studio=studio) elif sys.argv[1] == "performer": scrape_performer(j)
def apikey_get(site_url, time): r = sendRequest(site_url, HEADERS) if r is None: return None, None script_html = fetch_page_json(r.text) if script_html is not None: application_id = script_html['api']['algolia']['applicationID'] api_key = script_html['api']['algolia']['apiKey'] # Write key into a file write_config(time, application_id, api_key) log.info("New API keys: {}".format(api_key)) return application_id, api_key else: log.error("Can't retrieve API keys from page ({})".format(site_url)) return None, None
def scrape_scene(page_json, studio): if page_json.get("video") is None: log.error('Could not find scene in JSON data') sys.exit(1) scene = page_json["video"] scrape = {} scrape['studio'] = {'name': studio} if scene.get('title'): scrape['title'] = scene['title'] if scene.get('release_date'): scrape['date'] = scene['release_date'][:10] if scene.get('description'): details = BeautifulSoup(scene['description'], "html.parser").get_text() scrape['details'] = details if scene.get('models'): models = [] for m in scene['models']: if m.get('name'): models.append(m['name']) scrape['performers'] = [{'name': x} for x in models] if scene.get('tags'): tags = [] for t in scene['tags']: if t.get('name'): tags.append(t['name']) scrape['tags'] = [{'name': x} for x in tags] if scene.get('extra_thumbs'): # available image endings # ================ #_player.jpg #_playermobile.jpg #_portrait1.jpg #_portrait2.jpg #_scene.jpg #_scenemobile.jpg img = None for i in scene['extra_thumbs']: if i.endswith("_player.jpg"): image = i break if img is None: img = scene['extra_thumbs'][0] scrape['image'] = img print(json.dumps(scrape))
def scrape_performer(page_json): if page_json.get("model") is None: log.error('Could not find performer in JSON data') sys.exit(1) performer = page_json["model"] scrape = {} scrape['name'] = get_dict_value(performer, 'name') scrape['gender'] = get_dict_value(performer, 'gender') scrape['image'] = get_dict_value(performer, 'thumb') if performer.get('attributes'): pa = performer['attributes'] if pa.get('bio'): scrape['details'] = get_dict_value(pa['bio'], 'value') if pa.get('birthdate'): scrape['birthdate'] = get_dict_value(pa['birthdate'], 'value') if pa.get('measurements'): scrape['measurements'] = get_dict_value(pa['measurements'], 'value') if pa.get('eyes'): scrape['eye_color'] = get_dict_value(pa['eyes'], 'value') if pa.get('height'): height_ft = get_dict_value(pa['height'], 'value') if height_ft: h = re.match(r'(\d+)\D(\d+).+', height_ft) if h: h_int = int( round((float(h.group(1)) * 12 + float(h.group(2))) * 2.54)) # ft'inches to cm scrape['height'] = f"{h_int}" if pa.get('weight'): weight_lb = get_dict_value(pa['weight'], 'value') if weight_lb: w = re.match(r'(\d+)\slbs', weight_lb) if w: w_int = int(round(float(w.group(1)) / 2.2046)) # lbs to kg scrape['weight'] = f"{w_int}" if pa.get('hair'): scrape['hair_color'] = get_dict_value(pa['hair'], 'value') print(json.dumps(scrape))
def stashbox_call_graphql(query, variables=None): # this is basically the same code as call_graphql except it calls out to the stashbox. # the api_key and endpoint url are in the config of OUR stashbox, so they are globals here, set below in code headers = { "Accept-Encoding": "gzip, deflate, br", "Content-Type": "application/json", "Accept": "application/json", "Connection": "keep-alive", "DNT": "1", "ApiKey": boxapi_key } json = {'query': query} if variables is not None: json['variables'] = variables try: response = requests.post(endpoint, json=json, headers=headers) if response.status_code == 200: result = response.json() if result.get("error"): for error in result["error"]["errors"]: raise Exception("GraphQL error: {}".format(error)) if result.get("data"): return result.get("data") elif response.status_code == 401: log.error( "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You need to add a Stash box instance and API Key in your Stash config" ) return None else: raise ConnectionError("GraphQL query failed:{} - {}".format( response.status_code, response.content)) except Exception as err: log.error(err) return None
def callGraphQL(query, variables=None): api_key = "" if config.STASH.get("api_key"): api_key = config.STASH["api_key"] if config.STASH.get("url") is None: log.error("You need to set the URL in 'config.py'") return None stash_url = config.STASH["url"] + "/graphql" headers = { "Accept-Encoding": "gzip, deflate, br", "Content-Type": "application/json", "Accept": "application/json", "Connection": "keep-alive", "DNT": "1", "ApiKey": api_key } json = {'query': query} if variables is not None: json['variables'] = variables try: response = requests.post(stash_url, json=json, headers=headers) if response.status_code == 200: result = response.json() if result.get("error"): for error in result["error"]["errors"]: raise Exception("GraphQL error: {}".format(error)) if result.get("data"): return result.get("data") elif response.status_code == 401: log.error( "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder" ) return None else: raise ConnectionError("GraphQL query failed:{} - {}".format( response.status_code, response.content)) except Exception as err: log.error(err) return None
#either way, pass it back and done print(json.dumps(scrape)) exit fragment = json.loads(sys.stdin.read()) #how are we being called, for search, for url, for movie or scene, or by id? if sys.argv[1]: if sys.argv[1] == "searchName": if fragment["name"]: scene_name = fragment["name"] searchName(scene_name) exit else: log.error('You need to search for something, like a Movie title') sys.exit(1) elif sys.argv[1] == "validId": validId(fragment) exit elif sys.argv[1] == "searchURL": if fragment["url"]: scene_url = fragment["url"] searchUrl(scene_url, "scene") exit else: log.error( 'You need to set the URL (e.g. themoviedb.org/movie/*****)') sys.exit(1) elif sys.argv[1] == "movieURL": if fragment["url"]:
if SEARCH_TITLE: SEARCH_TITLE = SEARCH_TITLE.replace(".", " ") log.debug("[API] Searching for: {}".format(SEARCH_TITLE)) api_search = api_search_req("query", SEARCH_TITLE, api_url) final_json = None if api_search: result_search = [] for scene in api_search: scraped_json = scraping_json(scene) if scraped_json.get("tags"): scraped_json.pop("tags") result_search.append(scraped_json) if result_search: final_json = result_search if final_json is None: log.error("API Search finished. No results!") print(json.dumps(final_json)) sys.exit() if url_id: log.debug("[API] Searching using URL_ID") api_search = api_search_req("id", url_id, api_url) if api_search: log.info("[API] Search give {} result(s)".format(len(api_search))) api_json = json_parser(api_search, 120, True) else: log.warning("[API] No result") if url_title and api_json is None: log.debug("[API] Searching using URL_TITLE") api_search = api_search_req("query", url_title, api_url) if api_search:
def main(): stdin = sys.stdin.read() log.debug(stdin) fragment = json.loads(stdin) if not fragment['url']: log.error('No URL entered.') sys.exit(1) url = fragment['url'].strip() site, studio, sid, slug = get_from_url(url) if site is None: log.error('The URL could not be parsed') sys.exit(1) response, err = make_request(url, f"https://{site}") if err is not None: log.error('Could not fetch page HTML', err) sys.exit(1) j = fetch_page_json(response) if j is None: log.error('Could not find JSON on page') sys.exit(1) if 'video' not in j['data']: log.error('Could not locate scene within JSON') sys.exit(1) scene = j["data"]["video"] if scene.get('id'): if str(scene['id']) != sid: log.error('Wrong scene within JSON') sys.exit(1) log.info(f"Scene {sid} found") scrape = {} if scene.get('title'): scrape['title'] = scene['title'] if scene.get('release_date'): scrape['date'] = scene['release_date'][:10] if scene.get('description'): details = BeautifulSoup(scene['description'], "html.parser").get_text() scrape['details'] = details if scene.get('sites'): scene_studio = scene['sites'][0]['name'] scrape['studio'] = {'name': scene_studio} if scene.get('models'): models = [] for m in scene['models']: models.extend([x.strip() for x in m['name'].split("&")]) scrape['performers'] = [{'name': x} for x in models] if scene.get('tags'): scrape['tags'] = [{'name': x['name']} for x in scene['tags']] if j['data'].get('file_poster'): scrape['image'] = j['data']['file_poster'] print(json.dumps(scrape))
sys.exit(1) log.info(f"Scene {sid} found") scrape = {} if scene.get('title'): scrape['title'] = scene['title'] if scene.get('release_date'): scrape['date'] = scene['release_date'][:10] if scene.get('description'): details = BeautifulSoup(scene['description'], "html.parser").get_text() scrape['details'] = details if scene.get('sites'): scene_studio = scene['sites'][0]['name'] scrape['studio'] = {'name': scene_studio} if scene.get('models'): models = [] for m in scene['models']: models.extend([x.strip() for x in m['name'].split("&")]) scrape['performers'] = [{'name': x} for x in models] if scene.get('tags'): scrape['tags'] = [{'name': x['name']} for x in scene['tags']] if j['data'].get('file_poster'): scrape['image'] = j['data']['file_poster'] print(json.dumps(scrape)) if __name__ == '__main__': try: main() except Exception as e: log.error(e)
pass USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute()) DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet") # Not necessary but why not ? USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0' fragment = json.loads(sys.stdin.read()) if fragment["url"]: scene_url = fragment["url"] else: log.error('You need to set the URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) if "teamskeet.com/movies/" not in scene_url: log.error('The URL is not from a Teamskeet URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) scene_id = re.sub('.+/', '', scene_url) if not scene_id: log.error("Error with the ID ({})\nAre you sure that the end of your URL is correct ?".format(scene_id)) sys.exit(1) use_local = 0 json_file = os.path.join(DIR_JSON, scene_id+".json") if os.path.isfile(json_file): log.debug("Using local JSON...") use_local = 1
result { path mime size serve { type uri } } } } } """ studios = {Site('Fit18'), Site('Thicc18')} fragment = json.loads(sys.stdin.read()) url = fragment.get("url") if url: for x in studios: if x.isValidURL(url): s = x.getScene(url) #log.debug(f"{json.dumps(s)}") print(json.dumps(s)) sys.exit(0) log.error(f"URL: {url} is not supported") print("{}") sys.exit(1)
if "performers" in res: res["performers"] += [{"name":x.title()} for x in split_performers] else: res["performers"] = [{"name":x.title()} for x in split_performers] if tree.find("Writer") != None: if tree.find("Writer").text: res["studio"] = {"name":tree.find("Writer").text} return res if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) g_id = fragment.get("id") if not g_id: log.error(f"No ID found") sys.exit(1) gallery = graphql.getGalleryPath(g_id) if gallery: gallery_path = gallery.get("path") if gallery_path: p = pathlib.Path(gallery_path) res = {"title": fragment["title"]} # Determine if loose file format or archive such as .cbz or .zip if "cbz" in gallery_path or "zip" in gallery_path: # Look for filename.xml where filename.(cbz|zip) is the gallery f = p.with_suffix('.xml') log.debug(f"Single File Format, using: {f}") else: