Beispiel #1
0
    def __callGraphQL(self, query, variables=None):
        query = self.__resolveFragments(query)

        json_request = {'query': query}
        if variables is not None:
            json_request['variables'] = variables

        response = requests.post(self.url,
                                 json=json_request,
                                 headers=self.headers,
                                 cookies=self.cookies)

        if response.status_code == 200:
            result = response.json()
            if result.get("errors"):
                for error in result["errors"]:
                    log.error(f"GraphQL error: {error}")
            if result.get("error"):
                for error in result["error"]["errors"]:
                    log.error(f"GraphQL error: {error}")
            if result.get("data"):
                data = result['data']
                parse_response(data)
                return data
        elif response.status_code == 401:
            sys.exit(
                "HTTP Error 401, Unauthorized. Cookie authentication most likely failed"
            )
        else:
            raise ConnectionError(
                "GraphQL query failed:{} - {}. Query: {}. Variables: {}".
                format(response.status_code, response.content, query,
                       variables))
    def callGraphQL(self, query: dict, referer: str):
        headers = {
            "Accept-Encoding": "gzip, deflate",
            "Content-Type": "application/json",
            "Accept": "application/json",
            "Referer": referer,
            "DNT": "1",
        }
        if not query:
            return None

        try:
            response = requests.post(self.api, json=query, headers=headers)
            if response.status_code == 200:
                result = response.json()
                if result.get("error"):
                    for error in result["error"]["errors"]:
                        raise Exception(f"GraphQL error: {error}")
                return result
            else:
                raise ConnectionError(
                    f"GraphQL query failed:{response.status_code} - {response.content}"
                )
        except Exception as err:
            log.error(f"GraphqQL query failed {err}")
            return None
Beispiel #3
0
def validId(scene_data):
    if scene_data["url"]:
        scene_url = scene_data["url"]
        searchUrl(scene_url, "scene")
    else:
        log.error('Bad URL (e.g. not themoviedb.org/movie/*****)')
        sys.exit(1)
Beispiel #4
0
    def callGraphQL(self, query: dict):
        headers = {
            "Content-type": "application/json",
            "argonath-api-key": self.api_key,
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
            "Origin": self.homepage,
            "Referer": self.homepage
        }

        try:
            response = requests.post(self.api,
                                     json=query,
                                     headers=headers,
                                     timeout=10)
            if response.status_code == 200:
                result = response.json()
                if result.get("error"):
                    for error in result["error"]["errors"]:
                        raise Exception(f"GraphQL error: {error}")
                if result.get("data"):
                    return result.get("data")
            elif response.status_code >= 400:
                sys.exit(f"HTTP Error {response.status_code}, {response.text}")
            else:
                raise ConnectionError(
                    f"GraphQL query failed:{response.status_code} - {response.text}"
                )
        except Exception as err:
            log.error(f"GraphqQL query failed {err}")
            return None
Beispiel #5
0
def searchUrl(scene_url, type):
    if "themoviedb.org/movie/" not in scene_url:
        log.error('The URL is not from a TMDB URL (e.g. /movie/*****)')
        sys.exit(1)
    scene_id = re.sub('.+/', '', scene_url)
    if not scene_id:
        log.error(
            "Error with the ID ({})\nAre you sure that the end of your URL is correct ?"
            .format(scene_url))
        sys.exit(1)
    getId(scene_id, type)
def scrapeUrlToString(url):
    scraper = cloudscraper.create_scraper()
    try:
        scraped = scraper.get(url)
    except:
        log.error("scrape error")
        sys.exit(1)

    if scraped.status_code >= 400:
        log.error('HTTP Error: %s' % scraped.status_code)
        sys.exit(1)

    return scraped.content
Beispiel #7
0
def query_xml(gallery_path, title):
    res={"title":title}
    try:        
        tree=ET.parse(gallery_path)
    except Exception as e:
        log.error(f'xml parsing failed:{e}')
        print(json.dumps(res))
        exit(1)
    
    if tree.find("Title") != None:
        res["title"] = (tree.find("Title").text).title()

    if tree.find("Web") != None:
        res["url"] = tree.find("Web").text

    # if tree.find("Series") != None:
    #     Collection = tree.find("Series").text

    if tree.find("Summary") != None:
        res["details"] = tree.find("Summary").text

    if tree.find("Released") != None:
        res["date"] = tree.find("Released").text
  
    if tree.find("Genre") != None:
        if tree.find("Genre").text:

            # Need a more suitable spot for this but one doesn't really exist yet
            if tree.find("Series").text:
                split_tags = [t for x in tree.findall("Genre") for t in x.text.split(", ")]+[str("Series/Parody: " + tree.find("Series").text)]
            else:
                split_tags = [t for x in tree.findall("Genre") for t in x.text.split(", ")]

            if "tags" in res:
                res["tags"] += [{"name":x.title()} for x in split_tags]
            else:
                res["tags"] = [{"name":x.title()} for x in split_tags]
    
    if tree.find("Characters") != None:
        if tree.find("Characters").text:
            split_performers = [t for x in tree.findall("Characters") for t in x.text.split(", ")]
            if "performers" in res:
                res["performers"] += [{"name":x.title()} for x in split_performers]
            else:
                res["performers"] = [{"name":x.title()} for x in split_performers]

    if tree.find("Writer") != None:
        if tree.find("Writer").text:
            res["studio"] = {"name":tree.find("Writer").text}

    return res
Beispiel #8
0
def main():
    stdin = sys.stdin.read()
    log.debug(stdin)
    fragment = json.loads(stdin)

    if not fragment['url']:
        log.error('No URL entered.')
        sys.exit(1)
    url = fragment['url'].strip()

    site, studio, el_id, slug = get_from_url(url)
    if site is None:
        log.error('The URL could not be parsed')
        sys.exit(1)

    response, err = make_request(url, f"https://{site}")
    if err is not None:
        log.error(f"Could not fetch page HTML: {err}")
        sys.exit(1)

    j = fetch_page_json(response)
    if j is None:
        log.error('Could not find JSON on page')
        sys.exit(1)

    if len(sys.argv) == 0 or sys.argv[1] == "scene":
        scrape_scene(page_json=j, studio=studio)
    elif sys.argv[1] == "performer":
        scrape_performer(j)
Beispiel #9
0
def apikey_get(site_url, time):
    r = sendRequest(site_url, HEADERS)
    if r is None:
        return None, None
    script_html = fetch_page_json(r.text)
    if script_html is not None:
        application_id = script_html['api']['algolia']['applicationID']
        api_key = script_html['api']['algolia']['apiKey']
        # Write key into a file
        write_config(time, application_id, api_key)
        log.info("New API keys: {}".format(api_key))
        return application_id, api_key
    else:
        log.error("Can't retrieve API keys from page ({})".format(site_url))
        return None, None
Beispiel #10
0
def scrape_scene(page_json, studio):
    if page_json.get("video") is None:
        log.error('Could not find  scene in JSON data')
        sys.exit(1)

    scene = page_json["video"]

    scrape = {}
    scrape['studio'] = {'name': studio}
    if scene.get('title'):
        scrape['title'] = scene['title']
    if scene.get('release_date'):
        scrape['date'] = scene['release_date'][:10]
    if scene.get('description'):
        details = BeautifulSoup(scene['description'], "html.parser").get_text()
        scrape['details'] = details
    if scene.get('models'):
        models = []
        for m in scene['models']:
            if m.get('name'):
                models.append(m['name'])
        scrape['performers'] = [{'name': x} for x in models]
    if scene.get('tags'):
        tags = []
        for t in scene['tags']:
            if t.get('name'):
                tags.append(t['name'])
        scrape['tags'] = [{'name': x} for x in tags]
    if scene.get('extra_thumbs'):
        # available image endings
        # ================
        #_player.jpg
        #_playermobile.jpg
        #_portrait1.jpg
        #_portrait2.jpg
        #_scene.jpg
        #_scenemobile.jpg
        img = None
        for i in scene['extra_thumbs']:
            if i.endswith("_player.jpg"):
                image = i
                break
        if img is None:
            img = scene['extra_thumbs'][0]
        scrape['image'] = img
    print(json.dumps(scrape))
Beispiel #11
0
def scrape_performer(page_json):
    if page_json.get("model") is None:
        log.error('Could not find performer in JSON data')
        sys.exit(1)

    performer = page_json["model"]
    scrape = {}

    scrape['name'] = get_dict_value(performer, 'name')
    scrape['gender'] = get_dict_value(performer, 'gender')
    scrape['image'] = get_dict_value(performer, 'thumb')

    if performer.get('attributes'):
        pa = performer['attributes']
        if pa.get('bio'):
            scrape['details'] = get_dict_value(pa['bio'], 'value')
        if pa.get('birthdate'):
            scrape['birthdate'] = get_dict_value(pa['birthdate'], 'value')
        if pa.get('measurements'):
            scrape['measurements'] = get_dict_value(pa['measurements'],
                                                    'value')
        if pa.get('eyes'):
            scrape['eye_color'] = get_dict_value(pa['eyes'], 'value')
        if pa.get('height'):
            height_ft = get_dict_value(pa['height'], 'value')
            if height_ft:
                h = re.match(r'(\d+)\D(\d+).+', height_ft)
                if h:
                    h_int = int(
                        round((float(h.group(1)) * 12 + float(h.group(2))) *
                              2.54))  # ft'inches to cm
                    scrape['height'] = f"{h_int}"
        if pa.get('weight'):
            weight_lb = get_dict_value(pa['weight'], 'value')
            if weight_lb:
                w = re.match(r'(\d+)\slbs', weight_lb)
                if w:
                    w_int = int(round(float(w.group(1)) / 2.2046))  # lbs to kg
                    scrape['weight'] = f"{w_int}"
        if pa.get('hair'):
            scrape['hair_color'] = get_dict_value(pa['hair'], 'value')
    print(json.dumps(scrape))
Beispiel #12
0
def stashbox_call_graphql(query, variables=None):

    # this is basically the same code as call_graphql except it calls out to the stashbox.
    # the api_key and endpoint url are in the config of OUR stashbox, so they are globals here, set below in code

    headers = {
        "Accept-Encoding": "gzip, deflate, br",
        "Content-Type": "application/json",
        "Accept": "application/json",
        "Connection": "keep-alive",
        "DNT": "1",
        "ApiKey": boxapi_key
    }
    json = {'query': query}
    if variables is not None:
        json['variables'] = variables
    try:
        response = requests.post(endpoint, json=json, headers=headers)
        if response.status_code == 200:
            result = response.json()
            if result.get("error"):
                for error in result["error"]["errors"]:
                    raise Exception("GraphQL error: {}".format(error))
            if result.get("data"):
                return result.get("data")
        elif response.status_code == 401:
            log.error(
                "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You need to add a Stash box instance and API Key in your Stash config"
            )
            return None
        else:
            raise ConnectionError("GraphQL query failed:{} - {}".format(
                response.status_code, response.content))
    except Exception as err:
        log.error(err)
        return None
Beispiel #13
0
def callGraphQL(query, variables=None):
    api_key = ""
    if config.STASH.get("api_key"):
        api_key = config.STASH["api_key"]

    if config.STASH.get("url") is None:
        log.error("You need to set the URL in 'config.py'")
        return None

    stash_url = config.STASH["url"] + "/graphql"
    headers = {
        "Accept-Encoding": "gzip, deflate, br",
        "Content-Type": "application/json",
        "Accept": "application/json",
        "Connection": "keep-alive",
        "DNT": "1",
        "ApiKey": api_key
    }
    json = {'query': query}
    if variables is not None:
        json['variables'] = variables
    try:
        response = requests.post(stash_url, json=json, headers=headers)
        if response.status_code == 200:
            result = response.json()
            if result.get("error"):
                for error in result["error"]["errors"]:
                    raise Exception("GraphQL error: {}".format(error))
            if result.get("data"):
                return result.get("data")
        elif response.status_code == 401:
            log.error(
                "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder"
            )
            return None
        else:
            raise ConnectionError("GraphQL query failed:{} - {}".format(
                response.status_code, response.content))
    except Exception as err:
        log.error(err)
        return None
Beispiel #14
0
    #either way, pass it back and done
    print(json.dumps(scrape))
    exit


fragment = json.loads(sys.stdin.read())
#how are we being called, for search, for url, for movie or scene, or by id?
if sys.argv[1]:
    if sys.argv[1] == "searchName":
        if fragment["name"]:
            scene_name = fragment["name"]
            searchName(scene_name)
            exit
        else:
            log.error('You need to search for something, like a Movie title')
            sys.exit(1)
    elif sys.argv[1] == "validId":
        validId(fragment)
        exit
    elif sys.argv[1] == "searchURL":
        if fragment["url"]:
            scene_url = fragment["url"]
            searchUrl(scene_url, "scene")
            exit
        else:
            log.error(
                'You need to set the URL (e.g. themoviedb.org/movie/*****)')
            sys.exit(1)
    elif sys.argv[1] == "movieURL":
        if fragment["url"]:
Beispiel #15
0
if SEARCH_TITLE:
    SEARCH_TITLE = SEARCH_TITLE.replace(".", " ")
    log.debug("[API] Searching for: {}".format(SEARCH_TITLE))
    api_search = api_search_req("query", SEARCH_TITLE, api_url)
    final_json = None
    if api_search:
        result_search = []
        for scene in api_search:
            scraped_json = scraping_json(scene)
            if scraped_json.get("tags"):
                scraped_json.pop("tags")
            result_search.append(scraped_json)
        if result_search:
            final_json = result_search
    if final_json is None:
        log.error("API Search finished. No results!")
    print(json.dumps(final_json))
    sys.exit()

if url_id:
    log.debug("[API] Searching using URL_ID")
    api_search = api_search_req("id", url_id, api_url)
    if api_search:
        log.info("[API] Search give {} result(s)".format(len(api_search)))
        api_json = json_parser(api_search, 120, True)
    else:
        log.warning("[API] No result")
if url_title and api_json is None:
    log.debug("[API] Searching using URL_TITLE")
    api_search = api_search_req("query", url_title, api_url)
    if api_search:
Beispiel #16
0
def main():
    stdin = sys.stdin.read()
    log.debug(stdin)
    fragment = json.loads(stdin)

    if not fragment['url']:
        log.error('No URL entered.')
        sys.exit(1)
    url = fragment['url'].strip()
    site, studio, sid, slug = get_from_url(url)
    if site is None:
        log.error('The URL could not be parsed')
        sys.exit(1)
    response, err = make_request(url, f"https://{site}")
    if err is not None:
        log.error('Could not fetch page HTML', err)
        sys.exit(1)
    j = fetch_page_json(response)
    if j is None:
        log.error('Could not find JSON on page')
        sys.exit(1)
    if 'video' not in j['data']:
        log.error('Could not locate scene within JSON')
        sys.exit(1)

    scene = j["data"]["video"]

    if scene.get('id'):
        if str(scene['id']) != sid:
            log.error('Wrong scene within JSON')
            sys.exit(1)
        log.info(f"Scene {sid} found")
    scrape = {}
    if scene.get('title'):
        scrape['title'] = scene['title']
    if scene.get('release_date'):
        scrape['date'] = scene['release_date'][:10]
    if scene.get('description'):
        details = BeautifulSoup(scene['description'], "html.parser").get_text()
        scrape['details'] = details
    if scene.get('sites'):
        scene_studio = scene['sites'][0]['name']
        scrape['studio'] = {'name': scene_studio}
    if scene.get('models'):
        models = []
        for m in scene['models']:
            models.extend([x.strip() for x in m['name'].split("&")])
        scrape['performers'] = [{'name': x} for x in models]
    if scene.get('tags'):
        scrape['tags'] = [{'name': x['name']} for x in scene['tags']]
    if j['data'].get('file_poster'):
        scrape['image'] = j['data']['file_poster']
    print(json.dumps(scrape))
Beispiel #17
0
            sys.exit(1)
        log.info(f"Scene {sid} found")
    scrape = {}
    if scene.get('title'):
        scrape['title'] = scene['title']
    if scene.get('release_date'):
        scrape['date'] = scene['release_date'][:10]
    if scene.get('description'):
        details = BeautifulSoup(scene['description'], "html.parser").get_text()
        scrape['details'] = details
    if scene.get('sites'):
        scene_studio = scene['sites'][0]['name']
        scrape['studio'] = {'name': scene_studio}
    if scene.get('models'):
        models = []
        for m in scene['models']:
            models.extend([x.strip() for x in m['name'].split("&")])
        scrape['performers'] = [{'name': x} for x in models]
    if scene.get('tags'):
        scrape['tags'] = [{'name': x['name']} for x in scene['tags']]
    if j['data'].get('file_poster'):
        scrape['image'] = j['data']['file_poster']
    print(json.dumps(scrape))


if __name__ == '__main__':
    try:
        main()
    except Exception as e:
        log.error(e)
Beispiel #18
0
        pass


USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute())
DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet")


# Not necessary but why not ?
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0'


fragment = json.loads(sys.stdin.read())
if fragment["url"]:
    scene_url = fragment["url"]
else:
    log.error('You need to set the URL (e.g. teamskeet.com/movies/*****)')
    sys.exit(1)

if "teamskeet.com/movies/" not in scene_url:
    log.error('The URL is not from a Teamskeet URL (e.g. teamskeet.com/movies/*****)')
    sys.exit(1)

scene_id = re.sub('.+/', '', scene_url)
if not scene_id:
    log.error("Error with the ID ({})\nAre you sure that the end of your URL is correct ?".format(scene_id))
    sys.exit(1)
use_local = 0
json_file = os.path.join(DIR_JSON, scene_id+".json")
if os.path.isfile(json_file):
    log.debug("Using local JSON...")
    use_local = 1
Beispiel #19
0
                result {
                    path
                    mime
                    size
                    serve {
                        type
                        uri
                    }
                }
            }
        }
    }
    """


studios = {Site('Fit18'), Site('Thicc18')}
fragment = json.loads(sys.stdin.read())
url = fragment.get("url")

if url:
    for x in studios:
        if x.isValidURL(url):
            s = x.getScene(url)
            #log.debug(f"{json.dumps(s)}")
            print(json.dumps(s))
            sys.exit(0)

log.error(f"URL: {url} is not supported")
print("{}")
sys.exit(1)
Beispiel #20
0
            if "performers" in res:
                res["performers"] += [{"name":x.title()} for x in split_performers]
            else:
                res["performers"] = [{"name":x.title()} for x in split_performers]

    if tree.find("Writer") != None:
        if tree.find("Writer").text:
            res["studio"] = {"name":tree.find("Writer").text}

    return res

if sys.argv[1] == "query":
    fragment = json.loads(sys.stdin.read())
    g_id = fragment.get("id")
    if not g_id:
        log.error(f"No ID found")
        sys.exit(1)

    gallery = graphql.getGalleryPath(g_id)
    if gallery:
        gallery_path = gallery.get("path")
        if gallery_path:
            p = pathlib.Path(gallery_path)
            
            res = {"title": fragment["title"]}
            # Determine if loose file format or archive such as .cbz or .zip
            if "cbz" in gallery_path or "zip" in gallery_path:
                 # Look for filename.xml where filename.(cbz|zip) is the gallery
                 f = p.with_suffix('.xml')
                 log.debug(f"Single File Format, using: {f}")
            else: