def parse_team(self, response: Response) -> Generator[Request, None, None]: """ :param response: :type response: Response :return: :rtype: Generator[Request, None, None] @url https://www.whoscored.com/Teams/167/Fixtures/England-Manchester-City @returns requests 0 """ js_script: str = response.xpath( '//*[@id="layout-content-wrapper"]/div[2]/script[3]' ).extract_first() ctx = MiniRacer() fixture: List for fixture in ctx.eval( P_TEAM_FIXTURES.search(js_script).group("fixtures")): fixture_: Match = Match(*fixture) yield response.follow( url="https://www.whoscored.com/Matches/{id}/".format( id=fixture_.id), callback=self.parse_match, )
def parse(self, response: Response) -> Generator[Request, None, None]: """ Parse whoscored statistic page to get all tournaments url :param response: :type response: Response :return: :rtype: Generator[Request, None, None] # Scrapy check - because of settings missing, use Premier League # (England) only for test purpose @url https://www.whoscored.com/Statistics @returns requests 1 """ js_script: str = response.css( "#layout-wrapper > script::text").extract_first() tournaments: str = P_TOUR.search(js_script).group("tournaments") ctx = MiniRacer() for region in ctx.eval(tournaments): for tournament in filter(lambda x: x["name"], region["tournaments"]): if (region["id"], tournament["id"]) in self.settings.get( "REGIONS", {(252, 2)} # pylint: disable=bad-continuation ): # England, Premier League (as default) yield response.follow( tournament["url"], callback=self.parse_tournaments, meta={"waitForSelector": "#layout-content-wrapper"}, )
def init_js_ctx(self): with open(OPENING_HOURS_JS, "r") as f: js_sources = f.read() with open(OPENING_HOURS_JS_WRAPPER, "r") as f: js_wrapper = f.read() self.js_ctx = MiniRacer() self.js_ctx.eval(js_sources) self.js_ctx.eval(js_wrapper)
class Mock: def __init__(self): self.__code = codecs.open(os.path.join(os.path.dirname(__file__), 'js/mock.mini_racer.min.js'), encoding='utf-8').read() self.__ctx = MiniRacer() self.__ctx.eval(self.__code) def mock(self, template: typing.Union[dict, list, str], encoder=JSONEncoder, timeout=0, max_memory=0) -> typing.Union[dict, list, str]: """ Mock from python object :param template: Mock template :param encoder: You can pass a custom JSON encoder by passing it in the encoder :param timeout: Limit run timeout, default no limit: timeout = 0(millisecond) :param max_memory: Limit max memory, default no limit: max_memory = 0 :return: dict, list, str """ return self.__ctx.call('Mock.mock', template, encoder=encoder, timeout=timeout, max_memory=max_memory) def mock_js(self, js_str: str, timeout=0, max_memory=0) -> typing.Union[dict, list, str]: """ Mock form JSON string or JavaScript Object like-string :param js_str: Mock template :param timeout: Limit run timeout, default no limit: timeout = 0(millisecond) :param max_memory: Limit max memory, default no limit: max_memory = 0 :return: dict, list, str """ js = "Mock.mock({template})".format(template=js_str) return self.__ctx.eval(js, timeout, max_memory)
class OpeningHoursEngine: def __init__(self): self.js_ctx = None def init_js_ctx(self): with open(OPENING_HOURS_JS, "r") as f: js_sources = f.read() with open(OPENING_HOURS_JS_WRAPPER, "r") as f: js_wrapper = f.read() self.js_ctx = MiniRacer() self.js_ctx.eval(js_sources) self.js_ctx.eval(js_wrapper) def call(self, *args, **kwargs): # V8 does not support forking processes. # As Idunn is called with "gunicorn --preload" in the Docker image, # the MiniRacer context should not be initialized with the app. if self.js_ctx is None: self.init_js_ctx() return self.js_ctx.call(*args, **kwargs)
def parse_tournaments( self, response: Response) -> Generator[Request, None, None]: """ TODO: here are two methods: 1. go to seasons, and fetch all fixtures in one season 2. go to team statistics, and fetch all games for each of teams :param response: :type response: Response :return: :rtype: Generator[Request, None, None] @url https://www.whoscored.com/Regions/252/Tournaments/2/England-Premier-League @returns requests 0 """ # This is to go for method 1 # for season in response.css("#seasons option"): # url = season.css("option::attr(value)").extract_first() # yield response.follow(url, callback=self.parse_season) # This is to go for method 2 js_script: str = response.xpath( '//*[@id="layout-content-wrapper"]/div[2]/script[4]' ).extract_first() ctx = MiniRacer() team: List for team in ctx.eval(P_TEAM.search(js_script).group("history")): team_: Team = Team(*team) yield response.follow( url="https://www.whoscored.com/Teams/{id}/Fixtures/".format( id=team_.id), callback=self.parse_team, meta={"waitForSelector": "layout-content-wrapper"}, ) break
mol._hydrogens = {n: 0 for n in mol._hydrogens} smiles, order = mol._smiles(lambda x: random(), _return_order=True) return ''.join(smiles).replace('~', '-'), order class Calculate2DCGR(Calculate2D): __slots__ = () def _clean2d_prepare(self): mol = molecule.MoleculeContainer() for n, atom in self._atoms.items(): atom = Element.from_atomic_number(atom.atomic_number or 6)() mol.add_atom(atom, n) for n, m, bond in self.bonds(): mol.add_bond(n, m, bond.order or 1) mol._hydrogens = {n: 0 for n in mol._hydrogens} smiles, order = mol._smiles(lambda x: random(), _return_order=True) return ''.join(smiles).replace('~', '-'), order if find_spec('py_mini_racer'): from py_mini_racer.py_mini_racer import MiniRacer, JSEvalException ctx = MiniRacer() ctx.eval('const self = this') ctx.eval(resource_string(__name__, 'clean2d.js')) else: # disable clean2d support ctx = None __all__ = ['Calculate2DMolecule', 'Calculate2DCGR', 'Calculate2DQuery']
sitepackages.append(pr) sitepackages.append(pr / 'lib') for pr in sitepackages: pr = pr / 'clean2d.js' if pr.exists(): lib_js = pr.read_text() break else: warn('broken package installation. clean2d.js not found', ImportWarning) lib_js = None if find_spec('py_mini_racer') and lib_js: from py_mini_racer.py_mini_racer import MiniRacer, JSEvalException ctx = MiniRacer() ctx.eval('const self = this') ctx.eval(lib_js) else: # disable clean2d support class Calculate2DMolecule: __slots__ = () def clean2d(self): raise NotImplemented('py-mini-racer required for clean2d') class Calculate2DCGR: __slots__ = () def clean2d(self): raise NotImplemented('py-mini-racer required for clean2d')
def _build_v8_context(query) -> MiniRacer: js_code = f"let queryfunc = {query}" ctx = MiniRacer() ctx.eval(js_code) return ctx
def __init__(self): self.__code = codecs.open(os.path.join(os.path.dirname(__file__), 'js/mock.mini_racer.min.js'), encoding='utf-8').read() self.__ctx = MiniRacer() self.__ctx.eval(self.__code)