def categorize_json(self, folder_name): log.info(f"Dividing data between raw_fse and raw_dd.") json_fse = [ folder_name + json for json in os.listdir(folder_name) if json.endswith('.json') and json.startswith('fse_data') ] json_dd = [ folder_name + json for json in os.listdir(folder_name) if json.endswith('.json') and json.startswith('domain_data') ] return json_fse, json_dd
def cleanup_json(json: str) -> str: """Remove codeblocks, if present.""" if json.startswith("```") and json.endswith("```"): # remove ```json and ``` from start and end json = json.strip("```json") json = json.strip("```py") # not documented but want to accept it as well return json.strip("```") elif json.startswith("`") and json.endswith("`"): # inline codeblocks return json.strip("`") return json
def jukebox(self, query, country='gb', jam=False): assets = {'artists': [], 'artist_ids': [], 'videos': []} if jam: json = self.__get_html('jukebox/generateMixTape', {'ai': jam}) else: json = self.__get_html('jukebox/findArtistAssets', { 'mySearch': query, 'country': country }) if json.startswith('[{"'): for a in re.finditer( '\{.+?ArtistName":"(.+?)".+?ArtistIdentity":(\d+).+?\}', json): name, artist_id = a.groups() assets['artists'].append(name) assets['artist_ids'].append(artist_id) else: artist_id = '' for s in re.finditer( 'src="(.+?)".+?contentTitle-(\d+)" value="(.+?)".+?value="(.+?)".+?value="(.+?)"', json, re.DOTALL): thumb, asset_id, title, artist, artist_id = s.groups() assets['videos'].append({ 'asset_id': asset_id, 'title': title, 'artist': artist, 'thumb': thumb }) assets['artist_ids'].append(artist_id) return assets
def _process_request(self, args): path, json = self._split(args) # @ prefixed means a file is used instead of some json if json and json.startswith('@'): json_fname = json[1:] json = open(json_fname, 'rb').read() full_url = urlparse.urljoin(self._cfg.base_url, path) return full_url, json
def strip_json(json): json = json.strip() if not json.startswith("{"): json = json[json.index("{"):] if not json.endswith("}"): json = json[:json.rindex("}") + 1] return json
def strip_json(json): json = json.strip() if not json.startswith("{"): json = json[json.index("{") :] if not json.endswith("}"): json = json[: json.rindex("}") + 1] return json
def jukebox(self, query, country='gb', jam=False): assets = {'artists': [], 'artist_ids': [], 'videos': []} if jam: json = self.__get_html('jukebox/generateMixTape', {'ai': jam}) else: json = self.__get_html('jukebox/findArtistAssets', {'mySearch': query, 'country': country}) if json.startswith('[{"'): for a in re.finditer('\{.+?ArtistName":"(.+?)".+?ArtistIdentity":(\d+).+?\}', json): name, artist_id = a.groups() assets['artists'].append(name) assets['artist_ids'].append(artist_id) else: artist_id = '' for s in re.finditer('src="(.+?)".+?contentTitle-(\d+)" value="(.+?)".+?value="(.+?)".+?value="(.+?)"', json, re.DOTALL): thumb, asset_id, title, artist, artist_id = s.groups() assets['videos'].append({'asset_id': asset_id, 'title': title, 'artist': artist, 'thumb': thumb}) assets['artist_ids'].append(artist_id) return assets