def get_profile_id_name_map( self, website_name: str, ) -> dict: """ Fetch id and name properties of the profile class Parameters ---------- website_name: str, obligatory Your targeted website_name in Eulerian Technologies platform Returns ------- dict A dict as { "profile_id" : "profile_name" } """ if not isinstance(website_name, str): raise TypeError("website_name should be a string") profile_url = f"{self._api_v2}/ea/{website_name}/db/profile/search.json" _json = _request._to_json(request_type="get", url=profile_url, params={ "limit": 100, "output-as-kv": 1 }, headers=self._http_headers, print_log=self._print_log) return { _json["data"]["rows"][i]["profile_id"]: _json["data"]["rows"][i]["profile_name"] for i in range(len(_json["data"]["rows"])) }
def get_view_id_name_map(self, website_name: str) -> dict: """ Fetch attribution rules Parameters ---------- website_name: str, obligatory Your targeted website_name in Eulerian Technologies platform Returns ------- dict A dict as { "view_id" : "view_name", ...} """ if not isinstance(website_name, str): raise TypeError("website_name should be a string") view_url = f"{self._api_v2}/ea/{website_name}/db/view/get_all_name.json" view_json = _request._to_json(request_type="get", url=view_url, headers=self._http_headers, print_log=self._print_log) view_id_idx = view_json["data"]["fields"].index({"name": "view_id"}) view_name_idx = view_json["data"]["fields"].index( {"name": "view_name"}) views = { view[view_id_idx]: view[view_name_idx] for view in view_json["data"]["rows"] } if "0" not in views: views["0"] = "last channel" return views
def get_website_by_name(self, website_name: str) -> dict: """ Fetch website properties Parameters ---------- website_name: str, obligatory Your targeted website_name in Eulerian Technologies platform Returns ------- dict A dict as { "website_prop" : "website_prop_value" } """ if not isinstance(website_name, str): raise TypeError("website_name should be a string") website_url = f"{self._api_v2}/ea/{website_name}/db/website/get_me.json" website_json = _request._to_json(request_type="get", url=website_url, params={"output-as-kv": 1}, headers=self._http_headers, print_log=self._print_log) d_website = website_json["data"]["rows"][0] if not isinstance(d_website, dict): raise TypeError(f"d_website={d_website} should be a dict dtype") return d_website
def _get_all_paths( self, i: int, l_path: list, l_prev_path: list, url: str, payload: dict, ): l_next_path = [] for prev_path in l_prev_path: if not l_path[i].endswith("[%d]"): l_next_path.append(".".join([prev_path, l_path[i]])) else: payload["path"] = ".".join([prev_path, l_path[i].replace("[%d]", "")]) _json = _request._to_json( url=url, request_type="get", headers=self._http_headers, params=payload, print_log=self._print_log ) for _id in _get_ids(_json): l_next_path.append(".".join([prev_path, l_path[i] % int(_id)])) if i == len(l_path) - 1: return l_next_path i += 1 return self._get_all_paths( i=i, l_path=l_path, l_prev_path=l_next_path, url=url, payload=payload)
def job_create(url, headers, query, log): request = {"kind": "edw#request", "query": query} return _request._to_json(request_type='post', url=url, json_data=request, headers=headers, print_log=log)
def session(domain, headers, ip, log): url = f"{domain}/er/account/get_dw_session_token.json" payload = {'ip': ip} json = _request._to_json(request_type="get", url=url, headers=headers, params=payload, print_log=log) return json['data']['rows'][0][0]
def _check_credentials(self) -> None: """Check credentials validity set the allowed_website_names attributes """ # raise an error if API error or fail to load as json allowed_website_names = [] overview_url = f"{self._api_v2}/er/account/authtree.json" authtree_json = _request._to_json(request_type="get", url=overview_url, headers=self._http_headers, print_log=self._print_log) for k, v in authtree_json["data"].items(): allowed_website_names.append(v["website_name"]) self._allowed_website_names = allowed_website_names
def _all_paths_to_df( self, url: str, date_scale: str, l_path: [], l_dim, l_kpi, payload: {} ): payload["ea-columns"] = "name," + ",".join([*l_dim, *l_kpi]) if date_scale: del(payload["ea-columns"]) payload["date-scale"] = date_scale payload["dd-dt"] = ",".join([*l_dim, *l_kpi]) l_slice_path = [] l_df = [] for i in range(len(l_path)): l_slice_path.append(l_path[i]) if len(l_path) == 1 or (i and (i % 10 == 0 or i == len(l_path) - 1)): payload['path'] = ",".join(l_slice_path) _json = _request._to_json( url=url, request_type="get", params=payload, headers=self._http_headers, print_log=self._print_log ) sub_df = pd.DataFrame( data=_json["data"]["rows"], columns=[d_field["name"] for d_field in _json["data"]["fields"]]) l_df.append(sub_df) l_slice_path = [] df_concat = pd.concat( objs=l_df, axis=0, ignore_index=True) return df_concat
def download_datamining( self, website_name: str, datamining_type: str, payload=None, status_waiting_seconds=5, output_directory='', override_file=False, n_days_slice=31, ): """ Fetch datamining data from the API into a gzip compressed CSV file Parameters ---------- website_name : str, obligatory Your targeted website_name in Eulerian Technologies platform datamining_type : str, obligatory The targeted datamining (isenginerequest, actionlogorder, scart ,estimate, order) payload : dict, optional The datamining payload that contains the requested data status_waiting_seconds: int, optional Waiting time in seconds between each status query output_directory : str, optional The local targeted directory override_file : bool, optional If set to True, will override output_path2file (if exists) with the new datamining content Default: False n_days_slice: int, optional Split datamining query into days slice to reduce server load Default: 31 Returns ------- list A list of path2file """ if not isinstance(website_name, str): raise TypeError("website_name should be a str type") self._is_allowed_website_name(website_name) if not isinstance(datamining_type, str): raise TypeError("datamining_type should be a str type") if not isinstance(payload, dict) or not payload: raise TypeError("payload should be a non-empty dict") # solved a bug where the date_from and date_to of the payload were modified # initial payload object changed in a loop dc_payload = copy.deepcopy(payload) if not isinstance(n_days_slice, int) or n_days_slice < 0: raise TypeError("n_days_slice should be a positive integer") l_allowed_datamining_types = [ "order", "estimate", "isenginerequest", "actionlog", "scart" ] if datamining_type not in l_allowed_datamining_types: raise ValueError(f"datamining_type={datamining_type} not allowed.\n\ Use one of the following: {', '.join(l_allowed_datamining_types)}" ) date_from = dc_payload["date-from"] if "date-from" in dc_payload else None if not date_from: raise ValueError("missing parameter=date-from in payload object") date_to = dc_payload['date-to'] if 'date-to' in dc_payload else None if not date_to: raise ValueError("missing parameter=date-from in payload object") date_format = "%m/%d/%Y" dt_date_from = datetime.strptime(date_from, date_format) dt_date_to = datetime.strptime(date_to, date_format) if dt_date_from > dt_date_to: raise ValueError("'date-from' cannot occur later than 'date-to'") # marketing attribution rule id, default to 0 if "view-id" in dc_payload: dc_payload["view-id"] = str(dc_payload["view-id"]) match = re.match(pattern=r'^[0-9]$', string=dc_payload["view-id"]) if not match: raise ValueError("view-id should match ^[0-9]$") else: dc_payload["view-id"] = "0" website_name = website_name datamining_type = datamining_type status_waiting_seconds = status_waiting_seconds n_days_slice = n_days_slice dt_tmp_date_to = dt_date_to n_days_slice = timedelta(days=n_days_slice) one_day_slice = timedelta(days=1) _os._create_directory(output_directory=output_directory) l_path2file = [] # store each file for n_days_slice # To avoid overloading the API with huge requests # We split the requests into smaller timeranres "n_days_slice" # While temporary date to delta is smaller than requested time to delta while dt_tmp_date_to <= dt_date_to: dt_tmp_date_to = dt_date_from + n_days_slice # Cannot request further than dt_date_to dt_tmp_date_to = dt_date_to if dt_tmp_date_to >= dt_date_to else dt_tmp_date_to date_from = dt_date_from.strftime(date_format) date_to = dt_tmp_date_to.strftime(date_format) date_from_file = date_from.replace("/", "_") date_to_file = date_to.replace("/", "_") output_filename = "_".join([ website_name, datamining_type, "view", dc_payload["view-id"], "from", date_from_file, "to", date_to_file, ]) + ".csv.gz" output_path2file = os.path.join(output_directory, output_filename) if not _request._is_skippable(output_path2file=output_path2file, override_file=override_file, print_log=self._print_log): dc_payload['date-from'] = date_from dc_payload['date-to'] = date_to search_url = f"{self._api_v2}/ea/{website_name}/report/{datamining_type}/search.json" search_json = _request._to_json(request_type="get", url=search_url, params=dc_payload, headers=self._http_headers, print_log=self._print_log) jobrun_id = search_json["jobrun_id"] status_url = f"{self._api_v2}/ea/{website_name}/report/{datamining_type}/status.json" status_payload = {"jobrun-id": jobrun_id} ready = False if not isinstance(status_waiting_seconds, int) or status_waiting_seconds < 5: status_waiting_seconds = 5 while not ready: self._log(f'Waiting for jobrun_id={jobrun_id} to complete') time.sleep(status_waiting_seconds) status_json = _request._to_json(request_type="get", url=status_url, params=status_payload, headers=self._http_headers, print_log=self._print_log) if status_json["jobrun_status"] == "COMPLETED": ready = True download_url = f"{self._api_v2}/ea/{website_name}/report/{datamining_type}/download.json" download_payload = {'output-as-csv': 0, 'jobrun-id': jobrun_id} req = urllib.request.Request( url= f"{download_url}?{urllib.parse.urlencode(download_payload)}", headers=self._http_headers, ) _stream_req(req=req, output_path2file=output_path2file) l_path2file.append(output_path2file) # last iteration, we queried up to the requested date if dt_tmp_date_to == dt_date_to: break # add one_day_slice to avoid querying the same day twice dt_date_from += n_days_slice + one_day_slice return l_path2file
def download_flat_overview_realtime_report( self, date_from: str, date_to: str, website_name: str, report_name: list, kpi: list, channel: list = None, view_id: int = 0, filters: dict = None) -> pd.DataFrame: """ Fetch realtime report data into a pandas dataframe Parameters ---------- date_from: str, mandatory mm/dd/yyyy date_to: str, mandatory mm/dd/yyyy website_name: str, mandatory Your targeted website_name in Eulerian Technologies platform report_name: str, mandatory kpi: list, mandatory List of kpis to request channel: list, mandatory List of channels (ADVERTISING...) view_id: int, optional Between 0 and 9 filters: dict, optional To filter request results Returns ------- pd.DataFrame() A pandas dataframe """ if not isinstance(date_from, str): raise TypeError("date_from should be a string dtype") if not isinstance(date_to, str): raise TypeError("date_to should be a string dtype") if not isinstance(website_name, str): raise TypeError("website_name should be a string dtype") if not isinstance(report_name, str): raise TypeError("report_name should be a string dtype") if not isinstance(kpi, list): raise TypeError("kpi should be a list dtype") payload = { 'date-from': date_from, 'date-to': date_to, } if filters: if not isinstance(filters, dict): raise TypeError(f"filters={filters} should be a dict dtype") filters = self.check_convert_realtime_filter(website_name, filters) else: filters = {} for k in filters.keys(): if len(filters[k]): payload[k] = filters[k] view_id = str(view_id) view_map = self.get_view_id_name_map(website_name) if view_id not in view_map: raise ValueError( f"view_id={view_id} not found. Allowed: {', '.join(view_map.keys())}" ) payload["view-id"] = view_id d_website = self.get_website_by_name(website_name) url = f"{self._api_v2}/ea/{website_name}/report/realtime/{report_name}.json" path_module = __import__( name="eanalytics_api_py.internal.realtime_overview.path._" + report_name, fromlist=report_name) l_df = [] d_path = copy.deepcopy( path_module.d_path) # because we override values we want a clean copy if not channel: channel = list(d_path.keys()) for _channel in channel: l_path = d_path[_channel]["path"] l_path[0] = l_path[0] % int(d_website["website_id"]) l_dim = d_path[_channel]["dim"] if not isinstance(l_dim, list): raise TypeError(f"l_dim={l_dim} should be a list dtype") payload['path'] = ".".join(l_path) payload['ea-columns'] = ",".join([*l_dim, *kpi]) _json = _request._to_json(url=url, request_type="get", params=payload, headers=self._http_headers, print_log=True) sub_df = pd.DataFrame( data=_json["data"]["rows"], columns=[d_field["name"] for d_field in _json["data"]["fields"]]) if "add_dim_value_map" in d_path[_channel]: for _dim, _value in d_path[_channel]["add_dim_value_map"].items(): sub_df[_dim] = _value if "rename_dim_map" in d_path[_channel]: sub_df.rename(columns=d_path[_channel]["rename_dim_map"], inplace=True) # override name with alias if alias is set for name, alias in path_module.override_dim_map.items(): if all(_ in sub_df.columns for _ in [name, alias]): mask = (sub_df[alias].isin([0, '0'])) sub_df.loc[mask, alias] = sub_df[name] sub_df.drop(labels=alias, axis=1, inplace=True) sub_df.rename(columns=path_module.dim_px_map, inplace=True) l_df.append(sub_df) df = pd.concat(l_df, axis=0, ignore_index=True) for col_name in df.columns: if col_name in path_module.dim_px_map.values(): df[col_name] = df[col_name].astype("category") elif any(df[col_name].astype("str").str.contains(".", regex=False)): df[col_name] = df[col_name].astype("float64") else: df[col_name] = df[col_name].astype("int64") return df
def job_status(url, headers, log): return _request._to_json(request_type='get', url=url, headers=headers, print_log=log)