def GenerateReadme(game_schema:GameSchema, table_schema:TableSchema, path:Path = Path("./")): try: os.makedirs(name=path, exist_ok=True) with open(path / "readme.md", "w") as readme: # 1. Open files with game-specific readme data, and global db changelog. source_dir = Path("./doc/readme_src/") try: with open(source_dir / f"{game_schema._game_name}_readme_src.md", "r") as readme_src: readme.write(readme_src.read()) except FileNotFoundError as err: readme.write("No game readme prepared") Logger.Log(f"Could not find {game_schema._game_name}_readme_src", logging.WARNING) finally: readme.write("\n\n") # 2. Use schema to write feature & column descriptions to the readme. meta = FileManager.GenCSVMetadata(game_schema=game_schema, table_schema=table_schema) readme.write(meta) # 3. Append any important data from the data changelog. try: with open(source_dir / "changelog_src.md", "r") as changelog_src: readme.write(changelog_src.read()) except FileNotFoundError as err: readme.write("No changelog prepared") Logger.Log(f"Could not find changelog_src", logging.WARNING) except FileNotFoundError as err: Logger.Log(f"Could not open readme.md for writing.", logging.ERROR) traceback.print_tb(err.__traceback__)
def _getFeatureValues(self) -> List[Any]: if len(self._begin_times) < len(self._complete_times): Logger.Log( f"Player began level {self._count_index} {len(self._begin_times)} times but completed it {len(self._complete_times)}.", logging.WARNING) _diffs = [] for level in self._levels_encountered: if level in self._begin_times.keys( ) and level in self._complete_times.keys(): _num_plays = min(len(self._begin_times[level]), len(self._complete_times[level])) _diffs += [(self._complete_times[level][i] - self._begin_times[level][i]).total_seconds() for i in range(_num_plays)] else: if level not in self._begin_times.keys( ) and level in self._complete_times.keys(): Logger.Log( f"Player completed level {level}, but did not begin it!", logging.WARN) elif level in self._begin_times.keys( ) and level not in self._complete_times.keys(): Logger.Log( f"Player began level {level}, but did not complete it.", logging.DEBUG) elif level not in self._begin_times.keys( ) and level not in self._complete_times.keys(): Logger.Log( f"Player had level {level} listed as encountered, but did not begin *or* complete it.", logging.WARN) _total_time = sum(_diffs) if len(self._levels_encountered) > 0: return [_total_time / len(self._levels_encountered)] else: return [None]
def DatesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Union[List[int],None]=None) -> Union[Dict[str,datetime], Dict[str,None]]: ret_val = {'min':None, 'max':None} if not self.IsOpen(): Logger.Log(f"Could not retrieve date range {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) else: Logger.Log(f"Retrieving date range from IDs with {id_mode} ID mode.", logging.DEBUG, depth=3) ret_val = self._datesFromIDs(id_list=id_list, id_mode=id_mode, versions=versions) return ret_val
def RowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Union[List[int],None]=None) -> Union[List[Tuple], None]: ret_val = None if self.IsOpen(): Logger.Log(f"Retrieving rows from IDs with {id_mode} ID mode.", logging.DEBUG, depth=3) ret_val = self._rowsFromIDs(id_list=id_list, id_mode=id_mode, versions=versions) else: Logger.Log(f"Could not retrieve rows for {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) return ret_val
def incAggregateVal(self, feature_name: str, increment: Union[int, float] = 1) -> None: if self._has_feature(feature_name): if (type(self.features[feature_name]) == int or type(self.features[feature_name]) == float): self.features[feature_name] += increment else: Logger.Log("Attempted to increment a non-numeric value!", logging.WARN) else: Logger.Log( "Attempted to increment a feature that doesn't exist!", logging.WARN)
def _backupFileExportList(self) -> bool: try: src : Path = self._data_dir / "file_list.json" dest : Path = self._data_dir / "file_list.json.bak" if src.exists(): shutil.copyfile(src=src, dst=dest) else: Logger.Log(f"Could not back up file_list.json, because it does not exist!", logging.WARN) except Exception as err: msg = f"{type(err)} {str(err)}" Logger.Log(f"Could not back up file_list.json. Got the following error: {msg}", logging.ERROR) return False else: Logger.Log(f"Backed up file_list.json to {dest}", logging.INFO) return True
def _receiveEventTrigger(self, event: Event) -> None: # TODO: consider how to put a limit on times this runs, based on how big export is. if self._debug_count < 20: Logger.Log("ExportManager received an event trigger.", logging.DEBUG) self._debug_count += 1 self._processEvent(next_event=event)
def FullDateRange(self) -> Union[Dict[str,datetime], Dict[str,None]]: ret_val = {'min':None, 'max':None} if self.IsOpen(): ret_val = self._fullDateRange() else: Logger.Log(f"Could not get full date range, the source interface is not open!", logging.WARNING, depth=3) return ret_val
def GetSessionFeatures(self, slice_num:int, slice_count:int, as_str:bool = False) -> List[List[Any]]: start : datetime = datetime.now() self._try_update(as_str=as_str) ret_val = self._latest_results.get('sessions', []) time_delta = datetime.now() - start Logger.Log(f"Time to retrieve Session lines for slice [{slice_num}/{slice_count}]: {time_delta} to get {len(ret_val)} lines", logging.INFO, depth=2) return ret_val
def GetEventsFile(self) -> IO: ret_val : IO = sys.stdout if self._files['events'] is not None: ret_val = self._files['events'] else: Logger.Log("No events file available, returning standard output instead.", logging.WARN) return ret_val
def GetPopulationFeatures(self, as_str:bool = False) -> List[Any]: start : datetime = datetime.now() self._try_update(as_str=as_str) ret_val = self._latest_results.get('population', []) time_delta = datetime.now() - start Logger.Log(f"Time to retrieve Population lines: {time_delta} to get {len(ret_val)} lines", logging.INFO, depth=2) return ret_val
def AllIDs(self) -> Union[List[str],None]: ret_val = None if self.IsOpen(): ret_val = self._allIDs() else: Logger.Log("Can't retrieve list of all session IDs, the source interface is not open!", logging.WARNING, depth=3) return ret_val
def GetPopulationFile(self) -> IO: ret_val : IO = sys.stdout if self._files['population'] is not None: ret_val = self._files['population'] else: Logger.Log("No population file available, returning standard output instead.", logging.WARN) return ret_val
def _extractFromComplete(self, level, event_client_time: datetime, event_data: Dict[str, Any]): self._features.incValByIndex(feature_name="completesCount", index=level, increment=1) if self._active_begin == None: sess_id = self._features.getValByName(feature_name="sessionID") Logger.Log( f"Got a 'Complete' event when there was no active 'Begin' event! Level {level}, Sess ID: {sess_id}", logging.ERROR) else: self._end_times[level] = event_client_time time_taken = self._calcLevelTime(level) self._features.incValByIndex(feature_name="durationInSecs", index=level, increment=time_taken) self._features.incAggregateVal( feature_name="sessionDurationInSecs", increment=time_taken) self._active_begin = None score = event_data["stability"]["pack"] + event_data["stability"][ "charge"] max_score = max( score, self._features.getValByIndex(feature_name="finalScore", index=level)) self._features.setValByIndex(feature_name="finalScore", index=level, new_value=max_score)
def _validate_job(self, job_data): ret_val : bool = False if job_data['string_value'] and job_data['string_value'] in self._job_map: ret_val = True else: Logger.Log(f"Got invalid job_name data in JobsAttempted", logging.WARNING) return ret_val
def _open(self) -> bool: try: self._data = pd.read_csv(filepath=self._filepath, delimiter=self._delimiter, parse_dates=['timestamp']) self._is_open = True return True except FileNotFoundError as err: Logger.Log(f"Could not find file {self._filepath}.", logging.ERROR) return False
def _processEvent(self, next_event: Event): try: if self._event_mgr is not None: self._event_mgr.ProcessEvent(event=next_event) if self._feat_mgr is not None: self._feat_mgr.ProcessEvent(event=next_event) except Exception as err: if default_settings.get("FAIL_FAST", None): Logger.Log(f"Error while processing event {next_event}.", logging.ERROR, depth=2) raise err else: Logger.Log( f"Error while processing event {next_event}. This event will be skipped. \nFull error: {traceback.format_exc()}", logging.WARNING, depth=2)
def _has_feature(self, feature_name) -> bool: try: _ = self.features[feature_name] except KeyError: Logger.Log(f'Feature {feature_name} does not exist.', logging.ERROR) return False return True
def IDsFromDates(self, min:datetime, max:datetime, versions: Union[List[int],None]=None) -> Union[List[str], None]: ret_val = None if not self.IsOpen(): str_min, str_max = min.strftime("%Y%m%d"), max.strftime("%Y%m%d") Logger.Log(f"Could not retrieve IDs for {str_min}-{str_max}, the source interface is not open!", logging.WARNING, depth=3) else: ret_val = self._IDsFromDates(min=min, max=max, versions=versions) return ret_val
def _clearLines(self) -> None: """Function to empty the list of lines stored by the PlayerProcessor. This is helpful if we're processing a lot of data and want to avoid eating too much memory. """ Logger.Log( f"Clearing features from PlayerProcessor for {self._player_id}.", logging.DEBUG, depth=2) self._registry = FeatureRegistry()
def _extractFromEvent(self, event: Event) -> None: if event.event_name == "BEGIN.0": self._begin_times.append(event.timestamp) elif event.event_name == "COMPLETE.0": self._complete_times.append(event.timestamp) else: Logger.Log( f"AverageLevelTime received an event which was not a BEGIN or a COMPLETE!", logging.WARN)
def initLevel(self, level) -> None: for f_name in self.perlevels: feature = self.features[f_name] if type(feature) is dict and level in feature.keys(): if feature[level]["val"] == None: feature[level]["val"] = 0 else: Logger.Log(f"Tried to intialize invalid level: {level}", logging.ERROR)
def LoadToDetectorRegistry( self, registry: DetectorRegistry, trigger_callback: Callable[[Event], None]) -> None: # first, load aggregate features for name, aggregate in self._game_schema.aggregate_detectors().items(): if ExtractorLoader._validateFeature(name=name, base_setting=aggregate.get( 'enabled', False), overrides=self._overrides): try: detector = self.LoadDetector( detector_type=name, name=name, detector_args=aggregate, trigger_callback=trigger_callback) except NotImplementedError as err: Logger.Log( f"In ExtractorLoader, '{name}' is not a valid detector for {self._game_schema._game_name}", logging.ERROR) else: registry.Register( detector, ExtractorRegistry.Listener.Kinds.AGGREGATE) for name, percount in self._game_schema.percount_detectors().items(): if ExtractorLoader._validateFeature(name=name, base_setting=percount.get( 'enabled', False), overrides=self._overrides): for i in ExtractorLoader._genCountRange( count=percount["count"], schema=self._game_schema): try: detector = self.LoadDetector( detector_type=name, name=f"{percount['prefix']}{i}_{name}", detector_args=percount, trigger_callback=trigger_callback, count_index=i) except NotImplementedError as err: Logger.Log( f"In ExtractorLoader, '{name}' is not a valid detector for {self._game_schema._game_name}", logging.ERROR) else: registry.Register( extractor=detector, kind=ExtractorRegistry.Listener.Kinds.PERCOUNT)
def setValByIndex(self, feature_name: str, index: int, new_value) -> None: if self._has_feature(feature_name): feature = self.features[feature_name] if type(feature) is dict and index in feature.keys(): feature[index]["val"] = new_value else: Logger.Log( f"Tried to set value on invalid index of {feature_name}: {index}", logging.ERROR)
def _validate_job(self, job_data): ret_val: bool = False if job_data['string_value'] is not None: if job_data['string_value'] in self._job_map and self._job_map[ job_data['string_value']] == self._count_index: ret_val = True else: Logger.Log(f"Got invalid job_name data in JobCompletionTime", logging.WARNING) return ret_val
def level_range(self) -> range: ret_val = range(0) if self._min_level is not None and self._max_level is not None: # for i in range(self._min_level, self._max_level+1): ret_val = range(self._min_level, self._max_level + 1) else: Logger.Log( f"Could not generate per-level features, min_level={self._min_level} and max_level={self._max_level}", logging.ERROR) return ret_val
def __init__(self, request:Request, data_dir:str, extension:str="tsv"): self._file_names : Dict[str,Union[Path,None]] = {"population":None, "players":None, "sessions":None, "events":None} self._zip_names : Dict[str,Union[Path,None]] = {"population":None, "players":None, "sessions":None, "events":None} self._files : Dict[str,Union[IO,None]] = {"population":None, "players":None, "sessions":None, "events":None} self._game_id : str = request.GameID self._data_dir : Path = Path("./" + data_dir) self._game_data_dir: Path = self._data_dir / self._game_id self._readme_path : Path = self._game_data_dir/ "readme.md" self._extension : str = extension self._date_range : Dict[str,Union[datetime,None]] = request.Range.DateRange self._dataset_id : str = "" self._short_hash : str = "" # figure out dataset ID. start = self._date_range['min'].strftime("%Y%m%d") if self._date_range['min'] is not None else "UNKNOWN" end = self._date_range['max'].strftime("%Y%m%d") if self._date_range['max'] is not None else "UNKNOWN" self._dataset_id = f"{self._game_id}_{start}_to_{end}" # get hash try: repo = git.Repo(search_parent_directories=True) if repo.git is not None: self._short_hash = str(repo.git.rev_parse(repo.head.object.hexsha, short=7)) except InvalidGitRepositoryError as err: msg = f"Code is not in a valid Git repository:\n{str(err)}" Logger.Log(msg, logging.ERROR) except NoSuchPathError as err: msg = f"Unable to access proper file paths for Git repository:\n{str(err)}" Logger.Log(msg, logging.ERROR) # then set up our paths, and ensure each exists. base_file_name : str = f"{self._dataset_id}_{self._short_hash}" # finally, generate file names. if request.ExportEvents: self._file_names['events'] = self._game_data_dir / f"{base_file_name}_events.{self._extension}" self._zip_names['events'] = self._game_data_dir / f"{base_file_name}_events.zip" if request.ExportSessions: self._file_names['sessions'] = self._game_data_dir / f"{base_file_name}_session-features.{self._extension}" self._zip_names['sessions'] = self._game_data_dir / f"{base_file_name}_session-features.zip" if request.ExportPlayers: self._file_names['players'] = self._game_data_dir / f"{base_file_name}_player-features.{self._extension}" self._zip_names['players'] = self._game_data_dir / f"{base_file_name}_player-features.zip" if request.ExportPopulation: self._file_names['population'] = self._game_data_dir / f"{base_file_name}_population-features.{self._extension}" self._zip_names['population'] = self._game_data_dir / f"{base_file_name}_population-features.zip"
def getFeatureByIndex(self, feature_name: str, index: int) -> Any: if self._has_feature(feature_name): feature = self.features[feature_name] if type(feature) is dict and index in feature.keys(): return feature[index] else: Logger.Log( f"Tried to get feature on invalid index of {feature_name}: {index}", logging.ERROR) else: return None
def _loadSlice(self, request: Request, next_slice_ids: List[str], slice_num: int, slice_count: int) -> Union[List[Tuple], None]: start: datetime = datetime.now() ret_val = request.Interface.RowsFromIDs(id_list=next_slice_ids, id_mode=request.Range.IDMode) time_delta = datetime.now() - start if ret_val is not None: # extra space below so output aligns nicely with "Processing time for slice..." Logger.Log( f"Retrieval time for slice [{slice_num}/{slice_count}]: {time_delta} to get {len(ret_val)} events", logging.INFO, depth=2) else: Logger.Log( f"Could not retrieve data set for slice [{slice_num}/{slice_count}].", logging.WARN, depth=2) return ret_val
def _getFeatureValues(self) -> List[Any]: if len(self._begin_times) < len(self._complete_times): Logger.Log( f"Player began level {self._count_index} {len(self._begin_times)} times but completed it {len(self._complete_times)}.", logging.DEBUG) _num_plays = min(len(self._begin_times), len(self._complete_times)) _diffs = [ (self._complete_times[i] - self._begin_times[i]).total_seconds() for i in range(_num_plays) ] return [sum(_diffs)]