def parse_header(header): game_state = GameState() base_featureset = FeatureSet() for line in header.split("\n"): line = line.strip() if line.startswith("info,"): try: _, key, value = csv_split(line) except Exception: logging.error("Choked on line: %s" % line) raise if key in ["visteam", "hometeam"]: setattr(GameState, key, value) fs_key = "game_%s" % key if fs_key in FeatureSet.__slots__: setattr(FeatureSet, fs_key, value) return game_state, base_featureset
def from_featureset_json(cls, json_str): d = json.loads(json_str) parse_map = FeatureSet.get_parse_map() for k in d: # all keys are strings in the .features format if d[k] != UNK and k in parse_map: try: d[k] = parse_map[k](d[k]) except ValueError: d[k] = UNK obj = cls() def unk_check(obj_key, keys=None): """ If all specified keys are known, return True. Otherwise, set UNK on the specified object key. """ if keys is None: keys = [ obj_key ] if all( (d[k] != UNK) for k in keys): # proceed with calculation return True else: setattr(obj, obj_key, UNK) return False obj.game_daynight = d["game_daynight"] if unk_check("game_date"): obj.game_month = cls.bucketized(d["game_date"].month, granularity=2) if unk_check("game_date"): obj.game_year = cls.bucketized(d["game_date"].year, granularity=5) if unk_check("game_number"): obj.game_number = cls.bucketized(d["game_number"], buckets=[0,1]) if unk_check("game_temp"): obj.game_temp = cls.bucketized(d["game_temp"], granularity=5) obj.game_site = d["game_site"] obj.game_winddir = cls.get_winddir(d) if unk_check("ab_inning"): obj.ab_inning = cls.bucketized(d["ab_inning"], buckets=[1,4,7,10]) obj.ab_numballs = d["ab_numballs"] obj.ab_numstrikes = d["ab_numstrikes"] obj.ab_lrmatchup = cls.get_lrmatchup(d) obj.batter_bats = d["batter_bats"] obj.batter_fieldpos = d["batter_fieldpos"] obj.batter_visorhome = d["batter_visorhome"] if unk_check("batter_batpos"): obj.batter_batpos = cls.bucketized(d["batter_batpos"], buckets=[1,3,5,8,10,11,12]) obj.label = d["label"] for prefix in ["batter_", "pitcher_"]: def key(k): return prefix + k def year_from_td(td): return td.days / 365 if unk_check(key("age"), keys=[key("birthYear"), key("birthMonth"), key("birthDay"), "game_date"]): birthday = datetime.datetime(d[key("birthYear")], d[key("birthMonth")], d[key("birthDay")]) setattr(obj, key("age"), cls.bucketized(year_from_td(d["game_date"] - birthday), granularity=3)) if unk_check(key("weight")): setattr(obj, key("weight"), cls.bucketized(d[key("weight")], granularity=10)) if unk_check(key("height")): setattr(obj, key("height"), cls.bucketized(d[key("height")], granularity=3)) setattr(obj, key("team"), d[key("team")]) setattr(obj, key("throws"), d[key("throws")]) setattr(obj, key("birthCountry"), d[key("birthCountry")]) if unk_check(key("experience"), keys=[key("debut"), "game_date"]): setattr(obj, key("experience"), cls.bucketized(year_from_td(d["game_date"] - d[key("debut")]), granularity=3)) return obj
def copy(self): # this is very puzzling; why does a new object created in the context of a given object have all the fields of the object in which the new object is created...? new_obj = FeatureSet() assert(new_obj.to_json() == self.to_json()) return new_obj