def create(cls) -> None: size_map = { 5: 2_000, 6: 3_500, 7: 1_000, 8: 2_000, 9: 5_000, 10: 7_625, 15: 5_000, 20: 5_000, 25: 3_250, 50: 2_625 } groups = np.arange(sum(size_map.values())) group_sizes = [] for k, v in size_map.items(): group_sizes.extend([k] * v) def gen_group_values() -> tp.Iterator[np.ndarray]: for group in groups: yield np.full(group_sizes[group], group) def gen_group_unique_values() -> tp.Iterator[np.ndarray]: for group in groups: yield np.arange(group_sizes[group]) group_values = sf.Series(np.concatenate(list(gen_group_values())), name='group') group_unique_values = sf.Series(np.concatenate( list(gen_group_unique_values())), name='group_unique') value_values = sf.Series(np.random.random(len(group_values)), name='data') frame = sf.Frame.from_concat( (group_values, group_unique_values, value_values), axis=1) cls._store['pivot_sf'] = frame cls._store['pivot_df'] = frame.to_pandas() cls._store['to_pandas_a'] = sf.Frame.from_element(np.nan, index=range(75), columns=range(75000)) f1 = sf.FrameGO.from_element(np.nan, index=range(75), columns=range(50000)) f2 = sf.FrameGO.from_element(0, index=range(75), columns=range(50000, 100000)) f1.extend(f2) #type: ignore cls._store['to_pandas_b'] = f1
def init_new_session(self, event, prob): """Add new summoning session after an existing session finishes.""" if not self.summoner.should_start_new_session(event.targets_pulled): self.callback('quit summoning event') self.push_outcome(event, prob) return self.push_state( event, SessionState( prob_tier=event.dry_streak // SUMMONS_PER_SESSION, stone_summons=sf.Series(0, index=tuple(Colors)), stone_presences=sf.Series(True, index=tuple(Colors)), ), prob)
def test_series_dropna_b(self): s1 = sf.Series(np.nan, index=sf.IndexHierarchy.from_product(['A', 'B'], [1, 2])) s2 = s1.dropna() self.assertEqual(len(s2), 0) self.assertEqual(s1.__class__, s2.__class__)
def get_sample_frame_mixed_string_index( size: int = 10000, columns: int = 100) -> tp.Tuple[pd.DataFrame, sf.FrameGO, np.ndarray]: '''Get frames with mixed types. ''' # produces 14950 strings source_ids = list(''.join(x) for x in it.combinations(string.ascii_lowercase, 4)) assert size <= len(source_ids) index = source_ids[:size] cols = source_ids[:columns] dtypes = (float, int, object, bool) sff = sf.FrameGO(index=index) for idx, col in enumerate(cols): s = sf.Series(_typed_array(dtypes[idx % 4], size=size, shift=idx), index=index) sff[col] = s npf = sff.values pdf = pd.DataFrame(index=index) for idx, col in enumerate(cols): s = pd.Series(_typed_array(dtypes[idx % 4], size=size, shift=idx), index=index) pdf[col] = s return pdf, sff, npf
def test_display_float_scientific_b(self) -> None: s1 = sf.Series([3.1j, 5.2j])**40 self.assertEqual( s1.display(sf.DisplayConfig(type_color=False)).to_rows(), [ '<Series>', '<Index>', '0 4.51e+19+0.00e+00j', '1 4.37e+28+0.00e+00j', '<int64> <complex128>' ]) # non default config for scientifici will truncate values self.assertEqual( s1.display( sf.DisplayConfig( type_color=False, value_format_complex_scientific='{:f}')).to_rows(), [ '<Series>', '<Index>', '0 45130251461102338...', '1 43665028242109283...', '<int64> <complex128>' ]) self.assertEqual( s1.display( sf.DisplayConfig( type_color=False, value_format_complex_scientific='{:.1e}')).to_rows(), [ '<Series>', '<Index>', '0 4.5e+19+0.0e+00j', '1 4.4e+28+0.0e+00j', '<int64> <complex128>' ])
def test_display_float_scientific_a(self) -> None: s1 = sf.Series([3.1, 5.2])**40 self.assertEqual( s1.display(sf.DisplayConfig(type_color=False)).to_rows(), [ '<Series>', '<Index>', '0 4.51302515e+19', '1 4.36650282e+28', '<int64> <float64>' ]) # non default config for scientifici will truncate values self.assertEqual( s1.display( sf.DisplayConfig( type_color=False, value_format_float_scientific='{:f}')).to_rows(), [ '<Series>', '<Index>', '0 45130251461102272...', '1 43665028242109266...', '<int64> <float64>' ]) self.assertEqual( s1.display( sf.DisplayConfig( type_color=False, value_format_float_scientific='{:.2e}')).to_rows(), [ '<Series>', '<Index>', '0 4.51e+19', '1 4.37e+28', '<int64> <float64>' ])
def get_sample_series_float( size: int = 10000) -> tp.Tuple[pd.Series, sf.Series, np.ndarray]: a1 = np.arange(size) * .001 a1[size // 2:] = np.nan pds = pd.Series(a1) sfs = sf.Series(a1) return pds, sfs, a1
def get_sample_series_objstr(size=10000): sample = [None, 3, 0.123, np.nan, 'str'] a1 = np.array(sample * int(size / len(sample))) pds = pd.Series(a1) sfs = sf.Series(a1) return pds, sfs, a1
def sf(cls): index = sf.IndexHierarchy.from_product(list(string.ascii_lowercase), list(string.ascii_lowercase)) f1 = sf.FrameGO(index=index) for col in range(100): s = sf.Series(col * .1, index=index[col:col + 6]) f1[col] = s assert f1.sum().sum() == 2970.0
def get_sample_series_obj( size: int = 10000) -> tp.Tuple[pd.Series, sf.Series, np.ndarray]: sample = [None, 3, 0.123, np.nan] a1 = np.array(sample * int(size / len(sample))) pds = pd.Series(a1) sfs = sf.Series(a1) return pds, sfs, a1
def get_sample_series_string_index_float_values(size=10000): a1 = np.arange(size) * .001 a1[size // 2:] = np.nan # create hsa indices index = [ hashlib.sha224(str(x).encode('utf-8')).hexdigest() for x in range(size) ] pds = pd.Series(a1, index=index) sfs = sf.Series(a1, index=index) return pds, sfs, a1
def test_series_loc_extract_d(self): s = sf.Series(range(5), index=sf.IndexHierarchy.from_labels( (('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b'), ('b', 'c')))) # leaf loc selection must be terminal; using a slice or list is an exception with self.assertRaises(RuntimeError): s.loc['a', :] with self.assertRaises(RuntimeError): s.loc[['a', 'b'], 'b']
def test_series_min_max_b(self): # string objects work as expected; when fixed length strings, however, the do not s1 = Series(list('abc'), dtype=object) self.assertEqual(s1.min(), 'a') self.assertEqual(s1.max(), 'c') # get the same result from character arrays s2 = sf.Series(list('abc')) self.assertEqual(s2.min(), 'a') self.assertEqual(s2.max(), 'c')
def test_series_min_max_a(self): s1 = Series([1, 3, 4, 0]) self.assertEqual(s1.min(), 0) self.assertEqual(s1.max(), 4) s2 = sf.Series([-1, 4, None, np.nan]) self.assertEqual(s2.min(), -1) self.assertTrue(np.isnan(s2.min(skipna=False))) self.assertEqual(s2.max(), 4) self.assertTrue(np.isnan(s2.max(skipna=False)))
def test_series_loc_extract_c(self): s = sf.Series(range(5), index=sf.IndexHierarchy.from_labels( (('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b'), ('b', 'c')))) # this selection returns just a single value s2 = s.loc[sf.HLoc[:, 'c']] self.assertEqual(s2.__class__, s.__class__) self.assertEqual(s2.to_pairs(), ((('b', 'c'), 4), )) # this selection yields a series self.assertEqual(s.loc[sf.HLoc[:, 'a']].to_pairs(), ((('a', 'a'), 0), (('b', 'a'), 2)))
def color_count_probs(self, prob_tier): """Generate probabilities for number of session colors present.""" color_probs = self.colorpool_probs(prob_tier) counts = stone_combinations probs = sf.Series([ multinomial_prob(stone_counts, color_probs) for stone_counts in stone_combinations.iter_series(axis=1) ], name='probability') assert probs.sum() == 1 return (sf.Frame.from_concat([counts, probs], axis=1).set_index_hierarchy( tuple(Colors), drop=True)['probability'])
def get_series_float_h2d_str_index(size=1000): ''' Get a hierarchical index with ''' labels = list(''.join(x) for x in it.combinations(string.ascii_lowercase, 4)) labels0 = labels[:int(size / 10)] labels1 = labels[:size] values = np.arange(len(labels0) * len(labels1)) * .001 ih = sf.IndexHierarchy.from_product(labels0, labels1) sfs = sf.Series(values, index=ih) mi = pd.MultiIndex.from_product((labels0, labels1)) pds = pd.Series(values, index=mi) return pds, sfs
def test_series_iter_group_index_b(self): colors = ('red', 'green') shapes = ('square', 'circle', 'triangle') s1 = sf.Series(range(6), index=sf.IndexHierarchy.from_product(shapes, colors)) post = tuple(s1.iter_group_index(0)) self.assertTrue(len(post), 3) self.assertEqual( s1.iter_group_index(0).apply(np.sum).to_pairs(), (('circle', 5), ('square', 1), ('triangle', 9))) self.assertEqual( s1.iter_group_index(1).apply(np.sum).to_pairs(), (('green', 9), ('red', 6)))
def test_series_iter_group_index_c(self): colors = ('red', 'green') shapes = ('square', 'circle', 'triangle') textures = ('smooth', 'rough') s1 = sf.Series(range(12), index=sf.IndexHierarchy.from_product( shapes, colors, textures)) post = tuple(s1.iter_group_index([0, 2])) self.assertTrue(len(post), 6) self.assertEqual( s1.iter_group_index([0, 2]).apply(np.sum).to_pairs(), ((('circle', 'rough'), 12), (('circle', 'smooth'), 10), (('square', 'rough'), 4), (('square', 'smooth'), 2), (('triangle', 'rough'), 20), (('triangle', 'smooth'), 18)))
def to_sf(year: int = 2018) -> sf.Frame: labels = [] values = [] for buoy in BUOYS: f = BuoyLoader.buoy_to_sf(buoy, year) for row in f.iter_series(1): for attr in (BuoyLoader.FIELD_WAVE_HEIGHT, BuoyLoader.FIELD_WAVE_PERIOD): label = (row[BuoyLoader.FIELD_STATION_ID], row[BuoyLoader.FIELD_DATETIME], attr) labels.append(label) values.append(row[attr]) index = sf.IndexHierarchy.from_labels( labels, index_constructors=(sf.Index, sf.IndexMinute, sf.Index)) return sf.Series(values, index=index)
def branch_event(self, event, session, prob, stone_choice): """Split session into all potential following sub-sessions.""" orbs_spent = event.orbs_spent + stone_cost( session.stone_summons.sum() - 1) choice_starpool_probs = (self.event_details.pool_probs( session.prob_tier)[sf.HLoc[:, stone_choice]].reindex_drop_level(-1)) choice_starpool_probs = (choice_starpool_probs / choice_starpool_probs.sum()) for starpool, subprob in choice_starpool_probs.iter_element_items(): total_prob = prob * subprob if starpool.star_rating == StarRatings.x5_STAR: dry_streak = 0 else: dry_streak = event.dry_streak + 1 if (starpool, stone_choice) not in self.summoner.targets.index: pulls = ((event.targets_pulled, 1), ) else: targets_pulled_success = event.targets_pulled + sf.Series( [1], index=sf.IndexHierarchy.from_labels([ (starpool, stone_choice) ])).reindex(event.targets_pulled.index, fill_value=0) prob_success = Fraction( int(self.summoner.targets[starpool, stone_choice]), int(self.event_details.pool_counts[starpool, stone_choice])) pulls = ((targets_pulled_success, prob_success), (event.targets_pulled, 1 - prob_success)) for targets_pulled, subsubprob in pulls: new_event = EventState(orbs_spent, dry_streak, targets_pulled) new_prob = total_prob * subsubprob self.push_state(new_event, session, new_prob)
def sf() -> None: a = SampleData.get('npa_obj_10k') post = sf.Series(a, index=SampleData.get('label_str')[:len(a)])
def sf(cls): f1 = sf.FrameGO(index=cls._index) for col in range(100): s = sf.Series(col * .1, index=cls._index[col:col + 20]) f1[col] = s assert f1.sum().sum() == 9900.0
def test_series_dropna_c(self): s1 = sf.Series([1, np.nan, 2, np.nan], index=sf.IndexHierarchy.from_product(['A', 'B'], [1, 2])) s2 = s1.dropna() self.assertEqual(s2.to_pairs(), ((('A', 1), 1.0), (('B', 1), 2.0)))
def test_series_sum_b(self): s1 = Series(list('abc'), dtype=object) self.assertEqual(s1.sum(), 'abc') # get the same result from character arrays s2 = sf.Series(list('abc')) self.assertEqual(s2.sum(), 'abc')
def get_sample_series_float(size=10000): a1 = np.arange(size) * .001 a1[size // 2:] = np.nan pds = pd.Series(a1) sfs = sf.Series(a1) return pds, sfs, a1