def get_dragon_kings_abs(cutoff_count=1112550, min_size=200, index=None): if index is None: import fileio index = fileio.get_index_table() heavy_ids = index[ (index.count_span>cutoff_count) & (index.n_samples>min_size) ].index.values return heavy_ids
def get_whatever(N=100, min_size=200, index=None): import numpy as np if index is None: import fileio index = fileio.get_index_table() vids = index[ (index.n_samples>min_size) ].index.values return np.random.choice(vids, N, replace=False)
def get_dragon_kings_rel(cutoff_frac=0.75, index=None): import numpy as np if index is None: import fileio index = fileio.get_index_table() n_occ = index.count_span.values n_occ = np.sort(n_occ) cum_occ = np.cumsum(n_occ) cutoff_count = n_occ[np.searchsorted(cum_occ, cum_occ[-1]*cutoff_frac)] return get_dragon_kings_abs(cutoff_count, index=index)
def get_tricky_cases(min_size=200, index=None, shuffle=False): import numpy as np if index is None: import fileio index = fileio.get_index_table() s = set(get_flagships(index=index)) s = s.union(get_lead_balloons(index=index, min_size=min_size, N=300)) s = s.union(get_dragon_kings_abs(index=index, min_size=min_size)) s = np.array(list(s)) if shuffle: np.random.shuffle(s) return list(s)
def get_lead_balloons(cutoff_time=0.03, quantile="c50", min_size=200, N=None, index=None): import numpy as np if index is None: import fileio index = fileio.get_index_table() vids = index[ (index[quantile+"_sample"]<cutoff_time) & (index.n_samples>min_size) ].index.values if N is None: return vids return np.random.choice(vids, N, replace=False)