def _interpret_series(row: pd.Series, record_type: ert.data.RecordType) -> Any: if record_type not in {item.value for item in ert.data.RecordType}: raise ValueError( f"Unexpected record type when loading numerical record: {record_type}" ) if record_type == ert.data.RecordType.MAPPING_INT_FLOAT: return {int(k): v for k, v in row.to_dict().items()} return (row.to_list() if record_type == ert.data.RecordType.LIST_FLOAT else row.to_dict())
def correlate_series(s1: pd.Series, s2: pd.Series, window_len=41): ts1 = s1.to_dict() ts2 = s2.to_dict() results = np.zeros(len(s1)) for i in range(len(s1) - window_len + 1): time_period = (i, i + window_len) correlator = Correlator(ts1, ts2, time_period=time_period) results[i] = correlator.correlation_result.coefficient return pd.Series(results)
def action_set_cost( factual_instance: pd.Series, action_set: dict, ranges: pd.Series, norm_type=2, ) -> float: """ Compute cost of action Parameters ---------- factual_instance: pd.Series Contains a single factual instance, where each element corresponds to a feature. action_set: dict Contains perturbation of features. ranges: pd.Series Contains the feature ranges of the original dataset. norm_type: int Norm to be used, choose either 1 or 2. Returns ------- float """ factual_instance = factual_instance.to_dict() ranges = ranges.to_dict() factual_float_int = np.all( [isinstance(elem, (int, float)) for elem in factual_instance.values()]) action_float_int = np.all( [isinstance(elem, (int, float)) for elem in action_set.values()]) factual_torch = np.all( [isinstance(elem, torch.Tensor) for elem in factual_instance.values()]) action_torch = np.all( [isinstance(elem, torch.Tensor) for elem in action_set.values()]) deltas = [(action_set[key] - factual_instance[key]) / ranges[key] for key in action_set.keys()] if factual_float_int and action_float_int: return np.linalg.norm(deltas, norm_type) elif factual_torch and action_torch: return torch.norm(torch.stack(deltas), p=norm_type) else: raise Exception("Mismatching or unsupported datatypes.")
def get_combined_health_index(series): """ plot all health index in one figure """ # health_indices = [series[key].health_index_pred.values for key in series.keys()] # # for i in zip(health_indices[0], health_indices[1],health_indices[2], # health_indices[3],health_indices[4],health_indices[5], # health_indices[6],health_indices[7],health_indices[8], # health_indices[9],health_indices[10],health_indices[11]): # print(i) df = pd.DataFrame() index = None for key in series.keys(): df = pd.concat([df,pd.DataFrame({key:series[key].health_index})], axis=1) # df = pd.concat([df,pd.DataFrame({'aaa':[2,3,4,5]})],axis=1) overall_health_index = [] for i, row in df.iterrows(): data = Series.to_dict(row) s = 0 weights2 = {} for key in data.keys(): weights2[key] = weights[key]*abs(0.5-data[key]) s = s + data[key]*weights2[key] s = s/sum(weights2.values()) # s = s/sum(data*) overall_health_index.append(s) overall_health_index = pd.DataFrame({'overall_health_index':overall_health_index},index = series['PT-204'].time) return overall_health_index
def create_item_mod(possible_mod_ids: list, item_mod: pd.Series, rare_mods: pd.DataFrame, mod_value: int) -> dict: logger.info('Creating item mods dict') for mod_id in possible_mod_ids: # Comparing mod with rare mods multiple times is expensive possible_mods = rare_mods[rare_mods.mod_id == mod_id] if len(possible_mods) > 0: for idx, row in possible_mods.iterrows(): min_value = row['min'] max_value = row['max'] if min_value < 0: mod_value = -mod_value if min_value <= mod_value <= max_value + 1: item_mod = possible_mods.loc[idx, :] break else: continue if isinstance(item_mod, dict): if not item_mod: item_mod = possible_mods.loc[idx, :] else: if item_mod.empty: item_mod = possible_mods.loc[idx, :] item_mod['value'] = mod_value item_mod = item_mod.drop(['min', 'max', 'required_level']) item_mod = item_mod.to_dict() break return item_mod
def pandas_series(): obj = Series([3, 6, 9, 12]) print(obj) print(obj.values) print(obj.index) ww2_cas = Series([8700000, 4300000, 3000000, 2100000, 400000], index=["USSR", "Germany", "China", "Japan", "USA"]) print(ww2_cas) print(ww2_cas["USA"]) # countries with casualties > 4 million print(ww2_cas[ww2_cas > 4000000]) print("USSR" in ww2_cas) ww2_dict = ww2_cas.to_dict() print(ww2_dict) ww2_series = Series(ww2_dict) print(ww2_series) countries = ["China", "Germany", "Japan", "USA", "USSR", "Argentina"] obj2 = Series(ww2_dict, index=countries) print(obj2) print(pd.isnull(obj2)) print(pd.notnull(obj2)) print(ww2_series + obj2) obj2.name = "World War 2 Casualties" print(obj2) obj2.index.name = "Countries" print(obj2) return
def __create_bldg_operation_factory( self, sia_bldg_type_mapping: pd.Series, sia_params_generation_lock=None ) -> BuildingOperationFactoryProtocol: """ :param sia_bldg_type_mapping: series, index is bldg fid, value bldg type as string :param sia_params_generation_lock: only relevant in case sia2024 profiles are used. pass a instance of a lock if sia parameters might be generated from several threads at the same time in the same location :return: object providing the BuildingOperationFactoryProtocol, concrete implementation depends on configuration """ passive_cooling_op_fact: PassiveCoolingOperationFactoryProtocol = PassiveCoolingOperationFactory( self._unit_reg, self._custom_config) op_fact_class_name: str = self._mgr_config[ "BUILDING_OPERATION_FACTORY_CLASS"] if op_fact_class_name == "cesarp.SIA2024.SIA2024Facade.SIA2024Facade": # save as member because sia2024 is probably used as infiltration rate source as well.... self.sia2024 = SIA2024Facade(sia_bldg_type_mapping.to_dict(), passive_cooling_op_fact, self._unit_reg, self._custom_config) if sia_params_generation_lock: sia_params_generation_lock.acquire() # load has to be synchronized as well, because 2nd process has to wait till the first finished creating the profiles... self.sia2024.load_or_create_parameters( sia_bldg_type_mapping.unique()) if sia_params_generation_lock: sia_params_generation_lock.release() return self.sia2024 else: op_fact_class = cesarp.common.get_class_from_str( op_fact_class_name) return op_fact_class(passive_cooling_op_fact, self._unit_reg, self._custom_config)
def _update_seq_feature_ids(map_dict: dict, seqs_df: pd.Series) -> pd.Series: seqs_dict = seqs_df.to_dict() mapped_seqs_dict = dict() for sp, seq in seqs_dict.items(): mapped_sp = map_dict[sp] mapped_seqs_dict[mapped_sp] = seq return pd.Series(mapped_seqs_dict)
def _get_error_statuses(status: pd.Series) -> List[str]: errors = [ status_key for status_key, status_value in status.to_dict().items() if status_value and _ALARM_KEYWORD in status_key ] return errors
def setup(self, mapper, dtype): map_size = 1000 map_data = Series(map_size - np.arange(map_size), dtype=dtype) # construct mapper if mapper == "Series": self.map_data = map_data elif mapper == "dict": self.map_data = map_data.to_dict() elif mapper == "lambda": map_dict = map_data.to_dict() self.map_data = lambda x: map_dict[x] else: raise NotImplementedError self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype)
def __pretty__(classroom: pd.Series): address = Address(**classroom.to_dict()) location = str(address).replace("\n", ", ") name = classroom[rsrc.INDEX.NAME] code = classroom[rsrc.INDEX.CODE] return pd.Series([name, code, location], index=["name", "code", "address"])
def _create_lens(row: pd.Series) -> Lens: data = row.to_dict() for column in LensController.JSON_COLUMNS: data[column] = json.loads(data[column]) return Lens(**data)
def standardize_address(address_series: pd.Series, address_column_map: dict) -> dict: """ Create standardized address dictionary from *address_series* with the expected address keys for the SmartyStreets API provided in *address_column_map*. All address values are converted to uppercase and stripped of leading and trailing whitespaces. """ address_columns = list(filter(None, address_column_map.values())) if not address_columns: raise NoAddressColumnsFoundError(address_column_map) address = address_series.to_dict() std_address: Dict[str, str] = {} for standardized_column, provided_column in address_column_map.items(): std_address[standardized_column] = None if provided_column: std_address[standardized_column] = address[provided_column].upper( ).strip() return std_address
def extract_track_data(prefix: str, row: pd.Series, index: int = 0) -> typing.Union[dict, None]: """ Extracts the track data for a single track of the specified limb prefix from the given row data :param prefix: A limb identifier, which is one of lp, rp, lm or rm :param row: The row from the tracksim CSV file in which the track will be extracted :param index: Index of the row within the tracksim CSV source file :return: A dictionary containing the data for the specified track if such a track exists or None if no track exists for the given limb within the row """ if not has_track_data(prefix, row): return None key_prefix = '{}_'.format(prefix) data = dict([(key[len(key_prefix):], value) for key, value in row.to_dict().items() if key.startswith(key_prefix)]) data['limb'] = prefix data['is_left'] = bool(prefix[0] == 'l') data['is_pes'] = bool(prefix[1] == 'p') data['limb_index'] = index return data
def calc_best_pathway_features_m(p): ys = m_hfac[hfactor] xs = metab_i.ix[ys.index, keggp_comp[p]].dropna(how='all', axis=1) yss = [1 if metab_cinfo.ix[i, 'TISSUE TYPE'] == 'T' else 0 for i in ys.index] if xs.shape[1] > 1: feat = Series(dict(zip(*(xs.columns, RidgeCV(cv=StratifiedShuffleSplit(yss, test_size=.3)).fit(xs, ys).coef_)))) return feat.to_dict()
def calc_best_pathway_features_t(p): ys = t_hfac[hfactor] xs = trans_n.ix[ys.index, keggp_gene[p]].dropna(how='all', axis=1) yss = [1 if i.split('-')[3].startswith('01') else 0 for i in ys.index] if xs.shape[1] > 1: feat = Series(dict(zip(*(xs.columns, RidgeCV(cv=StratifiedShuffleSplit(yss, test_size=.3)).fit(xs, ys).coef_)))) return feat.to_dict()
def __init__(self, df: pd.Series): df = df.to_dict() self.xc = float(df[' xcenter']) self.yc = float(df[' ycenter']) self.rh = float(df[' rhorizontal']) self.rv = float(df[' rvertical']) self.shape = str(df[' shape']).strip() assert self.shape in [Figure.CIRCLE, Figure.RECT]
def object2proto(obj: pd.Series) -> PandasSeries_PB: series_dict = PrimitiveFactory.generate_primitive(value=obj.to_dict()) dict_proto = series_dict._object2proto() return PandasSeries_PB( id=dict_proto.id, series=dict_proto, )
def update_series(self, name: str, series: pd.Series) -> None: drawing = self.__sd.drawing lines = dict() if self.__split_dict(series.to_dict(), lines): for _name, _data in lines.items(): drawing.add_scatter(_name, _data) else: drawing.add_scatter(name, lines)
def __pretty__(classroom: pd.Series): address = Address(**classroom.to_dict()) location = str(address).replace('\n', ', ') name = classroom[rsrc.INDEX.NAME] code = classroom[rsrc.INDEX.CODE] return pd.Series([name, code, location], index=['name', 'code', 'address'])
def store_dataset_row(self, dataset_row: pandas.Series): # self.logger.info(dataset_row.to_dict()) dataset_dict = dataset_row.to_dict() company_properties = { k: v for k, v in dataset_dict.items() if k not in ['CEO', 'LEGAL_ADDRESS', 'SITE', 'EMAIL', 'PHONES'] } self.merge_node('SPARK_ID', ['Company'], company_properties) if dataset_dict['CEO']: ceo_properties = {'FULL_NAME': dataset_dict['CEO']} self.merge_node('FULL_NAME', ['Person'], ceo_properties) self.merge_directed_edge(left_node_by_key='SPARK_ID', left_node_by=company_properties, edge_label='CEO_OF', right_node_by_key='FULL_NAME', right_node_by=ceo_properties) if dataset_dict['LEGAL_ADDRESS']: legal_address_properties = { 'LEGAL_ADDRESS': dataset_dict['LEGAL_ADDRESS'] } self.merge_node('LEGAL_ADDRESS', ['Address'], legal_address_properties) self.merge_directed_edge(left_node_by_key='SPARK_ID', left_node_by=company_properties, edge_label='LEGAL_ADDRESS_OF', right_node_by_key='LEGAL_ADDRESS', right_node_by=legal_address_properties) if dataset_dict['SITE']: site_properties = {'SITE': dataset_dict['SITE']} self.merge_node('SITE', ['Site'], site_properties) self.merge_directed_edge(left_node_by_key='SPARK_ID', left_node_by=company_properties, edge_label='SITE_OF', right_node_by_key='SITE', right_node_by=site_properties) if dataset_dict['EMAIL']: email_properties = {'EMAIL': dataset_dict['EMAIL']} self.merge_node('EMAIL', ['Email'], email_properties) self.merge_directed_edge(left_node_by_key='SPARK_ID', left_node_by=company_properties, edge_label='EMAIL_OF', right_node_by_key='EMAIL', right_node_by=email_properties) if dataset_dict['PHONES']: for phone in dataset_dict['PHONES']: phone_property = {'PHONE': phone} self.merge_node('PHONE', ['Phone'], phone_property) self.merge_directed_edge(left_node_by_key='SPARK_ID', left_node_by=company_properties, edge_label='PHONE_OF', right_node_by_key='PHONE', right_node_by=phone_property)
def create_from_row(cls, row: pd.Series) -> "Action": """ Create an Action instance from a dataframe row. """ fields = [ key for key, value in cls.__dataclass_fields__.items() if value.type != ClassVar ] d = {key: value for key, value in row.to_dict().items() if key in fields} return cls(**d)
def _print_process(process_row: pd.Series, fmt: str = 'html') -> str: """ Format individual process item as text or html. Parameters ---------- process_row : pd.Series Process series fmt : str, optional Format ('txt' or 'html') (the default is ' html') Returns ------- str Formatted process summary. """ if process_row.NodeRole == 'parent': if process_row.Level > 1: level = 0 else: level = 1 elif process_row.NodeRole == 'source': level = 2 elif process_row.NodeRole == 'child': level = 3 + process_row.Level else: level = 2 px_spaces = 20 * level * 2 txt_spaces = ' ' * (4 * level) font_col = ('red' if process_row.NodeRole == 'source' else 'black') if fmt.lower() == 'html': l1_span = f'<span style="color:{font_col};font-size:90%">' line1_tmplt = (l1_span + '[{NodeRole}:lev{Level}] {TimeGenerated} ' + '<b>{NewProcessName}</b> ' + '[PID: {NewProcessId}, ' + 'SubjSess:{SubjectLogonId}, ' + 'TargSess:{TargetLogonId}]') line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]</span>' '' output_tmplt = (f'<div style="margin-left:{px_spaces}px">' + f'{{line1}}<br>{{line2}}</div>') else: line1_tmplt = ('[{NodeRole}:lev{Level}] {TimeGenerated} ' + '{NewProcessName} ' + '[PID: {NewProcessId}, ' + 'SubjSess:{SubjectLogonId}, ' + 'TargSess:{TargetLogonId}]') line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]' output_tmplt = f'\n{txt_spaces}{{line1}}\n{txt_spaces}{{line2}}' line1 = line1_tmplt.format(**(process_row.to_dict())) line2 = line2_tmplt.format(**(process_row.to_dict())) return output_tmplt.format(line1=line1, line2=line2)
def _print_process(process_row: pd.Series, fmt: str = "html") -> str: """ Format individual process item as text or html. Parameters ---------- process_row : pd.Series Process series fmt : str, optional Format ('txt' or 'html') (the default is ' html') Returns ------- str Formatted process summary. """ if process_row.NodeRole == "parent": if process_row.Level > 1: level = 0 else: level = 1 elif process_row.NodeRole == "source": level = 2 elif process_row.NodeRole == "child": level = 3 + process_row.Level else: level = 2 px_spaces = 20 * level * 2 txt_spaces = " " * (4 * level) font_col = "red" if process_row.NodeRole == "source" else "black" if fmt.lower() == "html": l1_span = f'<span style="color:{font_col};font-size:90%">' line1_tmplt = (l1_span + "[{NodeRole}:lev{Level}] {TimeGenerated} " + "<b>{NewProcessName}</b> " + "[PID: {NewProcessId}, " + "SubjSess:{SubjectLogonId}, " + "TargSess:{TargetLogonId}]") line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]</span>' "" output_tmplt = (f'<div style="margin-left:{px_spaces}px">' + f"{{line1}}<br>{{line2}}</div>") else: line1_tmplt = ("[{NodeRole}:lev{Level}] {TimeGenerated} " + "{NewProcessName} " + "[PID: {NewProcessId}, " + "SubjSess:{SubjectLogonId}, " + "TargSess:{TargetLogonId}]") line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]' output_tmplt = f"\n{txt_spaces}{{line1}}\n{txt_spaces}{{line2}}" line1 = line1_tmplt.format(**(process_row.to_dict())) line2 = line2_tmplt.format(**(process_row.to_dict())) return output_tmplt.format(line1=line1, line2=line2)
def append_to_global_metrics(metrics: pd.Series, band: int, n_subj: int, n_feat: int, repetition: int, filepath: Path) -> None: new_row = pd.DataFrame([{"Band": band, "NSubjects": n_subj, "NFeatures": n_feat, "Repetition": repetition, **metrics.to_dict()}]) if filepath.stat().st_size > 0: out_df: pd.DataFrame = pd.read_csv(filepath) out_df = out_df.append(new_row, ignore_index=True) else: out_df = new_row out_df.to_csv(filepath, index=False)
def test_map(self, datetime_series): index, data = tm.getMixedTypeDict() source = Series(data["B"], index=data["C"]) target = Series(data["C"][:4], index=data["D"][:4]) merged = target.map(source) for k, v in merged.items(): assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in merged.items(): assert v == source[target[k]] # function result = datetime_series.map(lambda x: x * 2) tm.assert_series_equal(result, datetime_series * 2) # GH 10324 a = Series([1, 2, 3, 4]) b = Series(["even", "odd", "even", "odd"], dtype="category") c = Series(["even", "odd", "even", "odd"]) exp = Series(["odd", "even", "odd", np.nan], dtype="category") tm.assert_series_equal(a.map(b), exp) exp = Series(["odd", "even", "odd", np.nan]) tm.assert_series_equal(a.map(c), exp) a = Series(["a", "b", "c", "d"]) b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(c), exp) a = Series(["a", "b", "c", "d"]) b = Series( ["B", "C", "D", "E"], dtype="category", index=pd.CategoricalIndex(["b", "c", "d", "e"]), ) c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) exp = Series( pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, "B", "C", "D"]) tm.assert_series_equal(a.map(c), exp)
def model_predict(self): if self.model is None: # self.resultArea.setText('where is the model?') try: childWindow().exec_() self.model_path = self.open_model() self.model = load(self.model_path) except Exception as e: self.model = None return s = Series(dtype=object) s['f0'] = self.isCardProverty.isChecked() s['f1'] = self.isLowest.isChecked() s['f2'] = self.isFiveGuarantee.isChecked() s['f3'] = self.isOrphan.isChecked() s['f4'] = self.isMartyrsFamily.isChecked() s['f5'] = self.isBussiness.isChecked() s['f6'] = self.isFarm.isChecked() s['f7'] = self.isRetire.isChecked() s['f8'] = self.noIncome.isChecked() s['f9'] = self.isWork.isChecked() s['f10'] = self.bothUnemployed.isChecked() s['f11'] = self.eitherUnemployed.isChecked() s['f12'] = self.income.value() / self.household.value() s['f13'] = self.numUniv.value() s['f14'] = self.numHigh.value() s['f15'] = self.numPrim.value() s['f16'] = self.grandParentDisease.isChecked() s['f17'] = self.parentDivorce.isChecked() s['f18'] = self.oneParentNormalDisease.isChecked() s['f19'] = self.bothParentNormalDisease.isChecked() s['f20'] = self.siblingDisease.isChecked() s['f21'] = self.oneParentSeriousDisease.isChecked() s['f22'] = self.bothParentSeriousDisease.isChecked() s['f23'] = self.parentPassAway.isChecked() s['f24'] = self.naturalAccident.isChecked() s['f29'] = self.household.value() s['f30'] = self.yesLoan.isChecked() s['f31'] = self.isRuralResident.isChecked() try: ss = Series(dtype=object) ss['0'] = self.ethnic.text() s['f28'] = DataSet.do_ethnic_group(ss)['0'] ss['0'] = self.scholarshipText.toPlainText().replace('\n', '') d = DataSet.do_scholarship(ss) s['f25'] = d['助学金个数']['0'] s['f26'] = d['助学金金额']['0'] s['f27'] = d['国助类型']['0'] d = DataFrame(s.to_dict(), index=[0]) ans = self.model.predict(d)[0] ans_type = ['非常困难', '一般困难', '可能为非困难生'] self.resultArea.setText(ans_type[ans]) except Exception as e: self.resultArea.setText('输入有误或模型导入错误\n请检查输入或重新导入模型') print(e.__str__())
class TestMultinomial(TestCase): def setUp(self) -> None: self.p = Series({'a': 0.4, 'b': 0.3, 'c': 0.2, 'd': 0.1}) self.m_array = Multinomial(n=10, p=self.p.values) self.m_series = Multinomial(n=10, p=self.p) self.m_dict = Multinomial(n=10, p=self.p.to_dict()) def test_init_with_array(self): expected = Series({'p1': 0.4, 'p2': 0.3, 'p3': 0.2, 'p4': 0.1}) actual = self.m_array.p self.assertTrue(expected.equals(actual)) def test_init_with_series(self): expected = self.p actual = self.m_series.p self.assertTrue(expected.equals(actual)) def test_init_with_dict(self): expected = self.p actual = self.m_dict.p self.assertTrue(expected.equals(actual)) def test_set_alpha_with_array(self): m = Multinomial(n=10, p=[0.1, 0.2, 0.3, 0.4]) expected = Series({'p1': 0.4, 'p2': 0.3, 'p3': 0.2, 'p4': 0.1}) m.p = [0.4, 0.3, 0.2, 0.1] actual = m.p self.assertTrue(expected.equals(actual)) def test_set_alpha_with_series(self): m = Multinomial(n=10, p=[0.1, 0.2, 0.3, 0.4]) expected = Series({'x1': 0.4, 'x2': 0.3, 'x3': 0.2, 'x4': 0.1}) m.p = expected actual = m.p self.assertTrue(expected.equals(actual)) def test_str(self): self.assertEqual('Multinomial(p1=0.4, p2=0.3, p3=0.2, p4=0.1)', str(self.m_array)) self.assertEqual('Multinomial(a=0.4, b=0.3, c=0.2, d=0.1)', str(self.m_series)) def test_get_item(self): for k, v in self.m_series.p.items(): expected = Binomial(n=self.m_series.n, p=self.m_series.p[k]) actual = self.m_series[k] self.assertTrue(expected == actual)
def create_inverse(terms: pd.Series, listseries: pd.Series): """uses series to create inverted DataFrame""" df = terms.to_frame("term") diction = listseries.to_dict() # creates list of indexes in which term has appeared for each row df["indexes"] = df["term"].apply( lambda x: [key for key, value in diction.items() if x in value]) df["frequency"] = df["indexes"].apply(lambda x: len(x)) return df
def get_anomaly_series(series: pd.Series, algorithm: str = "bitmap_detector") -> pd.Series: assert algorithm in [ "bitmap_detector", "derivative_detector", "exp_avg_detector" ] ts = series.to_dict() detector = AnomalyDetector(ts, algorithm_name=algorithm) scores = detector.get_all_scores() scores = [s for _, s in scores.iteritems()] return pd.Series(scores)
def inverse_transform(df: pd.DataFrame, col_means: pd.Series) -> pd.DataFrame: df = df.copy() df['measure'] = df['measure'].str.replace('_log10_scaled', '') std = (df['upper'] - df['lower']) / 1.96 for col in ['mean', 'lower', 'upper', 'actual']: if col == 'mean': # bias correction: df[col] = df[col] + .5 * std**2 df[col] = 10**df[col] # inverse log10 df[col] *= df['measure'].map(col_means.to_dict()) # inverse scaling return df
def daily_profit(self): """每日盈亏""" profit = Series(np.where(self.account_df['profit'] >= 0, self.account_df['profit'], float('nan'))) # 收益 loss = Series(np.where(self.account_df['profit'] < 0, self.account_df['profit'], float('nan'))) # 亏损 return [{ self.report_id: { "charts": { "daily_profit": { "title": { "left": 'center', "text": "每日盈亏" }, "xAxis": { "type": 'category', "data": self.account_df['date'].to_dict() }, "yAxis": { "type": 'value' }, "series": { "0": { "data": profit.to_dict(), "type": 'bar', "itemStyle": { "color": "#ee6666" }, "stack": 'one', }, "1": { "data": loss.to_dict(), "type": 'bar', "itemStyle": { "color": "#91cc75" }, "stack": 'one', } } } } } }]
def test_map(self): index, data = tm.getMixedTypeDict() source = Series(data['B'], index=data['C']) target = Series(data['C'][:4], index=data['D'][:4]) merged = target.map(source) for k, v in compat.iteritems(merged): assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in compat.iteritems(merged): assert v == source[target[k]] # function result = self.ts.map(lambda x: x * 2) tm.assert_series_equal(result, self.ts * 2) # GH 10324 a = Series([1, 2, 3, 4]) b = Series(["even", "odd", "even", "odd"], dtype="category") c = Series(["even", "odd", "even", "odd"]) exp = Series(["odd", "even", "odd", np.nan], dtype="category") tm.assert_series_equal(a.map(b), exp) exp = Series(["odd", "even", "odd", np.nan]) tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e'])) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series(['B', 'C', 'D', 'E'], dtype='category', index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e'])) exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'], categories=['B', 'C', 'D', 'E'])) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 'B', 'C', 'D']) tm.assert_series_equal(a.map(c), exp)
def test_map(self): index, data = tm.getMixedTypeDict() source = Series(data['B'], index=data['C']) target = Series(data['C'][:4], index=data['D'][:4]) merged = target.map(source) for k, v in merged.iteritems(): self.assertEqual(v, source[target[k]]) # input could be a dict merged = target.map(source.to_dict()) for k, v in merged.iteritems(): self.assertEqual(v, source[target[k]]) # function result = self.ts.map(lambda x: x * 2) self.assert_(np.array_equal(result, self.ts * 2))
countries = ['USSR','Germany','China','Japan','USA'] obj2 = Series(ww2_dict,index=countries) # another way to create a series ww2_cas.values # returns series values ww2_cas.index # returns series index values ww2_cas['USA'] #extract series value with given index ww2_cas[['USA','China']] # returns values corresponding to index in the list ww2_cas[4] # returns 5th value in the series ww2_cas[0:3] # returns first 4 ww2_cas[-1]# returns last value in the series ww2_cas[ww2_cas>4000000] #extract series value with given condition ww2_cas['USSR'] = 1 # replaces the value corresponding to the index 'USSR' in ww2_cas # returns boolean true or false ww2_dict = ww2_cas.to_dict() # to.dict() method converts series to dictionary ww2_series = Series(ww2_dict) #convert back to Series using the series method pd.isnull(obj2) # returns index that has NaN as value pd.notnull(obj2) # opposite ww2_series + obj2 # new series returned by adding values on the basis of index obj2.name = "world war 2 casualties" #name your series obj2.index.name = 'Countries' # name your index ser1 = Series([1,2,3,4],index=['A','B','C','D']) ser2 = ser1.reindex(['A','B','C','D','E','F'], fill_value = 0) # reindex - does exactly that. Adds new indexes to a series. Use fill_value, else the values will be NA. ## forward fill and backward fill of indexes in the examples below ser4 = Series(['USA','Mexico','Canada'],index = [0,5,10])
# check which countries had cas greater than 4M print(ww2_cas[ww2_cas > 4000000]) ''' USSR 8700000 Germany 4300000 dtype: int64 ''' # behave as dictionary print('USSR' in ww2_cas) # True # convert to dictionary ww2_dict = ww2_cas.to_dict() print(ww2_dict) # {'China': 3000000, 'USSR': 8700000, 'Germany': 4300000, 'USA': 400000, 'Japan': 2100000} # convert back to series ww2_series = Series(ww2_dict) print(ww2_series) ''' China 3000000 Germany 4300000 Japan 2100000 USA 400000 USSR 8700000 dtype: int64 '''
############################################################################################################# # 1. Series Basics ############################################################################################################# # Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index. s = Series([3,6,9,12]) # use the Series function to create series s.values # returns the values in the series s.index # returns the index of the series gdp = Series([8700000,4300000,3000000], # you can specify names/strings for indexes index= ['USSR','Germany','China']) gdp['USSR'] == gdp[0] # indexing works with either the index labels or the index number gdp[gdp > 5000000] # you can use inequality operators to check/filter 'USA' in gdp # to check if a specific index exists gdp_dict = gdp.to_dict() # .to_dict() converts a series to a dictionary gdp2 = Series(gdp_dict) # and dictionaries can be converted to series gdp2 = Series(gdp, index=['USSR','Germany','China','USA']) pd.isnull(gdp2) # .isnull() is used to find NaNs and Nulls pd.notnull(gdp2) # opposite of .isnull() gdp3 = gdp + gdp2 # series are vectorized: you can +,-,*,/ etc. gdp3.index.name = 'Country' # you can give the index a header gdp3 = gdp3.drop('USA') # delete a row with .drop() # .forwardfill (ffill) interpolates values between indices from top-down ser3 = Series(['USA','Mexico','Canada'],index=[0,5,10]) ser4 = ser3.reindex(range(15),method='ffill') print ser3, ser4
print( '\nPuoi ottenere i valori:\t', obj.values) print( '...e gli indici:\t', obj.index) ww2_cas = Series([8700000,4300000,3000000,2100000,400000],index=['USSR','Germany','China','Japan','USA']) print( '\nPosso personalizzare gli indici:\n',ww2_cas) # Call a value print( '\nCall a value..', ww2_cas['Germany'] ) # Can also check with array operations print( 'o richiamare tramite un espressione..,', ww2_cas[ww2_cas>4000000] ) # Check if the's a value.. print( 'oppure fare il check di presenza di un valore..','USSR' in ww2_cas ) # Convert Series in Python dictionary print( '\nConvertire in dictionary:\n', ww2_cas.to_dict()) print( 'o fare il contrario:\n', Series(ww2_cas.to_dict()) ) #We can use isnull and notnull to find missing data print( pd.isnull(ww2_cas) ) print( ww2_cas ) # We can mixing two Series and sorting... obj2 = Series([1,2],index=['Italy','France']) #print( pd.concat([ww2_cas,obj2]).sort_index(inplace=True) ) # inplace = True --> modifica realmente la serie !! # il problema è che non posso richiamarla perchè non l'ho instanziata: ho usato metodi all'interno del metodo print() s3 = pd.concat([ww2_cas,obj2]).sort_index(inplace=True) print( s3 )
__author__ = 'Executor' import numpy as na import pandas as pa from pandas import Series, DataFrame obj = Series([3, 6, 9, 2]) print(obj) print(obj.index) ww2 = Series([800, 400, 300, 200, 430], index=['USSR', 'Germany', 'China', 'Japan', 'USA']) ww3 = Series([800, 400, 300, 200, 430, 500], index=['USSR', 'Germany', 'China', 'Japan', 'USA', 'Jamaica']) print(ww2) # check casualties print(ww2[ww2>400]) print('USSR' in ww2) ww2d = ww2.to_dict() print(ww2) print(ww2d) ww2.update(([300], index='Jamaica')) print(ww2)
def setup(self, mapper): map_size = 1000 map_data = Series(map_size - np.arange(map_size)) self.map_data = map_data if mapper == 'Series' else map_data.to_dict() self.s = Series(np.random.randint(0, map_size, 10000))