Example #1
0
def _interpret_series(row: pd.Series, record_type: ert.data.RecordType) -> Any:
    if record_type not in {item.value for item in ert.data.RecordType}:
        raise ValueError(
            f"Unexpected record type when loading numerical record: {record_type}"
        )

    if record_type == ert.data.RecordType.MAPPING_INT_FLOAT:
        return {int(k): v for k, v in row.to_dict().items()}
    return (row.to_list() if record_type == ert.data.RecordType.LIST_FLOAT else
            row.to_dict())
Example #2
0
def correlate_series(s1: pd.Series, s2: pd.Series, window_len=41):
    ts1 = s1.to_dict()
    ts2 = s2.to_dict()

    results = np.zeros(len(s1))

    for i in range(len(s1) - window_len + 1):
        time_period = (i, i + window_len)
        correlator = Correlator(ts1, ts2, time_period=time_period)
        results[i] = correlator.correlation_result.coefficient

    return pd.Series(results)
Example #3
0
def action_set_cost(
    factual_instance: pd.Series,
    action_set: dict,
    ranges: pd.Series,
    norm_type=2,
) -> float:
    """
    Compute cost of action

    Parameters
    ----------
    factual_instance: pd.Series
        Contains a single factual instance, where each element corresponds to a feature.
    action_set: dict
        Contains perturbation of features.
    ranges: pd.Series
        Contains the feature ranges of the original dataset.
    norm_type: int
        Norm to be used, choose either 1 or 2.

    Returns
    -------
    float

    """

    factual_instance = factual_instance.to_dict()
    ranges = ranges.to_dict()

    factual_float_int = np.all(
        [isinstance(elem, (int, float)) for elem in factual_instance.values()])
    action_float_int = np.all(
        [isinstance(elem, (int, float)) for elem in action_set.values()])

    factual_torch = np.all(
        [isinstance(elem, torch.Tensor) for elem in factual_instance.values()])
    action_torch = np.all(
        [isinstance(elem, torch.Tensor) for elem in action_set.values()])

    deltas = [(action_set[key] - factual_instance[key]) / ranges[key]
              for key in action_set.keys()]

    if factual_float_int and action_float_int:
        return np.linalg.norm(deltas, norm_type)
    elif factual_torch and action_torch:
        return torch.norm(torch.stack(deltas), p=norm_type)
    else:
        raise Exception("Mismatching or unsupported datatypes.")
Example #4
0
def get_combined_health_index(series):
    """
    plot all health index in one figure
    """
    # health_indices = [series[key].health_index_pred.values for key in series.keys()]
    #
    # for i in zip(health_indices[0], health_indices[1],health_indices[2],
    #              health_indices[3],health_indices[4],health_indices[5],
    #              health_indices[6],health_indices[7],health_indices[8],
    #              health_indices[9],health_indices[10],health_indices[11]):
    #     print(i)
    df = pd.DataFrame()
    index = None
    for key in series.keys():
        df = pd.concat([df,pd.DataFrame({key:series[key].health_index})], axis=1)
    # df = pd.concat([df,pd.DataFrame({'aaa':[2,3,4,5]})],axis=1)

    overall_health_index = []
    for i, row in df.iterrows():
        data = Series.to_dict(row)
        s = 0
        weights2 = {}
        for key in data.keys():
            weights2[key] = weights[key]*abs(0.5-data[key])
            s = s + data[key]*weights2[key]
        s = s/sum(weights2.values())
        # s = s/sum(data*)
        overall_health_index.append(s)
    overall_health_index = pd.DataFrame({'overall_health_index':overall_health_index},index = series['PT-204'].time)

    return overall_health_index
Example #5
0
def create_item_mod(possible_mod_ids: list, item_mod: pd.Series, rare_mods: pd.DataFrame, mod_value: int) -> dict:
    logger.info('Creating item mods dict')
    for mod_id in possible_mod_ids:
        # Comparing mod with rare mods multiple times is expensive
        possible_mods = rare_mods[rare_mods.mod_id == mod_id]
        if len(possible_mods) > 0:
            for idx, row in possible_mods.iterrows():
                min_value = row['min']
                max_value = row['max']
                if min_value < 0:
                    mod_value = -mod_value
                if min_value <= mod_value <= max_value + 1:
                    item_mod = possible_mods.loc[idx, :]
                    break
                else:
                    continue
            if isinstance(item_mod, dict):
                if not item_mod:
                    item_mod = possible_mods.loc[idx, :]
            else:
                if item_mod.empty:
                    item_mod = possible_mods.loc[idx, :]
            item_mod['value'] = mod_value
            item_mod = item_mod.drop(['min', 'max', 'required_level'])
            item_mod = item_mod.to_dict()
            break
    return item_mod
Example #6
0
def pandas_series():
    obj = Series([3, 6, 9, 12])
    print(obj)
    print(obj.values)
    print(obj.index)

    ww2_cas = Series([8700000, 4300000, 3000000, 2100000, 400000],
                     index=["USSR", "Germany", "China", "Japan", "USA"])
    print(ww2_cas)
    print(ww2_cas["USA"])
    # countries with casualties > 4 million
    print(ww2_cas[ww2_cas > 4000000])
    print("USSR" in ww2_cas)

    ww2_dict = ww2_cas.to_dict()
    print(ww2_dict)
    ww2_series = Series(ww2_dict)
    print(ww2_series)

    countries = ["China", "Germany", "Japan", "USA", "USSR", "Argentina"]
    obj2 = Series(ww2_dict, index=countries)
    print(obj2)
    print(pd.isnull(obj2))
    print(pd.notnull(obj2))

    print(ww2_series + obj2)

    obj2.name = "World War 2 Casualties"
    print(obj2)
    obj2.index.name = "Countries"
    print(obj2)

    return
Example #7
0
    def __create_bldg_operation_factory(
            self,
            sia_bldg_type_mapping: pd.Series,
            sia_params_generation_lock=None
    ) -> BuildingOperationFactoryProtocol:
        """
        :param sia_bldg_type_mapping: series, index is bldg fid, value bldg type as string
        :param sia_params_generation_lock: only relevant in case sia2024 profiles are used. pass a instance of a lock if sia
        parameters might be generated from several threads at the same time in the same location
        :return: object providing the BuildingOperationFactoryProtocol, concrete implementation depends on configuration
        """
        passive_cooling_op_fact: PassiveCoolingOperationFactoryProtocol = PassiveCoolingOperationFactory(
            self._unit_reg, self._custom_config)

        op_fact_class_name: str = self._mgr_config[
            "BUILDING_OPERATION_FACTORY_CLASS"]
        if op_fact_class_name == "cesarp.SIA2024.SIA2024Facade.SIA2024Facade":
            # save as member because sia2024 is probably used as infiltration rate source as well....
            self.sia2024 = SIA2024Facade(sia_bldg_type_mapping.to_dict(),
                                         passive_cooling_op_fact,
                                         self._unit_reg, self._custom_config)

            if sia_params_generation_lock:
                sia_params_generation_lock.acquire()
            # load has to be synchronized as well, because 2nd process has to wait till the first finished creating the profiles...
            self.sia2024.load_or_create_parameters(
                sia_bldg_type_mapping.unique())
            if sia_params_generation_lock:
                sia_params_generation_lock.release()
            return self.sia2024
        else:
            op_fact_class = cesarp.common.get_class_from_str(
                op_fact_class_name)
            return op_fact_class(passive_cooling_op_fact, self._unit_reg,
                                 self._custom_config)
def _update_seq_feature_ids(map_dict: dict, seqs_df: pd.Series) -> pd.Series:
    seqs_dict = seqs_df.to_dict()
    mapped_seqs_dict = dict()
    for sp, seq in seqs_dict.items():
        mapped_sp = map_dict[sp]
        mapped_seqs_dict[mapped_sp] = seq
    return pd.Series(mapped_seqs_dict)
Example #9
0
def _get_error_statuses(status: pd.Series) -> List[str]:
    errors = [
        status_key
        for status_key, status_value in status.to_dict().items()
        if status_value and _ALARM_KEYWORD in status_key
    ]
    return errors
Example #10
0
    def setup(self, mapper, dtype):
        map_size = 1000
        map_data = Series(map_size - np.arange(map_size), dtype=dtype)

        # construct mapper
        if mapper == "Series":
            self.map_data = map_data
        elif mapper == "dict":
            self.map_data = map_data.to_dict()
        elif mapper == "lambda":
            map_dict = map_data.to_dict()
            self.map_data = lambda x: map_dict[x]
        else:
            raise NotImplementedError

        self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype)
Example #11
0
    def __pretty__(classroom: pd.Series):
        address = Address(**classroom.to_dict())
        location = str(address).replace("\n", ", ")
        name = classroom[rsrc.INDEX.NAME]
        code = classroom[rsrc.INDEX.CODE]

        return pd.Series([name, code, location], index=["name", "code", "address"])
    def _create_lens(row: pd.Series) -> Lens:
        data = row.to_dict()

        for column in LensController.JSON_COLUMNS:
            data[column] = json.loads(data[column])

        return Lens(**data)
Example #13
0
def standardize_address(address_series: pd.Series,
                        address_column_map: dict) -> dict:
    """
    Create standardized address dictionary from *address_series* with the
    expected address keys for the SmartyStreets API provided in
    *address_column_map*.

    All address values are converted to uppercase and stripped of leading and
    trailing whitespaces.
    """
    address_columns = list(filter(None, address_column_map.values()))

    if not address_columns:
        raise NoAddressColumnsFoundError(address_column_map)

    address = address_series.to_dict()
    std_address: Dict[str, str] = {}

    for standardized_column, provided_column in address_column_map.items():
        std_address[standardized_column] = None
        if provided_column:
            std_address[standardized_column] = address[provided_column].upper(
            ).strip()

    return std_address
Example #14
0
def extract_track_data(prefix: str,
                       row: pd.Series,
                       index: int = 0) -> typing.Union[dict, None]:
    """
    Extracts the track data for a single track of the specified limb prefix
    from the given row data

    :param prefix:
        A limb identifier, which is one of lp, rp, lm or rm
    :param row:
        The row from the tracksim CSV file in which the track will be extracted
    :param index:
        Index of the row within the tracksim CSV source file
    :return:
        A dictionary containing the data for the specified track if such a
        track exists or None if no track exists for the given limb within
        the row
    """

    if not has_track_data(prefix, row):
        return None

    key_prefix = '{}_'.format(prefix)

    data = dict([(key[len(key_prefix):], value)
                 for key, value in row.to_dict().items()
                 if key.startswith(key_prefix)])
    data['limb'] = prefix
    data['is_left'] = bool(prefix[0] == 'l')
    data['is_pes'] = bool(prefix[1] == 'p')
    data['limb_index'] = index

    return data
Example #15
0
def calc_best_pathway_features_m(p):
    ys = m_hfac[hfactor]
    xs = metab_i.ix[ys.index, keggp_comp[p]].dropna(how='all', axis=1)

    yss = [1 if metab_cinfo.ix[i, 'TISSUE TYPE'] == 'T' else 0 for i in ys.index]
    if xs.shape[1] > 1:
        feat = Series(dict(zip(*(xs.columns, RidgeCV(cv=StratifiedShuffleSplit(yss, test_size=.3)).fit(xs, ys).coef_))))
        return feat.to_dict()
Example #16
0
def calc_best_pathway_features_t(p):
    ys = t_hfac[hfactor]
    xs = trans_n.ix[ys.index, keggp_gene[p]].dropna(how='all', axis=1)

    yss = [1 if i.split('-')[3].startswith('01') else 0 for i in ys.index]
    if xs.shape[1] > 1:
        feat = Series(dict(zip(*(xs.columns, RidgeCV(cv=StratifiedShuffleSplit(yss, test_size=.3)).fit(xs, ys).coef_))))
        return feat.to_dict()
Example #17
0
 def __init__(self, df: pd.Series):
     df = df.to_dict()
     self.xc = float(df[' xcenter'])
     self.yc = float(df[' ycenter'])
     self.rh = float(df[' rhorizontal'])
     self.rv = float(df[' rvertical'])
     self.shape = str(df[' shape']).strip()
     assert self.shape in [Figure.CIRCLE, Figure.RECT]
Example #18
0
def object2proto(obj: pd.Series) -> PandasSeries_PB:
    series_dict = PrimitiveFactory.generate_primitive(value=obj.to_dict())
    dict_proto = series_dict._object2proto()

    return PandasSeries_PB(
        id=dict_proto.id,
        series=dict_proto,
    )
Example #19
0
 def update_series(self, name: str, series: pd.Series) -> None:
     drawing = self.__sd.drawing
     lines = dict()
     if self.__split_dict(series.to_dict(), lines):
         for _name, _data in lines.items():
             drawing.add_scatter(_name, _data)
     else:
         drawing.add_scatter(name, lines)
Example #20
0
    def __pretty__(classroom: pd.Series):
        address = Address(**classroom.to_dict())
        location = str(address).replace('\n', ', ')
        name = classroom[rsrc.INDEX.NAME]
        code = classroom[rsrc.INDEX.CODE]

        return pd.Series([name, code, location],
                         index=['name', 'code', 'address'])
Example #21
0
    def store_dataset_row(self, dataset_row: pandas.Series):
        # self.logger.info(dataset_row.to_dict())
        dataset_dict = dataset_row.to_dict()
        company_properties = {
            k: v
            for k, v in dataset_dict.items()
            if k not in ['CEO', 'LEGAL_ADDRESS', 'SITE', 'EMAIL', 'PHONES']
        }
        self.merge_node('SPARK_ID', ['Company'], company_properties)

        if dataset_dict['CEO']:
            ceo_properties = {'FULL_NAME': dataset_dict['CEO']}
            self.merge_node('FULL_NAME', ['Person'], ceo_properties)
            self.merge_directed_edge(left_node_by_key='SPARK_ID',
                                     left_node_by=company_properties,
                                     edge_label='CEO_OF',
                                     right_node_by_key='FULL_NAME',
                                     right_node_by=ceo_properties)

        if dataset_dict['LEGAL_ADDRESS']:
            legal_address_properties = {
                'LEGAL_ADDRESS': dataset_dict['LEGAL_ADDRESS']
            }
            self.merge_node('LEGAL_ADDRESS', ['Address'],
                            legal_address_properties)
            self.merge_directed_edge(left_node_by_key='SPARK_ID',
                                     left_node_by=company_properties,
                                     edge_label='LEGAL_ADDRESS_OF',
                                     right_node_by_key='LEGAL_ADDRESS',
                                     right_node_by=legal_address_properties)

        if dataset_dict['SITE']:
            site_properties = {'SITE': dataset_dict['SITE']}
            self.merge_node('SITE', ['Site'], site_properties)
            self.merge_directed_edge(left_node_by_key='SPARK_ID',
                                     left_node_by=company_properties,
                                     edge_label='SITE_OF',
                                     right_node_by_key='SITE',
                                     right_node_by=site_properties)

        if dataset_dict['EMAIL']:
            email_properties = {'EMAIL': dataset_dict['EMAIL']}
            self.merge_node('EMAIL', ['Email'], email_properties)
            self.merge_directed_edge(left_node_by_key='SPARK_ID',
                                     left_node_by=company_properties,
                                     edge_label='EMAIL_OF',
                                     right_node_by_key='EMAIL',
                                     right_node_by=email_properties)

        if dataset_dict['PHONES']:
            for phone in dataset_dict['PHONES']:
                phone_property = {'PHONE': phone}
                self.merge_node('PHONE', ['Phone'], phone_property)
                self.merge_directed_edge(left_node_by_key='SPARK_ID',
                                         left_node_by=company_properties,
                                         edge_label='PHONE_OF',
                                         right_node_by_key='PHONE',
                                         right_node_by=phone_property)
Example #22
0
 def create_from_row(cls, row: pd.Series) -> "Action":
     """ Create an Action instance from a dataframe row. """
     fields = [
         key
         for key, value in cls.__dataclass_fields__.items()
         if value.type != ClassVar
     ]
     d = {key: value for key, value in row.to_dict().items() if key in fields}
     return cls(**d)
Example #23
0
def _print_process(process_row: pd.Series, fmt: str = 'html') -> str:
    """
    Format individual process item as text or html.

    Parameters
    ----------
    process_row : pd.Series
        Process series
    fmt : str, optional
        Format ('txt' or 'html')
        (the default is ' html')

    Returns
    -------
    str
        Formatted process summary.

    """
    if process_row.NodeRole == 'parent':
        if process_row.Level > 1:
            level = 0
        else:
            level = 1
    elif process_row.NodeRole == 'source':
        level = 2
    elif process_row.NodeRole == 'child':
        level = 3 + process_row.Level
    else:
        level = 2

    px_spaces = 20 * level * 2
    txt_spaces = ' ' * (4 * level)

    font_col = ('red' if process_row.NodeRole == 'source' else 'black')

    if fmt.lower() == 'html':
        l1_span = f'<span style="color:{font_col};font-size:90%">'
        line1_tmplt = (l1_span + '[{NodeRole}:lev{Level}] {TimeGenerated} ' +
                       '<b>{NewProcessName}</b> ' + '[PID: {NewProcessId}, ' +
                       'SubjSess:{SubjectLogonId}, ' +
                       'TargSess:{TargetLogonId}]')
        line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]</span>' ''
        output_tmplt = (f'<div style="margin-left:{px_spaces}px">' +
                        f'{{line1}}<br>{{line2}}</div>')

    else:
        line1_tmplt = ('[{NodeRole}:lev{Level}] {TimeGenerated} ' +
                       '{NewProcessName} ' + '[PID: {NewProcessId}, ' +
                       'SubjSess:{SubjectLogonId}, ' +
                       'TargSess:{TargetLogonId}]')
        line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]'
        output_tmplt = f'\n{txt_spaces}{{line1}}\n{txt_spaces}{{line2}}'
    line1 = line1_tmplt.format(**(process_row.to_dict()))
    line2 = line2_tmplt.format(**(process_row.to_dict()))

    return output_tmplt.format(line1=line1, line2=line2)
Example #24
0
def _print_process(process_row: pd.Series, fmt: str = "html") -> str:
    """
    Format individual process item as text or html.

    Parameters
    ----------
    process_row : pd.Series
        Process series
    fmt : str, optional
        Format ('txt' or 'html')
        (the default is ' html')

    Returns
    -------
    str
        Formatted process summary.

    """
    if process_row.NodeRole == "parent":
        if process_row.Level > 1:
            level = 0
        else:
            level = 1
    elif process_row.NodeRole == "source":
        level = 2
    elif process_row.NodeRole == "child":
        level = 3 + process_row.Level
    else:
        level = 2

    px_spaces = 20 * level * 2
    txt_spaces = " " * (4 * level)

    font_col = "red" if process_row.NodeRole == "source" else "black"

    if fmt.lower() == "html":
        l1_span = f'<span style="color:{font_col};font-size:90%">'
        line1_tmplt = (l1_span + "[{NodeRole}:lev{Level}] {TimeGenerated} " +
                       "<b>{NewProcessName}</b> " + "[PID: {NewProcessId}, " +
                       "SubjSess:{SubjectLogonId}, " +
                       "TargSess:{TargetLogonId}]")
        line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]</span>' ""
        output_tmplt = (f'<div style="margin-left:{px_spaces}px">' +
                        f"{{line1}}<br>{{line2}}</div>")

    else:
        line1_tmplt = ("[{NodeRole}:lev{Level}] {TimeGenerated} " +
                       "{NewProcessName} " + "[PID: {NewProcessId}, " +
                       "SubjSess:{SubjectLogonId}, " +
                       "TargSess:{TargetLogonId}]")
        line2_tmplt = '(Cmdline: "{CommandLine}") [Account: {Account}]'
        output_tmplt = f"\n{txt_spaces}{{line1}}\n{txt_spaces}{{line2}}"
    line1 = line1_tmplt.format(**(process_row.to_dict()))
    line2 = line2_tmplt.format(**(process_row.to_dict()))

    return output_tmplt.format(line1=line1, line2=line2)
Example #25
0
def append_to_global_metrics(metrics: pd.Series, band: int, n_subj: int, n_feat: int, repetition: int,
                             filepath: Path) -> None:
    new_row = pd.DataFrame([{"Band": band, "NSubjects": n_subj, "NFeatures": n_feat, "Repetition": repetition,
                             **metrics.to_dict()}])
    if filepath.stat().st_size > 0:
        out_df: pd.DataFrame = pd.read_csv(filepath)
        out_df = out_df.append(new_row, ignore_index=True)
    else:
        out_df = new_row
    out_df.to_csv(filepath, index=False)
Example #26
0
    def test_map(self, datetime_series):
        index, data = tm.getMixedTypeDict()

        source = Series(data["B"], index=data["C"])
        target = Series(data["C"][:4], index=data["D"][:4])

        merged = target.map(source)

        for k, v in merged.items():
            assert v == source[target[k]]

        # input could be a dict
        merged = target.map(source.to_dict())

        for k, v in merged.items():
            assert v == source[target[k]]

        # function
        result = datetime_series.map(lambda x: x * 2)
        tm.assert_series_equal(result, datetime_series * 2)

        # GH 10324
        a = Series([1, 2, 3, 4])
        b = Series(["even", "odd", "even", "odd"], dtype="category")
        c = Series(["even", "odd", "even", "odd"])

        exp = Series(["odd", "even", "odd", np.nan], dtype="category")
        tm.assert_series_equal(a.map(b), exp)
        exp = Series(["odd", "even", "odd", np.nan])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series([1, 2, 3, 4],
                   index=pd.CategoricalIndex(["b", "c", "d", "e"]))
        c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))

        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series(
            ["B", "C", "D", "E"],
            dtype="category",
            index=pd.CategoricalIndex(["b", "c", "d", "e"]),
        )
        c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))

        exp = Series(
            pd.Categorical([np.nan, "B", "C", "D"],
                           categories=["B", "C", "D", "E"]))
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, "B", "C", "D"])
        tm.assert_series_equal(a.map(c), exp)
Example #27
0
 def model_predict(self):
     if self.model is None:
         # self.resultArea.setText('where is the model?')
         try:
             childWindow().exec_()
             self.model_path = self.open_model()
             self.model = load(self.model_path)
         except Exception as e:
             self.model = None
             return
     s = Series(dtype=object)
     s['f0'] = self.isCardProverty.isChecked()
     s['f1'] = self.isLowest.isChecked()
     s['f2'] = self.isFiveGuarantee.isChecked()
     s['f3'] = self.isOrphan.isChecked()
     s['f4'] = self.isMartyrsFamily.isChecked()
     s['f5'] = self.isBussiness.isChecked()
     s['f6'] = self.isFarm.isChecked()
     s['f7'] = self.isRetire.isChecked()
     s['f8'] = self.noIncome.isChecked()
     s['f9'] = self.isWork.isChecked()
     s['f10'] = self.bothUnemployed.isChecked()
     s['f11'] = self.eitherUnemployed.isChecked()
     s['f12'] = self.income.value() / self.household.value()
     s['f13'] = self.numUniv.value()
     s['f14'] = self.numHigh.value()
     s['f15'] = self.numPrim.value()
     s['f16'] = self.grandParentDisease.isChecked()
     s['f17'] = self.parentDivorce.isChecked()
     s['f18'] = self.oneParentNormalDisease.isChecked()
     s['f19'] = self.bothParentNormalDisease.isChecked()
     s['f20'] = self.siblingDisease.isChecked()
     s['f21'] = self.oneParentSeriousDisease.isChecked()
     s['f22'] = self.bothParentSeriousDisease.isChecked()
     s['f23'] = self.parentPassAway.isChecked()
     s['f24'] = self.naturalAccident.isChecked()
     s['f29'] = self.household.value()
     s['f30'] = self.yesLoan.isChecked()
     s['f31'] = self.isRuralResident.isChecked()
     try:
         ss = Series(dtype=object)
         ss['0'] = self.ethnic.text()
         s['f28'] = DataSet.do_ethnic_group(ss)['0']
         ss['0'] = self.scholarshipText.toPlainText().replace('\n', '')
         d = DataSet.do_scholarship(ss)
         s['f25'] = d['助学金个数']['0']
         s['f26'] = d['助学金金额']['0']
         s['f27'] = d['国助类型']['0']
         d = DataFrame(s.to_dict(), index=[0])
         ans = self.model.predict(d)[0]
         ans_type = ['非常困难', '一般困难', '可能为非困难生']
         self.resultArea.setText(ans_type[ans])
     except Exception as e:
         self.resultArea.setText('输入有误或模型导入错误\n请检查输入或重新导入模型')
         print(e.__str__())
Example #28
0
class TestMultinomial(TestCase):
    def setUp(self) -> None:

        self.p = Series({'a': 0.4, 'b': 0.3, 'c': 0.2, 'd': 0.1})
        self.m_array = Multinomial(n=10, p=self.p.values)
        self.m_series = Multinomial(n=10, p=self.p)
        self.m_dict = Multinomial(n=10, p=self.p.to_dict())

    def test_init_with_array(self):

        expected = Series({'p1': 0.4, 'p2': 0.3, 'p3': 0.2, 'p4': 0.1})
        actual = self.m_array.p
        self.assertTrue(expected.equals(actual))

    def test_init_with_series(self):

        expected = self.p
        actual = self.m_series.p
        self.assertTrue(expected.equals(actual))

    def test_init_with_dict(self):

        expected = self.p
        actual = self.m_dict.p
        self.assertTrue(expected.equals(actual))

    def test_set_alpha_with_array(self):

        m = Multinomial(n=10, p=[0.1, 0.2, 0.3, 0.4])
        expected = Series({'p1': 0.4, 'p2': 0.3, 'p3': 0.2, 'p4': 0.1})
        m.p = [0.4, 0.3, 0.2, 0.1]
        actual = m.p
        self.assertTrue(expected.equals(actual))

    def test_set_alpha_with_series(self):

        m = Multinomial(n=10, p=[0.1, 0.2, 0.3, 0.4])
        expected = Series({'x1': 0.4, 'x2': 0.3, 'x3': 0.2, 'x4': 0.1})
        m.p = expected
        actual = m.p
        self.assertTrue(expected.equals(actual))

    def test_str(self):

        self.assertEqual('Multinomial(p1=0.4, p2=0.3, p3=0.2, p4=0.1)',
                         str(self.m_array))
        self.assertEqual('Multinomial(a=0.4, b=0.3, c=0.2, d=0.1)',
                         str(self.m_series))

    def test_get_item(self):

        for k, v in self.m_series.p.items():
            expected = Binomial(n=self.m_series.n, p=self.m_series.p[k])
            actual = self.m_series[k]
            self.assertTrue(expected == actual)
Example #29
0
    def create_inverse(terms: pd.Series, listseries: pd.Series):
        """uses series to create inverted DataFrame"""
        df = terms.to_frame("term")
        diction = listseries.to_dict()
        # creates list of indexes in which term has appeared for each row
        df["indexes"] = df["term"].apply(
            lambda x: [key for key, value in diction.items() if x in value])

        df["frequency"] = df["indexes"].apply(lambda x: len(x))

        return df
Example #30
0
def get_anomaly_series(series: pd.Series,
                       algorithm: str = "bitmap_detector") -> pd.Series:
    assert algorithm in [
        "bitmap_detector", "derivative_detector", "exp_avg_detector"
    ]

    ts = series.to_dict()
    detector = AnomalyDetector(ts, algorithm_name=algorithm)
    scores = detector.get_all_scores()
    scores = [s for _, s in scores.iteritems()]
    return pd.Series(scores)
Example #31
0
def inverse_transform(df: pd.DataFrame, col_means: pd.Series) -> pd.DataFrame:
    df = df.copy()
    df['measure'] = df['measure'].str.replace('_log10_scaled', '')
    std = (df['upper'] - df['lower']) / 1.96
    for col in ['mean', 'lower', 'upper', 'actual']:
        if col == 'mean':
            # bias correction:
            df[col] = df[col] + .5 * std**2
        df[col] = 10**df[col]  # inverse log10
        df[col] *= df['measure'].map(col_means.to_dict())  # inverse scaling
    return df
Example #32
0
 def daily_profit(self):
     """每日盈亏"""
     profit = Series(np.where(self.account_df['profit'] >= 0, self.account_df['profit'], float('nan')))  # 收益
     loss = Series(np.where(self.account_df['profit'] < 0, self.account_df['profit'], float('nan')))  # 亏损
     return [{
         self.report_id: {
             "charts": {
                 "daily_profit": {
                     "title": {
                         "left": 'center',
                         "text": "每日盈亏"
                     },
                     "xAxis": {
                         "type": 'category',
                         "data": self.account_df['date'].to_dict()
                     },
                     "yAxis": {
                         "type": 'value'
                     },
                     "series": {
                         "0": {
                             "data": profit.to_dict(),
                             "type": 'bar',
                             "itemStyle": {
                                 "color": "#ee6666"
                             },
                             "stack": 'one',
                         },
                         "1": {
                             "data": loss.to_dict(),
                             "type": 'bar',
                             "itemStyle": {
                                 "color": "#91cc75"
                             },
                             "stack": 'one',
                         }
                     }
                 }
             }
         }
     }]
Example #33
0
    def test_map(self):
        index, data = tm.getMixedTypeDict()

        source = Series(data['B'], index=data['C'])
        target = Series(data['C'][:4], index=data['D'][:4])

        merged = target.map(source)

        for k, v in compat.iteritems(merged):
            assert v == source[target[k]]

        # input could be a dict
        merged = target.map(source.to_dict())

        for k, v in compat.iteritems(merged):
            assert v == source[target[k]]

        # function
        result = self.ts.map(lambda x: x * 2)
        tm.assert_series_equal(result, self.ts * 2)

        # GH 10324
        a = Series([1, 2, 3, 4])
        b = Series(["even", "odd", "even", "odd"], dtype="category")
        c = Series(["even", "odd", "even", "odd"])

        exp = Series(["odd", "even", "odd", np.nan], dtype="category")
        tm.assert_series_equal(a.map(b), exp)
        exp = Series(["odd", "even", "odd", np.nan])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(['a', 'b', 'c', 'd'])
        b = Series([1, 2, 3, 4],
                   index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
        c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e']))

        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(['a', 'b', 'c', 'd'])
        b = Series(['B', 'C', 'D', 'E'], dtype='category',
                   index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
        c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))

        exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
                                    categories=['B', 'C', 'D', 'E']))
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 'B', 'C', 'D'])
        tm.assert_series_equal(a.map(c), exp)
Example #34
0
    def test_map(self):
        index, data = tm.getMixedTypeDict()

        source = Series(data['B'], index=data['C'])
        target = Series(data['C'][:4], index=data['D'][:4])

        merged = target.map(source)

        for k, v in merged.iteritems():
            self.assertEqual(v, source[target[k]])

        # input could be a dict
        merged = target.map(source.to_dict())

        for k, v in merged.iteritems():
            self.assertEqual(v, source[target[k]])

        # function
        result = self.ts.map(lambda x: x * 2)
        self.assert_(np.array_equal(result, self.ts * 2))
countries = ['USSR','Germany','China','Japan','USA']
obj2 = Series(ww2_dict,index=countries) # another way to create a series

ww2_cas.values # returns series values
ww2_cas.index # returns series index values
ww2_cas['USA'] #extract series value with given index
ww2_cas[['USA','China']] # returns values corresponding to index in the list
ww2_cas[4] # returns 5th value in the series
ww2_cas[0:3] # returns first 4
ww2_cas[-1]# returns last value in the series

ww2_cas[ww2_cas>4000000] #extract series value with given condition
ww2_cas['USSR'] = 1 # replaces the value corresponding to the index

'USSR' in ww2_cas # returns boolean true or false
ww2_dict = ww2_cas.to_dict() # to.dict() method converts series to dictionary
ww2_series = Series(ww2_dict) #convert back to Series using the series method

pd.isnull(obj2) # returns index that has NaN as value
pd.notnull(obj2) # opposite

ww2_series + obj2 # new series returned by adding values on the basis of index

obj2.name = "world war 2 casualties"  #name your series
obj2.index.name = 'Countries' # name your index

ser1 = Series([1,2,3,4],index=['A','B','C','D'])
ser2 = ser1.reindex(['A','B','C','D','E','F'], fill_value = 0) # reindex - does exactly that. Adds new indexes to a series. Use fill_value, else the values will be NA.

## forward fill and backward fill of indexes in the examples below
ser4 = Series(['USA','Mexico','Canada'],index = [0,5,10]) 
# check which countries had cas greater than 4M
print(ww2_cas[ww2_cas > 4000000])
'''
USSR       8700000
Germany    4300000
dtype: int64
'''

# behave as dictionary
print('USSR' in ww2_cas)
# True

# convert to dictionary

ww2_dict = ww2_cas.to_dict()
print(ww2_dict)
# {'China': 3000000, 'USSR': 8700000, 'Germany': 4300000, 'USA': 400000, 'Japan': 2100000}

# convert back to series
ww2_series = Series(ww2_dict)
print(ww2_series)
'''
China      3000000
Germany    4300000
Japan      2100000
USA         400000
USSR       8700000
dtype: int64
'''
#############################################################################################################
# 1. Series Basics
#############################################################################################################
# Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index.

s = Series([3,6,9,12])                             # use the Series function to create series
s.values                                           # returns the values in the series
s.index                                            # returns the index of the series

gdp = Series([8700000,4300000,3000000],            # you can specify names/strings for indexes
              index= ['USSR','Germany','China'])
       
gdp['USSR'] == gdp[0]                              # indexing works with either the index labels or the index number
gdp[gdp > 5000000]                                 # you can use inequality operators to check/filter 
'USA' in gdp                                       # to check if a specific index exists
gdp_dict = gdp.to_dict()                           # .to_dict() converts a series to a dictionary
gdp2 = Series(gdp_dict)                            # and dictionaries can be converted to series

gdp2 = Series(gdp, 
       index=['USSR','Germany','China','USA'])               
pd.isnull(gdp2)                                    # .isnull() is used to find NaNs and Nulls
pd.notnull(gdp2)                                   # opposite of .isnull()
gdp3 = gdp + gdp2                                  # series are vectorized: you can +,-,*,/ etc.
gdp3.index.name = 'Country'                        # you can give the index a header
gdp3 = gdp3.drop('USA')                            # delete a row with .drop()

# .forwardfill (ffill) interpolates values between indices from top-down
ser3 = Series(['USA','Mexico','Canada'],index=[0,5,10])
ser4 = ser3.reindex(range(15),method='ffill')
print ser3, ser4
Example #38
0
print( '\nPuoi ottenere i valori:\t', obj.values)
print( '...e gli indici:\t', obj.index)


ww2_cas = Series([8700000,4300000,3000000,2100000,400000],index=['USSR','Germany','China','Japan','USA'])
print( '\nPosso personalizzare gli indici:\n',ww2_cas)

# Call a value
print( '\nCall a value..', ww2_cas['Germany'] )
# Can also check with array operations
print( 'o richiamare tramite un espressione..,', ww2_cas[ww2_cas>4000000] )
# Check if the's a value..
print( 'oppure fare il check di presenza di un valore..','USSR' in ww2_cas )

# Convert Series in Python dictionary
print( '\nConvertire in dictionary:\n', ww2_cas.to_dict())
print( 'o fare il contrario:\n', Series(ww2_cas.to_dict()) )

#We can use isnull and notnull to find missing data
print( pd.isnull(ww2_cas) )
print( ww2_cas )

# We can mixing two Series and sorting...
obj2 = Series([1,2],index=['Italy','France'])
#print( pd.concat([ww2_cas,obj2]).sort_index(inplace=True) ) 
# inplace = True  --> modifica realmente la serie !!
# il problema è che non posso richiamarla perchè non l'ho instanziata: ho usato metodi all'interno del metodo print()

s3 = pd.concat([ww2_cas,obj2]).sort_index(inplace=True)
print( s3 )
Example #39
0
__author__ = 'Executor'

import numpy as na
import pandas as pa
from pandas import Series, DataFrame

obj = Series([3, 6, 9, 2])
print(obj)
print(obj.index)

ww2 = Series([800, 400, 300, 200, 430], index=['USSR', 'Germany', 'China', 'Japan', 'USA'])
ww3 = Series([800, 400, 300, 200, 430, 500], index=['USSR', 'Germany', 'China', 'Japan', 'USA', 'Jamaica'])
print(ww2)

# check casualties
print(ww2[ww2>400])

print('USSR' in ww2)
ww2d  = ww2.to_dict()
print(ww2)
print(ww2d)

ww2.update(([300], index='Jamaica'))
print(ww2)

Example #40
0
 def setup(self, mapper):
     map_size = 1000
     map_data = Series(map_size - np.arange(map_size))
     self.map_data = map_data if mapper == 'Series' else map_data.to_dict()
     self.s = Series(np.random.randint(0, map_size, 10000))