Esempi in Python per DataFrame.join

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: d3m.container

Classe/tipologia: DataFrame

Metodo/funzione: join

Esempi su hotexamples.com: 3

DataFrame.join in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per d3m.container.DataFrame.join, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

copy(10)

metadata(5)

drop(4)

DataFrame(3)

set_index(3)

select_columns(3)

index(3)

join(3)

iterrows(2)

select_dtypes(2)

from_dict(2)

iteritems(2)

rename(1)

notnull(1)

insert(1)

item(1)

append_columns(1)

head(1)

groupby(1)

duplicated(1)

columns(1)

apply(1)

user(1)

Esempio n. 1

Mostra file

    def _join_string_col(cls, left_df: container.DataFrame, left_col: str,
                         right_df: container.DataFrame, right_col: str,
                         accuracy: float) -> pd.DataFrame:
        # use d3mIndex from left col if present
        right_df = right_df.drop(columns='d3mIndex')

        # pre-compute fuzzy matches
        left_keys = left_df[left_col].unique()
        right_keys = right_df[right_col].unique()
        matches: typing.Dict[str, typing.Optional[str]] = {}
        for left_key in left_keys:
            matches[left_key] = cls._string_fuzzy_match(
                left_key, right_keys, accuracy * 100)

        # look up pre-computed fuzzy match for each element in the left column
        left_df.index = left_df[left_col].map(lambda key: matches[key])

        # make the right col the right dataframe index
        right_df = right_df.set_index(right_col)

        # inner join on the left / right indices
        joined = container.DataFrame(
            left_df.join(right_df, lsuffix='_1', rsuffix='_2', how='inner'))

        # sort on the d3m index if there, otherwise use the joined column
        if 'd3mIndex' in joined:
            joined = joined.sort_values(by=['d3mIndex'])
        else:
            joined = joined.sort_values(by=[left_col])
        joined = joined.reset_index(drop=True)

        return joined

Esempio n. 2

Mostra file

    def _join_numeric_col(cls, left_df: container.DataFrame, left_col: str,
                          right_df: container.DataFrame, right_col: str,
                          accuracy: float) -> pd.DataFrame:
        # use d3mIndex from left col if present
        right_df = right_df.drop(columns='d3mIndex')

        # fuzzy match each of the left join col against the right join col value and save the results as the left
        # dataframe index
        right_df[right_col] = pd.to_numeric(right_df[right_col])
        choices = right_df[right_col].unique()
        left_df[left_col] = pd.to_numeric(left_df[left_col])
        left_df.index = left_df[left_col]. \
            map(lambda x: cls._numeric_fuzzy_match(x, choices, accuracy))

        # make the right col the right dataframe index
        right_df = right_df.set_index(right_col)

        # inner join on the left / right indices
        joined = container.DataFrame(
            left_df.join(right_df, lsuffix='_1', rsuffix='_2', how='inner'))

        # sort on the d3m index if there, otherwise use the joined column
        if 'd3mIndex' in joined:
            joined = joined.sort_values(by=['d3mIndex'])
        else:
            joined = joined.sort_values(by=[left_col])
        joined = joined.reset_index(drop=True)

        return joined

Esempio n. 3

Mostra file

    def _join_datetime_col(cls,
                           left_df: container.DataFrame,
                           left_col: str,
                           right_df: container.DataFrame,
                           right_col: str,
                           accuracy: float) -> pd.DataFrame:
        # use d3mIndex from left col if present
        right_df = right_df.drop(columns='d3mIndex')

        # compute a tolerance delta for time matching based on a percentage of the minimum left/right time
        # range
        choices = np.array([np.datetime64(parser.parse(dt)) for dt in right_df[right_col].unique()])
        left_keys = np.array([np.datetime64(parser.parse(dt)) for dt in left_df[left_col].values])
        time_tolerance = (1.0 - accuracy) * cls._compute_time_range(left_keys, choices)
        
        left_df.index = np.array([cls._datetime_fuzzy_match(dt, choices, time_tolerance) for dt in left_keys])

        # make the right col the right dataframe index
        right_df = right_df.set_index(right_col)

        # inner join on the left / right indices
        joined = container.DataFrame(left_df.join(right_df, lsuffix='_1', rsuffix='_2', how='inner'))

        # sort on the d3m index if there, otherwise use the joined column
        if 'd3mIndex' in joined:
            joined = joined.sort_values(by=['d3mIndex'])
        else:
            joined = joined.sort_values(by=[left_col])
        joined = joined.reset_index(drop=True)

        return joined