Beispiel #1
0
    def __call__(self, df):

        if len(self.args) >= 2:

            if not isinstance(self.args[0], dplython.later.Later) or \
                not isinstance(self.args[1], dplython.later.Later):

                raise ValueError(
                    "Arguments must be of the form \"X.column1, X.column2, ...\""
                )

            sp_key = self.args[0]._name
            sp_value = self.args[1]._name

        else:

            raise ValueError(
                "You must provide at least two arguments, the key and the value."
            )

        all_id_cols = []
        all_value_cols = list(df.columns)

        if len(self.args) > 2:

            if 'exclude' in self.kwargs and self.kwargs['exclude'] == True:

                for arg in self.args[2:]:

                    if not isinstance(arg, dplython.later.Later):
                        raise ValueError(
                            "Arguments must be of the form \"X.column1, X.column2, ...\""
                        )

                    all_id_cols.append(arg._name)
                    all_value_cols.remove(arg._name)

            else:

                all_id_cols = list(df.columns)
                all_value_cols = []

                for arg in self.args[2:]:

                    if not isinstance(arg, dplython.later.Later):
                        raise ValueError(
                            "Arguments must be of the form \"X.column1, X.column2, ...\""
                        )

                    all_id_cols.remove(arg._name)
                    all_value_cols.append(arg._name)

        outdf = DplyFrame(
            df.melt(id_vars=all_id_cols, value_vars=all_value_cols))

        cols = list(outdf.columns)
        cols[-2:] = sp_key, sp_value
        outdf.columns = cols

        return outdf
Beispiel #2
0
def read_delim(f, delim, col_names = True):

    assert isinstance(f, str)
    assert isinstance(delim, str)
    assert isinstance(col_names, bool)

    if col_names == True:
        col_names = 0
    else:
        col_names = None

    df = DplyFrame(pd.read_csv(filepath_or_buffer=f, header=col_names, sep=delim))

    if col_names == None:
        df.columns = [''.join(map(str, list(n))) for n in zip(cycle(['X']), range(1, df.shape[1]+1))]

    return df