Ejemplo n.º 1
0
    def file_to_dicts(self, file: str) -> [dict]:
        column_mapping = {
            task["label_column_name"]: task["label_name"]
            for task in self.tasks.values()
        }
        dicts = read_tsv(
            rename_columns=column_mapping,
            filename=file,
            delimiter=self.delimiter,
            skiprows=self.skiprows,
            quotechar=self.quote_char,
        )

        # collect all labels and compute scaling stats
        train_labels = []
        for d in dicts:
            train_labels.append(
                float(d[self.tasks["regression"]["label_name"]]))
        scaler = StandardScaler()
        scaler.fit(np.reshape(train_labels, (-1, 1)))
        # add to label list in regression task
        self.tasks["regression"]["label_list"] = [
            scaler.mean_.item(), scaler.scale_.item()
        ]

        return dicts
Ejemplo n.º 2
0
 def _file_to_dicts(self, file: str) -> [dict]:
     dicts = read_tsv(
         filename=file,
         delimiter=self.delimiter,
         skiprows=self.skiprows,
         quotechar=self.quote_char,
     )
     return dicts
Ejemplo n.º 3
0
 def _file_to_dicts(self, file: str) -> dict:
     dicts = read_tsv(
         filename=file,
         delimiter=self.delimiter,
         skiprows=self.skiprows,
         columns=self.columns,
     )
     return dicts
Ejemplo n.º 4
0
    def _file_to_dicts(self, file: str) -> [dict]:
        column_mapping = {task["label_column_name"]: task["label_name"] for task in self.tasks.values()}
        dicts = read_tsv(
            filename=file,
            delimiter=self.delimiter,
            skiprows=self.skiprows,
            quotechar=self.quote_char,
            rename_columns=column_mapping,
            header=self.header
            )

        return dicts