def file_to_dicts(self, file: str) -> [dict]: column_mapping = { task["label_column_name"]: task["label_name"] for task in self.tasks.values() } dicts = read_tsv( rename_columns=column_mapping, filename=file, delimiter=self.delimiter, skiprows=self.skiprows, quotechar=self.quote_char, ) # collect all labels and compute scaling stats train_labels = [] for d in dicts: train_labels.append( float(d[self.tasks["regression"]["label_name"]])) scaler = StandardScaler() scaler.fit(np.reshape(train_labels, (-1, 1))) # add to label list in regression task self.tasks["regression"]["label_list"] = [ scaler.mean_.item(), scaler.scale_.item() ] return dicts
def _file_to_dicts(self, file: str) -> [dict]: dicts = read_tsv( filename=file, delimiter=self.delimiter, skiprows=self.skiprows, quotechar=self.quote_char, ) return dicts
def _file_to_dicts(self, file: str) -> dict: dicts = read_tsv( filename=file, delimiter=self.delimiter, skiprows=self.skiprows, columns=self.columns, ) return dicts
def _file_to_dicts(self, file: str) -> [dict]: column_mapping = {task["label_column_name"]: task["label_name"] for task in self.tasks.values()} dicts = read_tsv( filename=file, delimiter=self.delimiter, skiprows=self.skiprows, quotechar=self.quote_char, rename_columns=column_mapping, header=self.header ) return dicts