def from_file(self, path): self.file_path = path if not path: self.word_list = [] else: enc = detect_encoding(path) with open(path, encoding=enc) as f: self.word_list = set([line.strip() for line in f])
def from_file(path): if not path: return set() for encoding in ('utf-8', None, detect_encoding(path)): try: with open(path, encoding=encoding) as f: return set(line.strip() for line in f) except UnicodeDecodeError: continue # No encoding worked, raise raise UnicodeError("Couldn't determine file encoding")
def from_file(self, path): self.file_path = path self.word_list = [] if not path: return for encoding in ('utf-8', None, # sys.getdefaultencoding() detect_encoding(path)): try: with open(path, encoding=encoding) as f: self.word_list = set(line.strip() for line in f) except UnicodeDecodeError: continue return # No encoding worked, raise raise UnicodeError("Couldn't determine file encoding")
def from_file(self, path): self.file_path = path self.word_list = [] if not path: return for encoding in ('utf-8', None, # sys.getdefaultencoding() detect_encoding(path)): try: with open(path, encoding=encoding) as f: self.word_list = set(line.strip() for line in f) except UnicodeDecodeError: continue return # No encoding worked, raise raise UnicodeError("Couldn't determine file encoding")
def from_file(cls, filename): """ Load distance matrix from a file The file should be preferrably encoded in ascii/utf-8. White space at the beginning and end of lines is ignored. The first line of the file starts with the matrix dimension. It can be followed by a list flags - *axis=<number>*: the axis number - *symmetric*: the matrix is symmetric; when reading the element (i, j) it's value is also assigned to (j, i) - *asymmetric*: the matrix is asymmetric - *row_labels*: the file contains row labels - *col_labels*: the file contains column labels By default, matrices are symmetric, have axis 1 and no labels are given. Flags *labeled* and *labelled* are obsolete aliases for *row_labels*. If the file has column labels, they follow in the second line. Row labels appear at the beginning of each row. Labels are arbitrary strings that connot contain newlines and tabulators. Labels are stored as instances of `Table` with a single meta attribute named "label". The remaining lines contain tab-separated numbers, preceded with labels, if present. Lines are padded with zeros if necessary. If the matrix is symmetric, the file contains the lower triangle; any data above the diagonal is ignored. Args: filename: file name """ with open(filename, encoding=detect_encoding(filename)) as fle: line = fle.readline() if not line: raise ValueError("empty file") data = line.strip().split() if not data[0].strip().isdigit(): raise ValueError("distance file must begin with dimension") n = int(data.pop(0)) symmetric = True axis = 1 col_labels = row_labels = None for flag in data: if flag in ("labelled", "labeled", "row_labels"): row_labels = [] elif flag == "col_labels": col_labels = [] elif flag == "symmetric": symmetric = True elif flag == "asymmetric": symmetric = False else: flag_data = flag.split("=") if len(flag_data) == 2: name, value = map(str.strip, flag_data) else: name, value = "", None if name == "axis" and value.isdigit(): axis = int(value) else: raise ValueError("invalid flag '{}'".format( flag, filename)) if col_labels is not None: col_labels = [x.strip() for x in fle.readline().strip().split("\t")] if len(col_labels) != n: raise ValueError("mismatching number of column labels") matrix = np.zeros((n, n)) for i, line in enumerate(fle): if i >= n: raise ValueError("too many rows".format(filename)) line = line.strip().split("\t") if row_labels is not None: row_labels.append(line.pop(0).strip()) if len(line) > n: raise ValueError("too many columns in matrix row {}". format("'{}'".format(row_labels[i]) if row_labels else i + 1)) for j, e in enumerate(line[:i + 1 if symmetric else n]): try: matrix[i, j] = float(e) except ValueError as exc: raise ValueError( "invalid element at row {}, column {}".format( "'{}'".format(row_labels[i]) if row_labels else i + 1, "'{}'".format(col_labels[j]) if col_labels else j + 1)) from exc if symmetric: matrix[j, i] = matrix[i, j] if col_labels: col_labels = Table.from_list( Domain([], metas=[StringVariable("label")]), [[item] for item in col_labels]) if row_labels: row_labels = Table.from_list( Domain([], metas=[StringVariable("label")]), [[item] for item in row_labels]) return cls(matrix, row_labels, col_labels, axis)
def read_file(self): encoding = detect_encoding(self.path) with open(self.path, 'r', encoding=encoding) as f: self.content = f.read()
def read_file(self): encoding = detect_encoding(self.path) with open(self.path, encoding=encoding, errors='ignore') as markup: soup = BeautifulSoup(markup.read(), "lxml") self.content = soup.get_text()
def read_file(self): encoding = detect_encoding(self.path) with open(self.path, 'r', encoding=encoding) as f: self.content = f.read()
def read_file(self): encoding = detect_encoding(self.path) with open(self.path, encoding=encoding, errors='ignore') as markup: soup = BeautifulSoup(markup.read(), "lxml") self.content = soup.get_text()