def load_tree(filename, format=None, underscore_unmunge=False): """Constructor for tree. Parameters ---------- filename : str a file path containing a newick or xml formatted tree. format : str either newick, xml or cogent3 json, default is newick underscore_unmunge : bool replace underscores with spaces in all names read, i.e. "sp_name" becomes "sp name". Notes ----- Underscore unmunging is turned off by default, although it is part of the Newick format. Returns ------- PhyloNode """ file_format, _ = get_format_suffixes(filename) if file_format == "json": return load_from_json(filename, (TreeNode, PhyloNode)) with open_(filename) as tfile: treestring = tfile.read() if format is None and filename.endswith(".xml"): format = "xml" return make_tree(treestring, format=format, underscore_unmunge=underscore_unmunge)
def load_aligned_seqs( filename, format=None, array_align=True, moltype=None, label_to_name=None, parser_kw=None, info=None, **kw, ): """ loads aligned sequences from file Parameters ---------- filename : str path to sequence file format : str sequence file format, if not specified tries to guess from the path suffix moltype the moltype, eg DNA, PROTEIN, 'dna', 'protein' array_align : bool if True, returns ArrayAlignment, otherwise an annotatable Alignment label_to_name function for converting original name into another name. parser_kw : dict optional arguments for the parser Returns ------- ``ArrayAlignment`` or ``Alignment`` instance """ file_format, _ = get_format_suffixes(filename) if file_format == "json": return load_from_json(filename, (Alignment, ArrayAlignment)) format = format or file_format if not format: msg = "could not determined file format, set using the format argument" raise ValueError(msg) parser_kw = parser_kw or {} for other_kw in ("constructor_kw", "kw"): other_kw = kw.pop(other_kw, None) or {} kw.update(other_kw) data = list(FromFilenameParser(filename, format, **parser_kw)) return make_aligned_seqs( data, array_align=array_align, label_to_name=label_to_name, moltype=moltype, source=filename, info=info, **kw, )
def load_table( filename, sep=None, reader=None, digits=4, space=4, title="", missing_data="", max_width=1e100, index_name=None, legend="", column_templates=None, static_column_types=False, limit=None, format="simple", skip_inconsistent=False, **kwargs, ): """ Parameters ---------- filename path to file containing a tabular data sep the delimiting character between columns reader a parser for reading filename. This approach assumes the first row returned by the reader will be the header row. static_column_types if True, and reader is None, identifies columns with a numeric/bool data types from the first non-header row. This assumes all subsequent entries in that column are of the same type. Default is False. digits floating point resolution space number of spaces between columns or a string title as implied missing_data character assigned if a row has no entry for a column max_width maximum column width for printing index_name column name with values to be used as row identifiers and keys for slicing. All column values must be unique. legend table legend column_templates dict of column headings or a function that will handle the formatting. limit exits after this many lines. Only applied for non pickled data file types. format output format when using str(Table) skip_inconsistent skips rows that have different length to header row """ import pathlib if not any(isinstance(filename, t) for t in (str, pathlib.PurePath)): raise TypeError( "filename must be string or Path, perhaps you want make_table()") if "index" in kwargs: deprecated("argument", "index", "index_name", "2021.11") index_name = kwargs.pop("index", index_name) sep = sep or kwargs.pop("delimiter", None) file_format, compress_format = get_format_suffixes(filename) if file_format == "json": return load_from_json(filename, (_Table, )) elif file_format in ("pickle", "pkl"): f = open_(filename, mode="rb") loaded_table = pickle.load(f) f.close() r = _Table() r.__setstate__(loaded_table) return r if reader: with open_(filename, newline=None) as f: data = [row for row in reader(f)] header = data[0] data = {column[0]: column[1:] for column in zip(*data)} else: if file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" header, rows, loaded_title, legend = load_delimited(filename, sep=sep, limit=limit, **kwargs) if skip_inconsistent: num_fields = len(header) rows = [r for r in rows if len(r) == num_fields] else: lengths = set(map(len, [header] + rows)) if len(lengths) != 1: msg = f"inconsistent number of fields {lengths}" raise ValueError(msg) title = title or loaded_title data = {column[0]: column[1:] for column in zip(header, *rows)} for key, value in data.items(): data[key] = cast_str_to_array(value, static_type=static_column_types) return make_table( header=header, data=data, digits=digits, title=title, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, index_name=index_name, legend=legend, format=format, )