Beispiel #1
0
def load_delimited(
    filename,
    header=True,
    delimiter=",",
    with_title=False,
    with_legend=False,
    limit=None,
):
    if limit is not None:
        limit += 1  # don't count header line

    with open_(filename) as f:
        reader = csv.reader(f, dialect="excel", delimiter=delimiter)
        title = "".join(next(reader)) if with_title else ""
        rows = []
        num_lines = 0
        for row in reader:
            rows.append(row)
            num_lines += 1
            if limit is not None and num_lines >= limit:
                break

    header = rows.pop(0) if header else None
    legend = "".join(rows.pop(-1)) if with_legend else ""
    return header, rows, title, legend
Beispiel #2
0
    def open(self, identifier):
        identifier = self.get_absolute_identifier(identifier,
                                                  from_relative=False)
        if not os.path.exists(identifier):
            raise ValueError(f"path '{identifier}' does not exist")

        return open_(identifier)
Beispiel #3
0
def load_from_json(filename, classes):
    """Loads objects from json files.

    Parameters
    ----------
    filename: name of the json file
    classes: A series of the Cogent3 types, for example: (Alignment, ArrayAlignment)

    """
    assert all(
        (isinstance(klass, type) for klass in classes)
    ), "classes should be a series of Cogent3 types, for example: (Alignment, ArrayAlignment)"

    with open_(filename) as f:
        content = json.loads(f.read())
    try:
        _, data, completed = load_record_from_json(content)
        if not completed:
            raise TypeError("json file is a record for type NotCompleted.")
    except (KeyError, TypeError):
        data = content

    type_ = data.get("type", None)
    if type_ is None:
        raise TypeError("json does not contain 'type' key")

    valid_types = {get_object_provenance(klass) for klass in classes}
    if type_ not in valid_types:
        raise TypeError(
            f"Invalid data type: {type_} is not one of {valid_types}")

    return deserialise_object(data)
Beispiel #4
0
def MinimalFastaParser(
    infile, strict=True, label_to_name=str, finder=FastaFinder, label_characters=">"
):
    """Yields successive sequences from infile as (label, seq) tuples.

    If strict is True (default), raises RecordError when label or seq missing.
    """
    try:
        infile = open_(infile)
        close_at_end = True
    except (TypeError, AttributeError):
        close_at_end = False

    for rec in finder(infile):
        # first line must be a label line
        if not rec[0][0] in label_characters:
            if strict:
                raise RecordError("Found Fasta record without label line: %s" % rec)
            continue
        # record must have at least one sequence
        if len(rec) < 2:
            if strict:
                raise RecordError("Found label line without sequences: %s" % rec)
            else:
                continue

        label = rec[0][1:].strip()
        label = label_to_name(label)
        seq = "".join(rec[1:])

        yield label, seq

    if close_at_end:
        infile.close()
Beispiel #5
0
def load_tree(filename, format=None, underscore_unmunge=False):
    """Constructor for tree.

    Parameters
    ----------
    filename : str
        a file path containing a newick or xml formatted tree.
    format : str
        either newick, xml or cogent3 json, default is newick
    underscore_unmunge : bool
        replace underscores with spaces in all names read, i.e. "sp_name"
        becomes "sp name".

    Notes
    -----
    Underscore unmunging is turned off by default, although it is part
    of the Newick format.

    Returns
    -------
    PhyloNode
    """
    file_format, _ = get_format_suffixes(filename)
    if file_format == "json":
        return load_from_json(filename, (TreeNode, PhyloNode))

    with open_(filename) as tfile:
        treestring = tfile.read()
        if format is None and filename.endswith(".xml"):
            format = "xml"

    return make_tree(treestring,
                     format=format,
                     underscore_unmunge=underscore_unmunge)
Beispiel #6
0
def FromFilenameParser(filename, format=None, **kw):
    """Arguments:
            - filename: name of the sequence alignment file
            - format: the multiple sequence file format
    """
    format = format_from_filename(filename, format)
    f = open_(filename, newline=None, mode="rt")
    return FromFileParser(f, format, **kw)
Beispiel #7
0
def load_delimited(
    filename,
    header=True,
    sep=",",
    delimiter=None,
    with_title=False,
    with_legend=False,
    limit=None,
):
    """
    basic processing of tabular data

    Parameters
    ----------
    filename: Path
        path to delimited file (can begin with ~)
    header: bool
        whether the first line of the file (after the title, if present) is a header
    sep: str
        the character separating columns
    with_title: bool
        whether the first line of the file is a title
    with_legend: bool
        whether the last line of the file is a legend
    limit: int
        maximum number of lines to read from the file

    Returns
    -------
    header, rows, title, legend

    Notes
    -----
    All row values remain as strings.
    """
    if delimiter:
        sep = delimiter
        deprecated("argument", "delimiter", "sep", "2022.1")

    if limit is not None and header:
        limit += 1  # don't count header line

    with open_(filename) as f:
        reader = csv.reader(f, dialect="excel", delimiter=sep)
        title = "".join(next(reader)) if with_title else ""
        rows = []
        num_lines = 0
        for row in reader:
            rows.append(row)
            num_lines += 1
            if limit is not None and num_lines >= limit:
                break

    header = rows.pop(0) if header else None
    legend = "".join(rows.pop(-1)) if with_legend else ""
    return header, rows, title, legend
Beispiel #8
0
def gff_parser(f):
    """delegates to the correct gff_parser based on the version"""
    f = f if not isinstance(f, Path) else str(f)
    if isinstance(f, str):
        with open_(f) as infile:
            yield from gff2_parser(infile)
    elif isinstance(f, StringIO):
        yield from gff2_parser(f)
    else:
        raise TypeError
Beispiel #9
0
def deserialise_object(data):
    """
    deserialises from json
    Parameters
    ----------
    data
        path to json file, json string or a dict

    Returns
    -------
    If the dict from json.loads does not contain a "type" key, the object will
    be returned as is. Otherwise, it will be deserialised to a cogent3 object.
    """
    if path_exists(data):
        with open_(data) as infile:
            data = json.load(infile)

    if type(data) is str:
        data = json.loads(data)

    type_ = data.get("type", None)
    if type_ is None:
        return data

    if "core.sequence" in type_:
        func = deserialise_seq
    elif "core.alignment" in type_:
        func = deserialise_seq_collections
    elif "core.tree" in type_:
        func = deserialise_tree
    elif (
        "evolve.substitution_model" in type_ or "evolve.ns_substitution_model" in type_
    ):
        func = deserialise_substitution_model
    elif "evolve.parameter_controller" in type_:
        func = deserialise_likelihood_function
    elif "core.moltype" in type_:
        func = deserialise_moltype
    elif "core.alphabet" in type_:
        func = deserialise_alphabet
    elif "app.result" in type_:
        func = deserialise_result
    elif "notcompleted" in type_.lower():
        func = deserialise_not_completed
    elif type_.lower().endswith("table"):
        func = deserialise_tabular
    elif "dictarray" in type_.lower():
        func = deserialise_tabular
    elif "distancematrix" in type_.lower():
        func = deserialise_tabular
    else:
        msg = "deserialising '%s' from json" % type_
        raise NotImplementedError(msg)
    return func(data)
Beispiel #10
0
    def __call__(self, lines):
        """a generator that yields individual lines processed according to the
        provided conditions

        Parameters
        ----------
        lines: path or iterable
            If file path, handles file open and close. Will expand user
            component (i.e. '~/') of path.

        Notes
        -----
        Elements within a row are strings
        """
        input_from_path = False
        if isinstance(lines, str) or isinstance(lines, pathlib.Path):
            path = pathlib.Path(lines).expanduser()
            input_from_path = path.exists()

            if input_from_path:
                lines = open_(path)

        num_lines = 0
        header = None
        match = not self.negate
        for line in lines:
            if is_empty(line):
                continue

            line = line.split(self.sep)
            line = [e.strip() for e in line]
            if header is None and self.with_header:
                header = True
                if self.columns:
                    self._column_names_to_indices(line)
                    line = [line[i] for i in self.columns]
                yield line
                continue

            if self.columns:
                line = [line[i] for i in self.columns]

            if self.condition and self.condition(line) != match:
                continue

            yield line

            num_lines += 1
            if self.limit is not None and num_lines >= self.limit:
                break

        if input_from_path:
            lines.close()
Beispiel #11
0
def load_classifier(path):
    '''returns dict of pickled classifier and features info'''
    with open_(path, 'rb') as clf:
        classifier = pickle.load(clf)
    try:
        feature_params = classifier['feature_params']
        scaler = classifier.get('scaler', None)
        classifier = classifier['classifier']
    except KeyError:
        raise ValueError('pickle formatted file does not '
                         'contain classifier')
    return classifier, feature_params, scaler
Beispiel #12
0
def MinimalNexusAlignParser(align_path):
    """returns {label: seq, ...}"""
    if type(align_path) == str:
        infile = open_(align_path)
    else:
        infile = align_path

    isblock = re.compile(r"begin\s+(data|characters)").search
    inblock = False
    try:
        line = infile.readline()
    except AttributeError:
        # guessing it's a list of strings from a nexus file
        line = infile.pop(0)

    if not line.lower().startswith("#nexus"):
        raise ValueError("not a nexus file")

    block = []
    index = None
    for line in infile:
        if isblock(line.lower()):
            inblock = True
        elif inblock and line.lower().startswith("end;"):
            break
        elif inblock:
            line = line.strip()
            if line.lower().startswith("matrix"):
                index = len(block)
            elif not line.startswith(";"):
                block.append(line)

    if hasattr(infile, "close"):
        infile.close()

    if not block:
        raise ValueError("not found DATA or CHARACTER block")
    elif index is None:
        raise RecordError("malformed block, no 'matrix' line")

    block = block[index:]
    seqs = defaultdict(list)
    for line in block:
        if not line or (line.startswith("[") and line.endswith("]")):
            # blank or comment line
            continue

        line = line.split()
        seqs[line[0]].append("".join(line[1:]))

    for n, s in seqs.items():
        yield n, "".join(s)
Beispiel #13
0
 def write(self, path, format="tsv", sep="\t"):
     """
     writes a flattened version to path
     Parameters
     ----------
     path : str
     format
         possible formats are 'rest'/'rst', 'markdown'/'md',
         'latex', 'html', 'phylip', 'bedgraph', 'csv', 'tsv', or 'simple'
         (default).
     sep : str
         used to split fields, will be inferred from path suffix if not
         provided
     """
     data = self.to_string(format=format, sep=sep)
     with open_(path, "w") as outfile:
         outfile.write(data)
Beispiel #14
0
def save_to_filename(alignment, filename, format, **kw):
    """Arguments:
            - alignment: to be written
            - filename: name of the sequence alignment file
            - format: the multiple sequence file format
    """
    if format is None:
        raise FileFormatError("format not known")

    f = open_(filename, "wt")
    try:
        write_alignment_to_file(f, alignment, format, **kw)
    except Exception:
        try:
            os.unlink(filename)
        except Exception:
            pass
        raise
    finally:
        f.close()
Beispiel #15
0
def gff_parser(f):
    """parses a gff file
    Parameters
    -----------
    f
        accepts string path or pathlib.Path or file-like object (e.g. StringIO)

    Returns
    -------
    dict
        contains each of the 9 parameters specified by gff3, and comments.
    """

    # calling a separate function to ensure file closes correctly
    f = f if not isinstance(f, Path) else str(f)
    if isinstance(f, str):
        with open_(f) as infile:
            yield from _gff_parser(infile)
    else:
        yield from _gff_parser(f)
Beispiel #16
0
def load_tree(filename, format=None, underscore_unmunge=False):
    """Constructor for tree.

    Parameters
    ----------
    filename
        a file containing a newick or xml formatted tree.

    Notes
    -----
    Underscore unmunging is turned off by default, although it is part
    of the Newick format. Set ``underscore_unmunge=True`` to replace underscores
    with spaces in all names read.
    """

    with open_(filename) as tfile:
        treestring = tfile.read()
        if format is None and filename.endswith(".xml"):
            format = "xml"
    tree = make_tree(treestring, format=format, underscore_unmunge=underscore_unmunge)
    return tree
Beispiel #17
0
    def __call__(self, lines):
        input_from_path = False
        if isinstance(lines, str) or isinstance(lines, pathlib.Path):
            path = pathlib.Path(lines)
            input_from_path = path.exists()

            if input_from_path:
                lines = open_(path)

        num_lines = 0
        header = None
        match = not self.negate
        for line in lines:
            if is_empty(line):
                continue

            line = line.split(self.sep)
            line = [e.strip() for e in line]
            if header is None and self.with_header:
                header = True
                if self.columns:
                    self._column_names_to_indices(line)
                    line = [line[i] for i in self.columns]
                yield line
                continue

            if self.columns:
                line = [line[i] for i in self.columns]

            if self.condition and self.condition(line) != match:
                continue

            yield line

            num_lines += 1
            if self.limit is not None and num_lines >= self.limit:
                break

        if input_from_path:
            lines.close()
Beispiel #18
0
    def load(self, data):
        """returns sequences

        Parameters
        ----------
        data
            file path or cogent3 sequence collection / alignment
        """
        if type(data) == str:
            with open_(data) as infile:
                data = dict(record for record in self._parser(infile))
            seqs = self.klass(data=data, moltype=self.moltype)
            seqs.info.path = data
        elif not isinstance(data, SequenceCollection):
            if self.aligned:
                seqs = make_aligned_seqs(data, moltype=self.moltype)
            else:
                seqs = make_unaligned_seqs(data, moltype=self.moltype)

        if not (self._output_types & {"aligned"}):
            seqs = seqs.degap()

        return seqs
Beispiel #19
0
    def write(
        self,
        filename,
        mode=None,
        writer=None,
        format=None,
        sep=None,
        compress=None,
        **kwargs,
    ):
        """Write table to filename in the specified format. If a format is not
        specified, it attempts to use a filename suffix. Note if a sep argument
        is provided, unformatted values are written to file in order to
        preserve numerical accuracy.

        Parameters
        ----------
        mode
            file opening mode
        format
            Valid formats are those of the to_string method plus
            pickle. Will try and guess from filename if not specified.
        writer
            a function for formatting the data for output.
        sep
            a character delimiter for fields.
        compress
            if True, gzips the file and appends .gz to the
            filename (if not already added).

        """
        file_suffix, compress_suffix = get_format_suffixes(filename)
        format = format or file_suffix
        compress = compress or compress_suffix is not None

        mode = mode or {"pickle": "wb"}.get(format, "w")

        if compress:
            if not filename.endswith(".gz"):
                filename = "%s.gz" % filename
            mode = "wt"

        outfile = open_(filename, mode)

        if format is None:
            # try guessing from filename suffix
            if compress:
                index = -2
            else:
                index = -1
            suffix = filename.split(".")
            if len(suffix) > 1:
                format = suffix[index]

        if format == "csv":
            sep = sep or ","
        elif format == "tsv":
            sep = sep or "\t"

        if writer:
            rows = self.tolist()
            rows.insert(0, self.header[:])
            rows = writer(rows, has_header=True)
            outfile.writelines("\n".join(rows))
        elif format == "pickle":
            data = self.__getstate__()
            pickle.dump(data, outfile, protocol=1)
        elif sep is not None and format != "bedgraph":
            writer = csv.writer(outfile, delimiter=sep, lineterminator="\n")
            if self.title:
                writer.writerow([self.title])
            writer.writerow(self.header)
            writer.writerows(self.array)
            if self.legend:
                writer.writerow([self.legend])
        else:
            table = self.to_string(format=format, sep=sep, **kwargs)
            outfile.writelines(table + "\n")
        outfile.close()
Beispiel #20
0
def load_table(
    filename,
    sep=None,
    reader=None,
    digits=4,
    space=4,
    title="",
    missing_data="",
    max_width=1e100,
    index=None,
    legend="",
    column_templates=None,
    dtype=None,
    static_column_types=False,
    limit=None,
    format="simple",
    skip_inconsistent=False,
    **kwargs,
):
    """

    Parameters
    ----------
    filename
        path to file containing a tabular data
    sep
        the delimiting character between columns
    reader
        a parser for reading filename. This approach assumes the first
        row returned by the reader will be the header row.
    static_column_types
        if True, and reader is None, identifies columns
        with a numeric/bool data types from the first non-header row.
        This assumes all subsequent entries in that column are of the same type.
        Default is False.
    header
        column headings
    rows
        a 2D dict, list or tuple. If a dict, it must have column
        headings as top level keys, and common row labels as keys in each
        column.
    row_order
        the order in which rows will be pulled from the twoDdict
    digits
        floating point resolution
    space
        number of spaces between columns or a string
    title
        as implied
    missing_data
        character assigned if a row has no entry for a column
    max_width
        maximum column width for printing
    index
        if True, the 0'th column is used as row identifiers and keys
        for slicing.
    legend
        table legend
    column_templates
        dict of column headings
        or a function that will handle the formatting.
    dtype
        optional numpy array typecode.
    limit
        exits after this many lines. Only applied for non pickled data
        file types.
    data_frame
        a pandas DataFrame, supersedes header/rows
    format
        output format when using str(Table)
    skip_inconsistent
        skips rows that have different length to header row
    """
    sep = sep or kwargs.pop("delimiter", None)
    file_format, compress_format = get_format_suffixes(filename)

    if file_format in ("pickle", "pkl"):
        f = open_(filename, mode="rb")
        loaded_table = pickle.load(f)
        f.close()
        r = _Table()
        r.__setstate__(loaded_table)
        return r

    if not reader:
        if file_format == "csv":
            sep = sep or ","
        elif file_format == "tsv":
            sep = sep or "\t"

        header, rows, loaded_title, legend = load_delimited(
            filename, delimiter=sep, limit=limit, **kwargs
        )
        if skip_inconsistent:
            num_fields = len(header)
            rows = [r for r in rows if len(r) == num_fields]
        else:
            lengths = set(map(len, [header] + rows))
            if len(lengths) != 1:
                msg = f"inconsistent number of fields {lengths}"
                raise ValueError(msg)

        title = title or loaded_title
        data = {column[0]: column[1:] for column in zip(header, *rows)}
    else:
        f = open_(filename, newline=None)
        data = [row for row in reader(f)]
        header = data[0]
        data = {column[0]: column[1:] for column in zip(*data)}
        f.close()

    for key, value in data.items():
        data[key] = cast_str_to_array(value, static_type=static_column_types)

    return make_table(
        header=header,
        data=data,
        digits=digits,
        title=title,
        dtype=dtype,
        column_templates=column_templates,
        space=space,
        missing_data=missing_data,
        max_width=max_width,
        index=index,
        legend=legend,
        format=format,
    )
Beispiel #21
0
def load_table(
    filename,
    sep=None,
    reader=None,
    digits=4,
    space=4,
    title="",
    missing_data="",
    max_width=1e100,
    index_name=None,
    legend="",
    column_templates=None,
    static_column_types=False,
    limit=None,
    format="simple",
    skip_inconsistent=False,
    **kwargs,
):
    """

    Parameters
    ----------
    filename
        path to file containing a tabular data
    sep
        the delimiting character between columns
    reader
        a parser for reading filename. This approach assumes the first
        row returned by the reader will be the header row.
    static_column_types
        if True, and reader is None, identifies columns
        with a numeric/bool data types from the first non-header row.
        This assumes all subsequent entries in that column are of the same type.
        Default is False.
    digits
        floating point resolution
    space
        number of spaces between columns or a string
    title
        as implied
    missing_data
        character assigned if a row has no entry for a column
    max_width
        maximum column width for printing
    index_name
        column name with values to be used as row identifiers and keys
        for slicing. All column values must be unique.
    legend
        table legend
    column_templates
        dict of column headings
        or a function that will handle the formatting.
    limit
        exits after this many lines. Only applied for non pickled data
        file types.
    format
        output format when using str(Table)
    skip_inconsistent
        skips rows that have different length to header row
    """
    import pathlib

    if not any(isinstance(filename, t) for t in (str, pathlib.PurePath)):
        raise TypeError(
            "filename must be string or Path, perhaps you want make_table()")

    if "index" in kwargs:
        deprecated("argument", "index", "index_name", "2021.11")
        index_name = kwargs.pop("index", index_name)

    sep = sep or kwargs.pop("delimiter", None)
    file_format, compress_format = get_format_suffixes(filename)

    if file_format == "json":
        return load_from_json(filename, (_Table, ))
    elif file_format in ("pickle", "pkl"):
        f = open_(filename, mode="rb")
        loaded_table = pickle.load(f)
        f.close()
        r = _Table()
        r.__setstate__(loaded_table)
        return r

    if reader:
        with open_(filename, newline=None) as f:
            data = [row for row in reader(f)]
            header = data[0]
            data = {column[0]: column[1:] for column in zip(*data)}
    else:
        if file_format == "csv":
            sep = sep or ","
        elif file_format == "tsv":
            sep = sep or "\t"

        header, rows, loaded_title, legend = load_delimited(filename,
                                                            sep=sep,
                                                            limit=limit,
                                                            **kwargs)
        if skip_inconsistent:
            num_fields = len(header)
            rows = [r for r in rows if len(r) == num_fields]
        else:
            lengths = set(map(len, [header] + rows))
            if len(lengths) != 1:
                msg = f"inconsistent number of fields {lengths}"
                raise ValueError(msg)

        title = title or loaded_title
        data = {column[0]: column[1:] for column in zip(header, *rows)}

    for key, value in data.items():
        data[key] = cast_str_to_array(value, static_type=static_column_types)

    return make_table(
        header=header,
        data=data,
        digits=digits,
        title=title,
        column_templates=column_templates,
        space=space,
        missing_data=missing_data,
        max_width=max_width,
        index_name=index_name,
        legend=legend,
        format=format,
    )
Beispiel #22
0
def LoadTable(
    filename=None,
    sep=None,
    reader=None,
    header=None,
    rows=None,
    row_order=None,
    digits=4,
    space=4,
    title="",
    missing_data="",
    max_width=1e100,
    row_ids=None,
    legend="",
    column_templates=None,
    dtype=None,
    static_column_types=False,
    limit=None,
    data_frame=None,
    format="simple",
    **kwargs,
):
    """
    .. deprecated:: 2019.8.30a

        ``LoadTable`` will be removed in ``cogent3`` 2020.1.1. It's replaced by
        ``load_table`` and ``make_table``.
    """
    sep = sep or kwargs.pop("delimiter", None)
    if filename is not None:
        file_format, compress_format = get_format_suffixes(filename)

    if filename is not None and not (reader or static_column_types):
        if file_format == "pickle":
            f = open_(filename, mode="rb")
            loaded_table = pickle.load(f)
            f.close()
            return _Table(**loaded_table)
        elif file_format == "csv":
            sep = sep or ","
        elif file_format == "tsv":
            sep = sep or "\t"

        header, rows, loaded_title, legend = load_delimited(
            filename, delimiter=sep, limit=limit, **kwargs
        )
        title = title or loaded_title
    elif filename and (reader or static_column_types):
        f = open_(filename, newline=None)
        if not reader:
            if file_format == "csv":
                sep = sep or ","
            elif file_format == "tsv":
                sep = sep or "\t"
            elif not sep:
                raise ValueError(
                    "static_column_types option requires a value " "for sep"
                )

            reader = autogen_reader(
                f, sep, limit=limit, with_title=kwargs.get("with_title", False)
            )

        rows = [row for row in reader(f)]
        f.close()
        header = rows.pop(0)

    table = _Table(
        header=header,
        rows=rows,
        digits=digits,
        row_order=row_order,
        title=title,
        dtype=dtype,
        column_templates=column_templates,
        space=space,
        missing_data=missing_data,
        max_width=max_width,
        row_ids=row_ids,
        legend=legend,
        data_frame=data_frame,
        format=format,
    )

    return table
Beispiel #23
0
def load_table(
    filename,
    sep=None,
    reader=None,
    digits=4,
    space=4,
    title="",
    missing_data="",
    max_width=1e100,
    row_ids=None,
    legend="",
    column_templates=None,
    dtype=None,
    static_column_types=False,
    limit=None,
    format="simple",
    **kwargs,
):
    """

    Parameters
    ----------
    filename
        path to file containing a tabular data
    sep
        the delimiting character between columns
    reader
        a parser for reading filename. This approach assumes the first
        row returned by the reader will be the header row.
    static_column_types
        if True, and reader is None, identifies columns
        with a numeric/bool data types from the first non-header row.
        This assumes all subsequent entries in that column are of the same type.
        Default is False.
    header
        column headings
    rows
        a 2D dict, list or tuple. If a dict, it must have column
        headings as top level keys, and common row labels as keys in each
        column.
    row_order
        the order in which rows will be pulled from the twoDdict
    digits
        floating point resolution
    space
        number of spaces between columns or a string
    title
        as implied
    missing_data
        character assigned if a row has no entry for a column
    max_width
        maximum column width for printing
    row_ids
        if True, the 0'th column is used as row identifiers and keys
        for slicing.
    legend
        table legend
    column_templates
        dict of column headings
        or a function that will handle the formatting.
    dtype
        optional numpy array typecode.
    limit
        exits after this many lines. Only applied for non pickled data
        file types.
    data_frame
        a pandas DataFrame, supersedes header/rows
    format
        output format when using str(Table)

    """
    sep = sep or kwargs.pop("delimiter", None)
    file_format, compress_format = get_format_suffixes(filename)

    if not (reader or static_column_types):
        if file_format == "pickle":
            f = open_(filename, mode="rb")
            loaded_table = pickle.load(f)
            f.close()
            return _Table(**loaded_table)
        elif file_format == "csv":
            sep = sep or ","
        elif file_format == "tsv":
            sep = sep or "\t"

        header, rows, loaded_title, legend = load_delimited(filename,
                                                            delimiter=sep,
                                                            limit=limit,
                                                            **kwargs)
        title = title or loaded_title
    else:
        f = open_(filename, newline=None)
        if not reader:
            if file_format == "csv":
                sep = sep or ","
            elif file_format == "tsv":
                sep = sep or "\t"
            elif not sep:
                raise ValueError("static_column_types option requires a value "
                                 "for sep")

            reader = autogen_reader(f,
                                    sep,
                                    limit=limit,
                                    with_title=kwargs.get("with_title", False))

        rows = [row for row in reader(f)]
        f.close()
        header = rows.pop(0)
    return make_table(
        header=header,
        rows=rows,
        digits=digits,
        title=title,
        dtype=dtype,
        column_templates=column_templates,
        space=space,
        missing_data=missing_data,
        max_width=max_width,
        row_ids=row_ids,
        legend=legend,
        format=format,
    )

    return table
Beispiel #24
0
def dump_json(path, data):
    '''dumps data in json format'''
    with open_(path, mode='wt') as outfile:
        json.dump(data, outfile)
Beispiel #25
0
def load_json(path):
    '''loads raw data object from json file'''
    with open_(path) as infile:
        data = json.load(infile)
    return data