예제 #1
0
def _check_empty_line(fh):
    """Check that the next line in `fh` is empty or whitespace-only."""
    line = next(fh, None)
    if line is None:
        raise OrdinationFormatError(
            "Reached end of file while looking for blank line separating "
            "sections.")

    if line.strip():
        raise OrdinationFormatError("Expected an empty line.")
예제 #2
0
def _parse_header(fh, header_id, num_dimensions):
    line = next(fh, None)
    if line is None:
        raise OrdinationFormatError(
            "Reached end of file while looking for %s header." % header_id)

    header = line.strip().split('\t')
    # +1 for the header ID
    if len(header) != num_dimensions + 1 or header[0] != header_id:
        raise OrdinationFormatError("%s header not found." % header_id)
    return header
예제 #3
0
def _parse_array_section(fh, header_id, has_ids=True):
    """Parse an array section of `fh` identified by `header_id`."""
    # Parse the array header
    header = _parse_header(fh, header_id, 2)

    # Parse the dimensions of the array
    rows = int(header[1])
    cols = int(header[2])

    ids = None
    if rows == 0 and cols == 0:
        # The ordination method didn't generate the array data for 'header', so
        # set it to None
        data = None
    elif rows == 0 or cols == 0:
        # Both dimensions should be 0 or none of them are zero
        raise OrdinationFormatError("One dimension of %s is 0: %d x %d" %
                                    (header_id, rows, cols))
    else:
        # Parse the data
        data = np.empty((rows, cols), dtype=np.float64)

        if has_ids:
            ids = []

        for i in range(rows):
            # Parse the next row of data
            line = next(fh, None)
            if line is None:
                raise OrdinationFormatError(
                    "Reached end of file while looking for row %d in %s "
                    "section." % (i + 1, header_id))
            vals = line.strip().split('\t')

            if has_ids:
                ids.append(vals[0])
                vals = vals[1:]

            if len(vals) != cols:
                raise OrdinationFormatError(
                    "Expected %d values, but found %d in row %d." %
                    (cols, len(vals), i + 1))
            data[i, :] = np.asarray(vals, dtype=np.float64)
        data = pd.DataFrame(data, index=ids)

    return data
예제 #4
0
def _check_length_against_eigvals(data, eigvals, label):
    if data is not None:
        num_vals = data.shape[-1]
        num_eigvals = eigvals.shape[-1]

        if num_vals != num_eigvals:
            raise OrdinationFormatError(
                "There should be as many %s as eigvals: %d != %d" %
                (label, num_vals, num_eigvals))
예제 #5
0
def _parse_vector_section(fh, header_id):
    header = _parse_header(fh, header_id, 1)

    # Parse how many values we are waiting for
    num_vals = int(header[1])
    if num_vals == 0:
        # The ordination method didn't generate the vector, so set it to None
        vals = None
    else:
        # Parse the line with the vector values
        line = next(fh, None)
        if line is None:
            raise OrdinationFormatError(
                "Reached end of file while looking for line containing values "
                "for %s section." % header_id)
        vals = np.asarray(line.strip().split('\t'), dtype=np.float64)
        if len(vals) != num_vals:
            raise OrdinationFormatError(
                "Expected %d values in %s section, but found %d." %
                (num_vals, header_id, len(vals)))
    return vals
예제 #6
0
def _ordination_to_ordination_results(fh):
    eigvals = _parse_vector_section(fh, 'Eigvals')
    if eigvals is None:
        raise OrdinationFormatError("At least one eigval must be present.")
    _check_empty_line(fh)

    prop_expl = _parse_vector_section(fh, 'Proportion explained')
    _check_length_against_eigvals(prop_expl, eigvals,
                                  'proportion explained values')
    _check_empty_line(fh)

    species, species_ids = _parse_array_section(fh, 'Species')
    _check_length_against_eigvals(species, eigvals, 'coordinates per species')
    _check_empty_line(fh)

    site, site_ids = _parse_array_section(fh, 'Site')
    _check_length_against_eigvals(site, eigvals, 'coordinates per site')
    _check_empty_line(fh)

    # biplot does not have ids to parse (the other arrays do)
    biplot, _ = _parse_array_section(fh, 'Biplot', has_ids=False)
    _check_empty_line(fh)

    cons, cons_ids = _parse_array_section(fh, 'Site constraints')

    if cons_ids is not None and site_ids is not None:
        if cons_ids != site_ids:
            raise OrdinationFormatError(
                "Site constraints ids and site ids must be equal: %s != %s" %
                (cons_ids, site_ids))

    return OrdinationResults(eigvals=eigvals,
                             species=species,
                             site=site,
                             biplot=biplot,
                             site_constraints=cons,
                             proportion_explained=prop_expl,
                             species_ids=species_ids,
                             site_ids=site_ids)
예제 #7
0
def _ordination_to_ordination_results(fh):
    eigvals = _parse_vector_section(fh, 'Eigvals')
    if eigvals is None:
        raise OrdinationFormatError("At least one eigval must be present.")
    _check_empty_line(fh)

    prop_expl = _parse_vector_section(fh, 'Proportion explained')
    _check_length_against_eigvals(prop_expl, eigvals,
                                  'proportion explained values')
    _check_empty_line(fh)

    species = _parse_array_section(fh, 'Species')
    _check_length_against_eigvals(species, eigvals,
                                  'coordinates per species')
    _check_empty_line(fh)

    site = _parse_array_section(fh, 'Site')
    _check_length_against_eigvals(site, eigvals,
                                  'coordinates per site')
    _check_empty_line(fh)

    # biplot does not have ids to parse (the other arrays do)
    biplot = _parse_array_section(fh, 'Biplot', has_ids=False)
    _check_empty_line(fh)

    cons = _parse_array_section(fh, 'Site constraints')

    if cons is not None and site is not None:
        if not np.array_equal(cons.index, site.index):
            raise OrdinationFormatError(
                "Site constraints ids and site ids must be equal: %s != %s" %
                (cons.index, site.index))

    return OrdinationResults(
        short_method_name='', long_method_name='', eigvals=eigvals,
        features=species, samples=site, biplot_scores=biplot,
        sample_constraints=cons, proportion_explained=prop_expl)