Exemple #1
0
def read_microsatellite_lines(raw_lines):
    """
    How can i combine the two haploid data sources?
    Maybe create each data matrix separately from the interleaved input.
    @param raw_lines: raw input lines
    @return: headers, diploid data
    """
    lines = Util.get_stripped_lines(raw_lines)
    if len(lines) % 2:
        raise ValueError('expected an even number of lines')
    if len(lines) < 2:
        raise ValueError('expected at least two lines')
    full_rows = [x.split() for x in lines]
    nfullcols = len(full_rows[0])
    if nfullcols < 2:
        raise ValueError('expected at least two columns')
    for row in full_rows:
        if len(row) != nfullcols:
            msg = 'each row should have the same number of elements'
            raise ValueError(msg)
    a_full_rows = [row for i, row in enumerate(full_rows) if i % 2 == 0]
    b_full_rows = [row for i, row in enumerate(full_rows) if i % 2 == 1]
    a_headers = [row[0] for row in a_full_rows]
    b_headers = [row[0] for row in b_full_rows]
    for h in a_headers:
        if not h.endswith('a'):
            msg = 'each odd row label should end with the letter a'
            raise ValueError(msg)
    for h in b_headers:
        if not h.endswith('b'):
            msg = 'each even row label should end with the letter b'
            raise ValueError(msg)
    headers = [h[:-1] for h in a_headers]
    # get the unique elements of each column
    rows = [row[1:] for row in full_rows]
    cols = zip(*rows)
    uniques = [list(iterutils.unique_everseen(col)) for col in cols]
    # get the results for each row
    a_rows = [row[1:] for row in a_full_rows]
    b_rows = [row[1:] for row in b_full_rows]
    a_columns = zip(*a_rows)
    b_columns = zip(*b_rows)
    a_binary_rows = Carbone.get_binary_rows_helper(a_columns, uniques)
    b_binary_rows = Carbone.get_binary_rows_helper(b_columns, uniques)
    # add the elements entrywise and return as a list of lists
    bin_row_groups = [a_binary_rows, b_binary_rows]
    binary_rows = np.array(bin_row_groups).sum(axis=0).tolist()
    return headers, binary_rows
Exemple #2
0
def read_microsatellite_lines(raw_lines):
    """
    How can i combine the two haploid data sources?
    Maybe create each data matrix separately from the interleaved input.
    @param raw_lines: raw input lines
    @return: headers, diploid data
    """
    lines = Util.get_stripped_lines(raw_lines)
    if len(lines) % 2:
        raise ValueError('expected an even number of lines')
    if len(lines) < 2:
        raise ValueError('expected at least two lines')
    full_rows = [x.split() for x in lines]
    nfullcols = len(full_rows[0])
    if nfullcols < 2:
        raise ValueError('expected at least two columns')
    for row in full_rows:
        if len(row) != nfullcols:
            msg = 'each row should have the same number of elements'
            raise ValueError(msg)
    a_full_rows = [row for i, row in enumerate(full_rows) if i % 2 == 0]
    b_full_rows = [row for i, row in enumerate(full_rows) if i % 2 == 1]
    a_headers = [row[0] for row in a_full_rows]
    b_headers = [row[0] for row in b_full_rows]
    for h in a_headers:
        if not h.endswith('a'):
            msg = 'each odd row label should end with the letter a'
            raise ValueError(msg)
    for h in b_headers:
        if not h.endswith('b'):
            msg = 'each even row label should end with the letter b'
            raise ValueError(msg)
    headers = [h[:-1] for h in a_headers]
    # get the unique elements of each column
    rows = [row[1:] for row in full_rows]
    cols = zip(*rows)
    uniques = [list(iterutils.unique_everseen(col)) for col in cols]
    # get the results for each row
    a_rows = [row[1:] for row in a_full_rows]
    b_rows = [row[1:] for row in b_full_rows]
    a_columns = zip(*a_rows)
    b_columns = zip(*b_rows)
    a_binary_rows = Carbone.get_binary_rows_helper(a_columns, uniques)
    b_binary_rows = Carbone.get_binary_rows_helper(b_columns, uniques)
    # add the elements entrywise and return as a list of lists
    bin_row_groups = [a_binary_rows, b_binary_rows]
    binary_rows = np.array(bin_row_groups).sum(axis=0).tolist()
    return headers, binary_rows
Exemple #3
0
def read_microsatellite_lines(raw_lines):
    """
    How can i combine the two haploid data sources?
    Maybe create each data matrix separately from the interleaved input.
    @param raw_lines: raw input lines
    @return: headers, diploid data
    """
    lines = Util.get_stripped_lines(raw_lines)
    full_rows = [line.split() for line in lines]
    nfullcols = len(full_rows[0])
    if nfullcols < 2:
        raise ValueError('expected at least two columns')
    if not all(len(row) == nfullcols for row in full_rows):
        raise ValueError('expected the same number of elements in each row')
    full_cols = zip(*full_rows)
    full_names = full_cols[0]
    ploidy = get_ploidy(full_names)
    headers = list(gen_headers(full_names, ploidy))
    # get the unique elements of each column
    rows = [row[1:] for row in full_rows]
    cols = zip(*rows)
    uniques = [list(iterutils.unique_everseen(col)) for col in cols]
    # get the rows for each offset
    n = len(rows) / ploidy
    groups = [[rows[j * ploidy + i] for j in range(n)] for i in range(ploidy)]
    # get the column groups
    col_groups = [zip(*m) for m in groups]
    # get the binary row groups
    bin_row_groups = [
        Carbone.get_binary_rows_helper(cols, uniques) for cols in col_groups
    ]
    # get the entrywise sum
    binary_rows = np.array(bin_row_groups).sum(axis=0).tolist()
    return headers, binary_rows