Exemple #1
0
def process(line_sources):
    """
    @param line_sources: sources of line iterables
    """
    # get the headers and data from all of the input sources
    header_data_pairs = [hud.decode(lines) for lines in line_sources]
    header_list, data_list = zip(*header_data_pairs)
    # get the header to index map for each input source
    h_to_i_list = [Util.inverse_map(x) for x in header_list]
    # get the intersection of headers in all lists
    header_sets = [set(x) for x in header_list]
    header_intersection = set.intersection(*header_sets)
    # get the ordered list of all headers
    unique_headers = list(
        iterutils.unique_everseen(itertools.chain.from_iterable(header_list)))
    # get the ordered list of headers present in every input source
    out_headers = [h for h in unique_headers if h in header_intersection]
    out_data = []
    for h in out_headers:
        row = []
        for data, h_to_i in zip(data_list, h_to_i_list):
            if h in h_to_i:
                row.extend(data[h_to_i[h]])
        out_data.append(row)
    return hud.encode(out_headers, out_data) + '\n'
Exemple #2
0
def get_response_content(fs):
    lines = Util.get_stripped_lines(fs.data.splitlines())
    if len(lines) < 2:
        raise ValueError('expected at least two lines')
    rows = [line.split() for line in lines]
    headers = rows[0]
    data_rows = [[int(x) for x in row] for row in rows[1:]]
    for row in data_rows:
        for x in row:
            if x not in (-1, 0, 1, 2):
                msg = 'invalid diploid data value: %d' % x
                raise ValueError(msg)
    # impute the missing data
    if fs.use_mode:
        imputed_data_rows = []
        for row in data_rows:
            non_missing_row = [x for x in row if x != -1]
            if not non_missing_row:
                msg = 'a variable has missing data for each individual'
                raise ValueError(msg)
            counts = [0]*3
            for x in non_missing_row:
                counts[x] += 1
            imputed_value = counts.index(max(counts))
            imputed_row = [imputed_value if x == -1 else x for x in row]
            imputed_data_rows.append(imputed_row)
        data_rows = imputed_data_rows
    # return the hud table
    return hud.encode(headers, zip(*data_rows))
Exemple #3
0
def process(line_sources):
    """
    @param line_sources: sources of line iterables
    """
    # get the headers and data from all of the input sources
    header_data_pairs = [hud.decode(lines) for lines in line_sources]
    header_list, data_list = zip(*header_data_pairs)
    # get the header to index map for each input source
    h_to_i_list = [Util.inverse_map(x) for x in header_list]
    # get the intersection of headers in all lists
    header_sets = [set(x) for x in header_list]
    header_intersection = set.intersection(*header_sets)
    # get the ordered list of all headers
    unique_headers = list(iterutils.unique_everseen(
            itertools.chain.from_iterable(header_list)))
    # get the ordered list of headers present in every input source
    out_headers = [h for h in unique_headers if h in header_intersection]
    out_data = []
    for h in out_headers:
        row = []
        for data, h_to_i in zip(data_list, h_to_i_list):
            if h in h_to_i:
                row.extend(data[h_to_i[h]])
        out_data.append(row)
    return hud.encode(out_headers, out_data) + '\n'
Exemple #4
0
def process(fs, raw_lines):
    headers, sequences = Phylip.decode(raw_lines)
    binary_rows = Carbone.get_binary_rows(sequences)
    if fs.hud:
        return hud.encode(headers, binary_rows) + '\n'
    elif fs.phy:
        binary_seqs = [''.join(str(x) for x in row) for row in binary_rows]
        return Phylip.encode(headers, binary_seqs) + '\n'
Exemple #5
0
def get_response_content(fs):
    # get the headers and data from all of the input sources
    headers, sequences = hud.decode(fs.hud.splitlines())
    h_to_s = dict((h, s) for h, s in zip(headers, sequences))
    headers_out = []
    sequences_out = []
    for p, hs in process_headers(headers):
        headers_out.append(p)
        data = np.vstack(h_to_s[h] for h in hs).sum(axis=0)
        if fs.combine_exist:
            data = np.minimum(1, data)
        sequences_out.append(data)
    if fs.remove_invariant:
        sequences_out = remove_invariant_columns(sequences_out)
    return hud.encode(headers_out, sequences_out) + '\n'
Exemple #6
0
def get_response_content(fs):
    # get the headers and data from all of the input sources
    headers, sequences = hud.decode(fs.hud.splitlines())
    h_to_s = dict((h, s) for h, s in zip(headers, sequences))
    headers_out = []
    sequences_out = []
    for p, hs in process_headers(headers):
        headers_out.append(p)
        data = np.vstack(h_to_s[h] for h in hs).sum(axis=0)
        if fs.combine_exist:
            data = np.minimum(1, data)
        sequences_out.append(data)
    if fs.remove_invariant:
        sequences_out = remove_invariant_columns(sequences_out)
    return hud.encode(headers_out, sequences_out) + '\n'
Exemple #7
0
def process(raw_lines):
    headers, binary_rows = read_microsatellite_lines(raw_lines)
    return hud.encode(headers, binary_rows) + '\n'
Exemple #8
0
def process(raw_lines):
    headers, binary_rows = read_microsatellite_lines(raw_lines)
    return hud.encode(headers, binary_rows) + '\n'
Exemple #9
0
def get_response_content(fs):
    headers, binary_rows = read_microsatellite_lines(fs.data.splitlines())
    return hud.encode(headers, binary_rows) + '\n'