Beispiel #1
0
 def test_format_mapping_file(self):
     """ format_mapping file should match expected result"""
     headers = ['SampleID','col1','col0','Description']
     samples =\
      [['bsample','v1_3','v0_3','d1'],['asample','aval','another','d2']]
     comments = ['this goes after headers','this too']
     self.assertEqual(format_mapping_file(headers,samples,comments),
      example_mapping_file)
def filter_mapping_file_from_mapping_f(mapping_f,sample_ids_to_keep,negate=False):
    """ Filter rows from a metadata mapping file """
    mapping_data, header, comments = parse_mapping_file(mapping_f)
    filtered_mapping_data = []
    sample_ids_to_keep = {}.fromkeys(sample_ids_to_keep)
    
    for mapping_datum in mapping_data:
        hit = mapping_datum[0] in sample_ids_to_keep
        if hit and not negate:
            filtered_mapping_data.append(mapping_datum)
        elif not hit and negate:
            filtered_mapping_data.append(mapping_datum)
        else:
            pass
    return format_mapping_file(header,filtered_mapping_data)
Beispiel #3
0
def filter_mapping_file_from_mapping_f(mapping_f,
                                       sample_ids_to_keep,
                                       negate=False):
    """ Filter rows from a metadata mapping file """
    mapping_data, header, comments = parse_mapping_file(mapping_f)
    filtered_mapping_data = []
    sample_ids_to_keep = {}.fromkeys(sample_ids_to_keep)

    for mapping_datum in mapping_data:
        hit = mapping_datum[0] in sample_ids_to_keep
        if hit and not negate:
            filtered_mapping_data.append(mapping_datum)
        elif not hit and negate:
            filtered_mapping_data.append(mapping_datum)
        else:
            pass
    return format_mapping_file(header, filtered_mapping_data)
Beispiel #4
0
def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data,
                        coords_headers, connected_by_header,
                        sorted_by_header=None):
    """Write a string representing the vectors in a PCoA plot as javascript

    Inputs:
    mapping_file_data: contents of the mapping file
    mapping_file_headers: headers of the mapping file
    coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of
    numpy 2-D arrays for jackknifed input
    coords_headers: headers of the coords in the PCoA plot or a list of lists
    with the headers for jackknifed input
    connected_by_header: header of the mapping file that represents how the
    lines will be connected
    sorted_by_header: numeric-only header name to sort the samples in the
    vectors

    Output:
    js_vectors_string: string that represents the vectors in the shape of a
    javascript object

    Notes:
    If using jackknifed input, the coordinates and headers that will be used are
    the ones belonging to the master coords i. e. the first element.
    """

    js_vectors_string = []
    js_vectors_string.append('\nvar g_vectorPositions = new Array();\n')

    if connected_by_header != None:
        # check if we are processing jackknifed input, if so just get the master
        if type(coords_data) == list:
            coords_data = coords_data[0]
            coords_headers = coords_headers[0]

        columns_to_keep = ['SampleID', connected_by_header]

        # do not ad None if sorted_by_header is None or empty
        if sorted_by_header:
            columns_to_keep.append(sorted_by_header)

        # reduce the amount of data by keeping the required fields only
        mapping_file_data, mapping_file_headers =\
            keep_columns_from_mapping_file(mapping_file_data,
            mapping_file_headers, columns_to_keep)

        # format the mapping file to use this with the filtering function
        mf_string = format_mapping_file(mapping_file_headers, mapping_file_data)

        index = mapping_file_headers.index(connected_by_header)
        connected_by = list(set([line[index] for line in mapping_file_data]))

        for category in connected_by:
            # convert to StringIO to for each iteration; else the object
            # won't be usable after the first iteration & you'll get an error
            sample_ids = sample_ids_from_metadata_description(
                StringIO(mf_string),'%s:%s' % (connected_by_header,category))

            # if there is a sorting header, sort the coords using these values
            if sorted_by_header:
                sorting_index = mapping_file_headers.index(sorted_by_header)
                to_sort = [line for line in mapping_file_data if line[0] in\
                    sample_ids]

                # get the sorted sample ids from the sorted-reduced mapping file
                sample_ids = zip(*sorted(to_sort,
                    key=lambda x: float(x[sorting_index])))[0]

            # each category value is a new vector
            js_vectors_string.append("g_vectorPositions['%s'] = new Array();\n"
                % (category))

            for s in sample_ids:
                index = coords_headers.index(s)

                # print the first three elements of each coord for each sample
                js_vectors_string.append("g_vectorPositions['%s']['%s'] = %s;\n"
                    % (category, s, coords_data[index, :3].tolist()))

    return ''.join(js_vectors_string)
Beispiel #5
0
def format_vectors_to_js(mapping_file_data,
                         mapping_file_headers,
                         coords_data,
                         coords_headers,
                         connected_by_header,
                         sorted_by_header=None):
    """Write a string representing the vectors in a PCoA plot as javascript

    Inputs:
    mapping_file_data: contents of the mapping file
    mapping_file_headers: headers of the mapping file
    coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of
    numpy 2-D arrays for jackknifed input
    coords_headers: headers of the coords in the PCoA plot or a list of lists
    with the headers for jackknifed input
    connected_by_header: header of the mapping file that represents how the
    lines will be connected
    sorted_by_header: numeric-only header name to sort the samples in the
    vectors

    Output:
    js_vectors_string: string that represents the vectors in the shape of a
    javascript object

    Notes:
    If using jackknifed input, the coordinates and headers that will be used are
    the ones belonging to the master coords i. e. the first element.
    """

    js_vectors_string = []
    js_vectors_string.append('\nvar g_vectorPositions = new Array();\n')

    if connected_by_header != None:
        # check if we are processing jackknifed input, if so just get the master
        if type(coords_data) == list:
            coords_data = coords_data[0]
            coords_headers = coords_headers[0]

        columns_to_keep = ['SampleID', connected_by_header]

        # do not ad None if sorted_by_header is None or empty
        if sorted_by_header:
            columns_to_keep.append(sorted_by_header)

        # reduce the amount of data by keeping the required fields only
        mapping_file_data, mapping_file_headers =\
            keep_columns_from_mapping_file(mapping_file_data,
            mapping_file_headers, columns_to_keep)

        # format the mapping file to use this with the filtering function
        mf_string = format_mapping_file(mapping_file_headers,
                                        mapping_file_data)

        index = mapping_file_headers.index(connected_by_header)
        connected_by = list(set([line[index] for line in mapping_file_data]))

        for category in connected_by:
            # convert to StringIO to for each iteration; else the object
            # won't be usable after the first iteration & you'll get an error
            sample_ids = sample_ids_from_metadata_description(
                StringIO(mf_string), '%s:%s' % (connected_by_header, category))

            # if there is a sorting header, sort the coords using these values
            if sorted_by_header:
                sorting_index = mapping_file_headers.index(sorted_by_header)
                to_sort = [line for line in mapping_file_data if line[0] in\
                    sample_ids]

                # get the sorted sample ids from the sorted-reduced mapping file
                sample_ids = zip(
                    *sorted(to_sort, key=lambda x: float(x[sorting_index])))[0]

            # each category value is a new vector
            js_vectors_string.append(
                "g_vectorPositions['%s'] = new Array();\n" % (category))

            for s in sample_ids:
                index = coords_headers.index(s)

                # print the first three elements of each coord for each sample
                js_vectors_string.append(
                    "g_vectorPositions['%s']['%s'] = %s;\n" %
                    (category, s, coords_data[index, :3].tolist()))

    return ''.join(js_vectors_string)
def write_mf(f, _df):
    from emperor.qiime_backports.format import format_mapping_file
    with open(f, 'w') as fp:
        lines = format_mapping_file(['SampleID'] + _df.columns.tolist(),
                                    list(_df.itertuples()))
        fp.write(lines+'\n')
def write_mf(f, _df):
    from emperor.qiime_backports.format import format_mapping_file
    with open(f, 'w') as fp:
        lines = format_mapping_file(['SampleID'] + _df.columns.tolist(),
                                    list(_df.itertuples()))
        fp.write(lines + '\n')