Beispiel #1
0
 def test_group_by_field(self):
     """group_by_field should group table by fields"""
     t = [
             ['#sample', 'loc', 'age'],
             ['a','US','5'],
             ['b','US','10'],
             ['c','Mal','5'],
             ['d','Mal','10'],
             ['e','Ven','5'],
         ]
     self.assertEqual(group_by_field(t, 'loc'), \
         {'US':['a','b'], 'Mal':['c','d'], 'Ven':['e']})
     self.assertEqual(group_by_field(t, 'age'), \
         {'5':['a','c','e'], '10':['b','d']})
Beispiel #2
0
 def test_group_by_field(self):
     """group_by_field should group table by fields"""
     t = [
         ['#sample', 'loc', 'age'],
         ['a', 'US', '5'],
         ['b', 'US', '10'],
         ['c', 'Mal', '5'],
         ['d', 'Mal', '10'],
         ['e', 'Ven', '5'],
     ]
     self.assertEqual(group_by_field(t, 'loc'), \
         {'US':['a','b'], 'Mal':['c','d'], 'Ven':['e']})
     self.assertEqual(group_by_field(t, 'age'), \
         {'5':['a','c','e'], '10':['b','d']})
Beispiel #3
0
def _collate_cluster_pcoa_plot_data(coords_f, map_f, category):
    pc_data = parse_coords(coords_f)
    coords_d = dict(zip(pc_data[0], pc_data[1]))

    map_data = parse_mapping_file(map_f)
    full_map_data = [map_data[1]]
    full_map_data.extend(map_data[0])

    sid_map = group_by_field(full_map_data, category)
    sorted_states = sorted(sid_map.keys())

    color_pool = get_color_pool()
    if len(sorted_states) > len(color_pool):
        raise ValueError("Not enough colors to uniquely color sample "
                         "groups.")

    results = []
    for state, color in zip(sorted_states,
                            color_pool[:len(sorted_states)]):
        sids = sid_map[state]
        xs = [coords_d[sid][0] for sid in sids]
        ys = [coords_d[sid][1] for sid in sids]
        results.append((xs, ys, color, state))

    return results
Beispiel #4
0
def group_distances(mapping_file,
                    dmatrix_file,
                    fields,
                    dir_prefix='',
                    subdir_prefix='group_distances'):
    """Calculate all lists of distance groups.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.
    """
    distance_groups = {}
    mapping, header, comments = parse_mapping_file(open(mapping_file, 'U'))
    header = [header]
    header.extend(mapping)
    mapping = header

    distance_header, distance_matrix = \
        parse_distmat(open(dmatrix_file, 'U'))

    if fields == []:
        raise ValueError(
            'Since no fields were defined and the values within your fields are either all the same or all unique, a field was not chosen for analysis. Please define a field to analyse.'
        )

    single_field = defaultdict(dict)
    for i in range(len(fields)):
        field = fields[i]
        groups = group_by_field(mapping, field)
        data = distances_by_groups(distance_header, distance_matrix, groups)
        # Need to remove pound signs from field name.
        field_name = field.replace('#', '')
        single_field[field_name] = data

    write_distance_files(group_distance_dict=single_field,
                         dir_prefix=dir_prefix,
                         subdir_prefix=subdir_prefix + '_single')

    paired_field = defaultdict(dict)
    paired_field_for_writing = defaultdict(dict)
    for i in range(len(fields)):
        for j in range(i, len(fields)):
            fieldi = fields[i]
            fieldj = fields[j]
            groups = group_by_fields(mapping, [fieldi, fieldj])
            data = distances_by_groups(distance_header, distance_matrix,
                                       groups)
            paired_field[fieldi + '_to_' + fieldj] = data
            paired_field_for_writing[fieldi + '_to_' + field] = data

    write_distance_files(group_distance_dict=paired_field_for_writing,
                         dir_prefix=dir_prefix,
                         subdir_prefix=subdir_prefix + '_pairs')

    return single_field, paired_field, distance_matrix
def group_distances(mapping_file, dmatrix_file, fields, dir_prefix='',
                    subdir_prefix='group_distances'):
    """Calculate all lists of distance groups.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.
    """
    distance_groups = {}
    mapping, header, comments = parse_mapping_file(open(mapping_file, 'U'))
    header = [header]
    header.extend(mapping)
    mapping = header

    distance_header, distance_matrix = \
        parse_distmat(open(dmatrix_file, 'U'))

    if fields == []:
        raise ValueError(
            'Since no fields were defined and the values within your fields are either all the same or all unique, a field was not chosen for analysis. Please define a field to analyse.')

    single_field = defaultdict(dict)
    for i in range(len(fields)):
        field = fields[i]
        groups = group_by_field(mapping, field)
        data = distances_by_groups(distance_header, distance_matrix, groups)
        # Need to remove pound signs from field name.
        field_name = field.replace('#', '')
        single_field[field_name] = data

    write_distance_files(group_distance_dict=single_field,
                         dir_prefix=dir_prefix, subdir_prefix=subdir_prefix + '_single')

    paired_field = defaultdict(dict)
    paired_field_for_writing = defaultdict(dict)
    for i in range(len(fields)):
        for j in range(i, len(fields)):
            fieldi = fields[i]
            fieldj = fields[j]
            groups = group_by_fields(mapping, [fieldi, fieldj])
            data = distances_by_groups(
                distance_header,
                distance_matrix,
                groups)
            paired_field[fieldi + '_to_' + fieldj] = data
            paired_field_for_writing[fieldi + '_to_' + field] = data

    write_distance_files(group_distance_dict=paired_field_for_writing,
                         dir_prefix=dir_prefix, subdir_prefix=subdir_prefix + '_pairs')

    return single_field, paired_field, distance_matrix
Beispiel #6
0
def get_grouped_distances(
    dist_matrix_header,
    dist_matrix,
    mapping_header,
    mapping,
    field,
    within=True,
    suppress_symmetry_and_hollowness_check=False,
):
    """Returns a list of distance groupings for the specified field.

    The return value is a list that contains tuples of three elements: the
    first two elements are the field values being compared, and the third
    element is a list of the distances.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        - dist_matrix_header: The distance matrix header, obtained from
                              parse.parse_distmat()
        - dist_matrix: The distance matrix, obtained from
                       parse.parse_distmat().
        - mapping_header: The mapping file header, obtained from
                          parse.parse_mapping_file()
        - mapping: The mapping file's contents, obtained from
                   parse.parse_mapping_file()
        - field: A field in the mapping file to do the grouping on.
        - within: If True, distances are grouped within a field value. If
          False, distances are grouped between field values.
        - suppress_symmetry_and_hollowness_check: By default, the input
          distance matrix will be checked for symmetry and hollowness. It is
          recommended to leave this check in place for safety, as the check
          is fairly fast. However, if you *know* you have a symmetric and
          hollow distance matrix, you can disable this check for small
          performance gains on extremely large distance matrices
    """
    _validate_input(dist_matrix_header, dist_matrix, mapping_header, mapping, field)
    mapping_data = [mapping_header]
    mapping_data.extend(mapping)
    groups = group_by_field(mapping_data, field)
    return _get_groupings(dist_matrix_header, dist_matrix, groups, within, suppress_symmetry_and_hollowness_check)
Beispiel #7
0
def get_grouped_distances(dist_matrix_header,
                          dist_matrix,
                          mapping_header,
                          mapping,
                          field,
                          within=True,
                          suppress_symmetry_and_hollowness_check=False):
    """Returns a list of distance groupings for the specified field.

    The return value is a list that contains tuples of three elements: the
    first two elements are the field values being compared, and the third
    element is a list of the distances.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        - dist_matrix_header: The distance matrix header, obtained from
                              parse.parse_distmat()
        - dist_matrix: The distance matrix, obtained from
                       parse.parse_distmat().
        - mapping_header: The mapping file header, obtained from
                          parse.parse_mapping_file()
        - mapping: The mapping file's contents, obtained from
                   parse.parse_mapping_file()
        - field: A field in the mapping file to do the grouping on.
        - within: If True, distances are grouped within a field value. If
          False, distances are grouped between field values.
        - suppress_symmetry_and_hollowness_check: By default, the input
          distance matrix will be checked for symmetry and hollowness. It is
          recommended to leave this check in place for safety, as the check
          is fairly fast. However, if you *know* you have a symmetric and
          hollow distance matrix, you can disable this check for small
          performance gains on extremely large distance matrices
    """
    _validate_input(dist_matrix_header, dist_matrix, mapping_header, mapping,
                    field)
    mapping_data = [mapping_header]
    mapping_data.extend(mapping)
    groups = group_by_field(mapping_data, field)
    return _get_groupings(dist_matrix_header, dist_matrix, groups, within,
                          suppress_symmetry_and_hollowness_check)
def group_distances(mapping_file,dmatrix_file,fields,dir_prefix='',\
    subdir_prefix='group_distances'):
    """Calculate all lists of distance groups."""
    distance_groups = {}
    mapping, header, comments = parse_mapping_file(open(mapping_file,'U'))
    header = [header]
    header.extend(mapping)
    mapping=header

    distance_header, distance_matrix = \
        parse_distmat(open(dmatrix_file,'U'))

    if fields == []:
        raise ValueError, 'Since no fields were defined and the values within your fields are either all the same or all unique, a field was not chosen for analysis. Please define a field to analyse.'
        
    single_field = defaultdict(dict)
    for i in range(len(fields)):
        field = fields[i]
        groups = group_by_field(mapping, field)
        data = distances_by_groups(distance_header, distance_matrix, groups)
        #Need to remove pound signs from field name.
        field_name = field.replace('#','')
        single_field[field_name]=data

    write_distance_files(group_distance_dict=single_field,\
        dir_prefix=dir_prefix,subdir_prefix=subdir_prefix+'_single')
        
    paired_field = defaultdict(dict)
    paired_field_for_writing = defaultdict(dict)
    for i in range(len(fields)):
        for j in range(i,len(fields)):
            fieldi = fields[i]
            fieldj = fields[j]
            groups = group_by_fields(mapping, [fieldi,fieldj])
            data = distances_by_groups(distance_header, distance_matrix, groups)
            paired_field[fieldi+'_to_'+fieldj]=data
            paired_field_for_writing[fieldi+'_to_'+field]=data
    
    write_distance_files(group_distance_dict=paired_field_for_writing,\
        dir_prefix=dir_prefix,subdir_prefix=subdir_prefix+'_pairs')
    
    return single_field, paired_field, distance_matrix
Beispiel #9
0
def group_distances(mapping_file,dmatrix_file,fields,dir_prefix='',\
    subdir_prefix='group_distances'):
    """Calculate all lists of distance groups."""
    distance_groups = {}
    mapping, header, comments = parse_mapping_file(open(mapping_file, 'U'))
    header = [header]
    header.extend(mapping)
    mapping = header

    distance_header, distance_matrix = \
        parse_distmat(open(dmatrix_file,'U'))

    if fields is None:
        fields = [mapping[0][0]]
    single_field = defaultdict(dict)
    for i in range(len(fields)):
        field = fields[i]
        groups = group_by_field(mapping, field)
        data = distances_by_groups(distance_header, distance_matrix, groups)
        #Need to remove pound signs from field name.
        field_name = field.replace('#', '')

        single_field[field_name] = data

    write_distance_files(group_distance_dict=single_field,\
        dir_prefix=dir_prefix,subdir_prefix=subdir_prefix+'_single')

    paired_field = defaultdict(dict)
    for i in range(len(fields)):
        for j in range(i, len(fields)):
            fieldi = fields[i]
            fieldj = fields[j]
            groups = group_by_fields(mapping, [fieldi, fieldj])
            data = distances_by_groups(distance_header, distance_matrix,
                                       groups)
            paired_field[fieldi + '_to_' + fieldj] = data

    write_distance_files(group_distance_dict=paired_field,\
        dir_prefix=dir_prefix,subdir_prefix=subdir_prefix+'_pairs')

    return single_field, paired_field, distance_matrix
Beispiel #10
0
def get_field_state_comparisons(dist_matrix_header,
                                dist_matrix,
                                mapping_header,
                                mapping,
                                field,
                                comparison_field_states,
                                suppress_symmetry_and_hollowness_check=False):
    """Returns a 2D dictionary relating distances between field states.

    The 2D dictionary is constructed such that each top-level key is a field
    state other than the field states in comparison_field_states. The
    second-level key is a field state from comparison_field_states, and the
    value at the (key, key) index is a list of distances between those two
    field states. Thus, given a field, this function will create comparisons
    between the specified comparison_field_states and all other field states.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        - dist_matrix_header: The distance matrix header, obtained from
                              parse.parse_distmat()
        - dist_matrix: The distance matrix, obtained from
                       parse.parse_distmat().
        - mapping_header: The mapping file header, obtained from
                          parse.parse_mapping_file()
        - mapping: The mapping file's contents, obtained from
                   parse.parse_mapping_file()
        - field: A field in the mapping file to do the comparisons on.
        - comparison_field_states: A list of strings specifying the field
          states to compare to all other field states. Cannot be an empty list.
        - suppress_symmetry_and_hollowness_check: By default, the input
          distance matrix will be checked for symmetry and hollowness. It is
          recommended to leave this check in place for safety, as the check
          is fairly fast. However, if you *know* you have a symmetric and
          hollow distance matrix, you can disable this check for small
          performance gains on extremely large distance matrices
    """
    _validate_input(dist_matrix_header, dist_matrix, mapping_header, mapping,
                    field)

    # Make sure each comparison group field state is in the specified field.
    if not comparison_field_states:
        raise ValueError("You must provide at least one field state to "
                         "compare to all of the other field states.")
    mapping_data = [mapping_header]
    mapping_data.extend(mapping)
    groups = group_by_field(mapping_data, field)
    for field_state in comparison_field_states:
        if field_state not in groups:
            raise ValueError("The comparison group field state '%s' is not in "
                             "the provided mapping file's field '%s'." %
                             (field_state, field))

    # Grab a list of all other field states (besides the ones in
    # comparison_field_states). These will be the field states that the states
    # in comparison_field_states will be compared against.
    field_states = [
        group for group in groups.keys()
        if group not in comparison_field_states
    ]

    # Get between distance groupings for the field of interest.
    between_groupings = get_grouped_distances(dist_matrix_header, dist_matrix,
            mapping_header, mapping, field, within=False,
            suppress_symmetry_and_hollowness_check=\
                    suppress_symmetry_and_hollowness_check)

    # Build up our 2D dictionary giving the distances between a field state and
    # a comparison group field state by filtering out the between_groupings
    # list to include only the comparisons that we want.
    result = {}
    for field_state in field_states:
        result[field_state] = {}
        for comp_field_state in comparison_field_states:
            result[field_state][comp_field_state] = []
            for group in between_groupings:
                if ((group[0] == field_state or group[1] == field_state)
                        and (group[0] == comp_field_state
                             or group[1] == comp_field_state)):
                    # We've found a group of distances between our comparison
                    # field state and the current field state, so keep the
                    # data.
                    result[field_state][comp_field_state] = group[2]
    return result
Beispiel #11
0
def get_field_state_comparisons(dist_matrix_header, dist_matrix,
                                mapping_header, mapping, field,
                                comparison_field_states,
                                suppress_symmetry_and_hollowness_check=False):
    """Returns a 2D dictionary relating distances between field states.

    The 2D dictionary is constructed such that each top-level key is a field
    state other than the field states in comparison_field_states. The
    second-level key is a field state from comparison_field_states, and the
    value at the (key, key) index is a list of distances between those two
    field states. Thus, given a field, this function will create comparisons
    between the specified comparison_field_states and all other field states.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        - dist_matrix_header: The distance matrix header, obtained from
                              parse.parse_distmat()
        - dist_matrix: The distance matrix, obtained from
                       parse.parse_distmat().
        - mapping_header: The mapping file header, obtained from
                          parse.parse_mapping_file()
        - mapping: The mapping file's contents, obtained from
                   parse.parse_mapping_file()
        - field: A field in the mapping file to do the comparisons on.
        - comparison_field_states: A list of strings specifying the field
          states to compare to all other field states. Cannot be an empty list.
        - suppress_symmetry_and_hollowness_check: By default, the input
          distance matrix will be checked for symmetry and hollowness. It is
          recommended to leave this check in place for safety, as the check
          is fairly fast. However, if you *know* you have a symmetric and
          hollow distance matrix, you can disable this check for small
          performance gains on extremely large distance matrices
    """
    _validate_input(dist_matrix_header, dist_matrix, mapping_header, mapping,
                    field)

    # Make sure each comparison group field state is in the specified field.
    if not comparison_field_states:
        raise ValueError("You must provide at least one field state to "
                         "compare to all of the other field states.")
    mapping_data = [mapping_header]
    mapping_data.extend(mapping)
    groups = group_by_field(mapping_data, field)
    for field_state in comparison_field_states:
        if field_state not in groups:
            raise ValueError("The comparison group field state '%s' is not in "
                             "the provided mapping file's field '%s'."
                             % (field_state, field))

    # Grab a list of all other field states (besides the ones in
    # comparison_field_states). These will be the field states that the states
    # in comparison_field_states will be compared against.
    field_states = [group for group in groups.keys()
                    if group not in comparison_field_states]

    # Get between distance groupings for the field of interest.
    between_groupings = get_grouped_distances(dist_matrix_header, dist_matrix,
            mapping_header, mapping, field, within=False,
            suppress_symmetry_and_hollowness_check=\
                    suppress_symmetry_and_hollowness_check)

    # Build up our 2D dictionary giving the distances between a field state and
    # a comparison group field state by filtering out the between_groupings
    # list to include only the comparisons that we want.
    result = {}
    for field_state in field_states:
        result[field_state] = {}
        for comp_field_state in comparison_field_states:
            result[field_state][comp_field_state] = []
            for group in between_groupings:
                if ((group[0] == field_state or group[1] == field_state)
                    and (group[0] == comp_field_state or
                         group[1] == comp_field_state)):
                    # We've found a group of distances between our comparison
                    # field state and the current field state, so keep the
                    # data.
                    result[field_state][comp_field_state] = group[2]
    return result
Beispiel #12
0
def make_pie_chart(data,
                   dir_path,
                   level,
                   color_data,
                   prefs,
                   background_color,
                   label_color,
                   file_prefix=None,
                   props={},
                   y_len=6.5,
                   dpi=80,
                   generate_eps=False,
                   generate_pdf=True,
                   others_key="All Other Categories",
                   others_color="#eeeeee",
                   should_capitalize=True):
    """
    Write interactive piechart 

    data: [fraction:label,...] 

    trunc_len: truncates labels after this many chars

    """
    if not data:
        raise ValueError, "No data available for pie chart."

    all_fracs = []
    all_labels = []
    colors = []
    for key in prefs.keys():
        if prefs[key]['column'] != str(level):
            continue
        col_name = 'Taxon'
        mapping = [['Taxon']]
        mapping.extend([[m] for m in color_data[1]])
        if 'colors' in prefs[key]:
            if isinstance(prefs[key]['colors'], dict):
                pref_colors = prefs[key]['colors'].copy(
                )  #copy so we can mutate
            else:
                pref_colors = prefs[key]['colors'][:]
        else:
            pref_colors = {}
        labelname = prefs[key]['column']

        #Define groups and associate appropriate colors to each group
        groups = group_by_field(mapping, col_name)
        pref_colors, data_colors, data_color_order = \
            get_group_colors(groups, pref_colors)

    # set up labels and colors for pie chart
    for color_ix, (c_label, c_frac) in enumerate(data):
        #commented out the following line, since the key becomes invalid when
        #replacing part of the string.
        #c_label = c_label.replace("_", " ")
        # we also want to color others category same every time
        if c_label == others_key:
            colors.append(others_color)
        else:
            colors.append(data_colors[pref_colors[c_label]].toHex())
        all_fracs.append(c_frac)
        if should_capitalize:
            capital = "%s (%.2f%%)" % (c_label.capitalize(), (c_frac * 100.0))
            all_labels.append(capital)
        else:
            all_labels.append("%s (%.2f%%)" % (c_label, (c_frac * 100.0)))
    rc('font', size='10')
    rc('text', color=label_color)
    rc('patch', linewidth=.1)
    rc('axes', linewidth=.5, edgecolor=label_color)
    rc('text', usetex=False)
    fig = figure(randrange(10000), figsize=(2 * y_len, y_len))

    fp = FontProperties()
    fp.set_size('8')
    if len(data) > 30:
        loc = 4
    else:
        loc = 5
    mtitle = "Pie Chart"
    if "title" in props:
        mtitle = props["title"]
    axis('off')
    title(mtitle, fontsize='10', color=label_color)
    ax = axes([0.0, 0.0, .5, 1])
    p1 = pie(all_fracs, shadow=False, colors=colors)
    flg = figlegend(p1[0],labels = all_labels, loc = loc, borderpad=0.3, \
                 labelspacing=0.3, prop = fp)
    flg.legendPatch.set_alpha(0.0)
    #write out
    if file_prefix is None:
        img_name = make_img_name()
    else:
        img_name = file_prefix
    img_abs = os.path.join(dir_path, 'pie_charts', img_name)
    savefig(img_abs, dpi=dpi, facecolor=background_color)
    eps_link = ""
    eps_abs = ""

    if generate_pdf:
        if file_prefix is None:
            eps_img_name = make_img_name(file_ext=".pdf")
        else:
            eps_img_name = file_prefix + ".pdf"
        savefig(os.path.join(dir_path, 'pie_charts', eps_img_name),
                facecolor=background_color)
        eps_abs = os.path.join('pie_charts', eps_img_name)
        eps_link = DOWNLOAD_LINK % ((os.path.join('pie_charts',\
                eps_img_name)),\
        IMG_SRC % (os.path.join('pie_charts',img_name)))
    if generate_eps:
        if file_prefix is None:
            eps_img_name = make_img_name(file_ext=".eps")
        else:
            eps_img_name = file_prefix + ".eps"
        savefig(os.path.join(dir_path, 'pie_charts', eps_img_name),
                facecolor=background_color)
        strip_eps_font(os.path.join(dir_path, 'pie_charts', eps_img_name))
        out = getoutput("gzip " +
                        os.path.join(dir_path, 'pie_charts', eps_img_name))
        eps_abs = os.path.join(dir_path, 'pie_charts', eps_img_name) + ".gz"
        eps_link=DOWNLOAD_LINK % ((os.path.join('pie_charts', eps_img_name)+".gz"),\
        IMG_SRC % (os.path.join('pie_charts',img_name)))
    close(fig)
    clf()
    return eps_link, IMG_SRC_2 % (os.path.join('pie_charts', img_name))
Beispiel #13
0
def monte_carlo_group_distances(mapping_file, dmatrix_file, prefs, \
    dir_prefix = '', subdir_prefix='monte_carlo_group_distances',\
    default_iters=10, fields=None):
    """Calculate Monte Carlo stats for specified group distances.
    
    Specifically:
    - find the groups for each specified col (or combination of cols)
    - do t test between each pair of groups
    - randomize matrix n times and find empirical value of t for each pair
    - compare the actual value of t to the randomized values

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.
    """
    mapping, header, comments = parse_mapping_file(open(mapping_file, 'U'))
    header = [header]
    header.extend(mapping)
    mapping = header

    distance_header, distance_matrix = \
        parse_distmat(open(dmatrix_file,'U'))

    orig_distance_matrix = distance_matrix.copy()

    path_prefix = path.join(dir_prefix, subdir_prefix)

    #if dir doesn't exist
    if not path.isdir(path_prefix):
        # make directory
        mkdir(path_prefix)

    if fields is None:
        fields = [mapping[0][0]]

    if prefs is None:
        prefs = {}

    if 'MONTE_CARLO_GROUP_DISTANCES' not in prefs:
        prefs = build_monte_carlo_prefs(fields, default_iters)

    for field, num_iters in prefs['MONTE_CARLO_GROUP_DISTANCES'].items():
        if '&&' in field:
            groups = group_by_fields(mapping, field.split('&&'))
        else:
            groups = group_by_field(mapping, field)
        outfile = open(
            path.join(path_prefix, 'group_distances_' + field + '.txt'), 'w')
        outfile.write('\t'.join(['Category_1a','Category_1b','Avg',\
            'Category_2a','Category_2b','Avg','t','p',\
            'p_greater','p_less','Iterations\n']))
        real_dists = distances_by_groups(distance_header, distance_matrix,\
            groups)

        #iterate over the groups
        for i, (first_g1, second_g1, distances_g1) in \
            enumerate(real_dists[:-1]):

            real_dist_1 = average(distances_g1)

            #then for each other pair (not including same group)
            for j in range(i + 1, len(real_dists)):
                first_g2, second_g2, distances_g2 = real_dists[j]

                real_dist_2 = average(distances_g2)

                # permute distances just within these groups!
                rand_dists_1, rand_dists_2 = \
                        permute_between_groups(distances_g1,
                                               distances_g2,
                                               num_iters)

                ttests = [t_two_sample(rand_dists_1[n].flatten(),rand_dists_2[n].flatten())[0] \
                    for n in range(num_iters)]
                real_ttest = t_two_sample(distances_g1.flatten(),
                                          distances_g2.flatten())
                curr_line = [first_g1, second_g1, real_dist_1, \
                    first_g2, second_g2, real_dist_2]
                curr_line.extend([real_ttest[0], real_ttest[1],\
                    (array(ttests)>real_ttest[0]).sum()/float(num_iters), \
                    (array(ttests)<real_ttest[0]).sum()/float(num_iters), \
                    num_iters])
                outfile.write('\t'.join(map(str, curr_line)))
                outfile.write('\n')
def get_category_value_to_sample_ids(mapping_lines, category):
    mapping_data, headers, _ = parse_mapping_file(mapping_lines)
    return group_by_field([headers] + mapping_data, category)
    def setUp(self):
        """setup data function for DistanceHistogramsTests."""
        self.working_dir = '/tmp/distance_histogram_tests/'
        try:
            mkdir(self.working_dir)
        except OSError: #except already exisits
            pass
        
        self.histogram_dir = path.join(self.working_dir,'histograms')
        try:
            mkdir(self.histogram_dir)
        except OSError: #except already exisits remove it and make a new one
            pass
            
        #Create distance matrix file
        self.dmat_file = self.working_dir+'dmat.txt'
        dmat_out = open(self.dmat_file,'w')
        dmat_out.write(DISTANCE_MATRIX_STRING)
        dmat_out.close()

        self.distance_header, self.dmat = \
            parse_distmat(open(self.dmat_file,'U'))
        
        #Create mapping file
        self.map_file = self.working_dir+'map.txt'
        map_out = open(self.map_file,'w')
        map_out.write(MAPPING_STRING)
        map_out.close()
        
        mapping, header, comments = parse_mapping_file(open(self.map_file,'U'))
        header[0] = '#'+header[0]
        header = [header]
        header.extend(mapping)
        self.mapping=header
        
        #Create prefs file
        self.prefs_file = self.working_dir+'prefs.txt'
        prefs_out = open(self.prefs_file,'w')
        prefs_out.write(str(PREFS))
        prefs_out.close()
        
        #Build single field dict for 'Treatment' field.
        self.single_field_treatment = defaultdict(dict)
        self.treatment_groups = group_by_field(self.mapping, 'Treatment')
        self.single_field_treatment['Treatment'] = \
            distances_by_groups(self.distance_header,self.dmat,\
                self.treatment_groups)
        self.paired_field_treatment = {'Treatment_to_Treatment':[\
            [('Control','Control'),('Fast','Fast'),\
                             array([[0.729,  0.8  ,  0.721, 0.765],
                                    [0.776,  0.744,  0.749, 0.677],
                                    [0.734,  0.777,  0.733, 0.724],
                                    [0.696,  0.675,  0.654, 0.696],
                                    [0.731,  0.758,  0.738, 0.737]])],\
            [('Control','Control'),('Control','Control'),\
                                 array([0.625,  0.623,  0.61 ,  0.577, 0.615,
                                        0.642,  0.673, 0.682,  0.737, 0.704])],\
            [('Fast','Fast'),('Fast','Fast'),\
                             array([0.718,  0.666, 0.727, 0.6, 0.578, 0.623])]
        ]}
        
        self.distances_file = self.working_dir+'distances_out.txt'
        dist_out = open(self.distances_file,'w')
        dist_out.write(DISTANCES_OUT)
        dist_out.close()
Beispiel #16
0
def monte_carlo_group_distances(mapping_file, dmatrix_file, prefs, \
    dir_prefix = '', subdir_prefix='monte_carlo_group_distances',\
    default_iters=10, fields=None):
    """Calculate Monte Carlo stats for specified group distances.
    
    Specifically:
    - find the groups for each specified col (or combination of cols)
    - do t test between each pair of groups
    - randomize matrix n times and find empirical value of t for each pair
    - compare the actual value of t to the randomized values
    """
    mapping, header, comments = parse_mapping_file(open(mapping_file, 'U'))
    header = [header]
    header.extend(mapping)
    mapping = header

    distance_header, distance_matrix = \
        parse_distmat(open(dmatrix_file,'U'))

    orig_distance_matrix = distance_matrix.copy()

    path_prefix = _make_path([dir_prefix, subdir_prefix])

    #if dir doesn't exist
    if not path.isdir(path_prefix):
        # make directory
        mkdir(path_prefix)

    if fields is None:
        fields = [mapping[0][0]]

    if prefs is None:
        prefs = {}

    if 'MONTE_CARLO_GROUP_DISTANCES' not in prefs:
        prefs = build_monte_carlo_prefs(fields, default_iters)

    for field, num_iters in prefs['MONTE_CARLO_GROUP_DISTANCES'].items():
        if '&&' in field:
            groups = group_by_fields(mapping, field.split('&&'))
        else:
            groups = group_by_field(mapping, field)
        outfile = open(path_prefix + 'group_distances_' + field + '.xls', 'w')
        outfile.write('\t'.join(['Category_1a','Category_1b','Avg',\
            'Category_2a','Category_2b','Avg','t','p',\
            'p_greater','p_less','Iterations\n']))
        real_dists = distances_by_groups(distance_header, distance_matrix,\
            groups)
        rand_distances = [distances_by_groups(distance_header, \
            permute_for_monte_carlo(distance_matrix), groups) \
            for i in range(num_iters)]
        #iterate over the groups

        for i, (first_g1, second_g1, distances_g1) in \
            enumerate(real_dists[:-1]):

            real_dist_1 = average(distances_g1)
            rand_dists_1 = [rand_distances[n][i][-1] for n in range(num_iters)]
            #then for each other pair (not including same group)
            for j in range(i + 1, len(real_dists)):
                first_g2, second_g2, distances_g2 = real_dists[j]

                real_dist_2 = average(distances_g2)
                rand_dists_2 = [rand_distances[n][j][-1] \
                    for n in range(num_iters)]
                ttests = [t_two_sample(rand_dists_1[n],rand_dists_2[n])[0] \
                    for n in range(num_iters)]
                real_ttest = t_two_sample(distances_g1, distances_g2)
                curr_line = [first_g1, second_g1, real_dist_1, \
                    first_g2, second_g2, real_dist_2]
                curr_line.extend([real_ttest[0], real_ttest[1],\
                    (array(ttests)>real_ttest[0]).sum()/float(num_iters), \
                    (array(ttests)<real_ttest[0]).sum()/float(num_iters), \
                    num_iters])
                outfile.write('\t'.join(map(str, curr_line)))
                outfile.write('\n')
Beispiel #17
0
    def setUp(self):
        """Create some data to be used in the tests."""
        # Create the mapping file/distance matrix combo from the overview
        # tutorial.
        self.dist_matrix_string = [
            "\tPC.354\tPC.355\tPC.356\tPC.481\tPC.593\
                                    \tPC.607\tPC.634\tPC.635\tPC.636",
            "PC.354\t0.0\t0.625\t0.623\t0.61\t0.577\
                                    \t0.729\t0.8\t0.721\t0.765",
            "PC.355\t0.625\t0.0\t0.615\t0.642\t0.673\
                                    \t0.776\t0.744\t0.749\t0.677",
            "PC.356\t0.623\t0.615\t0.0\t0.682\t0.737\
                                    \t0.734\t0.777\t0.733\t0.724",
            "PC.481\t0.61\t0.642\t0.682\t0.0\t0.704\
                                    \t0.696\t0.675\t0.654\t0.696",
            "PC.593\t0.577\t0.673\t0.737\t0.704\t0.0\
                                    \t0.731\t0.758\t0.738\t0.737",
            "PC.607\t0.729\t0.776\t0.734\t0.696\t0.731\
                                    \t0.0\t0.718\t0.666\t0.727",
            "PC.634\t0.8\t0.744\t0.777\t0.675\t0.758\
                                    \t0.718\t0.0\t0.6\t0.578",
            "PC.635\t0.721\t0.749\t0.733\t0.654\t0.738\
                                    \t0.666\t0.6\t0.0\t0.623",
            "PC.636\t0.765\t0.677\t0.724\t0.696\t0.737\
                                    \t0.727\t0.578\t0.623\t0.0",
        ]

        self.mapping_string = [
            "#SampleID\tBarcodeSequence\tTreatment\tDOB",
            "PC.354\tAGCACGAGCCTA\tControl\t20061218",
            "PC.355\tAACTCGTCGATG\tControl\t20061218",
            "PC.356\tACAGACCACTCA\tControl\t20061126",
            "PC.481\tACCAGCGACTAG\tControl\t20070314",
            "PC.593\tAGCAGCACTTGT\tControl\t20071210",
            "PC.607\tAACTGTGCGTAC\tFast\t20071112",
            "PC.634\tACAGAGTCGGCT\tFast\t20080116",
            "PC.635\tACCGCAGAGTCA\tFast\t20080116",
            "PC.636\tACGGTGAGTGTC\tFast\t20080116",
        ]

        # Field to test on. Field values are either "Control" or "Fast".
        self.field = "Treatment"

        # Create a tiny distancy matrix/mapping file with a single sample for
        # additional testing.
        self.tiny_dist_matrix_string = ["\tSamp.1", "Samp.1\t0"]
        self.tiny_mapping_string = [
            "#SampleID\tBarcodeSequence\tSampleField",
            "Samp.1\tAGCACGAGCCTA\tSampleFieldState1",
        ]
        self.tiny_field = "SampleField"

        self.small_dist_matrix_string = ["\tSamp.1\tSamp.2", "Samp.1\t0\t0.5", "Samp.2\t0.5\t0"]
        self.small_mapping_string = [
            "#SampleID\tBarcodeSequence\tSampleField",
            "Samp.1\tAGCACGAGCCTA\tSampleFieldState1",
            "Samp.2\tAGCACGAGCCTG\tSampleFieldState2",
        ]
        self.small_field = "SampleField"

        # Parse mapping "files" (faked here).
        self.mapping, self.mapping_header, self.comments = parse_mapping_file(self.mapping_string)
        mapping_data = [self.mapping_header]
        mapping_data.extend(self.mapping)
        self.groups = group_by_field(mapping_data, self.field)

        self.tiny_mapping, self.tiny_mapping_header, self.tiny_comments = parse_mapping_file(self.tiny_mapping_string)
        tiny_mapping_data = [self.tiny_mapping_header]
        tiny_mapping_data.extend(self.tiny_mapping)
        self.tiny_groups = group_by_field(tiny_mapping_data, self.tiny_field)

        self.small_mapping, self.small_mapping_header, self.small_comments = parse_mapping_file(
            self.small_mapping_string
        )
        small_mapping_data = [self.small_mapping_header]
        small_mapping_data.extend(self.small_mapping)
        self.small_groups = group_by_field(small_mapping_data, self.small_field)

        # Parse distance matrix "files" (faked here).
        self.dist_matrix_header, self.dist_matrix = parse_distmat(self.dist_matrix_string)

        self.tiny_dist_matrix_header, self.tiny_dist_matrix = parse_distmat(self.tiny_dist_matrix_string)

        self.small_dist_matrix_header, self.small_dist_matrix = parse_distmat(self.small_dist_matrix_string)

        # extract_per_individual* input data
        self.individual_states_and_responses_map_f1 = parse_mapping_file_to_dict(
            individual_states_and_responses_map_f1.split("\n")
        )[0]
        self.individual_states_and_responses_map_f2 = parse_mapping_file_to_dict(
            individual_states_and_responses_map_f2.split("\n")
        )[0]
        self.paired_difference_biom1 = parse_biom_table(paired_difference_biom_f1.split("\n"))
Beispiel #18
0
    def setUp(self):
        """Create some data to be used in the tests."""
        # Create the mapping file/distance matrix combo from the overview
        # tutorial.
        self.dist_matrix_string = [
            "\tPC.354\tPC.355\tPC.356\tPC.481\tPC.593\
                                    \tPC.607\tPC.634\tPC.635\tPC.636",
            "PC.354\t0.0\t0.625\t0.623\t0.61\t0.577\
                                    \t0.729\t0.8\t0.721\t0.765",
            "PC.355\t0.625\t0.0\t0.615\t0.642\t0.673\
                                    \t0.776\t0.744\t0.749\t0.677",
            "PC.356\t0.623\t0.615\t0.0\t0.682\t0.737\
                                    \t0.734\t0.777\t0.733\t0.724",
            "PC.481\t0.61\t0.642\t0.682\t0.0\t0.704\
                                    \t0.696\t0.675\t0.654\t0.696",
            "PC.593\t0.577\t0.673\t0.737\t0.704\t0.0\
                                    \t0.731\t0.758\t0.738\t0.737",
            "PC.607\t0.729\t0.776\t0.734\t0.696\t0.731\
                                    \t0.0\t0.718\t0.666\t0.727",
            "PC.634\t0.8\t0.744\t0.777\t0.675\t0.758\
                                    \t0.718\t0.0\t0.6\t0.578",
            "PC.635\t0.721\t0.749\t0.733\t0.654\t0.738\
                                    \t0.666\t0.6\t0.0\t0.623",
            "PC.636\t0.765\t0.677\t0.724\t0.696\t0.737\
                                    \t0.727\t0.578\t0.623\t0.0"
        ]

        self.mapping_string = [
            "#SampleID\tBarcodeSequence\tTreatment\tDOB",
            "PC.354\tAGCACGAGCCTA\tControl\t20061218",
            "PC.355\tAACTCGTCGATG\tControl\t20061218",
            "PC.356\tACAGACCACTCA\tControl\t20061126",
            "PC.481\tACCAGCGACTAG\tControl\t20070314",
            "PC.593\tAGCAGCACTTGT\tControl\t20071210",
            "PC.607\tAACTGTGCGTAC\tFast\t20071112",
            "PC.634\tACAGAGTCGGCT\tFast\t20080116",
            "PC.635\tACCGCAGAGTCA\tFast\t20080116",
            "PC.636\tACGGTGAGTGTC\tFast\t20080116"
        ]

        # Field to test on. Field values are either "Control" or "Fast".
        self.field = 'Treatment'

        # Create a tiny distancy matrix/mapping file with a single sample for
        # additional testing.
        self.tiny_dist_matrix_string = ["\tSamp.1", "Samp.1\t0"]
        self.tiny_mapping_string = [
            "#SampleID\tBarcodeSequence\tSampleField",
            "Samp.1\tAGCACGAGCCTA\tSampleFieldState1"
        ]
        self.tiny_field = 'SampleField'

        self.small_dist_matrix_string = [
            "\tSamp.1\tSamp.2", "Samp.1\t0\t0.5", "Samp.2\t0.5\t0"
        ]
        self.small_mapping_string = [
            "#SampleID\tBarcodeSequence\tSampleField",
            "Samp.1\tAGCACGAGCCTA\tSampleFieldState1",
            "Samp.2\tAGCACGAGCCTG\tSampleFieldState2"
        ]
        self.small_field = 'SampleField'

        # Parse mapping "files" (faked here).
        self.mapping, self.mapping_header, self.comments = parse_mapping_file(
            self.mapping_string)
        mapping_data = [self.mapping_header]
        mapping_data.extend(self.mapping)
        self.groups = group_by_field(mapping_data, self.field)

        self.tiny_mapping, self.tiny_mapping_header, self.tiny_comments = \
                parse_mapping_file(self.tiny_mapping_string)
        tiny_mapping_data = [self.tiny_mapping_header]
        tiny_mapping_data.extend(self.tiny_mapping)
        self.tiny_groups = group_by_field(tiny_mapping_data, self.tiny_field)

        self.small_mapping, self.small_mapping_header, self.small_comments = \
                parse_mapping_file(self.small_mapping_string)
        small_mapping_data = [self.small_mapping_header]
        small_mapping_data.extend(self.small_mapping)
        self.small_groups = group_by_field(small_mapping_data,
                                           self.small_field)

        # Parse distance matrix "files" (faked here).
        self.dist_matrix_header, self.dist_matrix = parse_distmat(
            self.dist_matrix_string)

        self.tiny_dist_matrix_header, self.tiny_dist_matrix = parse_distmat(
            self.tiny_dist_matrix_string)

        self.small_dist_matrix_header, self.small_dist_matrix = parse_distmat(
            self.small_dist_matrix_string)
def monte_carlo_group_distances(mapping_file, dmatrix_file, prefs, \
    dir_prefix = '', subdir_prefix='monte_carlo_group_distances',\
    default_iters=10, fields=None):
    """Calculate Monte Carlo stats for specified group distances.
    
    Specifically:
    - find the groups for each specified col (or combination of cols)
    - do t test between each pair of groups
    - randomize matrix n times and find empirical value of t for each pair
    - compare the actual value of t to the randomized values

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.
    """
    mapping, header, comments = parse_mapping_file(open(mapping_file,'U'))
    header = [header]
    header.extend(mapping)
    mapping=header

    distance_header, distance_matrix = \
        parse_distmat(open(dmatrix_file,'U'))

    orig_distance_matrix = distance_matrix.copy()

    path_prefix = path.join(dir_prefix,subdir_prefix)
    
    #if dir doesn't exist
    if not path.isdir(path_prefix):
        # make directory
        mkdir(path_prefix)
    
    if fields is None:
        fields = [mapping[0][0]]
        
    if prefs is None:
        prefs = {}
 
    if 'MONTE_CARLO_GROUP_DISTANCES' not in prefs:
        prefs = build_monte_carlo_prefs(fields,default_iters)
            
    for field, num_iters in prefs['MONTE_CARLO_GROUP_DISTANCES'].items():
        if '&&' in field:
            groups = group_by_fields(mapping, field.split('&&'))
        else:
            groups = group_by_field(mapping, field)
        outfile = open(path.join(path_prefix,
                                 'group_distances_'+field+'.txt'), 'w')
        outfile.write('\t'.join(['Category_1a','Category_1b','Avg',\
            'Category_2a','Category_2b','Avg','t','p',\
            'p_greater','p_less','Iterations\n']))
        real_dists = distances_by_groups(distance_header, distance_matrix,\
            groups)
 
        #iterate over the groups
        for i, (first_g1, second_g1, distances_g1) in \
            enumerate(real_dists[:-1]):

            real_dist_1 = average(distances_g1)

            #then for each other pair (not including same group)
            for j in range(i+1,len(real_dists)):
                first_g2, second_g2, distances_g2 = real_dists[j]

                real_dist_2 = average(distances_g2)

                # permute distances just within these groups!
                rand_dists_1, rand_dists_2 = \
                        permute_between_groups(distances_g1, 
                                               distances_g2,
                                               num_iters)

                ttests = [t_two_sample(rand_dists_1[n].flatten(),rand_dists_2[n].flatten())[0] \
                    for n in range(num_iters)]
                real_ttest = t_two_sample(distances_g1.flatten(), distances_g2.flatten())
                curr_line = [first_g1, second_g1, real_dist_1, \
                    first_g2, second_g2, real_dist_2]
                curr_line.extend([real_ttest[0], real_ttest[1],\
                    (array(ttests)>real_ttest[0]).sum()/float(num_iters), \
                    (array(ttests)<real_ttest[0]).sum()/float(num_iters), \
                    num_iters])
                outfile.write('\t'.join(map(str, curr_line)))
                outfile.write('\n')
def get_category_value_to_sample_ids(mapping_lines,category):
    mapping_data, headers, _ = parse_mapping_file(mapping_lines)
    return group_by_field([headers] + mapping_data,category)
Beispiel #21
0
def make_all_charts(data,dir_path,filename,num_categories,colorby,args,\
                        color_data, prefs,background_color,label_color,
                        chart_type,generate_image_type,plot_width,plot_height,\
                        bar_width,dpi,resize_nth_label,label_type,\
                        include_html_legend,include_html_counts):
    """Generate interactive charts in one HTML file"""

    #iterate over the preferences and assign colors according to taxonomy
    img_data = []
    for label,f_name in data:
        raw_fpath=os.path.join(dir_path,'raw_data',os.path.split(f_name)[-1])
        # move raw file to output directory
        shutil.copyfile(f_name,raw_fpath)
        
        f = color_data['counts'][f_name]
        level = max([len(t.split(';')) - 1 for t in f[1]])
        
        for key in prefs.keys():
            if prefs[key]['column'] != str(level):
                continue
            col_name = 'Taxon'
            mapping = [['Taxon']]
            mapping.extend([[m] for m in f[1]])
            if 'colors' in prefs[key]:
                if isinstance(prefs[key]['colors'], dict):
                    pref_colors = prefs[key]['colors'].copy() 
                    #copy so we can mutate
                else:
                    pref_colors = prefs[key]['colors'][:]
            else:
                pref_colors={}
            labelname=prefs[key]['column']

            #Define groups and associate appropriate colors to each group
            groups = group_by_field(mapping, col_name)
            pref_colors, data_colors, data_color_order = \
                get_group_colors(groups, pref_colors)
        
        updated_pref_colors={}
        
        if chart_type=='area' and len(f[0])==1:
            raise ValueError, 'When generating area charts, the number of samples (or category values) must be greater than 1.  However, you can still produce a pie chart or bar chart with only 1 sample (or category value), but you must remove the area chart value from the input arguments.'
        
        for key in pref_colors:
            updated_pref_colors[key.replace('"','')]=pref_colors[key]
        
        for i,val in enumerate(f[1]):
            f[1][i]=val.replace('"','')
            
        #parse the counts and continue processing
        img_data.extend(get_counts(label.strip(),colorby,num_categories,\
                        dir_path,level,f,prefs,updated_pref_colors,\
                        background_color,\
                        label_color,chart_type,generate_image_type,\
                        plot_width,plot_height,bar_width,dpi,raw_fpath,\
                        resize_nth_label,label_type,include_html_legend,\
                        include_html_counts))

    #generate html filepath
    outpath = os.path.join(dir_path,'%s_charts.html' % chart_type)
    out_table = ''.join(img_data)
    #write out html file
    write_html_file(out_table,outpath)
    def setUp(self):
        """setup data function for DistanceHistogramsTests."""
        self.working_dir = '/tmp/distance_histogram_tests/'
        try:
            mkdir(self.working_dir)
        except OSError:  #except already exisits
            pass

        self.histogram_dir = self.working_dir + 'histograms/'
        try:
            mkdir(self.histogram_dir)
        except OSError:  #except already exisits remove it and make a new one
            pass

        #Create distance matrix file
        self.dmat_file = self.working_dir + 'dmat.txt'
        dmat_out = open(self.dmat_file, 'w')
        dmat_out.write(DISTANCE_MATRIX_STRING)
        dmat_out.close()

        self.distance_header, self.dmat = \
            parse_distmat(open(self.dmat_file,'U'))

        #Create mapping file
        self.map_file = self.working_dir + 'map.txt'
        map_out = open(self.map_file, 'w')
        map_out.write(MAPPING_STRING)
        map_out.close()

        mapping, header, comments = parse_mapping_file(open(
            self.map_file, 'U'))
        header[0] = '#' + header[0]
        header = [header]
        header.extend(mapping)
        self.mapping = header

        #Create prefs file
        self.prefs_file = self.working_dir + 'prefs.txt'
        prefs_out = open(self.prefs_file, 'w')
        prefs_out.write(str(PREFS))
        prefs_out.close()

        #Build single field dict for 'Treatment' field.
        self.single_field_treatment = defaultdict(dict)
        self.treatment_groups = group_by_field(self.mapping, 'Treatment')
        self.single_field_treatment['Treatment'] = \
            distances_by_groups(self.distance_header,self.dmat,\
                self.treatment_groups)
        self.paired_field_treatment = {'Treatment_to_Treatment':[\
            [('Control','Control'),('Fast','Fast'),\
                             array([[0.729,  0.8  ,  0.721, 0.765],
                                    [0.776,  0.744,  0.749, 0.677],
                                    [0.734,  0.777,  0.733, 0.724],
                                    [0.696,  0.675,  0.654, 0.696],
                                    [0.731,  0.758,  0.738, 0.737]])],\
            [('Control','Control'),('Control','Control'),\
                                 array([0.625,  0.623,  0.61 ,  0.577, 0.615,
                                        0.642,  0.673, 0.682,  0.737, 0.704])],\
            [('Fast','Fast'),('Fast','Fast'),\
                             array([0.718,  0.666, 0.727, 0.6, 0.578, 0.623])]
        ]}

        self.distances_file = self.working_dir + 'distances_out.txt'
        dist_out = open(self.distances_file, 'w')
        dist_out.write(DISTANCES_OUT)
        dist_out.close()