Example #1
0
    def test_summarize_pcoas(self):
        """summarize_pcoas works
        """
        master_pcoa = [['1', '2', '3'], \
            array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \
            array([.76, .24])]
        jn1 = [['1', '2', '3'], \
            array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \
            array([0.80, .20])]
        jn2 = [['1', '2', '3'], \
            array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \
            array([0.76, .24])]
        jn3 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        jn4 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        support_pcoas = [jn1, jn2, jn3, jn4]
        #test with the ideal_fourths option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths',
                            apply_procrustes=False)
        self.assertEqual(m_names, ['1', '2', '3'])
        self.assertFloatEqual(matrix_average[(0, 0)], -1.4)
        self.assertFloatEqual(matrix_average[(0, 1)], 0.0125)
        self.assertFloatEqual(matrix_low[(0, 0)], -1.5)
        self.assertFloatEqual(matrix_high[(0, 0)], -1.28333333)
        self.assertFloatEqual(matrix_low[(0, 1)], -0.0375)
        self.assertFloatEqual(matrix_high[(0, 1)], 0.05)
        self.assertFloatEqual(eigval_average[0], 0.81)
        self.assertFloatEqual(eigval_average[1], 0.19)
        #test with the IQR option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='IQR',
                            apply_procrustes=False)
        self.assertFloatEqual(matrix_low[(0, 0)], -1.5)
        self.assertFloatEqual(matrix_high[(0, 0)], -1.3)

        #test with procrustes option followed by sdev
        m, m1, msq = procrustes(master_pcoa[1], jn1[1])
        m, m2, msq = procrustes(master_pcoa[1], jn2[1])
        m, m3, msq = procrustes(master_pcoa[1], jn3[1])
        m, m4, msq = procrustes(master_pcoa[1], jn4[1])
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='sdev',
                            apply_procrustes=True)

        x = array([m1[0, 0], m2[0, 0], m3[0, 0], m4[0, 0]])
        self.assertEqual(x.mean(), matrix_average[0, 0])
        self.assertEqual(-x.std(ddof=1) / 2, matrix_low[0, 0])
        self.assertEqual(x.std(ddof=1) / 2, matrix_high[0, 0])
Example #2
0
    def test_summarize_pcoas(self):
        """summarize_pcoas works
        """
        master_pcoa = [['1', '2', '3'], \
            array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \
            array([.76, .24])]
        jn1 = [['1', '2', '3'], \
            array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \
            array([0.80, .20])]
        jn2 = [['1', '2', '3'], \
            array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \
            array([0.76, .24])]
        jn3 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        jn4 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        support_pcoas = [jn1, jn2, jn3, jn4]
        #test with the ideal_fourths option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths',
                            apply_procrustes=False)
        self.assertEqual(m_names, ['1', '2', '3'])
        self.assertFloatEqual(matrix_average[(0,0)], -1.4)
        self.assertFloatEqual(matrix_average[(0,1)], 0.0125)
        self.assertFloatEqual(matrix_low[(0,0)], -1.5)
        self.assertFloatEqual(matrix_high[(0,0)], -1.28333333)
        self.assertFloatEqual(matrix_low[(0,1)], -0.0375)
        self.assertFloatEqual(matrix_high[(0,1)], 0.05)
        self.assertFloatEqual(eigval_average[0], 0.81)
        self.assertFloatEqual(eigval_average[1], 0.19)
        #test with the IQR option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='IQR',
                            apply_procrustes=False)
        self.assertFloatEqual(matrix_low[(0,0)], -1.5)
        self.assertFloatEqual(matrix_high[(0,0)], -1.3)

        #test with procrustes option followed by sdev
        m, m1, msq = procrustes(master_pcoa[1],jn1[1])
        m, m2, msq = procrustes(master_pcoa[1],jn2[1])
        m, m3, msq = procrustes(master_pcoa[1],jn3[1])
        m, m4, msq = procrustes(master_pcoa[1],jn4[1])
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='sdev',
                            apply_procrustes=True)

        x = array([m1[0,0],m2[0,0],m3[0,0],m4[0,0]])
        self.assertEqual(x.mean(),matrix_average[0,0])
        self.assertEqual(-x.std(ddof=1)/2,matrix_low[0,0])
        self.assertEqual(x.std(ddof=1)/2,matrix_high[0,0])
Example #3
0
def get_coord(coord_fname, method="IQR"):
    """Opens and returns coords location matrix and metadata.
       Also two spread matrices (+/-) if passed a dir of coord files.
       If only a single coord file, spread matrices are returned as None.
    """
    if not os.path.isdir(coord_fname):
        try:
            coord_f = open(coord_fname, 'U')
        except (TypeError, IOError):
            raise MissingFileError('Coord file required for this analysis')
        coord_header, coords, eigvals, pct_var = parse_coords(coord_f)
        return [coord_header, coords, eigvals, pct_var, None, None]
    else:
        master_pcoa, support_pcoas = load_pcoa_files(coord_fname)

        # get Summary statistics
        coords, coords_low, coords_high, eigval_average, coord_header = \
            summarize_pcoas(master_pcoa, support_pcoas, method=method)
        pct_var = master_pcoa[3]  # should be getting this from an average

        # make_3d_plots expects coord_header to be a python list
        coord_header = list(master_pcoa[0])
        return ([
            coord_header, coords, eigval_average, pct_var, coords_low,
            coords_high
        ])
Example #4
0
def get_coord(coord_fname, method="IQR"):
    """Opens and returns coords location matrix and metadata.
       Also two spread matrices (+/-) if passed a dir of coord files.
       If only a single coord file, spread matrices are returned as None.
    """
    if not os.path.isdir(coord_fname):
        try:
            coord_f = open(coord_fname, 'U')
        except (TypeError, IOError):
            raise MissingFileError('Coord file required for this analysis')
        coord_header, coords, eigvals, pct_var = parse_coords(coord_f)
        return [coord_header, coords, eigvals, pct_var, None, None]
    else:
        master_pcoa, support_pcoas = load_pcoa_files(coord_fname)

        # get Summary statistics
        coords, coords_low, coords_high, eigval_average, coord_header = \
            summarize_pcoas(master_pcoa, support_pcoas, method=method)
        pct_var = master_pcoa[3]  # should be getting this from an average

        # make_3d_plots expects coord_header to be a python list
        coord_header = list(master_pcoa[0])
        return (
            [coord_header,
             coords,
             eigval_average,
             pct_var,
             coords_low,
             coords_high]
        )
Example #5
0
def generate_2d_plots(prefs,data,html_dir_path,data_dir_path,filename,
                        background_color,label_color,generate_scree):
    """Generate interactive 2D scatterplots"""
    coord_tups = [("1", "2"), ("3", "2"), ("1", "3")]
    mapping=data['map']
    out_table=''
    #Iterate through prefs and generate html files for each colorby option
    #Sort by the column name first
    sample_location={}

    groups_and_colors=iter_color_groups(mapping,prefs)
    groups_and_colors=list(groups_and_colors)
    radiobuttons = []
    for i in range(len(groups_and_colors)):
        labelname=groups_and_colors[i][0] #'EnvoID'
        groups=groups_and_colors[i][1]    #defaultdict(<type 'list'>, {'mangrove biome/marine habitat/ocean water': ['BBA.number1.filt..660397', 'BBA.number2.filt..660380', ...}
        colors=groups_and_colors[i][2]    #{'mangrove biome/marine habitat/ocean water': 'red5', 'Small lake biome/marine habitat/saline lake sediment': 'cyan1', 
        data_colors=groups_and_colors[i][3]#{'orange1': <qiime.colors.Color object at 0x25f1210>, 'orange3': 
        data_color_order=groups_and_colors[i][4]#['red1', 'blue1', 'orange1', 'green1', 'purple1', 'yellow1', 'cyan1', 'pink1', 'teal1', ...]
        
        data_file_dir_path = get_random_directory_name(output_dir=data_dir_path)
        
        new_link=os.path.split(data_file_dir_path)
        data_file_link=os.path.join('.', os.path.split(new_link[-2])[-1], \
                                    new_link[-1])

        new_col_name=labelname
        img_data = {}
        plot_label=labelname
        
        if data.has_key('support_pcoas'):
            matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
                summarize_pcoas(data['coord'], data['support_pcoas'], 
                method=data['ellipsoid_method'])
            data['coord'] = \
                (m_names,matrix_average,data['coord'][2],data['coord'][3])
            for i in range(len(m_names)):
                sample_location[m_names[i]]=i
        else: 
            matrix_average = None
            matrix_low =  None
            matrix_high =  None
            eigval_average =  None
            m_names =  None
        iterator=0

        for coord_tup in coord_tups: # change, if you want more thatn one PCoA plot! (i.e involving PC3)
            if isarray(matrix_low) and isarray(matrix_high) and \
                                                isarray(matrix_average):
                coord_1r=asarray(matrix_low)
                coord_2r=asarray(matrix_high)
                mat_ave=asarray(matrix_average)
            else:
                coord_1r=None
                coord_2r=None
                mat_ave=None
                sample_location=None
            
            coord_1, coord_2 = coord_tup
    
            img_data[coord_tup] = draw_pcoa_graph(plot_label,data_file_dir_path,
                                                 data_file_link,coord_1,coord_2,
                                                 coord_1r, coord_2r, mat_ave,\
                                                 sample_location,
                                                 data,prefs,groups,colors,
                                                 background_color,label_color,
                                                 data_colors,data_color_order,
                                                 generate_eps=True) 
        radiobuttons.append(RADIO % (data_file_link, labelname))
    
        

    
        if i == 0: ## only create first table!
            out_table += TABLE_HTML % (labelname, 
                                       "<br>".join(img_data[("1", "2")]),
                                       "<br>".join(img_data[("3", "2")]),
                                       "<br>".join(img_data[("1", "3")]))


    if generate_scree:
        data_file_dir_path = get_random_directory_name(output_dir = data_dir_path)
        new_link = os.path.split(data_file_dir_path)
        data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1])

        img_src, download_link = draw_scree_graph(data_file_dir_path, data_file_link, background_color,
                            label_color, generate_eps = True, data = data)

        out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link)))

    out_table = "\n".join(radiobuttons) + out_table
    outfile = create_html_filename(filename,'.html')
    outfile = os.path.join(html_dir_path,outfile)
        
    write_html_file(out_table,outfile)
Example #6
0
def generate_2d_plots(prefs, data, html_dir_path, data_dir_path, filename,
                      background_color, label_color, generate_scree):
    """Generate interactive 2D scatterplots"""
    coord_tups = [("1", "2"), ("3", "2"), ("1", "3")]
    mapping = data['map']
    out_table = ''
    #Iterate through prefs and generate html files for each colorby option
    #Sort by the column name first
    sample_location = {}

    groups_and_colors = iter_color_groups(mapping, prefs)
    groups_and_colors = list(groups_and_colors)

    for i in range(len(groups_and_colors)):
        labelname = groups_and_colors[i][0]
        groups = groups_and_colors[i][1]
        colors = groups_and_colors[i][2]
        data_colors = groups_and_colors[i][3]
        data_color_order = groups_and_colors[i][4]

        data_file_dir_path = get_random_directory_name(
            output_dir=data_dir_path)

        new_link = os.path.split(data_file_dir_path)
        data_file_link=os.path.join('.', os.path.split(new_link[-2])[-1], \
                                    new_link[-1])

        new_col_name = labelname
        img_data = {}
        plot_label = labelname

        if data.has_key('support_pcoas'):
            matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
                summarize_pcoas(data['coord'], data['support_pcoas'],
                method=data['ellipsoid_method'])
            data['coord'] = \
                (m_names,matrix_average,data['coord'][2],data['coord'][3])
            for i in range(len(m_names)):
                sample_location[m_names[i]] = i
        else:
            matrix_average = None
            matrix_low = None
            matrix_high = None
            eigval_average = None
            m_names = None
        iterator = 0

        for coord_tup in coord_tups:
            if isarray(matrix_low) and isarray(matrix_high) and \
                                                isarray(matrix_average):
                coord_1r = asarray(matrix_low)
                coord_2r = asarray(matrix_high)
                mat_ave = asarray(matrix_average)
            else:
                coord_1r = None
                coord_2r = None
                mat_ave = None
                sample_location = None

            coord_1, coord_2 = coord_tup
            img_data[coord_tup] = draw_pcoa_graph(plot_label,data_file_dir_path,
                                                 data_file_link,coord_1,coord_2,
                                                 coord_1r, coord_2r, mat_ave,\
                                                 sample_location,
                                                 data,prefs,groups,colors,
                                                 background_color,label_color,
                                                 data_colors,data_color_order,
                                                 generate_eps=True)

        out_table += TABLE_HTML % (labelname, "<br>".join(
            img_data[("1", "2")]), "<br>".join(
                img_data[("3", "2")]), "<br>".join(img_data[("1", "3")]))

    if generate_scree:
        data_file_dir_path = get_random_directory_name(
            output_dir=data_dir_path)
        new_link = os.path.split(data_file_dir_path)
        data_file_link = os.path.join('.',
                                      os.path.split(new_link[-2])[-1],
                                      new_link[-1])

        img_src, download_link = draw_scree_graph(data_file_dir_path,
                                                  data_file_link,
                                                  background_color,
                                                  label_color,
                                                  generate_eps=True,
                                                  data=data)

        out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link)))

    outfile = create_html_filename(filename, '.html')
    outfile = os.path.join(html_dir_path, outfile)

    write_html_file(out_table, outfile)
Example #7
0
def generate_2d_plots(prefs, data, html_dir_path, data_dir_path, filename,
                      background_color, label_color, generate_scree):
    """Generate interactive 2D scatterplots"""
    coord_tups = [("1", "2"), ("3", "2"), ("1", "3")]
    mapping = data['map']
    out_table = ''
    # Iterate through prefs and generate html files for each colorby option
    # Sort by the column name first
    sample_location = {}

    groups_and_colors = iter_color_groups(mapping, prefs)
    groups_and_colors = list(groups_and_colors)

    for i in range(len(groups_and_colors)):
        labelname = groups_and_colors[i][0]
        groups = groups_and_colors[i][1]
        colors = groups_and_colors[i][2]
        data_colors = groups_and_colors[i][3]
        data_color_order = groups_and_colors[i][4]

        data_file_dir_path = mkdtemp(dir=data_dir_path)

        new_link = os.path.split(data_file_dir_path)
        data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1],
                                      new_link[-1])

        new_col_name = labelname
        img_data = {}
        plot_label = labelname

        if 'support_pcoas' in data:
            matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
                summarize_pcoas(data['coord'], data['support_pcoas'],
                                method=data['ellipsoid_method'])
            data['coord'] = \
                (m_names, matrix_average, data['coord'][2], data['coord'][3])
            for i in range(len(m_names)):
                sample_location[m_names[i]] = i
        else:
            matrix_average = None
            matrix_low = None
            matrix_high = None
            eigval_average = None
            m_names = None
        iterator = 0

        for coord_tup in coord_tups:
            if isarray(matrix_low) and isarray(matrix_high) and \
                    isarray(matrix_average):
                coord_1r = asarray(matrix_low)
                coord_2r = asarray(matrix_high)
                mat_ave = asarray(matrix_average)
            else:
                coord_1r = None
                coord_2r = None
                mat_ave = None
                sample_location = None

            coord_1, coord_2 = coord_tup
            img_data[coord_tup] = draw_pcoa_graph(
                plot_label, data_file_dir_path,
                data_file_link, coord_1, coord_2,
                coord_1r, coord_2r, mat_ave,
                sample_location,
                data, prefs, groups, colors,
                background_color, label_color,
                data_colors, data_color_order,
                generate_eps=True)

        out_table += TABLE_HTML % (labelname,
                                   "<br>".join(img_data[("1", "2")]),
                                   "<br>".join(img_data[("3", "2")]),
                                   "<br>".join(img_data[("1", "3")]))

    if generate_scree:
        data_file_dir_path = mkdtemp(dir=data_dir_path)
        new_link = os.path.split(data_file_dir_path)
        data_file_link = os.path.join(
            '.',
            os.path.split(new_link[-2])[-1],
            new_link[-1])

        img_src, download_link = draw_scree_graph(
            data_file_dir_path, data_file_link, background_color,
            label_color, generate_eps=True, data=data)

        out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link)))

    outfile = create_html_filename(filename, '.html')
    outfile = os.path.join(html_dir_path, outfile)

    write_html_file(out_table, outfile)
Example #8
0
def generate_2d_plots(prefs,data,html_dir_path,data_dir_path,filename,
                        background_color,label_color,generate_scree):
    """Generate interactive 2D scatterplots"""
    coord_tups = [("1", "2"), ("3", "2"), ("1", "3")]
    mapping=data['map']
    out_table=''
    #Iterate through prefs and generate html files for each colorby option
    #Sort by the column name first
    sample_location={}

    groups_and_colors=iter_color_groups(mapping,prefs)
    groups_and_colors=list(groups_and_colors)
    radiobuttons = []
    for i in range(len(groups_and_colors)):
        labelname=groups_and_colors[i][0] #'EnvoID'
        groups=groups_and_colors[i][1]    #defaultdict(<type 'list'>, {'mangrove biome/marine habitat/ocean water': ['BBA.number1.filt..660397', 'BBA.number2.filt..660380', ...}
        colors=groups_and_colors[i][2]    #{'mangrove biome/marine habitat/ocean water': 'red5', 'Small lake biome/marine habitat/saline lake sediment': 'cyan1', 
        data_colors=groups_and_colors[i][3]#{'orange1': <qiime.colors.Color object at 0x25f1210>, 'orange3': 
        data_color_order=groups_and_colors[i][4]#['red1', 'blue1', 'orange1', 'green1', 'purple1', 'yellow1', 'cyan1', 'pink1', 'teal1', ...]
        
        data_file_dir_path = get_random_directory_name(output_dir=data_dir_path)
        
        new_link=os.path.split(data_file_dir_path)
        data_file_link=os.path.join('.', os.path.split(new_link[-2])[-1], \
                                    new_link[-1])

        new_col_name=labelname
        img_data = {}
        plot_label=labelname
        
        if data.has_key('support_pcoas'):
            matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
                summarize_pcoas(data['coord'], data['support_pcoas'], 
                method=data['ellipsoid_method'])
            data['coord'] = \
                (m_names,matrix_average,data['coord'][2],data['coord'][3])
            for i in range(len(m_names)):
                sample_location[m_names[i]]=i
        else: 
            matrix_average = None
            matrix_low =  None
            matrix_high =  None
            eigval_average =  None
            m_names =  None
        iterator=0

        for coord_tup in coord_tups: # change, if you want more thatn one PCoA plot! (i.e involving PC3)
            if isarray(matrix_low) and isarray(matrix_high) and \
                                                isarray(matrix_average):
                coord_1r=asarray(matrix_low)
                coord_2r=asarray(matrix_high)
                mat_ave=asarray(matrix_average)
            else:
                coord_1r=None
                coord_2r=None
                mat_ave=None
                sample_location=None
            
            coord_1, coord_2 = coord_tup
    
            img_data[coord_tup] = draw_pcoa_graph(plot_label,data_file_dir_path,
                                                 data_file_link,coord_1,coord_2,
                                                 coord_1r, coord_2r, mat_ave,\
                                                 sample_location,
                                                 data,prefs,groups,colors,
                                                 background_color,label_color,
                                                 data_colors,data_color_order,
                                                 generate_eps=True) 
        radiobuttons.append(RADIO % (data_file_link, labelname))
    
        

    
        if i == 0: ## only create first table!
            out_table += TABLE_HTML % (labelname, 
                                       "<br>".join(img_data[("1", "2")]),
                                       "<br>".join(img_data[("3", "2")]),
                                       "<br>".join(img_data[("1", "3")]))


    if generate_scree:
        data_file_dir_path = get_random_directory_name(output_dir = data_dir_path)
        new_link = os.path.split(data_file_dir_path)
        data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1])

        img_src, download_link = draw_scree_graph(data_file_dir_path, data_file_link, background_color,
                            label_color, generate_eps = True, data = data)

        out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link)))

    out_table = "\n".join(radiobuttons) + out_table
    outfile = create_html_filename(filename,'.html')
    outfile = os.path.join(html_dir_path,outfile)
        
    write_html_file(out_table,outfile)
Example #9
0
def preprocess_coords_file(coords_header,
                           coords_data,
                           coords_eigenvals,
                           coords_pct,
                           mapping_header,
                           mapping_data,
                           custom_axes=None,
                           jackknifing_method=None,
                           is_comparison=False):
    """Process a PCoA data and handle customizations in the contents

    Inputs:
    coords_header: list of sample identifiers in the PCoA file _or_ list of
    lists with sample identifiers for each coordinate file (if jackknifing or
    comparing plots)
    coords_data: matrix of coordinates in the PCoA file _or_ list of numpy
    arrays with coordinates for each file (if jackknifing or comparing plots)
    coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_
    list of numpy arrays with the eigenvalues (if jackknifing or comparing plots
    )
    coords_pct: numpy array with a the percent explained by each principal
    coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or
    comparing plots)
    mapping_header: mapping file headers names
    mapping_data: mapping file data
    custom_axes: name of the mapping data fields to add to coords_data
    jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info
    see qiime.util.summarize_pcoas
    is_comparison: whether or not the inputs should be considered as the ones
    for a comparison plot

    Outputs:
    coords_header: list of sample identifiers in the PCoA file
    coords_data: matrix of coordinates in the PCoA file with custom_axes if
    provided
    coords_eigenvalues: either the eigenvalues of the input coordinates or the
    average eigenvalues of the multiple coords that were passed in
    coords_pct: list of percents explained by each axis as given by the master
    coordinates i. e. the center around where the values revolve
    coords_low: coordinates representing the lower edges of an ellipse; None if
    no jackknifing is applied
    coords_high: coordinates representing the highere edges of an ellipse; None
    if no jackknifing is applied
    clones: total number of input files

    This controller function handles any customization that has to be done to
    the PCoA data prior to the formatting. Note that the first element in each
    list (coords, headers, eigenvalues & percents) will be considered the master
    set of coordinates.

    Raises: AssertionError if a comparison plot is requested but a list of data
    is not passed as input
    """

    # prevent obscure and obfuscated errors
    if is_comparison:
        assert type(coords_data) == list, "Cannot process a comparison with "+\
            "the data from a single coordinates file"

    mapping_file = [mapping_header] + mapping_data
    coords_file = [coords_header, coords_data]

    # number PCoA files; zero for any case except for comparison plots
    clones = 0

    if custom_axes and type(coords_data) == ndarray:
        # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
        get_custom_coords(custom_axes, mapping_file, coords_file)
        remove_nans(coords_file)
        scale_custom_coords(custom_axes, coords_file)
    elif type(coords_data) == list and is_comparison == False:
        # take the first pcoa file as the master set of coordinates
        master_pcoa = [
            coords_header[0], coords_data[0], coords_eigenvals[0],
            coords_pct[0]
        ]

        # support pcoas must be a list of lists where each list contain
        # all the elements that compose a coordinates file
        support_pcoas = [[h, d, e, p] for h, d, e, p in zip(
            coords_header, coords_data, coords_eigenvals, coords_pct)]

        # do not apply procrustes, at least not for now
        coords_data, coords_low, coords_high, eigenvalues_average,\
            identifiers = summarize_pcoas(master_pcoa, support_pcoas,
                method=jackknifing_method, apply_procrustes=False)

        # custom axes and jackknifing is a tricky thing to do, you only have to
        # add the custom values to the master file which is represented as the
        # coords_data return value. Since there is really no variation in that
        # axis then you have to change the values of coords_high and of
        # coords_low to something really small so that WebGL work properly
        if custom_axes:
            coords_file = [master_pcoa[0], coords_data]
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

            # this opens support for as many custom axes as needed
            axes = len(custom_axes)
            coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes])
            coords_high[:,
                        0:axes] = ones([coords_high.shape[0], axes]) * 0.00001
            coords_data = coords_file[1]

        # return a value containing coords_low and coords_high
        return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\
            coords_low, coords_high, clones
    # comparison plots are processed almost individually
    elif type(coords_data) == list and is_comparison:

        # indicates the number of files that were totally processed so other
        # functions/APIs are aware of how many times to replicate the metadata
        clones = len(coords_data)
        out_headers, out_coords = [], []

        for index in range(0, clones):
            headers_i = coords_header[index]
            coords_i = coords_data[index]

            # tag each header with the the number in which those coords came in
            out_headers.extend(
                [element + '_%d' % index for element in headers_i])

            if index == 0:
                # numpy can only stack things if they have the same shape
                out_coords = coords_i

                # the eigenvalues and percents explained are really the ones
                # belonging to the the first set of coordinates that was passed
                coords_eigenvals = coords_eigenvals[index]
                coords_pct = coords_pct[index]
            else:
                out_coords = vstack((out_coords, coords_i))

        coords_file = [out_headers, out_coords]

        if custom_axes:
            # this condition deals with the fact that in order for the custom
            # axes to be added into the original coordinates, we have to add the
            # suffix for the sample identifiers that the coordinates have
            if clones:
                out_data = []
                for index in range(0, clones):
                    out_data.extend([[element[0] + '_%d' % index] +
                                     element[1::] for element in mapping_data])
                mapping_file = [mapping_header] + out_data

            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

    # if no coords summary is applied, return None in the corresponding values
    # note that the value of clones will be != 0 for a comparison plot
    return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\
        None, clones
Example #10
0
def preprocess_coords_file(
    coords_header,
    coords_data,
    coords_eigenvals,
    coords_pct,
    mapping_header,
    mapping_data,
    custom_axes=None,
    jackknifing_method=None,
    is_comparison=False,
):
    """Process a PCoA data and handle customizations in the contents

    Inputs:
    coords_header: list of sample identifiers in the PCoA file _or_ list of
    lists with sample identifiers for each coordinate file (if jackknifing or
    comparing plots)
    coords_data: matrix of coordinates in the PCoA file _or_ list of numpy
    arrays with coordinates for each file (if jackknifing or comparing plots)
    coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_
    list of numpy arrays with the eigenvalues (if jackknifing or comparing plots
    )
    coords_pct: numpy array with a the percent explained by each principal
    coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or
    comparing plots)
    mapping_header: mapping file headers names
    mapping_data: mapping file data
    custom_axes: name of the mapping data fields to add to coords_data
    jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info
    see qiime.util.summarize_pcoas
    is_comparison: whether or not the inputs should be considered as the ones
    for a comparison plot

    Outputs:
    coords_header: list of sample identifiers in the PCoA file
    coords_data: matrix of coordinates in the PCoA file with custom_axes if
    provided
    coords_eigenvalues: either the eigenvalues of the input coordinates or the
    average eigenvalues of the multiple coords that were passed in
    coords_pct: list of percents explained by each axis as given by the master
    coordinates i. e. the center around where the values revolve
    coords_low: coordinates representing the lower edges of an ellipse; None if
    no jackknifing is applied
    coords_high: coordinates representing the highere edges of an ellipse; None
    if no jackknifing is applied
    clones: total number of input files

    This controller function handles any customization that has to be done to
    the PCoA data prior to the formatting. Note that the first element in each
    list (coords, headers, eigenvalues & percents) will be considered the master
    set of coordinates.

    Raises: AssertionError if a comparison plot is requested but a list of data
    is not passed as input
    """

    # prevent obscure and obfuscated errors
    if is_comparison:
        assert type(coords_data) == list, (
            "Cannot process a comparison with " + "the data from a single coordinates file"
        )

    mapping_file = [mapping_header] + mapping_data
    coords_file = [coords_header, coords_data]

    # number PCoA files; zero for any case except for comparison plots
    clones = 0

    if custom_axes and type(coords_data) == ndarray:
        # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
        get_custom_coords(custom_axes, mapping_file, coords_file)
        remove_nans(coords_file)
        scale_custom_coords(custom_axes, coords_file)
    elif type(coords_data) == list and is_comparison == False:
        # take the first pcoa file as the master set of coordinates
        master_pcoa = [coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0]]

        # support pcoas must be a list of lists where each list contain
        # all the elements that compose a coordinates file
        support_pcoas = [[h, d, e, p] for h, d, e, p in zip(coords_header, coords_data, coords_eigenvals, coords_pct)]

        # do not apply procrustes, at least not for now
        coords_data, coords_low, coords_high, eigenvalues_average, identifiers = summarize_pcoas(
            master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False
        )

        # custom axes and jackknifing is a tricky thing to do, you only have to
        # add the custom values to the master file which is represented as the
        # coords_data return value. Since there is really no variation in that
        # axis then you have to change the values of coords_high and of
        # coords_low to something really small so that WebGL work properly
        if custom_axes:
            coords_file = [master_pcoa[0], coords_data]
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

            # this opens support for as many custom axes as needed
            axes = len(custom_axes)
            coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes])
            coords_high[:, 0:axes] = ones([coords_high.shape[0], axes]) * 0.00001
            coords_data = coords_file[1]

        # return a value containing coords_low and coords_high
        return identifiers, coords_data, eigenvalues_average, master_pcoa[3], coords_low, coords_high, clones
    # comparison plots are processed almost individually
    elif type(coords_data) == list and is_comparison:

        # indicates the number of files that were totally processed so other
        # functions/APIs are aware of how many times to replicate the metadata
        clones = len(coords_data)
        out_headers, out_coords = [], []

        for index in range(0, clones):
            headers_i = coords_header[index]
            coords_i = coords_data[index]

            # tag each header with the the number in which those coords came in
            out_headers.extend([element + "_%d" % index for element in headers_i])

            if index == 0:
                # numpy can only stack things if they have the same shape
                out_coords = coords_i

                # the eigenvalues and percents explained are really the ones
                # belonging to the the first set of coordinates that was passed
                coords_eigenvals = coords_eigenvals[index]
                coords_pct = coords_pct[index]
            else:
                out_coords = vstack((out_coords, coords_i))

        coords_file = [out_headers, out_coords]

        if custom_axes:
            # this condition deals with the fact that in order for the custom
            # axes to be added into the original coordinates, we have to add the
            # suffix for the sample identifiers that the coordinates have
            if clones:
                out_data = []
                for index in range(0, clones):
                    out_data.extend([[element[0] + "_%d" % index] + element[1::] for element in mapping_data])
                mapping_file = [mapping_header] + out_data

            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

    # if no coords summary is applied, return None in the corresponding values
    # note that the value of clones will be != 0 for a comparison plot
    return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None, None, clones