def musicc_normalization(profile, in_file, output_dir, musicc_inter=True, input_format='tab', output_format='tab', 
                         musicc_intra='use_generic', compute_scores=False, verbose=False):    
    """Perform a MUSiCC normalization on this profile's abundance data. Write normalized
    data to file and update profile's abundance data to be normalized.
    
    Requires:
        MUSiCC module can be imported and abundance features are KOs. 
    
    Args:
        profile (metagenomic_profile): profile containing the abundance data
        in_file (str): path to original abundance data file 
        output_dir (str): output directory where normalized data will be saved
    
    Effects: 
        profile's abundance data is now normalized.
    
    Note:
        For more details see "MUSiCC: A marker genes based framework for 
        metagenomic normalization and accurate profiling of gene abundances in the 
        microbiome." Ohad Manor and Elhanan Borenstein. Genome Biology.
    """
    
    from musicc.core import correct_and_normalize
    
    musicc_args = {'musicc_inter':musicc_inter, 'input_format':input_format, 'output_format':output_format,
               'musicc_intra':musicc_intra, 'compute_scores':compute_scores, 'verbose':verbose}    
    
    musicc_args['input_file'] = in_file 
    musicc_args['output_file'] = output_dir + "//" + "musicc_normalized_abundance.tab"
    correct_and_normalize(musicc_args)
    
    profile.set_abundance_data(pd.DataFrame.from_csv(musicc_args['output_file'], sep='\t'))
Esempio n. 2
0
    def test_is_output_correct_for_normalization_correction_learn_model(self):
        """Does MUSiCC produce the correct output for normalization and correction of the example case?"""
        # define the arguments needed by MUSiCC
        musicc_args = {
            'input_file': MUSiCCTestCase.path_to_data +
            '/examples/simulated_ko_relative_abundance.tab',
            'output_file': MUSiCCTestCase.path_to_data + '/examples/test3.tab',
            'input_format': 'tab',
            'output_format': 'tab',
            'musicc_inter': True,
            'musicc_intra': 'learn_model',
            'compute_scores': True,
            'verbose': False
        }
        # run the MUSiCC correction
        correct_and_normalize(musicc_args)
        # assert that the result is equal to the example (up to small difference due to de novo learning)
        example = pd.read_table(
            MUSiCCTestCase.path_to_data +
            '/examples/simulated_ko_MUSiCC_Normalized_Corrected_learn_model.tab',
            index_col=0)
        output = pd.read_table(MUSiCCTestCase.path_to_data +
                               '/examples/test3.tab',
                               index_col=0)
        example_vals = example.values
        output_vals = output.values
        self.assertTrue(example_vals.shape[0] == output_vals.shape[0])
        self.assertTrue(example_vals.shape[1] == output_vals.shape[1])
        for i in range(example_vals.shape[0]):
            for j in range(example_vals.shape[1]):
                self.assertTrue(
                    abs(example_vals[i, j] - output_vals[i, j]) < 1)

        os.remove(MUSiCCTestCase.path_to_data + '/examples/test3.tab')
Esempio n. 3
0
def musicc_normalization(profile,
                         in_file,
                         output_dir,
                         musicc_inter=True,
                         input_format='tab',
                         output_format='tab',
                         musicc_intra='use_generic',
                         compute_scores=False,
                         verbose=False):
    """Perform a MUSiCC normalization on this profile's abundance data. Write normalized
    data to file and update profile's abundance data to be normalized.
    
    Requires:
        MUSiCC module can be imported and abundance features are KOs. 
    
    Args:
        profile (metagenomic_profile): profile containing the abundance data
        in_file (str): path to original abundance data file 
        output_dir (str): output directory where normalized data will be saved
    
    Effects: 
        profile's abundance data is now normalized.
    
    Note:
        For more details see "MUSiCC: A marker genes based framework for 
        metagenomic normalization and accurate profiling of gene abundances in the 
        microbiome." Ohad Manor and Elhanan Borenstein. Genome Biology.
    """

    from musicc.core import correct_and_normalize

    musicc_args = {
        'musicc_inter': musicc_inter,
        'input_format': input_format,
        'output_format': output_format,
        'musicc_intra': musicc_intra,
        'compute_scores': compute_scores,
        'verbose': verbose
    }

    musicc_args['input_file'] = in_file
    musicc_args[
        'output_file'] = output_dir + "//" + "musicc_normalized_abundance.tab"
    correct_and_normalize(musicc_args)

    profile.set_abundance_data(
        pd.DataFrame.from_csv(musicc_args['output_file'], sep='\t'))
Esempio n. 4
0
    def test_is_output_correct_for_normalization_correction_learn_model(self):
        """Does MUSiCC produce the correct output for normalization and correction of the example case?"""
        # define the arguments needed by MUSiCC
        musicc_args = {'input_file': MUSiCCTestCase.path_to_data + '/examples/simulated_ko_relative_abundance.tab',
                       'output_file': MUSiCCTestCase.path_to_data + '/examples/test3.tab',
                       'input_format': 'tab', 'output_format': 'tab', 'musicc_inter': True,
                       'musicc_intra': 'learn_model', 'compute_scores': True, 'verbose': False}
        # run the MUSiCC correction
        correct_and_normalize(musicc_args)
        # assert that the result is equal to the example (up to small difference due to de novo learning)
        example = pd.read_table(MUSiCCTestCase.path_to_data + '/examples/simulated_ko_MUSiCC_Normalized_Corrected_learn_model.tab', index_col=0)
        output = pd.read_table(MUSiCCTestCase.path_to_data + '/examples/test3.tab', index_col=0)
        example_vals = example.values
        output_vals = output.values
        self.assertTrue(example_vals.shape[0] == output_vals.shape[0])
        self.assertTrue(example_vals.shape[1] == output_vals.shape[1])
        for i in range(example_vals.shape[0]):
            for j in range(example_vals.shape[1]):
                self.assertTrue(abs(example_vals[i, j] - output_vals[i, j]) < 1)

        os.remove(MUSiCCTestCase.path_to_data + '/examples/test3.tab')
Esempio n. 5
0
        default='tab')
    parser.add_argument('-n',
                        '--normalize',
                        dest='musicc_inter',
                        help='Apply MUSiCC normalization (default: false)',
                        action='store_true')
    parser.add_argument(
        '-c',
        '--correct',
        dest='musicc_intra',
        choices=['use_generic', 'learn_model'],
        help='Correct abundance per-sample using MUSiCC (default: false)',
        default='None')
    parser.add_argument(
        '-perf',
        '--performance',
        dest='compute_scores',
        help=
        'Calculate model performance on various gene sets (may add to running time) (default: false)',
        action='store_true')
    parser.add_argument('-v',
                        '--verbose',
                        dest='verbose',
                        help='Increase verbosity of module (default: false)',
                        action='store_true')

    given_args = parser.parse_args()

    # run normalization and correction
    correct_and_normalize(vars(given_args))
Esempio n. 6
0
#!/usr/bin/env python

import argparse

# for testing the new module, remove later!!!!!!
import sys
sys.path.append('/net/gs/vol1/home/ohadm/MUSiCC/PyCode/MUSiCC')

from musicc.core import correct_and_normalize

if __name__ == "__main__":
    # get options from user
    parser = argparse.ArgumentParser(description='MUSiCC: Metagenomic Universal Single-Copy Correction')
    parser.add_argument('input_file', help='Input abundance file to correct')
    parser.add_argument('-o', '--out', dest='output_file', help='Output destination for corrected abundance (default: MUSiCC.tab)', default='MUSiCC.tab')
    parser.add_argument('-if', '--input_format', dest='input_format', choices=['tab', 'csv', 'biom'], help='Option indicating the format of the input file (default: tab)', default='tab')
    parser.add_argument('-of', '--output_format', dest='output_format', choices=['tab', 'csv', 'biom'], help='Option indicating the format of the output file (default: tab)', default='tab')
    parser.add_argument('-n', '--normalize', dest='musicc_inter', help='Apply MUSiCC normalization (default: false)', action='store_true')
    parser.add_argument('-c', '--correct', dest='musicc_intra', choices=['use_generic', 'learn_model'], help='Correct abundance per-sample using MUSiCC (default: false)', default='None')
    parser.add_argument('-perf', '--performance', dest='compute_scores', help='Calculate model performance on various gene sets (may add to running time) (default: false)', action='store_true')
    parser.add_argument('-v', '--verbose', dest='verbose', help='Increase verbosity of module (default: false)', action='store_true')

    given_args = parser.parse_args()

    # run normalization and correction
    correct_and_normalize(vars(given_args))