def musicc_normalization(profile, in_file, output_dir, musicc_inter=True, input_format='tab', output_format='tab', musicc_intra='use_generic', compute_scores=False, verbose=False): """Perform a MUSiCC normalization on this profile's abundance data. Write normalized data to file and update profile's abundance data to be normalized. Requires: MUSiCC module can be imported and abundance features are KOs. Args: profile (metagenomic_profile): profile containing the abundance data in_file (str): path to original abundance data file output_dir (str): output directory where normalized data will be saved Effects: profile's abundance data is now normalized. Note: For more details see "MUSiCC: A marker genes based framework for metagenomic normalization and accurate profiling of gene abundances in the microbiome." Ohad Manor and Elhanan Borenstein. Genome Biology. """ from musicc.core import correct_and_normalize musicc_args = {'musicc_inter':musicc_inter, 'input_format':input_format, 'output_format':output_format, 'musicc_intra':musicc_intra, 'compute_scores':compute_scores, 'verbose':verbose} musicc_args['input_file'] = in_file musicc_args['output_file'] = output_dir + "//" + "musicc_normalized_abundance.tab" correct_and_normalize(musicc_args) profile.set_abundance_data(pd.DataFrame.from_csv(musicc_args['output_file'], sep='\t'))
def test_is_output_correct_for_normalization_correction_learn_model(self): """Does MUSiCC produce the correct output for normalization and correction of the example case?""" # define the arguments needed by MUSiCC musicc_args = { 'input_file': MUSiCCTestCase.path_to_data + '/examples/simulated_ko_relative_abundance.tab', 'output_file': MUSiCCTestCase.path_to_data + '/examples/test3.tab', 'input_format': 'tab', 'output_format': 'tab', 'musicc_inter': True, 'musicc_intra': 'learn_model', 'compute_scores': True, 'verbose': False } # run the MUSiCC correction correct_and_normalize(musicc_args) # assert that the result is equal to the example (up to small difference due to de novo learning) example = pd.read_table( MUSiCCTestCase.path_to_data + '/examples/simulated_ko_MUSiCC_Normalized_Corrected_learn_model.tab', index_col=0) output = pd.read_table(MUSiCCTestCase.path_to_data + '/examples/test3.tab', index_col=0) example_vals = example.values output_vals = output.values self.assertTrue(example_vals.shape[0] == output_vals.shape[0]) self.assertTrue(example_vals.shape[1] == output_vals.shape[1]) for i in range(example_vals.shape[0]): for j in range(example_vals.shape[1]): self.assertTrue( abs(example_vals[i, j] - output_vals[i, j]) < 1) os.remove(MUSiCCTestCase.path_to_data + '/examples/test3.tab')
def musicc_normalization(profile, in_file, output_dir, musicc_inter=True, input_format='tab', output_format='tab', musicc_intra='use_generic', compute_scores=False, verbose=False): """Perform a MUSiCC normalization on this profile's abundance data. Write normalized data to file and update profile's abundance data to be normalized. Requires: MUSiCC module can be imported and abundance features are KOs. Args: profile (metagenomic_profile): profile containing the abundance data in_file (str): path to original abundance data file output_dir (str): output directory where normalized data will be saved Effects: profile's abundance data is now normalized. Note: For more details see "MUSiCC: A marker genes based framework for metagenomic normalization and accurate profiling of gene abundances in the microbiome." Ohad Manor and Elhanan Borenstein. Genome Biology. """ from musicc.core import correct_and_normalize musicc_args = { 'musicc_inter': musicc_inter, 'input_format': input_format, 'output_format': output_format, 'musicc_intra': musicc_intra, 'compute_scores': compute_scores, 'verbose': verbose } musicc_args['input_file'] = in_file musicc_args[ 'output_file'] = output_dir + "//" + "musicc_normalized_abundance.tab" correct_and_normalize(musicc_args) profile.set_abundance_data( pd.DataFrame.from_csv(musicc_args['output_file'], sep='\t'))
def test_is_output_correct_for_normalization_correction_learn_model(self): """Does MUSiCC produce the correct output for normalization and correction of the example case?""" # define the arguments needed by MUSiCC musicc_args = {'input_file': MUSiCCTestCase.path_to_data + '/examples/simulated_ko_relative_abundance.tab', 'output_file': MUSiCCTestCase.path_to_data + '/examples/test3.tab', 'input_format': 'tab', 'output_format': 'tab', 'musicc_inter': True, 'musicc_intra': 'learn_model', 'compute_scores': True, 'verbose': False} # run the MUSiCC correction correct_and_normalize(musicc_args) # assert that the result is equal to the example (up to small difference due to de novo learning) example = pd.read_table(MUSiCCTestCase.path_to_data + '/examples/simulated_ko_MUSiCC_Normalized_Corrected_learn_model.tab', index_col=0) output = pd.read_table(MUSiCCTestCase.path_to_data + '/examples/test3.tab', index_col=0) example_vals = example.values output_vals = output.values self.assertTrue(example_vals.shape[0] == output_vals.shape[0]) self.assertTrue(example_vals.shape[1] == output_vals.shape[1]) for i in range(example_vals.shape[0]): for j in range(example_vals.shape[1]): self.assertTrue(abs(example_vals[i, j] - output_vals[i, j]) < 1) os.remove(MUSiCCTestCase.path_to_data + '/examples/test3.tab')
default='tab') parser.add_argument('-n', '--normalize', dest='musicc_inter', help='Apply MUSiCC normalization (default: false)', action='store_true') parser.add_argument( '-c', '--correct', dest='musicc_intra', choices=['use_generic', 'learn_model'], help='Correct abundance per-sample using MUSiCC (default: false)', default='None') parser.add_argument( '-perf', '--performance', dest='compute_scores', help= 'Calculate model performance on various gene sets (may add to running time) (default: false)', action='store_true') parser.add_argument('-v', '--verbose', dest='verbose', help='Increase verbosity of module (default: false)', action='store_true') given_args = parser.parse_args() # run normalization and correction correct_and_normalize(vars(given_args))
#!/usr/bin/env python import argparse # for testing the new module, remove later!!!!!! import sys sys.path.append('/net/gs/vol1/home/ohadm/MUSiCC/PyCode/MUSiCC') from musicc.core import correct_and_normalize if __name__ == "__main__": # get options from user parser = argparse.ArgumentParser(description='MUSiCC: Metagenomic Universal Single-Copy Correction') parser.add_argument('input_file', help='Input abundance file to correct') parser.add_argument('-o', '--out', dest='output_file', help='Output destination for corrected abundance (default: MUSiCC.tab)', default='MUSiCC.tab') parser.add_argument('-if', '--input_format', dest='input_format', choices=['tab', 'csv', 'biom'], help='Option indicating the format of the input file (default: tab)', default='tab') parser.add_argument('-of', '--output_format', dest='output_format', choices=['tab', 'csv', 'biom'], help='Option indicating the format of the output file (default: tab)', default='tab') parser.add_argument('-n', '--normalize', dest='musicc_inter', help='Apply MUSiCC normalization (default: false)', action='store_true') parser.add_argument('-c', '--correct', dest='musicc_intra', choices=['use_generic', 'learn_model'], help='Correct abundance per-sample using MUSiCC (default: false)', default='None') parser.add_argument('-perf', '--performance', dest='compute_scores', help='Calculate model performance on various gene sets (may add to running time) (default: false)', action='store_true') parser.add_argument('-v', '--verbose', dest='verbose', help='Increase verbosity of module (default: false)', action='store_true') given_args = parser.parse_args() # run normalization and correction correct_and_normalize(vars(given_args))