Beispiel #1
0
 def run_enrichment(self, args):
     self._check_enrichment(args)
     enrichment = Enrichment()
     enrichment.enrichment_pipeline(  # Input options
         args.annotate_output,
         args.annotation_matrix,
         args.gff_files,
         args.metadata,
         args.abundance,
         args.abundance_metadata,
         args.transcriptome,
         args.transcriptome_metadata,
         # Runtime options
         args.pval_cutoff,
         args.proportions_cutoff,
         args.threshold,
         args.multi_test_correction,
         args.batchfile,
         args.processes,
         args.allow_negative_values,
         args.ko,
         args.pfam,
         args.tigrfam,
         args.cluster,
         args.ortholog,
         args.cazy,
         args.ec,
         args.ko_hmm,
         args.range,
         args.subblock_size,
         args.operon_mismatch_cutoff,
         args.operon_match_score_cutoff,
         # Outputs
         args.output)
Beispiel #2
0
 def test_check_annotation_type(self):
     pfam = ['PF10117']
     self.assertEqual(Enrichment().check_annotation_type(pfam),
                      Enrichment.PFAM)
     cazy = ['GH42']
     self.assertEqual(Enrichment().check_annotation_type(cazy),
                      Enrichment.CAZY)
     tigrfam = ['TIGR00008']
     self.assertEqual(Enrichment().check_annotation_type(tigrfam),
                      Enrichment.TIGRFAM)
     ko = ['K00399']
     self.assertEqual(Enrichment().check_annotation_type(ko),
                      Enrichment.KEGG)
     ec = ['1.2.3.4']
     self.assertEqual(Enrichment().check_annotation_type(ec), Enrichment.EC)
Beispiel #3
0
    def run_enrichm(self, args, command):
        '''
        Parameters
        ----------

        Output
        ------
        '''
        self._check_general(args)
        self._logging_setup(args)

        logging.info("Command: %s" % ' '.join(command))
        logging.info("Running the %s pipeline" % args.subparser_name)

        if args.subparser_name == self.DATA:
            d = Data()
            d.do(args.uninstall, args.dry)

        if args.subparser_name == self.ANNOTATE:
            self._check_annotate(args)
            annotate = Annotate(# Define inputs and outputs
                                args.output,
                                # Define type of annotation to be carried out
                                args.ko, args.ko_hmm, args.pfam, args.tigrfam,
                                args.clusters, args.orthologs, args.cazy,
                                args.ec,
                                # Cutoffs
                                args.evalue, args.bit, args.id, args.aln_query,
                                args.aln_reference, args.c, args.cut_ga, 
                                args.cut_nc, args.cut_tc, args.cut_ko,
                                args.inflation, args.chunk_number, args.chunk_max,
                                args.count_domains,
                                # Parameters
                                args.threads, args.parallel, args.suffix, args.light)

            annotate.annotate_pipeline(args.genome_directory,
                                       args.protein_directory,
                                       args.genome_files,
                                       args.protein_files)

        elif args.subparser_name == self.CLASSIFY:
            self._check_classify(args)
            classify = Classify()
            classify.classify_pipeline(args.custom_modules, args.cutoff, args.aggregate,
                                       args.genome_and_annotation_matrix, args.output)

        elif args.subparser_name == self.ENRICHMENT:
            self._check_enrichment(args)
            enrichment = Enrichment()
            enrichment.enrichment_pipeline(# Input options
                                           args.annotate_output, args.annotation_matrix,
                                           args.metadata, args.abundance, args.abundance_metadata,
                                           args.transcriptome, args.transcriptome_metadata,
                                           # Runtime options
                                           args.pval_cutoff, args.proportions_cutoff, 
                                           args.threshold, args.multi_test_correction, 
                                           args.batchfile, args.processes, 
                                           args.allow_negative_values, args.ko, args.pfam, 
                                           args.tigrfam, args.cluster, args.ortholog, args.cazy,
                                           args.ec, args.ko_hmm,
                                           # Outputs
                                           args.output)

        elif(args.subparser_name == NetworkAnalyser.PATHWAY or
             args.subparser_name == NetworkAnalyser.EXPLORE):
            self._check_network(args)
            network_analyser=NetworkAnalyser()
            network_analyser.network_pipeline(args.subparser_name, args.matrix, 
                                              args.genome_metadata, args.tpm_values,
                                              args.tpm_metadata, args.abundance, 
                                              args.abundance_metadata, args.metabolome,
                                              args.enrichment_output, args.depth, args.filter,
                                              args.limit, args.queries, args.output)

        if args.subparser_name == self.PREDICT:
            self._check_predict(args)
            predict = Predict()
            predict.predict_pipeline(args.forester_model_directory,
                 args.input_matrix,
                 args.output)

        elif args.subparser_name == self.GENERATE:
            self._check_generate(args)
            generate_model = GenerateModel()
            generate_model.generate_pipeline(args.input_matrix,
                  args.groups,
                  args.model_type,
                  args.testing_portion,
                  args.grid_search,
                  args.threads,
                  args.output)

        elif args.subparser_name == self.USES:
            self._check_uses(args)
            uses = Uses()
            uses.uses_pipeline(args.compounds_list,
                    args.annotation_matrix,
                    args.metadata,
                    args.output,
                    args.count)

        logging.info('Finished running EnrichM')
Beispiel #4
0
    def main(self, args, command):
        '''
        Parameters
        ----------
        
        Output
        ------
        '''

        self._check_general(args)
        self._logging_setup(args)

        logging.info("Running command: %s" % ' '.join(command))

        if args.subparser_name == self.DATA:
            d = Data()
            d.do(args.uninstall)
        
        if args.subparser_name == self.ANNOTATE:
            self._check_annotate(args)
            a = Annotate(# Define inputs and outputs
                         args.output,
                         # Define type of annotation to be carried out
                         args.ko,
                         args.pfam,
                         args.tigrfam,
                         args.hypothetical,
                         args.cazy,
                         # Cutoffs
                         args.evalue,
                         args.bit,
                         args.id,
                         args.aln_query, 
                         args.aln_reference, 
                         args.c,
                         args.cut_ga,
                         args.cut_nc,
                         args.cut_tc,
                         args.inflation,
                         args.chunk_number,
                         args.chunk_max,
                         # Parameters
                         args.threads,
                         args.parallel,
                         args.suffix,
                         args.light)
            a.do(args.genome_directory,
                 args.protein_directory, 
                 args.genome_files,
                 args.protein_files)

        elif args.subparser_name == self.CLASSIFY:
            self._check_classify(args)
            c = Classify()
            c.do(args.custom_modules, 
                 args.cutoff,
                 args.genome_and_annotation_file,
                 args.genome_and_annotation_matrix,
                 args.output)

        elif args.subparser_name == self.ENRICHMENT: 
            self._check_enrichment(args)
            e = Enrichment()
            e.do(# Input options
                 args.annotate_output,
                 args.metadata,
                 args.modules,
                 args.abundances,
                 # Runtime options
                 args.genomes_to_compare_with_group,
                 args.pval_cutoff,
                 args.proportions_cutoff,
                 args.threshold,
                 args.multi_test_correction,
                 args.batchfile,
                 args.processes,
                 args.ko,
                 args.pfam,
                 args.tigrfam,
                 args.hypothetical,
                 args.cazy,
                 # Outputs
                 args.output)

        elif args.subparser_name == self.CONNECT:
            self._check_connect(args)
            c = Connect()
            c.do(args.annotate_output,
                 args.metadata,
                 args.custom_modules,
                 args.cutoff,
                 args.output)

        elif(args.subparser_name == NetworkAnalyser.PATHWAY or
             args.subparser_name == NetworkAnalyser.EXPLORE or
             args.subparser_name == NetworkAnalyser.TRAVERSE):
            self._check_network(args)
            na=NetworkAnalyser(args.metadata)
            na.do(args.matrix,
                  args.transcriptome,
                  args.metabolome,
                  args.depth,
                  args.filter,
                  args.limit,
                  args.queries,
                  args.subparser_name,
                  args.starting_compounds, 
                  args.steps,
                  args.number_of_queries,
                  args.output)
        
        if args.subparser_name == self.PREDICT:
            self._check_predict(args)
            p = Predict()
            p.do(args.forester_model_directory,
                 args.input_matrix,
                 args.output)

        elif args.subparser_name == self.GENERATE:
            self._check_generate(args)
            gm = GenerateModel()
            gm.do(args.input_matrix,
                  args.groups,
                  args.model_type,
                  args.testing_portion,
                  args.grid_search,
                  args.threads,
                  args.output)
        
        logging.info('Done!')
Beispiel #5
0
class Tests(unittest.TestCase):

    genome_annotation_simple_example = {
        "genome_1": {
            "K00001": 1,
            "K00002": 2
        },
        "genome_2": {
            "K00003": 1
        },
        "genome_3": {
            "K00001": 5,
            "K00002": 4,
            "K00003": 5
        }
    }

    genome_groups_simple_example = {
        "group_1": ["genome_1"],
        "group_2": ["genome_2", "genome_3"]
    }

    sample_abundance = {
        "sample_1": {
            "genome_1": 1.0,
            "genome_2": 0.5,
            "genome_3": 3.0
        },
        "sample_2": {
            "genome_1": 0.5,
            "genome_2": 1.2,
            "genome_3": 5.0
        },
        "sample_3": {
            "genome_1": 0.1,
            "genome_2": 1.1,
            "genome_3": 6.0
        },
        "sample_4": {
            "genome_1": 5.0,
            "genome_2": 5.2,
            "genome_3": 0.2
        },
        "sample_5": {
            "genome_1": 6.0,
            "genome_2": 4.9,
            "genome_3": 0.1
        },
        "sample_6": {
            "genome_1": 7.0,
            "genome_2": 5.0,
            "genome_3": 0.0
        }
    }

    sample_groups = {
        "sample_group_1": ["sample_1", "sample_2", "sample_3"],
        "sample_group_2": ["sample_4", "sample_5", "sample_6"]
    }

    genomes = ["genome_1", "genome_2", "genome_3"]
    annotations = ["K00001", "K00002", "K00003"]
    enrichment_test_object = Enrichment()

    def test_check_annotation_type(self):
        pfam = ['PF10117']
        self.assertEqual(Enrichment().check_annotation_type(pfam),
                         Enrichment.PFAM)
        cazy = ['GH42']
        self.assertEqual(Enrichment().check_annotation_type(cazy),
                         Enrichment.CAZY)
        tigrfam = ['TIGR00008']
        self.assertEqual(Enrichment().check_annotation_type(tigrfam),
                         Enrichment.TIGRFAM)
        ko = ['K00399']
        self.assertEqual(Enrichment().check_annotation_type(ko),
                         Enrichment.KEGG)
        ec = ['1.2.3.4']
        self.assertEqual(Enrichment().check_annotation_type(ec), Enrichment.EC)

    def test_calculate_portions(self):
        expected = [['Annotation', 'group_1', 'group_2'],
                    ['K00001', '1.0', '0.5'], ['K00002', '1.0', '0.5'],
                    ['K00003', '0.0', '1.0']]
        result = self.enrichment_test_object.calculate_portions(
            self.annotations, self.genome_groups_simple_example,
            self.genome_annotation_simple_example, self.genomes, 1)
        self.assertEqual(result, expected)

    def test_enrichment_from_ko_matrix(self):

        tmp = tempfile.mkdtemp()
        expected_output = os.path.join(path_to_data, 'enrichm_enrichment_ko')
        metadata = os.path.join(path_to_data, 'metadata.tsv')
        cmd             = '%s enrichment --annotate_output %s --metadata %s --output %s --force --ko --verbosity 1' \
                            % (path_to_script, path_to_annotate, metadata, tmp)
        subprocess.call(cmd, shell=True)

        self.assertTrue(filecmp.dircmp(tmp, expected_output))
        # The pvalues are never exact - cannot compare files directly
        #for file in os.listdir(tmp):
        #    if file         == 'enrichment.log': continue
        #    output_file     = os.path.join(tmp, file)
        #    expected_file   = os.path.join(expected_output, file)
        #    self.assertTrue(filecmp.cmp(output_file, expected_file))

    def test_enrichment_from_pfam_matrix(self):

        tmp = tempfile.mkdtemp()
        expected_output = os.path.join(path_to_data, 'enrichm_enrichment_pfam')
        metadata = os.path.join(path_to_data, 'metadata.tsv')
        cmd             = '%s enrichment --annotate_output %s --metadata %s --output %s --force --pfam  --verbosity 1' \
                            % (path_to_script, path_to_annotate, metadata, tmp)

        subprocess.call(cmd, shell=True)

        self.assertTrue(filecmp.dircmp(tmp, expected_output))

        # The pvalues are never exact - cannot compare files directly
        #for file in os.listdir(tmp):
        #    if file         == 'enrichment.log': continue
        #    output_file     = os.path.join(tmp, file)
        #    expected_file   = os.path.join(expected_output, file)
        #    self.assertTrue(filecmp.cmp(output_file, expected_file))

    def test_weight_annotation_matrix(self):
        expected = {
            'sample_group_1': {
                'K00001': [16.0, 25.5, 30.1],
                'K00002': [14.0, 21.0, 24.2],
                'K00003': [15.5, 26.2, 31.1]
            },
            'sample_group_2': {
                'K00001': [6.0, 6.5, 7.0],
                'K00002': [10.8, 12.4, 14.0],
                'K00003': [6.2, 5.4, 5.0]
            }
        }

        result = self.enrichment_test_object.weight_annotation_matrix(
            self.sample_abundance, self.genome_annotation_simple_example,
            self.sample_groups, self.annotations)
        self.assertEqual(result, expected)