def test_basic(my_go_term, my_other_term): ontology = GeneOntology([my_go_term, my_other_term]) assert isinstance(ontology, GeneOntology) assert isinstance(repr(ontology), str) assert isinstance(str(ontology), str) assert isinstance(text(ontology), text) assert isinstance(ontology.hash, text) # test access methods assert len(ontology) == 2 assert my_go_term.id in ontology assert ontology[my_go_term.id] == my_go_term del ontology[my_go_term.id] assert my_go_term.id not in ontology ontology[my_go_term.id] = my_go_term assert my_go_term.id in ontology # test additional access methods assert ontology.get_term_by_id(my_go_term.id) == my_go_term assert ontology.get_term_by_acc(my_go_term.acc) == my_go_term # test comparisons other = copy.deepcopy(ontology) assert other == ontology del other[my_other_term.id] assert other != ontology # test iteration assert set(list(iter(ontology))) == set([my_go_term, my_other_term])
def test_list(my_go_annotation, my_go_term): gene_ontology = GeneOntology([my_go_term]) l = my_go_annotation.to_list() assert isinstance(l, list) other = GOAnnotation.from_list(gene_ontology, l) assert isinstance(other, GOAnnotation) assert other == my_go_annotation
def main(args=None): """Run GO-PCA and store the result in a `pickle` file. Parameters ---------- args: argparse.Namespace object, optional The argument values. If not specified, the values will be obtained by parsing the command line arguments using the `argparse` module. Returns ------- int Exit code (0 if no error occurred). Raises ------ SystemError If the version of the Python interpreter is not >= 2.7. """ vinfo = sys.version_info if not (vinfo >= (2, 7)): raise SystemError('Python interpreter version >= 2.7 required, ' 'found %d.%d instead.' % (vinfo.major, vinfo.minor)) if args is None: # read arguments from the command line parser = get_argument_parser() # parse first with default options, in case "--help" is specified # ("--help" causes the program to exit at this point) args = parser.parse_args() # now remove the defaults and parse again # (removing the defaults is important so that we know which values # were specified by the user) no_defaults = dict([p, None] for p in GOPCA.get_param_defaults()) no_defaults2 = dict([p, None] for p in GOPCAParams.get_param_defaults()) no_defaults.update(no_defaults2) parser.set_defaults(**no_defaults) args = parser.parse_args() # reporting options log_file = args.log_file quiet = args.quiet verbose = args.verbose # test if we can write to log_file? # configure root logger logger = util.get_logger(log_file=log_file, quiet=quiet) # check if required parameters were specified passed = True if args.expression_file is None: logger.error('No expression file specified!') passed = False if args.gene_set_file is None: logger.error('No gene set file specified!') passed = False if args.output_file is None: logger.error('No output file specified!') passed = False if not passed: logger.error('Not all required parameters were specified.') return 1 # generate configuration if args.config_file is not None: # read parameter values from config file params = GOPCAParams.read_ini(args.config_file) else: # start with default configuration params = GOPCAParams() # overwrite parameters specified on the command line for p in GOPCAParams.get_param_defaults(): v = getattr(args, p) if v is not None: logger.debug('Parameter "%s" specified on command line!', p) params.set_param(p, v) global_params = GOPCA.get_param_defaults() for k in list(global_params.keys()): v = getattr(args, k) if v is not None: logger.debug('Parameter "%s" specified on command line!', p) global_params[k] = v # read expression file matrix = ExpMatrix.read_tsv(args.expression_file) logger.info('Expression matrix size: ' + '(p = %d genes) x (n = %d samples).', matrix.p, matrix.n) if args.sel_var_genes > 0: # filter genes by variance matrix = matrix.filter_variance(args.sel_var_genes) # read gene set file gene_sets = GeneSetCollection.read_tsv(args.gene_set_file) print(args.gene_set_file, gene_sets) # read ontology file (if supplied) gene_ontology = None if args.gene_ontology_file is not None: p_logger = logging.getLogger(genometools.__name__) p_logger.setLevel(logging.ERROR) gene_ontology = GeneOntology.read_obo( args.gene_ontology_file, part_of_cc_only=params.go_part_of_cc_only) p_logger.setLevel(logging.NOTSET) M = GOPCA.simple_setup(matrix, params, gene_sets, gene_ontology, verbose=verbose, **global_params) run = M.run() if run is None: logger.error('GO-PCA run failed!') return 1 # write run to pickle file logger.info('Storing GO-PCA run in file "%s"...', args.output_file) run.write_pickle(args.output_file) return 0
def my_config(my_params, my_gene_ontology_file, my_fly_gene_set_file): gene_ontology = GeneOntology.read_obo(my_gene_ontology_file) gene_sets = GeneSetCollection.read_tsv(my_fly_gene_set_file) config = GOPCAConfig(my_params, gene_sets, gene_ontology) return config
def main(args=None): """Run GO-PCA and store the result in a `pickle` file. Parameters ---------- args: argparse.Namespace object, optional The argument values. If not specified, the values will be obtained by parsing the command line arguments using the `argparse` module. Returns ------- int Exit code (0 if no error occurred). Raises ------ SystemError If the version of the Python interpreter is not >= 2.7. """ vinfo = sys.version_info if not (vinfo >= (2, 7)): raise SystemError('Python interpreter version >= 2.7 required, ' 'found %d.%d instead.' % (vinfo.major, vinfo.minor)) if args is None: # read arguments from the command line parser = get_argument_parser() # parse first with default options, in case "--help" is specified # ("--help" causes the program to exit at this point) args = parser.parse_args() # now remove the defaults and parse again # (removing the defaults is important so that we know which values # were specified by the user) no_defaults = dict([p, None] for p in GOPCA.get_param_defaults()) no_defaults2 = dict([p, None] for p in GOPCAParams.get_param_defaults()) no_defaults.update(no_defaults2) parser.set_defaults(**no_defaults) args = parser.parse_args() # reporting options log_file = args.log_file quiet = args.quiet verbose = args.verbose # test if we can write to log_file? # configure root logger logger = util.get_logger(log_file=log_file, quiet=quiet) # check if required parameters were specified passed = True if args.expression_file is None: logger.error('No expression file specified!') passed = False if args.gene_set_file is None: logger.error('No gene set file specified!') passed = False if args.output_file is None: logger.error('No output file specified!') passed = False if not passed: logger.error('Not all required parameters were specified.') return 1 # generate configuration if args.config_file is not None: # read parameter values from config file params = GOPCAParams.read_ini(args.config_file) else: # start with default configuration params = GOPCAParams() # overwrite parameters specified on the command line for p in GOPCAParams.get_param_defaults(): v = getattr(args, p) if v is not None: logger.debug('Parameter "%s" specified on command line!', p) params.set_param(p, v) global_params = GOPCA.get_param_defaults() for k in list(global_params.keys()): v = getattr(args, k) if v is not None: logger.debug('Parameter "%s" specified on command line!', p) global_params[k] = v # read expression file matrix = ExpMatrix.read_tsv(args.expression_file) logger.info( 'Expression matrix size: ' + '(p = %d genes) x (n = %d samples).', matrix.p, matrix.n) if args.sel_var_genes > 0: # filter genes by variance matrix = matrix.filter_variance(args.sel_var_genes) # read gene set file gene_sets = GeneSetCollection.read_tsv(args.gene_set_file) print(args.gene_set_file, gene_sets) # read ontology file (if supplied) gene_ontology = None if args.gene_ontology_file is not None: p_logger = logging.getLogger(genometools.__name__) p_logger.setLevel(logging.ERROR) gene_ontology = GeneOntology.read_obo( args.gene_ontology_file, part_of_cc_only=params.go_part_of_cc_only) p_logger.setLevel(logging.NOTSET) M = GOPCA.simple_setup(matrix, params, gene_sets, gene_ontology, verbose=verbose, **global_params) run = M.run() if run is None: logger.error('GO-PCA run failed!') return 1 # write run to pickle file logger.info('Storing GO-PCA run in file "%s"...', args.output_file) run.write_pickle(args.output_file) return 0
def my_gene_ontology(my_gene_ontology_file): gene_ontology = GeneOntology.read_obo(my_gene_ontology_file) return gene_ontology
def main(args=None): """Extract GO annotations and store in tab-delimited text file. Parameters ---------- args: argparse.Namespace object, optional The argument values. If not specified, the values will be obtained by parsing the command line arguments using the `argparse` module. Returns ------- int Exit code (0 if no error occurred). Raises ------ SystemError If the version of the Python interpreter is not >= 2.7. """ vinfo = sys.version_info if not (vinfo >= (2, 7)): raise SystemError('Python interpreter version >= 2.7 required, ' 'found %d.%d instead.' % (vinfo.major, vinfo.minor)) if args is None: parser = get_argument_parser() args = parser.parse_args() gene_file = args.gene_file gene_ontology_file = args.gene_ontology_file goa_association_file = args.goa_association_file output_file = args.output_file evidence_codes = args.evidence_codes min_genes = args.min_genes_per_term max_genes = args.max_genes_per_term part_of_cc_only = args.part_of_cc_only # logging parameters log_file = args.log_file quiet = args.quiet verbose = args.verbose # configure root logger logger = misc.get_logger(log_file=log_file, quiet=quiet, verbose=verbose) logger.info('Selected evidence codes: %s', ', '.join(evidence_codes)) logger.info('Min. number of genes per gene set: %d', min_genes) logger.info('Max. number of genes per gene set: %d', max_genes) # checks assert os.path.isfile(gene_file) assert os.path.isfile(gene_ontology_file) assert os.path.isfile(goa_association_file) # configure root logger log_stream = sys.stdout if output_file == '-': # if we print output to stdout, redirect log messages to stderr log_stream = sys.stderr logger = misc.get_logger(log_stream=log_stream, log_file=log_file, quiet=quiet, verbose=verbose) # extract protein-coding genes from Ensembl GTF file exp_genome = ExpGenome.read_tsv(gene_file) # parse Gene Ontology gene_ontology = GeneOntology.read_obo(gene_ontology_file) # parse UniProt-GOA gene association file with gzip.open(goa_association_file, 'rt', encoding='ascii') as fh: go_annotations = ontology.parse_gaf(fh, gene_ontology, ev_codes=evidence_codes, genome=exp_genome) # extract GO-based gene sets gene_sets = ontology.get_goa_gene_sets(go_annotations) logger.info('Generated %d GO-derived gene sets', len(gene_sets)) # filter gene sets based on size if min_genes > 0: old_size = len(gene_sets) gene_sets = GeneSetCollection(gs for gs in gene_sets if gs.size >= min_genes) logger.info('Excluded %d gene sets with too few genes.', old_size - len(gene_sets)) if max_genes > 0: old_size = len(gene_sets) gene_sets = GeneSetCollection(gs for gs in gene_sets if gs.size <= max_genes) logger.info('Excluded %d gene sets with too many genes.', old_size - len(gene_sets)) # writing output file gene_sets.write_tsv(output_file) logger.info('Wrote %s GO-derived gene sets to output file "%s".', len(gene_sets), output_file) return 0