def test_basic(my_go_term, my_other_term):

    ontology = GeneOntology([my_go_term, my_other_term])

    assert isinstance(ontology, GeneOntology)
    assert isinstance(repr(ontology), str)
    assert isinstance(str(ontology), str)
    assert isinstance(text(ontology), text)
    assert isinstance(ontology.hash, text)

    # test access methods
    assert len(ontology) == 2
    assert my_go_term.id in ontology
    assert ontology[my_go_term.id] == my_go_term
    del ontology[my_go_term.id]
    assert my_go_term.id not in ontology
    ontology[my_go_term.id] = my_go_term
    assert my_go_term.id in ontology

    # test additional access methods
    assert ontology.get_term_by_id(my_go_term.id) == my_go_term
    assert ontology.get_term_by_acc(my_go_term.acc) == my_go_term

    # test comparisons
    other = copy.deepcopy(ontology)
    assert other == ontology
    del other[my_other_term.id]
    assert other != ontology

    # test iteration
    assert set(list(iter(ontology))) == set([my_go_term, my_other_term])
Example #2
0
def test_list(my_go_annotation, my_go_term):

    gene_ontology = GeneOntology([my_go_term])
    l = my_go_annotation.to_list()
    assert isinstance(l, list)

    other = GOAnnotation.from_list(gene_ontology, l)
    assert isinstance(other, GOAnnotation)
    assert other == my_go_annotation
Example #3
0
def main(args=None):
    """Run GO-PCA and store the result in a `pickle` file.

    Parameters
    ----------
    args: argparse.Namespace object, optional
        The argument values. If not specified, the values will be obtained by
        parsing the command line arguments using the `argparse` module.

    Returns
    -------
    int
        Exit code (0 if no error occurred).
 
    Raises
    ------
    SystemError
        If the version of the Python interpreter is not >= 2.7.
    """
    vinfo = sys.version_info
    if not (vinfo >= (2, 7)):
        raise SystemError('Python interpreter version >= 2.7 required, '
                          'found %d.%d instead.' % (vinfo.major, vinfo.minor))

    if args is None:
        # read arguments from the command line
        parser = get_argument_parser()

        # parse first with default options, in case "--help" is specified
        # ("--help" causes the program to exit at this point)
        args = parser.parse_args()

        # now remove the defaults and parse again
        # (removing the defaults is important so that we know which values
        # were specified by the user)
        no_defaults = dict([p, None] for p in GOPCA.get_param_defaults())
        no_defaults2 = dict([p, None] for p in GOPCAParams.get_param_defaults())
        no_defaults.update(no_defaults2)
        parser.set_defaults(**no_defaults)
        args = parser.parse_args()

    # reporting options
    log_file = args.log_file
    quiet = args.quiet
    verbose = args.verbose

    # test if we can write to log_file?

    # configure root logger
    logger = util.get_logger(log_file=log_file, quiet=quiet)

    # check if required parameters were specified
    passed = True
    if args.expression_file is None:
        logger.error('No expression file specified!')
        passed = False
    if args.gene_set_file is None:
        logger.error('No gene set file specified!')
        passed = False
    if args.output_file is None:
        logger.error('No output file specified!')
        passed = False
    if not passed:
        logger.error('Not all required parameters were specified.')
        return 1

    # generate configuration
    if args.config_file is not None:
        # read parameter values from config file
        params = GOPCAParams.read_ini(args.config_file)
    else:
        # start with default configuration
        params = GOPCAParams()

    # overwrite parameters specified on the command line
    for p in GOPCAParams.get_param_defaults():
        v = getattr(args, p)
        if v is not None:
            logger.debug('Parameter "%s" specified on command line!', p)
            params.set_param(p, v)

    global_params = GOPCA.get_param_defaults()
    for k in list(global_params.keys()):
        v = getattr(args, k)
        if v is not None:
            logger.debug('Parameter "%s" specified on command line!', p)
            global_params[k] = v

    # read expression file
    matrix = ExpMatrix.read_tsv(args.expression_file)
    logger.info('Expression matrix size: ' +
                '(p = %d genes) x (n = %d samples).', matrix.p, matrix.n)

    if args.sel_var_genes > 0:
        # filter genes by variance
        matrix = matrix.filter_variance(args.sel_var_genes)
    
    # read gene set file
    gene_sets = GeneSetCollection.read_tsv(args.gene_set_file)
    print(args.gene_set_file, gene_sets)
    
    # read ontology file (if supplied)
    gene_ontology = None
    if args.gene_ontology_file is not None:
        p_logger = logging.getLogger(genometools.__name__)
        p_logger.setLevel(logging.ERROR)
        gene_ontology = GeneOntology.read_obo(
            args.gene_ontology_file,
            part_of_cc_only=params.go_part_of_cc_only)
        p_logger.setLevel(logging.NOTSET)
        
    M = GOPCA.simple_setup(matrix, params, gene_sets, gene_ontology,
                          verbose=verbose, **global_params)
    run = M.run()

    if run is None:
        logger.error('GO-PCA run failed!')
        return 1

    # write run to pickle file
    logger.info('Storing GO-PCA run in file "%s"...', args.output_file)
    run.write_pickle(args.output_file)

    return 0
Example #4
0
def my_config(my_params, my_gene_ontology_file, my_fly_gene_set_file):
    gene_ontology = GeneOntology.read_obo(my_gene_ontology_file)
    gene_sets = GeneSetCollection.read_tsv(my_fly_gene_set_file)
    config = GOPCAConfig(my_params, gene_sets, gene_ontology)
    return config
Example #5
0
def my_config(my_params, my_gene_ontology_file, my_fly_gene_set_file):
    gene_ontology = GeneOntology.read_obo(my_gene_ontology_file)
    gene_sets = GeneSetCollection.read_tsv(my_fly_gene_set_file)
    config = GOPCAConfig(my_params, gene_sets, gene_ontology)
    return config
Example #6
0
def main(args=None):
    """Run GO-PCA and store the result in a `pickle` file.

    Parameters
    ----------
    args: argparse.Namespace object, optional
        The argument values. If not specified, the values will be obtained by
        parsing the command line arguments using the `argparse` module.

    Returns
    -------
    int
        Exit code (0 if no error occurred).
 
    Raises
    ------
    SystemError
        If the version of the Python interpreter is not >= 2.7.
    """
    vinfo = sys.version_info
    if not (vinfo >= (2, 7)):
        raise SystemError('Python interpreter version >= 2.7 required, '
                          'found %d.%d instead.' % (vinfo.major, vinfo.minor))

    if args is None:
        # read arguments from the command line
        parser = get_argument_parser()

        # parse first with default options, in case "--help" is specified
        # ("--help" causes the program to exit at this point)
        args = parser.parse_args()

        # now remove the defaults and parse again
        # (removing the defaults is important so that we know which values
        # were specified by the user)
        no_defaults = dict([p, None] for p in GOPCA.get_param_defaults())
        no_defaults2 = dict([p, None]
                            for p in GOPCAParams.get_param_defaults())
        no_defaults.update(no_defaults2)
        parser.set_defaults(**no_defaults)
        args = parser.parse_args()

    # reporting options
    log_file = args.log_file
    quiet = args.quiet
    verbose = args.verbose

    # test if we can write to log_file?

    # configure root logger
    logger = util.get_logger(log_file=log_file, quiet=quiet)

    # check if required parameters were specified
    passed = True
    if args.expression_file is None:
        logger.error('No expression file specified!')
        passed = False
    if args.gene_set_file is None:
        logger.error('No gene set file specified!')
        passed = False
    if args.output_file is None:
        logger.error('No output file specified!')
        passed = False
    if not passed:
        logger.error('Not all required parameters were specified.')
        return 1

    # generate configuration
    if args.config_file is not None:
        # read parameter values from config file
        params = GOPCAParams.read_ini(args.config_file)
    else:
        # start with default configuration
        params = GOPCAParams()

    # overwrite parameters specified on the command line
    for p in GOPCAParams.get_param_defaults():
        v = getattr(args, p)
        if v is not None:
            logger.debug('Parameter "%s" specified on command line!', p)
            params.set_param(p, v)

    global_params = GOPCA.get_param_defaults()
    for k in list(global_params.keys()):
        v = getattr(args, k)
        if v is not None:
            logger.debug('Parameter "%s" specified on command line!', p)
            global_params[k] = v

    # read expression file
    matrix = ExpMatrix.read_tsv(args.expression_file)
    logger.info(
        'Expression matrix size: ' + '(p = %d genes) x (n = %d samples).',
        matrix.p, matrix.n)

    if args.sel_var_genes > 0:
        # filter genes by variance
        matrix = matrix.filter_variance(args.sel_var_genes)

    # read gene set file
    gene_sets = GeneSetCollection.read_tsv(args.gene_set_file)
    print(args.gene_set_file, gene_sets)

    # read ontology file (if supplied)
    gene_ontology = None
    if args.gene_ontology_file is not None:
        p_logger = logging.getLogger(genometools.__name__)
        p_logger.setLevel(logging.ERROR)
        gene_ontology = GeneOntology.read_obo(
            args.gene_ontology_file, part_of_cc_only=params.go_part_of_cc_only)
        p_logger.setLevel(logging.NOTSET)

    M = GOPCA.simple_setup(matrix,
                           params,
                           gene_sets,
                           gene_ontology,
                           verbose=verbose,
                           **global_params)
    run = M.run()

    if run is None:
        logger.error('GO-PCA run failed!')
        return 1

    # write run to pickle file
    logger.info('Storing GO-PCA run in file "%s"...', args.output_file)
    run.write_pickle(args.output_file)

    return 0
Example #7
0
def my_gene_ontology(my_gene_ontology_file):
    gene_ontology = GeneOntology.read_obo(my_gene_ontology_file)
    return gene_ontology
Example #8
0
def my_gene_ontology(my_gene_ontology_file):
    gene_ontology = GeneOntology.read_obo(my_gene_ontology_file)
    return gene_ontology
def main(args=None):
    """Extract GO annotations and store in tab-delimited text file.

    Parameters
    ----------
    args: argparse.Namespace object, optional
        The argument values. If not specified, the values will be obtained by
        parsing the command line arguments using the `argparse` module.

    Returns
    -------
    int
        Exit code (0 if no error occurred).
 
    Raises
    ------
    SystemError
        If the version of the Python interpreter is not >= 2.7.
    """
    vinfo = sys.version_info
    if not (vinfo >= (2, 7)):
        raise SystemError('Python interpreter version >= 2.7 required, '
                          'found %d.%d instead.' % (vinfo.major, vinfo.minor))

    if args is None:
        parser = get_argument_parser()
        args = parser.parse_args()

    gene_file = args.gene_file
    gene_ontology_file = args.gene_ontology_file
    goa_association_file = args.goa_association_file
    output_file = args.output_file

    evidence_codes = args.evidence_codes
    min_genes = args.min_genes_per_term
    max_genes = args.max_genes_per_term

    part_of_cc_only = args.part_of_cc_only

    # logging parameters
    log_file = args.log_file
    quiet = args.quiet
    verbose = args.verbose

    # configure root logger
    logger = misc.get_logger(log_file=log_file, quiet=quiet, verbose=verbose)

    logger.info('Selected evidence codes: %s', ', '.join(evidence_codes))
    logger.info('Min. number of genes per gene set: %d', min_genes)
    logger.info('Max. number of genes per gene set: %d', max_genes)

    # checks
    assert os.path.isfile(gene_file)
    assert os.path.isfile(gene_ontology_file)
    assert os.path.isfile(goa_association_file)

    # configure root logger
    log_stream = sys.stdout
    if output_file == '-':
        # if we print output to stdout, redirect log messages to stderr
        log_stream = sys.stderr

    logger = misc.get_logger(log_stream=log_stream,
                             log_file=log_file,
                             quiet=quiet,
                             verbose=verbose)

    # extract protein-coding genes from Ensembl GTF file
    exp_genome = ExpGenome.read_tsv(gene_file)

    # parse Gene Ontology
    gene_ontology = GeneOntology.read_obo(gene_ontology_file)

    # parse UniProt-GOA gene association file
    with gzip.open(goa_association_file, 'rt', encoding='ascii') as fh:
        go_annotations = ontology.parse_gaf(fh,
                                            gene_ontology,
                                            ev_codes=evidence_codes,
                                            genome=exp_genome)

    # extract GO-based gene sets
    gene_sets = ontology.get_goa_gene_sets(go_annotations)
    logger.info('Generated %d GO-derived gene sets', len(gene_sets))

    # filter gene sets based on size
    if min_genes > 0:
        old_size = len(gene_sets)
        gene_sets = GeneSetCollection(gs for gs in gene_sets
                                      if gs.size >= min_genes)
        logger.info('Excluded %d gene sets with too few genes.',
                    old_size - len(gene_sets))

    if max_genes > 0:
        old_size = len(gene_sets)
        gene_sets = GeneSetCollection(gs for gs in gene_sets
                                      if gs.size <= max_genes)
        logger.info('Excluded %d gene sets with too many genes.',
                    old_size - len(gene_sets))

    # writing output file
    gene_sets.write_tsv(output_file)
    logger.info('Wrote %s GO-derived gene sets to output file "%s".',
                len(gene_sets), output_file)

    return 0