Exemple #1
0
def test_factory():
    afa = AssociationSetFactory()
    ont = OntologyFactory().create(ONT)
    aset = afa.create_from_file(POMBASE, ontology=ont, skim=False)

    found = 0
    for s in aset.subjects:
        print('{} {}'.format(s, aset.label(s)))
        for c in aset.annotations(s):
            print('  {} {}'.format(c, ont.label(c)))
            for a in aset.associations(s, c):
                e = a['evidence']
                print('    {} {} {}'.format(e['type'], e['with_support_from'],
                                            e['has_supporting_reference']))
                if s == 'PomBase:SPBC2D10.10c' and c == 'GO:0005730':
                    if e['type'] == 'ISO':
                        if e['with_support_from'] == [
                                'SGD:S000002172'
                        ] and e['has_supporting_reference'] == [
                                'GO_REF:0000024'
                        ]:
                            found += 1
                            logging.info('** FOUND: {}'.format(a))
                    if e['type'] == 'IDA':
                        if e['has_supporting_reference'] == ['PMID:16823372']:
                            found += 1
                            logging.info('** FOUND: {}'.format(a))

    assert len(aset.associations_by_subj) > 0
    assert found == 2
Exemple #2
0
def test_learn():
    ont = OntologyFactory().create(ONT)
    tont = OntologyFactory().create(TONT)
    afa = AssociationSetFactory()
    aset = afa.create_from_file(file=GAF, ontology=ont)
    taset = afa.create_from_file(file=TGAF, ontology=tont)

    learner = ol.OntologyLearner(assocs=aset,
                                 target_assocs=taset,
                                 score_threshold=0.6)
    print('L={}'.format(learner))
    print('L.assocs={}'.format(learner.assocs))
    print('L.tassocs={}'.format(learner.target_assocs))
    with open('target/pheno_index.md', 'w') as file:
        learner.fit_all(reportfile=file)
    print('L.targets={}'.format(learner.targets))
Exemple #3
0
def test_learn():
    afa = AssociationSetFactory()
    ont = OntologyFactory().create(ONT)

    aset = afa.create_from_file(file=GAF, ontology=ont)
    learner = ol.OntologyLearner(assocs=aset)
    print('L={}'.format(learner))
    subont = ont.subontology(relations=['subClassOf'])
    learner.split_assocs(CC, ontology=subont)
    print('L.assocs={}'.format(learner.assocs))
    print('L.tassocs={}'.format(learner.target_assocs))
    with open('target/index.md', 'w') as file:
        learner.fit_all(reportfile=file)
    print('L.targets={}'.format(learner.targets))
Exemple #4
0
def test_learn_from_phenotype():
    """
    Learn GO from Phenotypes

    (note: some phenotypes in FYPO have graph paths to GO classes,
    so GO will be used to predict GO, which may seem circular, but
    in fact the phenotype is different information)
    """
    ont = OntologyFactory().create(ONT)
    tont = OntologyFactory().create(TONT)
    afa = AssociationSetFactory()
    aset = afa.create_from_file(file=GAF, ontology=ont)
    taset = afa.create_from_file(file=TGAF, ontology=tont)

    learner = ol.OntologyLearner(assocs=aset,
                                 target_assocs=taset,
                                 score_threshold=0.9)
    print('L={}'.format(learner))
    print('L.assocs={}'.format(learner.assocs))
    print('L.tassocs={}'.format(learner.target_assocs))
    dir = 'target/from_phenotype'
    with open(dir + '/index.md', 'w') as file:
        learner.fit_all(dir=dir, reportfile=file)
    print('L.targets={}'.format(learner.targets))
Exemple #5
0
def main():
    """
    Wrapper for OGR Assocs
    """
    parser = argparse.ArgumentParser(
        description='Wrapper for obographs assocmodel library'
        """
                                                 By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint
                                                 """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-r',
                        '--resource',
                        type=str,
                        required=False,
                        help='Name of ontology')
    parser.add_argument('-f',
                        '--assocfile',
                        type=str,
                        required=False,
                        help='Name of input file for associations')
    parser.add_argument(
        '--assocformat',
        type=str,
        default='gaf',
        required=False,
        help='Format of association file, if passed (default: gaf)')
    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-t',
                        '--to',
                        type=str,
                        required=False,
                        help='Output to (tree, dot, ...)')
    parser.add_argument('-d',
                        '--direction',
                        type=str,
                        default='u',
                        required=False,
                        help='u = up, d = down, ud = up and down')
    parser.add_argument('-e',
                        '--evidence',
                        type=str,
                        required=False,
                        help='ECO class')
    parser.add_argument('-p',
                        '--properties',
                        nargs='*',
                        type=str,
                        required=False,
                        help='Properties')
    parser.add_argument('-P',
                        '--plot',
                        type=bool,
                        default=False,
                        help='if set, plot output (requires plotly)')
    parser.add_argument('-y',
                        '--yamlconfig',
                        type=str,
                        required=False,
                        help='Path to setup/configuration yaml file')
    parser.add_argument('-S',
                        '--slim',
                        type=str,
                        default='',
                        required=False,
                        help='Slim type. m=minimal')
    parser.add_argument('-c',
                        '--container_properties',
                        nargs='*',
                        type=str,
                        required=False,
                        help='Properties to nest in graph')
    parser.add_argument('-C',
                        '--category',
                        nargs=2,
                        type=str,
                        required=False,
                        help='category tuple (SUBJECT OBJECT)')
    parser.add_argument('-T',
                        '--taxon',
                        type=str,
                        required=False,
                        help='Taxon of associations')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    subparsers = parser.add_subparsers(dest='subcommand',
                                       help='sub-command help')

    # EXTRACT ONTOLOGY
    parser_n = subparsers.add_parser(
        'subontology',
        help=
        'Extract sub-ontology, include only annotated nodes or their descendants'
    )
    parser_n.add_argument('-M',
                          '--minimal',
                          dest='minimal',
                          action='store_true',
                          default=False,
                          help='If set, remove non-MRCA nodes')
    parser_n.set_defaults(function=extract_ontology)

    # ENRICHMENT
    parser_n = subparsers.add_parser(
        'enrichment',
        help=
        'Perform an enrichment test over a sample set of annotated entities')
    parser_n.add_argument(
        '-q',
        '--query',
        type=str,
        help='query all genes for this class an use as subject')
    parser_n.add_argument('-H',
                          '--hypotheses',
                          nargs='*',
                          help='list of classes to test against')
    parser_n.add_argument(
        '-s',
        '--sample_file',
        type=str,
        help='file containing list of gene IDs in sample set')
    parser_n.add_argument(
        '-b',
        '--background_file',
        type=str,
        help='file containing list of gene IDs in background set')
    parser_n.add_argument('-t',
                          '--threshold',
                          type=float,
                          help='p-value threshold')
    parser_n.add_argument('sample_ids',
                          nargs='*',
                          help='list of gene IDs in sample set')
    parser_n.set_defaults(function=run_enrichment_test)

    # PHENOLOG
    parser_n = subparsers.add_parser(
        'phenolog',
        help=
        'Perform multiple enrichment tests, using a second ontology and assoc set to build gene sets'
    )
    parser_n.add_argument('-R',
                          '--resource2',
                          type=str,
                          required=True,
                          help='path to second GAF')
    parser_n.add_argument('-F',
                          '--file2',
                          type=str,
                          required=True,
                          help='handle for second ontology')
    parser_n.set_defaults(function=run_phenolog)

    # QUERY
    parser_n = subparsers.add_parser(
        'query',
        help=
        'Query for entities (e.g. genes) based on positive and negative terms')
    parser_n.add_argument('-q', '--query', nargs='*', help='positive classes')
    parser_n.add_argument('-N',
                          '--negative',
                          type=str,
                          help='negative classes')
    parser_n.set_defaults(function=run_query)

    # QUERY ASSOCIATIONS
    parser_n = subparsers.add_parser(
        'associations',
        help='Query for associations for a set of entities (e.g. genes)')
    parser_n.add_argument('subjects', nargs='*', help='subject ids')
    parser_n.add_argument('-D', '--dendrogram', type=bool, default=False)
    parser_n.set_defaults(function=run_query_associations)

    # INTERSECTIONS
    parser_n = subparsers.add_parser('intersections',
                                     help='Query intersections')
    parser_n.add_argument('-X', '--xterms', nargs='*', help='x classes')
    parser_n.add_argument('-Y', '--yterms', nargs='*', help='y classes')
    parser_n.add_argument('--useids',
                          type=bool,
                          default=False,
                          help='if true, use IDs not labels on axes')
    parser_n.add_argument('terms', nargs='*', help='all terms (x and y)')
    parser_n.set_defaults(function=plot_intersections)

    # INTERSECTION DENDROGRAM (TODO: merge into previous?)
    parser_n = subparsers.add_parser('intersection-dendrogram',
                                     help='Plot dendrogram from intersections')
    parser_n.add_argument('-X', '--xterms', nargs='*', help='x classes')
    parser_n.add_argument('-Y', '--yterms', nargs='*', help='y classes')
    parser_n.add_argument('--useids',
                          type=bool,
                          default=False,
                          help='if true, use IDs not labels on axes')
    parser_n.add_argument('terms', nargs='*', help='all terms (x and y)')
    parser_n.set_defaults(function=plot_term_intersection_dendrogram)

    # SIMILARITY MATRIX (may move to another module)
    parser_n = subparsers.add_parser(
        'simmatrix', help='Plot dendrogram for similarities between subjects')
    parser_n.add_argument('-X', '--xsubjects', nargs='*', help='x subjects')
    parser_n.add_argument('-Y', '--ysubjects', nargs='*', help='y subjects')
    parser_n.add_argument('--useids',
                          type=bool,
                          default=False,
                          help='if true, use IDs not labels on axes')
    parser_n.add_argument('subjects', nargs='*', help='all terms (x and y)')
    parser_n.set_defaults(function=plot_simmatrix)

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    if not args.assocfile:
        if not args.taxon or not args.category:
            raise ValueError(
                "Must specify EITHER assocfile OR both taxon and category")

    logging.info("Welcome!")

    if args.yamlconfig is not None:
        logging.info("Setting config from: {}".format(args.yamlconfig))
        # note this sets a global:
        # we would not do this outside the context of a standalone script
        from ontobio.config import set_config
        set_config(args.yamlconfig)

    handle = args.resource

    # Ontology Factory
    ofactory = OntologyFactory()
    logging.info("Creating ont object from: {} {}".format(handle, ofactory))
    ont = ofactory.create(handle)
    logging.info("ont: {}".format(ont))

    evidence = args.evidence
    if evidence is not None and evidence.lower() == 'noiea':
        evidence = "-ECO:0000501"

    # Association Factory
    afactory = AssociationSetFactory()
    aset = None
    if args.assocfile is not None:
        aset = afactory.create_from_file(file=args.assocfile,
                                         fmt=args.assocformat,
                                         ontology=ont)
    else:
        [subject_category, object_category] = args.category
        # create using GO/Monarch services
        aset = afactory.create(ontology=ont,
                               subject_category=subject_category,
                               object_category=object_category,
                               taxon=args.taxon)

    func = args.function
    func(ont, aset, args)