Python GraphUtils.get_properties_from_graph 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dipper.utils.GraphUtils

클래스/타입: GraphUtils

메소드/함수: get_properties_from_graph

hotexamples.com에서의 예제들: 5

Python GraphUtils.get_properties_from_graph - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dipper.utils.GraphUtils.GraphUtils.get_properties_from_graph에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

loadProperties(22)

addSynonym(19)

loadAllProperties(19)

addEquivalentClass(14)

addDescription(12)

GraphUtils(10)

getNode(8)

digest_id(8)

addXref(8)

addDefinition(5)

addType(5)

addTriple(4)

makeLeader(4)

addDeprecatedClass(4)

addMemberOf(3)

addPage(3)

addSubclass(3)

addDeprecatedIndividual(3)

add_property_axioms(3)

addComment(3)

get_properties_from_graph(3)

write(2)

addOWLPropertyClassRestriction(2)

addClassToGraph(2)

addSameIndividual(2)

addMember(2)

addTitle(2)

addIndividualToGraph(2)

addDepiction(2)

addPerson(1)

loadObjectProperties(1)

addOntologyDeclaration(1)

addOWLVersionInfo(1)

addOWLVersionIRI(1)

write_raw_triples(1)

예제 #1

파일 보기

파일: test_source.py 프로젝트: putmantime/dipper

    def test_parse(self):
        if self.source is not None:  # don't test the abstract class
            self.source.parse()
            """
            seems we get a better stack trace by not catching the exception
            am I missing something?
            try:
                self.source.parse()
            except Exception as ParseException:  # tec too broad?
                logger.error(ParseException)
                self.assertFalse(True, "Parsing failed")
            """
            try:
                properties = GraphUtils.get_properties_from_graph(
                    self.source.graph)
                GraphUtils.add_property_axioms(self.source.graph, properties)
                self.source.write(format='turtle')
            except Exception as WriteException:
                logger.error(WriteException)
                self.assertFalse(True, "Write failed")

        return

예제 #2

파일 보기

파일: test_source.py 프로젝트: kshefchek/dipper

    def test_parse(self):
        if self.source is not None:  # don't test the abstract class
            self.source.parse()
            """
            seems we get a better stack trace by not catching the exception
            am I missing something?
            try:
                self.source.parse()
            except Exception as ParseException:  # tec too broad?
                logger.error(ParseException)
                self.assertFalse(True, "Parsing failed")
            """
            try:
                properties = GraphUtils.get_properties_from_graph(self.source.graph)
                GraphUtils.add_property_axioms(self.source.graph, properties)
                self.source.write()  # default to  fmt='turtle'
                #self.source.write(fmt='nt')
                #self.source.write(fmt='nquads')
            except Exception as WriteException:
                logger.error(WriteException)
                self.assertFalse(True, "Write failed")

        return

예제 #3

파일 보기

파일: dipper-etl.py 프로젝트: TomConlin/dipper

def main():
    # TODO this should be generated by looking in the dipper/sources directory
    source_to_class_map = {
        # 'facebase_alpha': 'FaceBase_alpha',
        'hpoa': 'HPOAnnotations',   # ~3 min
        'zfin': 'ZFIN',
        'omim': 'OMIM',
        'biogrid': 'BioGrid',
        'mgi': 'MGI',
        'impc': 'IMPC',
        # Panther takes ~1hr to map 7 species-worth of associations
        'panther': 'Panther',
        'ncbigene': 'NCBIGene',
        'ucscbands': 'UCSCBands',
        'ctd': 'CTD',
        'genereviews': 'GeneReviews',
        'eom': 'EOM',
        'coriell': 'Coriell',
        # 'clinvar': 'ClinVar',      # needs integrating here
        'monochrom': 'Monochrom',
        'kegg': 'KEGG',
        'animalqtldb': 'AnimalQTLdb',
        'ensembl': 'Ensembl',
        'hgnc': 'HGNC',
        'orphanet': 'Orphanet',
        'omia': 'OMIA',
        'flybase': 'FlyBase',
        'mmrrc': 'MMRRC',
        'wormbase': 'WormBase',
        'mpd': 'MPD',
        'gwascatalog': 'GWASCatalog',
        'monarch': 'Monarch',
        'go': 'GeneOntology',
        'reactome': 'Reactome',
        'udp': 'UDP',
        'mgi-slim': 'MGISlim',
        'zfinslim': 'ZFINSlim',
        'bgee': 'Bgee',
        'mydrug': 'MyDrug',
        'stringdb': 'StringDB',
        'rgd': 'RGD',
        'sgd': 'SGD',
        'mychem': 'MyChem',
        'ebi': 'EBIGene2Phen',
    }

    parser = argparse.ArgumentParser(
        description='Dipper: Data Ingestion Pipeline for Monarch',
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument(
        '-g', '--graph', type=str, default="rdf_graph",
        help='graph type: rdf_graph, streamed_graph')
    parser.add_argument(
        '-s', '--sources', type=str, default='?',
        help='comma separated list of sources')
    parser.add_argument('-l', '--limit', type=int, help='limit number of rows used')
    parser.add_argument(
        '--parse_only', action='store_true', help='parse files without writing RDF')
    parser.add_argument(
        '--fetch_only', action='store_true', help='fetch sources without parsing')
    parser.add_argument(
        '-f', '--force', action='store_true', help='force re-download of files')
    parser.add_argument(
        '--no_verify', help='ignore the verification step', action='store_true')
    # parser.add_argument( '--query', help='enter in a sparql query', type=str)
    parser.add_argument(
        '-q', '--quiet', help='turn off info logging', action="store_true")
    parser.add_argument(
        '--debug', help='turn on debug logging', action="store_true")
    parser.add_argument(
        '--skip_tests', help='skip any testing', action="store_true")
    # Blank Nodes can't be visualized in Protege, default to Skolemizing them
    parser.add_argument(
        '-b', '--use_bnodes',
        help="use blank nodes instead of skolemizing", action="store_true",
        default=False)
    #
    parser.add_argument(
        '-t', '--taxon', type=str, default='9606',
        help='Constrain Source to supplied taxon identifier(s).\n'
        'Please enter comma delimited NCBITaxon numbers:\n'
        'Implemented taxa per source\n'
        'NCBIGene: 9606,10090,7955\n'
        'Panther: 9606,10090,10116,7227,7955,6239,8355\n'
        'BioGrid: 9606,10090,10116,7227,7955,6239,8355\n'
        'UCSCBands: 9606\n'
        'GO: 9606,10090,10116,7227,7955,6239,9615,9823,9031,9913')
    parser.add_argument(
        '-o', '--test_only',
        help='only process and output the pre-configured test subset',
        action="store_true")

    parser.add_argument(
        '--dest_fmt',
        help='serialization format: [turtle], nt, nquads, rdfxml, n3, raw', type=str)

    parser.add_argument('-v', '--version', help='version of source', type=str)

    args = parser.parse_args()
    tax_ids = None
    if args.taxon is not None:
        tax_ids = [str(int(t)) for t in args.taxon.split(',')]

    taxa_supported = [  # these are not taxa, janky approach
        'Panther', 'NCBIGene', 'BioGrid', 'UCSCBands', 'GeneOntology', 'Bgee',
        'Ensembl', 'StringDB']

    formats_supported = [
        'turtle', 'ttl',
        'ntriples', 'nt',
        'nquads', 'nq',
        'rdfxml', 'xml',
        'notation3', 'n3',
        'raw']

    if args.quiet:
        logging.getLogger().setLevel(logging.WARNING)
    else:
        if args.debug:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

    if not args.use_bnodes:
        LOG.info("Will Skolemize Blank Nodes")

    # None of these query test utils exist in ./dipper/utils/TestUtils.py
    # if args.query is not None:
    #    test_query = TestUtils()
    #    for source in args.sources.split(','):
    #
    #        source = source.lower()
    #        mysource = source_to_class_map[source]()
    #
    #        # import source lib
    #        module = "dipper.sources.{0}".format(mysource)
    #        imported_module = importlib.import_module(module)
    #        source_class = getattr(imported_module, mysource)
    #        test_query.check_query_syntax(args.query, source_class)
    #        test_query.load_graph_from_turtle(source_class)
    #
    #    print(test_query.query_graph(args.query, True))
    #    exit(0)

    # run initial tests
    if (args.no_verify or args.skip_tests) is not True:
        unittest.TextTestRunner(verbosity=2).run(TEST_SUITE)

    # set serializer
    if args.dest_fmt is not None:
        if args.dest_fmt in formats_supported:
            if args.dest_fmt == 'ttl':
                args.dest_fmt = 'turtle'
            elif args.dest_fmt == 'ntriples':
                args.dest_fmt = 'nt'
            elif args.dest_fmt == 'nq':
                args.dest_fmt = 'nquads'
            elif args.dest_fmt == 'xml':
                args.dest_fmt = 'rdfxml'
            elif args.dest_fmt == 'notation3':
                args.dest_fmt = 'n3'
        else:
            LOG.error("You have specified an invalid serializer: %s", args.dest_fmt)

            exit(0)
    else:
        args.dest_fmt = 'turtle'

    # Provide feedback if we can't proceed
    if args.sources is None or args.sources.split(',')[0] not in source_to_class_map:
        LOG.info('Unknown Source %s', args.sources.split(',')[0])
        LOG.info('Sources Known are limited to:')
        for key in sorted(source_to_class_map):
            LOG.info('\t%s\t%s', key, source_to_class_map[key])
        exit(0)

    # iterate through all the sources
    for source in args.sources.split(','):
        LOG.info("\n******* %s *******", source)
        source = source.lower()
        src = source_to_class_map[source]

        # import source lib
        module = "dipper.sources.{0}".format(src)
        imported_module = importlib.import_module(module)
        source_class = getattr(imported_module, src)
        mysource = None
        # arg factory
        source_args = dict(
            graph_type=args.graph
        )
        source_args['are_bnodes_skolemized'] = not args.use_bnodes

        # args should be available to source supported (yet) or not
        if src in taxa_supported:
            source_args['tax_ids'] = tax_ids
        if args.version:
            source_args['version'] = args.version

        mysource = source_class(**source_args)
        if args.parse_only is False:
            start_fetch = time.perf_counter()
            mysource.fetch(args.force)

            end_fetch = time.perf_counter()
            LOG.info("Fetching time: %d sec", end_fetch-start_fetch)

        mysource.settestonly(args.test_only)

        # run tests first
        if (args.no_verify or args.skip_tests) is not True:
            suite = mysource.getTestSuite()
            if suite is None:
                LOG.warning(
                    "No tests configured for this source: %s", source)
            else:
                unittest.TextTestRunner(verbosity=2).run(suite)
        else:
            LOG.info("Skipping Tests for source: %s", source)

        if args.test_only is False and args.fetch_only is False:
            start_parse = time.perf_counter()
            mysource.parse(args.limit)

            end_parse = time.perf_counter()
            LOG.info("Parsing time: %d sec", end_parse-start_parse)

            if args.graph == 'rdf_graph':
                LOG.info("Found %d nodes", len(mysource.graph))

                # Add property axioms

                start_axiom_exp = time.perf_counter()
                LOG.info("Adding property axioms")

                properties = GraphUtils.get_properties_from_graph(mysource.graph)
                GraphUtils.add_property_axioms(mysource.graph, properties)
                end_axiom_exp = time.clock()
                LOG.info("Property axioms added: %d sec", end_axiom_exp-start_axiom_exp)

                start_write = time.perf_counter()
                mysource.write(fmt=args.dest_fmt)

                end_write = time.perf_counter()
                LOG.info("Writing time: %d sec", end_write-start_write)

        # if args.no_verify is not True:
        #    status = mysource.verify()
        #    if status is not True:
        #        LOG.error(
        #            'Source %s did not pass verification tests.', source)
        #        exit(1)
        # else:
        #    LOG.info('skipping verification step')
        LOG.info('***** Finished with %s *****', source)
    # load configuration parameters
    # for example, keys

    LOG.info("All done.")

예제 #4

파일 보기

파일: dipper-etl.py 프로젝트: shanelanan/dipper

def main():
    # TODO this should be generated by looking in the dipper/sources directory
    source_to_class_map = {
        # 'facebase_alpha': 'FaceBase_alpha',
        'hpoa': 'HPOAnnotations',
        'zfin': 'ZFIN',
        'omim': 'OMIM',
        'biogrid': 'BioGrid',
        'mgi': 'MGI',
        'impc': 'IMPC',
        'panther': 'Panther',
        'ncbigene': 'NCBIGene',
        'ucscbands': 'UCSCBands',
        'ctd': 'CTD',
        'genereviews': 'GeneReviews',
        'eom': 'EOM',
        'coriell': 'Coriell',
        # 'clinvar': 'ClinVar',      # needs integrating here
        'monochrom': 'Monochrom',
        'kegg': 'KEGG',
        'animalqtldb': 'AnimalQTLdb',
        'ensembl': 'Ensembl',
        'hgnc': 'HGNC',
        'orphanet': 'Orphanet',
        'omia': 'OMIA',
        'flybase': 'FlyBase',
        'mmrrc': 'MMRRC',
        'wormbase': 'WormBase',
        'mpd': 'MPD',
        'gwascatalog': 'GWASCatalog',
        'monarch': 'Monarch',
        'go': 'GeneOntology',
        'reactome': 'Reactome',
        'udp': 'UDP',
        'mgislim': 'MGISlim',
        'zfinslim': 'ZFINSlim',
        'bgee': 'Bgee',
        'mydrug': 'MyDrug',
        'stringdb': 'StringDB',
        'rgd': 'RGD',
        'sgd': 'SGD',
        'mychem': 'MyChem',
        'ebi': 'EBIGene2Phen',
        'xenbase': 'Xenbase'
    }

    parser = argparse.ArgumentParser(
        description='Dipper: Data Ingestion Pipeline for Monarch',
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-g',
                        '--graph',
                        type=str,
                        default="rdf_graph",
                        help='graph type: rdf_graph, streamed_graph')
    parser.add_argument('-s',
                        '--sources',
                        type=str,
                        default='?',
                        help='comma separated list of sources')
    parser.add_argument('-l',
                        '--limit',
                        type=int,
                        help='limit number of rows used')
    parser.add_argument('--parse_only',
                        action='store_true',
                        help='parse files without writing RDF')
    parser.add_argument('--fetch_only',
                        action='store_true',
                        help='fetch sources without parsing')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        help='force re-download of files')
    parser.add_argument('--no_verify',
                        help='ignore the verification step',
                        action='store_true')
    # parser.add_argument( '--query', help='enter in a sparql query', type=str)
    parser.add_argument('-q',
                        '--quiet',
                        help='turn off info logging',
                        action="store_true")
    parser.add_argument('--debug',
                        help='turn on debug logging',
                        action="store_true")
    parser.add_argument('--skip_tests',
                        help='skip any testing',
                        action="store_true")
    # Blank Nodes can't be visualized in Protege, default to Skolemizing them
    parser.add_argument('-b',
                        '--use_bnodes',
                        help="use blank nodes instead of skolemizing",
                        action="store_true",
                        default=False)
    #
    parser.add_argument(  # TODO help needs revisiting, push constraints off the the src
        '-t',
        '--taxon',
        type=str,
        help='''
            Constrain Source to supplied taxon identifier(s).
            Please enter comma delimited NCBITaxon numbers:
            Implemented taxa per source
            NCBIGene: 9606,10090,7955
            Panther: 9606,10090,10116,7227,7955,6239,8355
            BioGrid: 9606,10090,10116,7227,7955,6239,8355
            UCSCBands: 9606
            GO: 9606,10090,10116,7227,7955,6239,9615,9823,9031,9913,4896,5782,5052
    ''')
    parser.add_argument(
        '-o',
        '--test_only',
        help='only process and output the pre-configured test subset',
        action="store_true")

    parser.add_argument(
        '--dest_fmt',
        help='serialization format: [turtle], nt, nquads, rdfxml, n3, raw',
        type=str)

    parser.add_argument('-v',
                        '--version',
                        help='version of source (deprecated)',
                        type=str)

    parser.add_argument('-d',
                        '--data_release_version',
                        help='''
            string indicating the version of data release, e.g. '\'201908\' (YYYYMM),
            used to construct metadata, including version and distribution IRIs
            and downloadURLs
            [defaults to date at start of runtime in ISO 8601 format]
        ''',
                        type=str)

    args = parser.parse_args()
    tax_ids = None
    if args.taxon is not None:
        tax_ids = [str(t) for t in args.taxon.split(',') if t.isdigit()]

    species_specific = [
        'Panther', 'NCBIGene', 'BioGrid', 'UCSCBands', 'GeneOntology', 'Bgee',
        'StringDB', 'Ensembl'
    ]

    formats_supported = [
        'turtle', 'ttl', 'ntriples', 'nt', 'nquads', 'nq', 'rdfxml', 'xml',
        'notation3', 'n3', 'raw'
    ]

    if args.quiet:
        logging.getLogger().setLevel(logging.WARNING)
    else:
        if args.debug:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

    if not args.use_bnodes:
        LOG.info("Will Skolemize Blank Nodes")

    # None of these query test utils exist in ./dipper/utils/TestUtils.py
    # if args.query is not None:
    #    test_query = TestUtils()
    #    for source in args.sources.split(','):
    #        source = source.lower()
    #        mysource = source_to_class_map[source]()
    #        # import source lib
    #        module = "dipper.sources.{0}".format(mysource)
    #        imported_module = importlib.import_module(module)
    #        source_class = getattr(imported_module, mysource)
    #        test_query.check_query_syntax(args.query, source_class)
    #        test_query.load_graph_from_turtle(source_class)
    #
    #    print(test_query.query_graph(args.query, True))
    #    exit(0)

    # run initial tests
    if (args.no_verify or args.skip_tests) is not True:
        unittest.TextTestRunner(verbosity=2).run(TEST_SUITE)

    # set serializer
    if args.dest_fmt is not None:
        if args.dest_fmt in formats_supported:
            if args.dest_fmt == 'ttl':
                args.dest_fmt = 'turtle'
            elif args.dest_fmt == 'ntriples':
                args.dest_fmt = 'nt'
            elif args.dest_fmt == 'nq':
                args.dest_fmt = 'nquads'
            elif args.dest_fmt == 'xml':
                args.dest_fmt = 'rdfxml'
            elif args.dest_fmt == 'notation3':
                args.dest_fmt = 'n3'
        else:
            LOG.error("You have specified an invalid serializer: %s",
                      args.dest_fmt)
            exit(0)
    else:
        args.dest_fmt = 'turtle'

    # Provide feedback if we can't proceed
    if args.sources is None or args.sources.split(
            ',')[0] not in source_to_class_map:
        LOG.info('Unknown Source %s', args.sources.split(',')[0])
        LOG.info('Sources Known are limited to:')
        for key in sorted(source_to_class_map):
            LOG.info('\t%s\t%s', key, source_to_class_map[key])
        exit(0)

    # iterate through all the sources
    for source in args.sources.split(','):
        LOG.info("\n******* %s *******", source)
        source = source.lower()
        src = source_to_class_map[source]

        # import source lib
        module = "dipper.sources.{0}".format(src)
        imported_module = importlib.import_module(module)
        source_class = getattr(imported_module, src)
        mysource = None

        LOG.info(
            'Command line arguments available to dipper-etl:\n%s',
            "\n".join(['\t{}: {}'.format(k, v)
                       for k, v in vars(args).items()]))

        source_args = dict(graph_type=args.graph)
        source_args['are_bnodes_skolemized'] = not args.use_bnodes
        if src in species_specific:
            source_args['tax_ids'] = tax_ids
        if args.version:
            source_args['version'] = args.version
        if args.data_release_version:
            source_args['data_release_version'] = args.data_release_version

        mysource = source_class(**source_args)

        # WIP cli args should be available to source
        if hasattr(mysource, 'ARGV'):
            mysource.ARGV = vars(args)
        else:
            LOG.error('no where to to put args in %s', mysource.__class__)

        if args.parse_only is False:
            start_fetch = time.perf_counter()
            mysource.fetch(args.force)

            end_fetch = time.perf_counter()
            LOG.info("Fetching time: %d sec", end_fetch - start_fetch)

        mysource.settestonly(args.test_only)

        # create source ingest graph first (with pristine arguments)
        if args.test_only is False and args.fetch_only is False:
            start_parse = time.perf_counter()
            mysource.parse(args.limit)

            end_parse = time.perf_counter()
            LOG.info("Parsing time: %d sec", end_parse - start_parse)

            if args.graph == 'rdf_graph':
                LOG.info("Found %d nodes", len(mysource.graph))

                # Add property axioms
                start_axiom_exp = time.perf_counter()
                LOG.info("Adding property axioms")

                properties = GraphUtils.get_properties_from_graph(
                    mysource.graph)
                GraphUtils.add_property_axioms(mysource.graph, properties)
                LOG.info("Property axioms added: %d sec",
                         time.perf_counter() - start_axiom_exp)

                start_write = time.perf_counter()
                mysource.write(fmt=args.dest_fmt)
                LOG.info("Writing time: %d sec",
                         time.perf_counter() - start_write)
            # elif args.graph == 'streamed_graph': ...

        # '*_test.ttl' graphs if requested
        if (args.no_verify or args.skip_tests) is False:
            suite = mysource.getTestSuite()
            if suite is None:
                LOG.warning("No tests configured for this source: %s", source)
            else:
                unittest.TextTestRunner(verbosity=2).run(suite)
        else:
            LOG.info("Skipping Tests for source: %s", source)

        LOG.info('***** Finished with %s *****', source)

    LOG.info("All done.")

예제 #5

파일 보기

파일: dipper-etl.py 프로젝트: alpae/dipper

def main():
    # TODO this should be generated by looking in the dipper/sources directory
    # or read from a sources/dataset/config yaml or dir of yamls
    source_to_class_map = {
        # 'facebase_alpha': 'FaceBase_alpha',
        'hpoa': 'HPOAnnotations',   # ~3 min
        'zfin': 'ZFIN',
        'omim': 'OMIM',  # full file takes ~15 min, due to required throttling
        'biogrid': 'BioGrid',  # interactions file takes <10 minutes
        'mgi': 'MGI',
        'impc': 'IMPC',
        # Panther takes ~1hr to map 7 species-worth of associations
        'panther': 'Panther',
        'oma': 'OMA',
        'ncbigene': 'NCBIGene',  # takes about 4 minutes to process 2 species
        'ucscbands': 'UCSCBands',
        'ctd': 'CTD',
        'genereviews': 'GeneReviews',
        'eom': 'EOM',  # Takes about 5 seconds.
        'coriell': 'Coriell',
        # 'clinvar': 'ClinVar',                   # takes ~ half hour
        # 'clinvarxml_alpha': 'ClinVarXML_alpha', # takes ~ five minutes
        'monochrom': 'Monochrom',
        'kegg': 'KEGG',
        'animalqtldb': 'AnimalQTLdb',
        'ensembl': 'Ensembl',
        'hgnc': 'HGNC',
        'orphanet': 'Orphanet',
        'omia': 'OMIA',
        'flybase': 'FlyBase',
        'mmrrc': 'MMRRC',
        'wormbase': 'WormBase',
        'mpd': 'MPD',
        'gwascatalog': 'GWASCatalog',
        'monarch': 'Monarch',
        'go': 'GeneOntology',
        'reactome': 'Reactome',
        'udp': 'UDP',
        'mgi-slim': 'MGISlim',
        'zfin-slim': 'ZFINSlim',
        'bgee': 'Bgee',
        'mydrug': 'MyDrug',
        'stringdb': 'StringDB',
        'rgd': 'RGD',
        'sgd': 'SGD'
    }

    logger = logging.getLogger(__name__)

    parser = argparse.ArgumentParser(
        description='Dipper: Data Ingestion Pipeline for SciGraph',
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument(
        '-g', '--graph', type=str, default="rdf_graph",
        help='graph type: rdf_graph, streamed_graph')
    parser.add_argument(
        '-s', '--sources', type=str, required=True,
        help='comma separated list of sources')
    parser.add_argument(
        '-l', '--limit', type=int,
        help='limit number of rows')
    parser.add_argument(
        '--parse_only', action='store_true',
        help='parse files without writing')
    parser.add_argument(
        '--fetch_only', action='store_true',
        help='fetch sources without parsing')
    parser.add_argument('-f', '--force', action='store_true',
                        help='force re-download of files')
    parser.add_argument(
        '--no_verify',
        help='ignore the verification step', action='store_true')
    parser.add_argument('--query', help='enter in a sparql query', type=str)
    parser.add_argument(
        '-q', '--quiet',
        help='turn off info logging', action="store_true")
    parser.add_argument(
        '--debug', help='turn on debug logging', action="store_true")
    parser.add_argument(
        '--skip_tests', help='skip any testing', action="store_true")

    # Blank Nodes can't be visualized in Protege, default to Skolemizing them
    parser.add_argument(
        '-b', '--use_bnodes',
        help="use blank nodes instead of skolemizing", action="store_true",
        default=False)

    # TODO this should live in a global data file
    #   and the same filter be applied to all sources
    parser.add_argument(
        '-t', '--taxon', type=str,
        help='Add a taxon constraint on a source. Enter 1+ NCBITaxon numbers,'
        ' comma delimited\n'
        'Implemented taxa per source\n'
        'NCBIGene: 9606,10090,7955\n'
        'Panther: 9606,10090,10116,7227,7955,6239,8355\n'
        'BioGrid: 9606,10090,10116,7227,7955,6239,8355\n'
        'UCSCBands: 9606\n'
        'GO: 9606,10090,10116,7227,7955,6239,9615,9823,9031,9913')
    parser.add_argument(
        '-o', '--test_only',
        help='only process and output the pre-configured test subset',
        action="store_true")

    parser.add_argument(
        '--dest_fmt',
        help='serialization format: [turtle], nt, nquads, rdfxml, n3, raw',
        type=str)

    parser.add_argument(
        '--version', '-v',
        help='version of source',
        type=str)

    args = parser.parse_args()
    tax_ids = None
    if args.taxon is not None:
        tax_ids = [int(t) for t in args.taxon.split(',')]

    taxa_supported = [  # these are not taxa
        'Panther', 'NCBIGene', 'BioGrid', 'UCSCBands', 'GeneOntology',
        'Bgee', 'Ensembl', 'StringDB', 'OMA']

    formats_supported = [
        'turtle', 'ttl',
        'ntriples', 'nt',
        'nquads', 'nq',
        'rdfxml', 'xml',
        'notation3', 'n3',
        'raw']

    if args.quiet:
        logging.basicConfig(level=logging.ERROR)
    else:
        if args.debug:
            logging.basicConfig(level=logging.DEBUG)
        else:
            logging.basicConfig(level=logging.INFO)

    if not args.use_bnodes:
        logger.info("Will Skolemize Blank Nodes")

    if args.query is not None:
        test_query = TestUtils()
        for source in args.sources.split(','):
            source = source.lower()
            mysource = source_to_class_map[source]()

            # import source lib
            module = "dipper.sources.{0}".format(mysource)
            imported_module = importlib.import_module(module)
            source_class = getattr(imported_module, mysource)

            test_query.check_query_syntax(args.query, source_class)
            test_query.load_graph_from_turtle(source_class)

        print(test_query.query_graph(args.query, True))
        exit(0)

    # run initial tests
    if (args.no_verify or args.skip_tests) is not True:
        unittest.TextTestRunner(verbosity=2).run(test_suite)

    # set serializer
    if args.dest_fmt is not None:
        if args.dest_fmt in formats_supported:
            if args.dest_fmt == 'ttl':
                args.dest_fmt = 'turtle'
            elif args.dest_fmt == 'ntriples':
                args.dest_fmt = 'nt'
            elif args.dest_fmt == 'nq':
                args.dest_fmt = 'nquads'
            elif args.dest_fmt == 'xml':
                args.dest_fmt = 'rdfxml'
            elif args.dest_fmt == 'notation3':
                args.dest_fmt = 'n3'
        else:
            logger.error(
                "You have specified an invalid serializer: %s", args.dest_fmt)

            exit(0)
    else:
        args.dest_fmt = 'turtle'

    # iterate through all the sources
    for source in args.sources.split(','):
        logger.info("\n******* %s *******", source)
        source = source.lower()
        src = source_to_class_map[source]

        # import source lib
        module = "dipper.sources.{0}".format(src)
        imported_module = importlib.import_module(module)
        source_class = getattr(imported_module, src)
        mysource = None
        # arg factory
        source_args = dict(
            graph_type=args.graph
        )
        source_args['are_bnodes_skolemized'] = not args.use_bnodes
        if src in taxa_supported:
            source_args['tax_ids'] = tax_ids
        if args.version:
            source_args['version'] = args.version

        mysource = source_class(**source_args)
        if args.parse_only is False:
            start_fetch = time.clock()
            mysource.fetch(args.force)
            end_fetch = time.clock()
            logger.info("Fetching time: %d sec", end_fetch-start_fetch)

        mysource.settestonly(args.test_only)

        # run tests first
        if (args.no_verify or args.skip_tests) is not True:
            suite = mysource.getTestSuite()
            if suite is None:
                logger.warning(
                    "No tests configured for this source: %s", source)
            else:
                unittest.TextTestRunner(verbosity=2).run(suite)
        else:
            logger.info("Skipping Tests for source: %s", source)

        if args.test_only is False and args.fetch_only is False:
            start_parse = time.clock()
            mysource.parse(args.limit)
            end_parse = time.clock()
            logger.info("Parsing time: %d sec", end_parse-start_parse)
            if args.graph == 'rdf_graph':
                logger.info("Found %d nodes", len(mysource.graph))

                # Add property axioms
                start_axiom_exp = time.clock()
                logger.info("Adding property axioms")

                properties = GraphUtils.get_properties_from_graph(mysource.graph)
                GraphUtils.add_property_axioms(mysource.graph, properties)
                end_axiom_exp = time.clock()
                logger.info("Property axioms added: %d sec",
                            end_axiom_exp-start_axiom_exp)

                start_write = time.clock()
                mysource.write(fmt=args.dest_fmt)
                end_write = time.clock()
                logger.info("Writing time: %d sec", end_write-start_write)
        # if args.no_verify is not True:

        #    status = mysource.verify()
        #    if status is not True:
        #        logger.error(
        #            'Source %s did not pass verification tests.', source)
        #        exit(1)
        # else:
        #    logger.info('skipping verification step')
        logger.info('***** Finished with %s *****', source)
    # load configuration parameters
    # for example, keys

    logger.info("All done.")