def main(): db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta') try: articlemeta_db = controller.get_dbconn(db_dsn) except: print('Fail to connect to:', db_dsn) sys.exit(1) _collections = collections_acronym(articlemeta_db) parser = argparse.ArgumentParser( description="Load documents DOI from SciELO website" ) parser.add_argument( '--collection', '-c', choices=_collections, help='Collection acronym' ) parser.add_argument( '--all_records', '-a', action='store_true', help='Apply processing to all records or just records without the license parameter' ) parser.add_argument( '--scrap_scielo', '-s', action='store_true', help='Try to Scrapy SciELO Website, articles page to get the DOI number' ) parser.add_argument( '--query_crossref', '-d', action='store_true', help='Try to query to crossref API for the DOI number' ) parser.add_argument( '--logging_level', '-l', default=LOGGING_LEVEL, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logggin level' ) args = parser.parse_args() LOGGING['handlers']['console']['level'] = args.logging_level for lg, content in LOGGING['loggers'].items(): content['level'] = args.logging_level logging.config.dictConfig(LOGGING) collections = [args.collection] if args.collection else _collections run(articlemeta_db, collections, args.all_records, args.scrap_scielo, args.query_crossref)
def main(): db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta') try: articlemeta_db = controller.get_dbconn(db_dsn) except: print('Fail to connect to:', db_dsn) sys.exit(1) _collections = collections_acronym(articlemeta_db) parser = argparse.ArgumentParser( description="Load documents DOI from SciELO website" ) parser.add_argument( '--collection', '-c', choices=collections_acronym(), help='Collection acronym' ) parser.add_argument( '--all_records', '-a', action='store_true', help='Apply processing to all records or just records without the license parameter' ) parser.add_argument( '--scrap_scielo', '-s', action='store_true', help='Try to Scrapy SciELO Website, articles page to get the DOI number' ) parser.add_argument( '--query_crossref', '-d', action='store_true', help='Try to query to crossref API for the DOI number' ) parser.add_argument( '--logging_level', '-l', default=LOGGING_LEVEL, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logggin level' ) args = parser.parse_args() LOGGING['handlers']['console']['level'] = args.logging_level for lg, content in LOGGING['loggers'].items(): content['level'] = args.logging_level logging.config.dictConfig(LOGGING) collections = [args.collection] if args.collection else _collections run(articlemeta_db, collections, args.all_records, args.scrap_scielo, args.query_crossref)
def main(): db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta') try: articlemeta_db = controller.get_dbconn(db_dsn) except: print('Fail to connect to:', db_dsn) sys.exit(1) parser = argparse.ArgumentParser( description="Load Languages from SciELO static files available in the file system" ) _collections_acronyms = collections_acronym(articlemeta_db) parser.add_argument( '--collection', '-c', choices=_collections_acronyms, help='Collection acronym' ) parser.add_argument( '--all_records', '-a', action='store_true', help='Apply processing to all records or just records without the languages parameter' ) parser.add_argument( '--logging_file', '-o', help='Full path to the log file' ) parser.add_argument( '--logging_level', '-l', default=LOGGING_LEVEL, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logggin level' ) parser.add_argument( '--domain', '-d', help='Collection domain to get Static catalog' ) args = parser.parse_args() LOGGING['handlers']['console']['level'] = args.logging_level for lg, content in LOGGING['loggers'].items(): content['level'] = args.logging_level logging.config.dictConfig(LOGGING) collections = [args.collection] if args.collection else _collections_acronyms run(collections, articlemeta_db, args.all_records, args.domain)
def __init__(self): config = utils.Configuration.from_env() settings = dict(config.items()) db_dsn = os.environ.get('MONGODB_HOST', settings.get('mongo_uri', '127.0.0.1:27017')) self._admintoken = os.environ.get('ADMIN_TOKEN', None) or settings['app:main'].get('admintoken', uuid.uuid4().hex) db_client = get_dbconn(db_dsn) self._databroker = DataBroker(db_client)
def main(global_config, **settings): """ This function returns a Pyramid WSGI application. [1]: This views are responding for requests with any methods except GET, so have a work-around with multiple routes path to attend with or without trailing slash. Reference: http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/urldispatch.html#redirecting-to-slash-appended-routes """ config = Configurator(settings=settings) config.add_renderer('jsonp', JSONP(param_name='callback', indent=4)) db_dsn = os.environ.get('MONGODB_HOST', settings.get('mongo_uri', '127.0.0.1:27017')) db_client = controller.get_dbconn(db_dsn) def add_databroker(request): """Add a databroker to all incoming request""" return controller.DataBroker(db_client) config.add_route('index', '/') # collections - GET method: config.add_route('collection', '/api/v1/collection/') config.add_route('identifiers_collection', '/api/v1/collection/identifiers/') # journals - GET method: config.add_route('journal', '/api/v1/journal/') config.add_route('identifiers_journal', '/api/v1/journal/identifiers/') config.add_route('exists_journal', '/api/v1/journal/exists/') # issues - GET method: config.add_route('get_issue', '/api/v1/issue/') config.add_route('get_issues', '/api/v1/issues/') config.add_route('identifiers_issue', '/api/v1/issue/identifiers/') config.add_route('exists_issue', '/api/v1/issue/exists/') # articles - GET method: config.add_route('get_article', '/api/v1/article/') config.add_route('get_articles', '/api/v1/articles/') config.add_route('identifiers_article', '/api/v1/article/identifiers/') config.add_route('exists_article', '/api/v1/article/exists/') # press releases - GET method: config.add_route('identifiers_press_release', '/api/v1/press_release/identifiers/') # logs historychanges - GET method: config.add_route('list_historychanges_article', '/api/v1/article/history/') config.add_route('list_historychanges_journal', '/api/v1/journal/history/') config.add_route('list_historychanges_issue', '/api/v1/issue/history/') # others config.add_request_method(add_databroker, 'databroker', reify=True) config.scan() return config.make_wsgi_app()
def main(): db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta') try: articlemeta_db = controller.get_dbconn(db_dsn) except: print('Fail to connect to:', db_dsn) sys.exit(1) _collections_acronyms = collections_acronym(articlemeta_db) parser = argparse.ArgumentParser( description="Load Languages from SciELO static files available in the file system" ) parser.add_argument( '--collection', '-c', choices=_collections_acronyms, help='Collection acronym' ) parser.add_argument( '--all_records', '-a', action='store_true', help='Apply processing to all records or just records without the section parameter' ) parser.add_argument( '--logging_file', '-o', help='Full path to the log file' ) parser.add_argument( '--logging_level', '-l', default='DEBUG', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logggin level' ) args = parser.parse_args() _config_logging(args.logging_level, args.logging_file) collections = [args.collection] if args.collection else _collections_acronyms run(articlemeta_db, collections, args.all_records)
def main(): db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta') try: articlemeta_db = controller.get_dbconn(db_dsn) except: print('Fail to connect to:', db_dsn) sys.exit(1) _collections_acronyms = collections_acronym(articlemeta_db) parser = argparse.ArgumentParser( description= "Load Languages from SciELO static files available in the file system") parser.add_argument('--collection', '-c', choices=_collections_acronyms, help='Collection acronym') parser.add_argument( '--all_records', '-a', action='store_true', help= 'Apply processing to all records or just records without the section parameter' ) parser.add_argument('--logging_file', '-o', help='Full path to the log file') parser.add_argument( '--logging_level', '-l', default='DEBUG', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logggin level') args = parser.parse_args() _config_logging(args.logging_level, args.logging_file) collections = [args.collection ] if args.collection else _collections_acronyms run(articlemeta_db, collections, args.all_records)
def main(): db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta') try: articlemeta_db = controller.get_dbconn(db_dsn) except: print('Fail to connect to:', db_dsn) sys.exit(1) _collections = collections_acronym(articlemeta_db) parser = argparse.ArgumentParser( description="Load documents body from SciELO website") parser.add_argument('--collection', '-c', choices=_collections, help='Collection acronym') parser.add_argument( '--pids', '-p', nargs='*', help= "List of pids. Separate by space Ex.: 'python load_body.py -p 'S0102-05362006000100018 S0102-05362006000100015'" ) parser.add_argument( '--all_records', '-a', action='store_true', help= 'Apply processing to all records or just records without the body parameter' ) parser.add_argument('--logging_file', '-o', help='Full path to the log file') parser.add_argument( '--logging_level', '-l', default=LOGGING_LEVEL, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logggin level') args = parser.parse_args() LOGGING['handlers']['console']['level'] = args.logging_level for lg, content in LOGGING['loggers'].items(): content['level'] = args.logging_level logging.config.dictConfig(LOGGING) if not args.collection: logger.info("Parameter collection -c is mandatory") sys.exit(1) if args.pids: logger.info("Process PIDs from collection: %s", args.collection) run(articlemeta_db, collections=[args.collection], pids=args.pids) else: collections = [args.collection] if args.collection else _collections run(articlemeta_db, collections=collections, all_records=args.all_records)
def main(): db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta') try: articlemeta_db = controller.get_dbconn(db_dsn) except: print('Fail to connect to:', db_dsn) sys.exit(1) _collections = collections_acronym(articlemeta_db) parser = argparse.ArgumentParser( description="Load documents body from SciELO website" ) parser.add_argument( '--collection', '-c', choices=_collections, help='Collection acronym' ) parser.add_argument( '--pids', '-p', nargs='*', help="List of pids. Separate by space Ex.: 'python load_body.py -p 'S0102-05362006000100018 S0102-05362006000100015'" ) parser.add_argument( '--all_records', '-a', action='store_true', help='Apply processing to all records or just records without the body parameter' ) parser.add_argument( '--logging_file', '-o', help='Full path to the log file' ) parser.add_argument( '--logging_level', '-l', default=LOGGING_LEVEL, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logggin level' ) args = parser.parse_args() LOGGING['handlers']['console']['level'] = args.logging_level for lg, content in LOGGING['loggers'].items(): content['level'] = args.logging_level logging.config.dictConfig(LOGGING) if not args.collection: logger.info("Parameter collection -c is mandatory") sys.exit(1) if args.pids: logger.info("Process PIDs from collection: %s", args.collection) run(articlemeta_db, collections=[args.collection], pids=args.pids) else: collections = [args.collection] if args.collection else _collections run(articlemeta_db, collections=collections, all_records=args.all_records)
'level': LOGGING_LEVEL, 'propagate': True, }, } } if SENTRY_DSN: LOGGING['handlers']['sentry'] = { 'level': 'ERROR', 'class': 'raven.handlers.logging.SentryHandler', 'dsn': SENTRY_DSN, } LOGGING['loggers']['']['handlers'].append('sentry') try: articlemeta_db = controller.get_dbconn(MONGODB_HOST) except: print('Fail to connect to:', MONGODB_HOST) sys.exit(1) def remove_control_characters(data): return "".join(ch for ch in data if unicodedata.category(ch)[0] != "C") def escape_html_http_tags(string): """Escapa trechos de uma string que podem ser interpretadas como tags HTML. >>> escape_html_http_tags("Citação disponível em <http://www.scielo.br>.") >>> "Citação disponível em <http://www.scielo.br>." >>> escape_html_http_tags("Citação disponível em <http://www.scielo.br")