Esempio n. 1
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    _collections = collections_acronym(articlemeta_db)

    parser = argparse.ArgumentParser(
        description="Load documents DOI from SciELO website"
    )

    parser.add_argument(
        '--collection',
        '-c',
        choices=_collections,
        help='Collection acronym'
    )

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help='Apply processing to all records or just records without the license parameter'
    )

    parser.add_argument(
        '--scrap_scielo',
        '-s',
        action='store_true',
        help='Try to Scrapy SciELO Website, articles page to get the DOI number'
    )

    parser.add_argument(
        '--query_crossref',
        '-d',
        action='store_true',
        help='Try to query to crossref API for the DOI number'
    )

    parser.add_argument(
        '--logging_level',
        '-l',
        default=LOGGING_LEVEL,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level'
    )

    args = parser.parse_args()
    LOGGING['handlers']['console']['level'] = args.logging_level
    for lg, content in LOGGING['loggers'].items():
        content['level'] = args.logging_level

    logging.config.dictConfig(LOGGING)

    collections = [args.collection] if args.collection else _collections
    run(articlemeta_db, collections, args.all_records, args.scrap_scielo, args.query_crossref)
Esempio n. 2
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    _collections = collections_acronym(articlemeta_db)

    parser = argparse.ArgumentParser(
        description="Load documents DOI from SciELO website"
    )

    parser.add_argument(
        '--collection',
        '-c',
        choices=collections_acronym(),
        help='Collection acronym'
    )

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help='Apply processing to all records or just records without the license parameter'
    )

    parser.add_argument(
        '--scrap_scielo',
        '-s',
        action='store_true',
        help='Try to Scrapy SciELO Website, articles page to get the DOI number'
    )

    parser.add_argument(
        '--query_crossref',
        '-d',
        action='store_true',
        help='Try to query to crossref API for the DOI number'
    )

    parser.add_argument(
        '--logging_level',
        '-l',
        default=LOGGING_LEVEL,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level'
    )

    args = parser.parse_args()
    LOGGING['handlers']['console']['level'] = args.logging_level
    for lg, content in LOGGING['loggers'].items():
        content['level'] = args.logging_level

    logging.config.dictConfig(LOGGING)

    collections = [args.collection] if args.collection else _collections
    run(articlemeta_db, collections, args.all_records, args.scrap_scielo, args.query_crossref)
Esempio n. 3
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description="Load Languages from SciELO static files available in the file system"
    )

    _collections_acronyms = collections_acronym(articlemeta_db)

    parser.add_argument(
        '--collection',
        '-c',
        choices=_collections_acronyms,
        help='Collection acronym'
    )

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help='Apply processing to all records or just records without the languages parameter'
    )

    parser.add_argument(
        '--logging_file',
        '-o',
        help='Full path to the log file'
    )

    parser.add_argument(
        '--logging_level',
        '-l',
        default=LOGGING_LEVEL,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level'
    )

    parser.add_argument(
        '--domain',
        '-d',
        help='Collection domain to get Static catalog'
    )

    args = parser.parse_args()
    LOGGING['handlers']['console']['level'] = args.logging_level
    for lg, content in LOGGING['loggers'].items():
        content['level'] = args.logging_level

    logging.config.dictConfig(LOGGING)

    collections = [args.collection] if args.collection else _collections_acronyms

    run(collections, articlemeta_db, args.all_records, args.domain)
Esempio n. 4
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description="Load Languages from SciELO static files available in the file system"
    )

    _collections_acronyms = collections_acronym(articlemeta_db)

    parser.add_argument(
        '--collection',
        '-c',
        choices=_collections_acronyms,
        help='Collection acronym'
    )

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help='Apply processing to all records or just records without the languages parameter'
    )

    parser.add_argument(
        '--logging_file',
        '-o',
        help='Full path to the log file'
    )

    parser.add_argument(
        '--logging_level',
        '-l',
        default=LOGGING_LEVEL,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level'
    )

    parser.add_argument(
        '--domain',
        '-d',
        help='Collection domain to get Static catalog'
    )

    args = parser.parse_args()
    LOGGING['handlers']['console']['level'] = args.logging_level
    for lg, content in LOGGING['loggers'].items():
        content['level'] = args.logging_level

    logging.config.dictConfig(LOGGING)

    collections = [args.collection] if args.collection else _collections_acronyms

    run(collections, articlemeta_db, args.all_records, args.domain)
Esempio n. 5
0
    def __init__(self):
        config = utils.Configuration.from_env()
        settings = dict(config.items())

        db_dsn = os.environ.get('MONGODB_HOST', settings.get('mongo_uri', '127.0.0.1:27017'))

        self._admintoken = os.environ.get('ADMIN_TOKEN', None) or settings['app:main'].get('admintoken', uuid.uuid4().hex)

        db_client = get_dbconn(db_dsn)
        self._databroker = DataBroker(db_client)
Esempio n. 6
0
    def __init__(self):
        config = utils.Configuration.from_env()
        settings = dict(config.items())

        db_dsn = os.environ.get('MONGODB_HOST', settings.get('mongo_uri', '127.0.0.1:27017'))

        self._admintoken = os.environ.get('ADMIN_TOKEN', None) or settings['app:main'].get('admintoken', uuid.uuid4().hex)

        db_client = get_dbconn(db_dsn)
        self._databroker = DataBroker(db_client)
Esempio n. 7
0
def main(global_config, **settings):
    """ This function returns a Pyramid WSGI application.
    [1]: This views are responding for requests with any methods except GET,
    so have a work-around with multiple routes path to attend with or without trailing slash.
    Reference:
    http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/urldispatch.html#redirecting-to-slash-appended-routes
    """

    config = Configurator(settings=settings)
    config.add_renderer('jsonp', JSONP(param_name='callback', indent=4))

    db_dsn = os.environ.get('MONGODB_HOST',
                            settings.get('mongo_uri', '127.0.0.1:27017'))
    db_client = controller.get_dbconn(db_dsn)

    def add_databroker(request):
        """Add a databroker to all incoming request"""
        return controller.DataBroker(db_client)

    config.add_route('index', '/')
    # collections - GET method:
    config.add_route('collection', '/api/v1/collection/')
    config.add_route('identifiers_collection',
                     '/api/v1/collection/identifiers/')
    # journals - GET method:
    config.add_route('journal', '/api/v1/journal/')
    config.add_route('identifiers_journal', '/api/v1/journal/identifiers/')
    config.add_route('exists_journal', '/api/v1/journal/exists/')
    # issues - GET method:
    config.add_route('get_issue', '/api/v1/issue/')
    config.add_route('get_issues', '/api/v1/issues/')
    config.add_route('identifiers_issue', '/api/v1/issue/identifiers/')
    config.add_route('exists_issue', '/api/v1/issue/exists/')
    # articles - GET method:
    config.add_route('get_article', '/api/v1/article/')
    config.add_route('get_articles', '/api/v1/articles/')
    config.add_route('identifiers_article', '/api/v1/article/identifiers/')
    config.add_route('exists_article', '/api/v1/article/exists/')
    # press releases - GET method:
    config.add_route('identifiers_press_release',
                     '/api/v1/press_release/identifiers/')
    # logs historychanges - GET method:
    config.add_route('list_historychanges_article', '/api/v1/article/history/')
    config.add_route('list_historychanges_journal', '/api/v1/journal/history/')
    config.add_route('list_historychanges_issue', '/api/v1/issue/history/')
    # others
    config.add_request_method(add_databroker, 'databroker', reify=True)
    config.scan()

    return config.make_wsgi_app()
Esempio n. 8
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    _collections_acronyms = collections_acronym(articlemeta_db)

    parser = argparse.ArgumentParser(
        description="Load Languages from SciELO static files available in the file system"
    )

    parser.add_argument(
        '--collection',
        '-c',
        choices=_collections_acronyms,
        help='Collection acronym'
    )

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help='Apply processing to all records or just records without the section parameter'
    )

    parser.add_argument(
        '--logging_file',
        '-o',
        help='Full path to the log file'
    )

    parser.add_argument(
        '--logging_level',
        '-l',
        default='DEBUG',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level'
    )

    args = parser.parse_args()

    _config_logging(args.logging_level, args.logging_file)

    collections = [args.collection] if args.collection else _collections_acronyms

    run(articlemeta_db, collections, args.all_records)
Esempio n. 9
0
def main(global_config, **settings):
    """ This function returns a Pyramid WSGI application.
    [1]: This views are responding for requests with any methods except GET,
    so have a work-around with multiple routes path to attend with or without trailing slash.
    Reference:
    http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/urldispatch.html#redirecting-to-slash-appended-routes
    """

    config = Configurator(settings=settings)
    config.add_renderer('jsonp', JSONP(param_name='callback', indent=4))

    db_dsn = os.environ.get('MONGODB_HOST', settings.get('mongo_uri', '127.0.0.1:27017'))
    db_client = controller.get_dbconn(db_dsn)

    def add_databroker(request):
        """Add a databroker to all incoming request"""
        return controller.DataBroker(db_client)

    config.add_route('index', '/')
    # collections - GET method:
    config.add_route('collection', '/api/v1/collection/')
    config.add_route('identifiers_collection', '/api/v1/collection/identifiers/')
    # journals - GET method:
    config.add_route('journal', '/api/v1/journal/')
    config.add_route('identifiers_journal', '/api/v1/journal/identifiers/')
    config.add_route('exists_journal', '/api/v1/journal/exists/')
    # issues - GET method:
    config.add_route('get_issue', '/api/v1/issue/')
    config.add_route('get_issues', '/api/v1/issues/')
    config.add_route('identifiers_issue', '/api/v1/issue/identifiers/')
    config.add_route('exists_issue', '/api/v1/issue/exists/')
    # articles - GET method:
    config.add_route('get_article', '/api/v1/article/')
    config.add_route('get_articles', '/api/v1/articles/')
    config.add_route('identifiers_article', '/api/v1/article/identifiers/')
    config.add_route('exists_article', '/api/v1/article/exists/')
    # press releases - GET method:
    config.add_route('identifiers_press_release', '/api/v1/press_release/identifiers/')
    # logs historychanges - GET method:
    config.add_route('list_historychanges_article', '/api/v1/article/history/')
    config.add_route('list_historychanges_journal', '/api/v1/journal/history/')
    config.add_route('list_historychanges_issue', '/api/v1/issue/history/')
    # others
    config.add_request_method(add_databroker, 'databroker', reify=True)
    config.scan()

    return config.make_wsgi_app()
Esempio n. 10
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST',
                            'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    _collections_acronyms = collections_acronym(articlemeta_db)

    parser = argparse.ArgumentParser(
        description=
        "Load Languages from SciELO static files available in the file system")

    parser.add_argument('--collection',
                        '-c',
                        choices=_collections_acronyms,
                        help='Collection acronym')

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help=
        'Apply processing to all records or just records without the section parameter'
    )

    parser.add_argument('--logging_file',
                        '-o',
                        help='Full path to the log file')

    parser.add_argument(
        '--logging_level',
        '-l',
        default='DEBUG',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level')

    args = parser.parse_args()

    _config_logging(args.logging_level, args.logging_file)

    collections = [args.collection
                   ] if args.collection else _collections_acronyms

    run(articlemeta_db, collections, args.all_records)
Esempio n. 11
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST',
                            'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    _collections = collections_acronym(articlemeta_db)

    parser = argparse.ArgumentParser(
        description="Load documents body from SciELO website")

    parser.add_argument('--collection',
                        '-c',
                        choices=_collections,
                        help='Collection acronym')

    parser.add_argument(
        '--pids',
        '-p',
        nargs='*',
        help=
        "List of pids. Separate by space Ex.: 'python load_body.py -p 'S0102-05362006000100018 S0102-05362006000100015'"
    )

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help=
        'Apply processing to all records or just records without the body parameter'
    )

    parser.add_argument('--logging_file',
                        '-o',
                        help='Full path to the log file')

    parser.add_argument(
        '--logging_level',
        '-l',
        default=LOGGING_LEVEL,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level')

    args = parser.parse_args()
    LOGGING['handlers']['console']['level'] = args.logging_level
    for lg, content in LOGGING['loggers'].items():
        content['level'] = args.logging_level

    logging.config.dictConfig(LOGGING)

    if not args.collection:
        logger.info("Parameter collection -c is mandatory")
        sys.exit(1)

    if args.pids:
        logger.info("Process PIDs from collection: %s", args.collection)
        run(articlemeta_db, collections=[args.collection], pids=args.pids)
    else:
        collections = [args.collection] if args.collection else _collections
        run(articlemeta_db,
            collections=collections,
            all_records=args.all_records)
Esempio n. 12
0
def main():
    db_dsn = os.environ.get('MONGODB_HOST', 'mongodb://localhost:27017/articlemeta')
    try:
        articlemeta_db = controller.get_dbconn(db_dsn)
    except:
        print('Fail to connect to:', db_dsn)
        sys.exit(1)

    _collections = collections_acronym(articlemeta_db)

    parser = argparse.ArgumentParser(
        description="Load documents body from SciELO website"
    )

    parser.add_argument(
        '--collection',
        '-c',
        choices=_collections,
        help='Collection acronym'
    )

    parser.add_argument(
        '--pids',
        '-p',
        nargs='*',
        help="List of pids. Separate by space Ex.: 'python load_body.py -p 'S0102-05362006000100018 S0102-05362006000100015'"
    )

    parser.add_argument(
        '--all_records',
        '-a',
        action='store_true',
        help='Apply processing to all records or just records without the body parameter'
    )

    parser.add_argument(
        '--logging_file',
        '-o',
        help='Full path to the log file'
    )

    parser.add_argument(
        '--logging_level',
        '-l',
        default=LOGGING_LEVEL,
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Logggin level'
    )

    args = parser.parse_args()
    LOGGING['handlers']['console']['level'] = args.logging_level
    for lg, content in LOGGING['loggers'].items():
        content['level'] = args.logging_level

    logging.config.dictConfig(LOGGING)

    if not args.collection:
        logger.info("Parameter collection -c is mandatory")
        sys.exit(1)

    if args.pids:
        logger.info("Process PIDs from collection: %s", args.collection)
        run(articlemeta_db, collections=[args.collection], pids=args.pids)
    else:
        collections = [args.collection] if args.collection else _collections
        run(articlemeta_db, collections=collections, all_records=args.all_records)
Esempio n. 13
0
            'level': LOGGING_LEVEL,
            'propagate': True,
        },
    }
}

if SENTRY_DSN:
    LOGGING['handlers']['sentry'] = {
        'level': 'ERROR',
        'class': 'raven.handlers.logging.SentryHandler',
        'dsn': SENTRY_DSN,
    }
    LOGGING['loggers']['']['handlers'].append('sentry')

try:
    articlemeta_db = controller.get_dbconn(MONGODB_HOST)
except:
    print('Fail to connect to:', MONGODB_HOST)
    sys.exit(1)


def remove_control_characters(data):
    return "".join(ch for ch in data if unicodedata.category(ch)[0] != "C")


def escape_html_http_tags(string):
    """Escapa trechos de uma string que podem ser interpretadas como tags HTML.

    >>> escape_html_http_tags("Citação disponível em <http://www.scielo.br>.")
    >>> "Citação disponível em &lt;http://www.scielo.br&gt;."
    >>> escape_html_http_tags("Citação disponível em <http://www.scielo.br")