def main(): log = get_logger() argparser = argparse.ArgumentParser('irco-import') argparser.add_argument('-v', '--verbose', action='store_true') argparser.add_argument('database') args = argparser.parse_args() sentry.context.merge({ 'tags': { 'command': 'irco-upgrade', }, 'extra': { 'parsed_arguments': args.__dict__, } }) log.info('arguments_parsed', args=args) config = Config() config.set_main_option('script_location', 'irco:migrations') config.set_main_option('sqlalchemy.url', args.database) command.upgrade(config, 'head', sql=False, tag=None)
def main(): log = get_logger() argparser = argparse.ArgumentParser("irco-scrape") argparser.add_argument("search_id") argparser.add_argument("output") argparser.add_argument("count", type=int, nargs="?", help="Deprecated") args = argparser.parse_args() sentry.context.merge({"tags": {"command": "irco-init"}, "extra": {"parsed_arguments": args.__dict__}}) log.info("arguments_parsed", args=args) if not os.path.exists(args.output): os.makedirs(args.output) digits = 5 for i, start in iterpages(MAX_RECORDS): dest = os.path.join(args.output, "savedrecs-{:05d}.csv".format(i)) end = start + MAX_RECORDS print("{:{}d} - {:{}d} => {}".format(start + 1, digits, end, digits, dest)) with open(dest, "wb") as fh: try: download(args.search_id, start + 1, end, fh) except AbortDownload: break os.remove(dest)
def main(): log = get_logger() pipelines = { 'compendex': compendex.pipeline, 'scopus': scopus.pipeline, 'wos': wos.pipeline, } argparser = argparse.ArgumentParser('irco-import') argparser.add_argument('-v', '--verbose', action='store_true') argparser.add_argument('-i', '--input-format', choices=pipelines, required=True) argparser.add_argument('-e', '--encoding', default='utf8') argparser.add_argument('-a', '--include-ambiguous-affiliations', action='store_true', dest='ambiguous') argparser.add_argument('source', nargs='+') argparser.add_argument('database') args = argparser.parse_args() sentry.context.merge({ 'tags': { 'command': 'irco-import', 'input_format': args.input_format, }, 'extra': { 'parsed_arguments': args.__dict__, } }) log.info('arguments_parsed', args=args) pipeline = pipelines[args.input_format](encoding=args.encoding, include_ambiguous=args.ambiguous) engine = create_engine(args.database, echo=args.verbose) Session = sessionmaker(bind=engine) count_before = Session().query(models.Publication).count() records = get_records(args.source, pipeline) imported, ignored = import_records(engine, records) count_after = Session().query(models.Publication).count() pipeline.add_metric('imported_records', 'Records added to the database', imported) pipeline.add_metric('ignored_records', 'Ignored records (already imported)', ignored) pipeline.add_metric('before_import', 'Records count before import', count_before) pipeline.add_metric('after_import', 'Records count after import', count_after) print() print(pipeline.report())
import itertools import collections import networkx as nx from irco import logging log = logging.get_logger() def get_institutions(publication): institutions = set() for affiliation in publication.affiliations: institutions.add(affiliation.institution.name) return institutions def create(session, publications): g = nx.Graph() papers_count = collections.Counter() collaborations_count = collections.Counter() for publication in publications: institutions = get_institutions(publication) g.add_nodes_from(institutions) papers_count.update(institutions)