Example #1
0
def pubfetch(
    config_path: str,
    only_update_authorships: bool,
    delay: int,
    max_authorships: int
) -> None:
    global pubmed_delay
    pubmed_delay = delay

    cfg = config.load(config_path)

    pubmed_init(email=cfg.get("pubmed_email"),
                api_key=cfg.get("pubmed_api_token"))

    sup_conn: psql_connection
    sup_conn = psycopg2.connect(host=cfg.get("sup_host"),
                                dbname=cfg.get("sup_database"),
                                user=cfg.get("sup_username"),
                                password=cfg.get("sup_password"),
                                port=cfg.get("sup_port"))

    with sup_conn:
        with sup_conn.cursor() as cursor:
            update_authorships(cursor, max_authorships)
            if not only_update_authorships:
                fetch_publications(cursor)

    sup_conn.close()
Example #2
0
def serve(config_path: str):
    global conn
    global picture_path
    global file_storage_alias

    cfg = config.load(config_path)
    if not cfg:
        print('Error: Check config file')
        sys.exit(-1)

    try:
        conn = psycopg2.connect(host=cfg.get("sup_host"),
                                dbname=cfg.get("sup_database"),
                                user=cfg.get("sup_username"),
                                password=cfg.get("sup_password"),
                                port=cfg.get("sup_port"))
    except Exception:
        print('Cannot connect to the database')
        sys.exit(-1)

    picture_path = cfg.get('picturepath', picture_path)
    file_storage_alias = cfg.get('file_storage_alias', file_storage_alias)
    secret_key = cfg.get('secret', os.getenv('SECRET_KEY', ''))
    assert secret_key, (
        "You must set a secret key for sessions in Flask\n"
        "\thttps://flask.palletsprojects.com/en/1.1.x/quickstart/#sessions")
    template_folder = cfg.get('forms', 'templates')
    url_prefix = os.getenv('APPLICATION_ROOT', '')

    server = Flask(__name__, template_folder=template_folder)
    server.register_blueprint(app, url_prefix=url_prefix)
    server.secret_key = secret_key
    server.run()
Example #3
0
def prefill(config_path: str):
    cfg = config.load(config_path)

    mwb_client = mwb.Client(cfg.get("mwb_host"), cfg.get("mwb_port"))
    sup_conn: db.Connection = psycopg2.connect(
        host=cfg.get("sup_host"),
        dbname=cfg.get("sup_database"),
        user=cfg.get("sup_username"),
        password=cfg.get("sup_password"),
        port=cfg.get("sup_port"))

    embargoed_path = cfg.get("embargoed", "")
    embargoed: List[str] = []
    if embargoed_path:
        with open(embargoed_path) as f:
            embargoed = [line.strip() for line in f if line]

    with sup_conn:
        with sup_conn.cursor() as sup_cur:
            process_projects_and_studies(mwb_client, sup_cur, embargoed)
            add_developers(sup_cur)

    sup_conn.close()
Example #4
0
def generate(config_path: str, old_path: str):
    timestamp = datetime.now()
    path = os.path.join("data_out", timestamp.strftime("%Y"),
                        timestamp.strftime("%m"),
                        timestamp.strftime("%Y_%m_%d"))
    os.makedirs(path, exist_ok=True)

    org_file = os.path.join(path, 'orgs.nt')
    people_file = os.path.join(path, 'people.nt')
    project_file = os.path.join(path, 'projects.nt')
    study_file = os.path.join(path, 'studies.nt')
    dataset_file = os.path.join(path, 'datasets.nt')
    tools_file = os.path.join(path, 'tools.nt')
    photos_file = os.path.join(path, 'photos.nt')
    pubs_file = os.path.join(path, 'pubs.nt')
    add_file = os.path.join(path, 'add.nt')
    sub_file = os.path.join(path, 'sub.nt')

    cfg = config.load(config_path)

    if not cfg.namespace.endswith('/'):
        print(f"WARNING! Namespace doesn't end with '/': {cfg.namespace}")

    mwb_conn: psycopg2.extensions.connection = psycopg2.connect(
        host=cfg.get('mwb_host'),
        dbname=cfg.get('mwb_database'),
        user=cfg.get('mwb_username'),
        password=cfg.get('mwb_password'),
        port=cfg.get('mwb_port'))

    sup_conn: psycopg2.extensions.connection = psycopg2.connect(
        host=cfg.get('sup_host'),
        dbname=cfg.get('sup_database'),
        user=cfg.get('sup_username'),
        password=cfg.get('sup_password'),
        port=cfg.get('sup_port'))

    with mwb_conn, sup_conn:
        with mwb_conn.cursor() as mwb_cur, sup_conn.cursor() as sup_cur:
            # Organizations
            orgs = get_organizations(sup_cur)
            org_triples = make_organizations(cfg.namespace, orgs)
            print_to_file(org_triples, org_file)

            # People
            all_people = get_people(sup_cur)
            people = {k: v for k, v in all_people.items() if not v.withheld}
            withheld_people = {
                k: v
                for k, v in all_people.items() if v.withheld
            }
            people_triples = make_people(cfg.namespace, people)
            people_triples.extend(
                link_people_to_org(cfg.namespace, sup_cur, people, orgs))
            print_to_file(people_triples, people_file)

            # Photos
            photos = get_photos(cfg.get("picturepath", "."), people)
            photos_triples = make_photos(cfg.namespace, photos)
            print_to_file(photos_triples, photos_file)

            # Tools
            print("Gathering Tools")
            yaml_tools = get_yaml_tools(cfg)
            csv_tools = list(fetch_mtw_tools(sup_cur))
            all_tools = yaml_tools + csv_tools
            tools_triples = make_tools(cfg.namespace, all_tools, people,
                                       withheld_people, mwb_cur, sup_cur)
            print_to_file(tools_triples, tools_file)

            # Publications
            pubs = get_publications(sup_cur)
            pubs_triples = make_publications(cfg.namespace, pubs)
            print_to_file(pubs_triples, pubs_file)

            # Projects
            projects = get_projects(mwb_cur, sup_cur, people, orgs)
            project_data = make_projects(cfg.namespace, projects)
            project_triples, project_summaries = project_data
            all_proj_triples = project_triples + project_summaries
            print_to_file(all_proj_triples, project_file)

            # Studies
            # Study file printed after datasets
            embargoed_path = cfg.get('embargoed', '')
            embargoed: List[str] = []
            if embargoed_path:
                with open(embargoed_path) as f:
                    embargoed = [line.strip() for line in f if line]

            studies = get_studies(mwb_cur, sup_cur, people, orgs, embargoed)
            study_triples, study_summaries = \
                make_studies(cfg.namespace, studies, projects)

            # Datasets
            datasets = get_datasets(mwb_cur)
            dataset_triples, study_sup_triples = \
                make_datasets(cfg.namespace, datasets, studies)
            print_to_file(dataset_triples, dataset_file)

            all_study_triples = study_triples + study_summaries \
                + study_sup_triples
            print_to_file(all_study_triples, study_file)

            if old_path:
                add, sub = diff(old_path, path)
                with open(add_file, 'w') as f:
                    f.writelines(add)
                with open(sub_file, 'w') as f:
                    f.writelines(sub)

    sup_conn.close()
    mwb_conn.close()