예제 #1
0
def main():
    es_util.setup_connection(host='ves-hx-5e.ebi.ac.uk', port=9200)
    if not es_util.ping():
        logging.error(
            "ERROR: Can not establish connection with the elastic search server."
        )
        sys.exit(1)
    run_coffee_queries()
예제 #2
0
    both_fails = 0

    for key, value in SVG_FAILURES.items():
        if len(value) > 1:
            SVG_FAILURES[key] = 'BOTH'
            both_fails += 1
        else:
            if value[0] == 'INDIGO':
                indigo_fails += 1
            else:
                rdkit_fails += 1
            SVG_FAILURES[key] = value[0]

    failures_file_path = os.path.join(BASE_CACHE_PATH, 'svg_failures.json')
    try:
        with open(failures_file_path, 'w', encoding='utf-8') as failures_file:
            json.dump(SVG_FAILURES, failures_file)
    except:
        traceback.print_exc()
        print('UNABLE TO WRITE FILE AT {0}'.format(failures_file_path),
              file=sys.stderr)

    print('INDIGO FAIL COUNT: {0}'.format(indigo_fails), file=sys.stderr)
    print('RDKIT FAIL COUNT: {0}'.format(rdkit_fails), file=sys.stderr)
    print('BOTH FAIL COUNT: {0}'.format(both_fails), file=sys.stderr)


if __name__ == '__main__':
    es_util.setup_connection('wp-p2m-50.ebi.ac.uk', 9200)
    pre_cache_svg_files()
def main():
    t_ini = time.time()
    parser = argparse.ArgumentParser(
        description="Denormalize ChEMBL data existing in Elastic Search")
    parser.add_argument("--host",
                        dest="es_host",
                        help="Elastic Search Hostname or IP address.",
                        default="localhost")
    parser.add_argument("--user",
                        dest="es_user",
                        help="Elastic Search username.",
                        default=None)
    parser.add_argument("--password",
                        dest="es_password",
                        help="Elastic Search username password.",
                        default=None)
    parser.add_argument("--port",
                        dest="es_port",
                        help="Elastic Search port.",
                        default=9200)
    parser.add_argument(
        "--unichem",
        dest="denormalize_unichem",
        help="If included will denormalize the unichem related data.",
        action="store_true",
    )
    parser.add_argument(
        "--activity",
        dest="denormalize_activity",
        help=
        "If included will denormalize the configured activity related data.",
        action="store_true",
    )
    parser.add_argument(
        "--compound_hierarchy",
        dest="denormalize_compound_hierarchy",
        help="If included will denormalize the Compound Hierarchy data.",
        action="store_true",
    )
    parser.add_argument(
        "--mechanism_and_drug_indication",
        dest="denormalize_mechanism_and_drug_indication",
        help=
        "If included will denormalize the Mechanism and Drug Indication data.",
        action="store_true",
    )
    args = parser.parse_args()

    es_util.setup_connection(args.es_host, args.es_port, args.es_user,
                             args.es_password)
    es_util.bulk_submitter.start()

    signal_handler.add_termination_handler(es_util.stop_scan)

    dn_type = None
    if args.denormalize_compound_hierarchy:
        denormalize_compound_hierarchy()
        dn_type = 'COMPOUND-HIERARCHY'
    elif args.denormalize_activity:
        denormalize_activity()
        dn_type = 'ACTIVITY'
    elif args.denormalize_unichem:
        denormalize_unichem()
        dn_type = 'UNICHEM'
    elif args.denormalize_mechanism_and_drug_indication:
        denormalize_mechanism_and_drug_indication()
        dn_type = 'MECHANISMS-AND-DRUG-INDICATION'
    else:
        denormalize_all_but_activity()
        dn_type = 'ALL-NO-ACTIVITY'
    end_msg = 'DENORMALIZATION FOR "{}" FINISHED'.format(dn_type)

    es_util.bulk_submitter.join()
    glados.es.ws2es.progress_bar_handler.write_after_progress_bars()

    total_time = time.time() - t_ini
    sec = timedelta(seconds=total_time)
    d = datetime(1, 1, 1) + sec

    print(end_msg, file=sys.stderr)
    print(
        "Finished in: {0} Day(s), {1} Hour(s), {2} Minute(s) and {3} Second(s)"
        .format(d.day - 1, d.hour, d.minute, d.second),
        file=sys.stderr)
예제 #4
0
def main():
    t_ini = time.time()
    parser = argparse.ArgumentParser(
        description="Migrate ChEMBL data from the WebServices to Elastic Search"
    )
    parser.add_argument(
        "--delete_indexes",
        dest="delete_indexes",
        help="Delete indexes if they exist already in the elastic cluster.",
        action="store_true",
    )
    parser.add_argument(
        "-A",
        "--all",
        dest="migrate_all",
        help="Migrate all the data in the WebServices, "
        "if missing defaults to only 1000 records per resource.",
        action="store_true",
    )
    parser.add_argument(
        "--generate_mappings",
        dest="generate_mappings",
        help="Generate elastic search mapping skeleton files without migrating",
        action="store_true",
    )
    parser.add_argument("--host",
                        dest="es_host",
                        help="Elastic Search Hostname or IP address.",
                        default="localhost")
    parser.add_argument("--user",
                        dest="es_user",
                        help="Elastic Search username.",
                        default=None)
    parser.add_argument("--password",
                        dest="es_password",
                        help="Elastic Search username password.",
                        default=None)
    parser.add_argument("--port",
                        dest="es_port",
                        help="Elastic Search port.",
                        default=9200)
    parser.add_argument(
        "--resource",
        dest="ws_resource",
        help=
        "Web Services resource to iterate, if not specified will iterate all the resources.",
        default=None)
    parser.add_argument(
        "--production",
        dest="ws_prod_env",
        help=
        "If included will use the production environment of the WS, if not will default to dev.",
        action="store_true",
    )
    parser.add_argument(
        "--create_alias",
        dest="create_alias",
        help="If included will create alias for the configured resources.",
        action="store_true",
    )
    args = parser.parse_args()

    prod_env = False
    if args.ws_prod_env:
        prod_env = True
    resources_description.set_ws_env(prod_env)
    print('CHEMBL WS URL: {0}'.format(resources_description.WS_URL_TO_USE),
          file=sys.stderr)
    sys.stderr.flush()

    if args.create_alias:
        resources_description.ResourceDescription.create_all_aliases(
            args.es_host, args.es_port, args.es_user, args.es_password)
        sys.exit(0)

    es_util.setup_connection(args.es_host, args.es_port, args.es_user,
                             args.es_password)

    if not es_util.ping():
        print("ERROR: Can't ping the elastic search server.", file=sys.stderr)
        sys.exit(1)

    selected_resources = None
    if args.ws_resource:
        selected_resources = args.ws_resource.split(',')

    if args.generate_mappings:
        migration_common.generate_mappings_for_resources(selected_resources)
        return

    migration_common.DELETE_AND_CREATE_INDEXES = args.delete_indexes
    if migration_common.DELETE_AND_CREATE_INDEXES:
        if not query_yes_no(
                "This procedure will delete and create all indexes again in the server.\n"
                "Do you want to proceed?",
                default="no"):
            return

    es_util.bulk_submitter.start()

    on_start = migration_common.create_res_idx
    on_doc = migration_common.write_res_doc2es_first_id
    on_done = None
    iterate_all = args.migrate_all

    iterator_thread_pool = SharedThreadPool(max_workers=10)

    resources_to_run = resources_description.ALL_WS_RESOURCES
    if selected_resources:
        resources_to_run = []
        for resource_i_str in selected_resources:
            resource_i = resources_description.RESOURCES_BY_RES_NAME.get(
                resource_i_str, None)
            if resource_i is None:
                print('Unknown resource {0}'.format(resource_i_str),
                      file=sys.stderr)
                sys.exit(1)
            resources_to_run.append(resource_i)
    iterators = []
    for resource_i in resources_to_run:
        res_it_i = ResourceIterator(resource_i,
                                    iterator_thread_pool,
                                    on_start=on_start,
                                    on_doc=on_doc,
                                    on_done=on_done,
                                    iterate_all=iterate_all,
                                    redo_failed_chunks=True)
        res_it_i.start()
        iterators.append(res_it_i)
    for res_it_i in iterators:
        res_it_i.join()

    es_util.bulk_submitter.join()
    for res_i in resources_description.ALL_WS_RESOURCES_NAMES:
        if migration_common.MIG_TOTAL[res_i] > 0:
            migration_common.MIG_LOG.info(
                "{0} migrated {1} out of {2} tried out of {3} total".format(
                    res_i,
                    es_util.get_idx_count(
                        migration_common.get_index_name(res_i)),
                    migration_common.MIG_TRIED_COUNT[res_i],
                    migration_common.MIG_TOTAL[res_i]))
    glados.es.ws2es.progress_bar_handler.write_after_progress_bars()

    total_time = time.time() - t_ini
    sec = timedelta(seconds=total_time)
    d = datetime(1, 1, 1) + sec

    migration_common.MIG_LOG.info(
        "Finished in: {0} Day(s), {1} Hour(s), {2} Minute(s) and {3} Second(s)"
        .format(d.day - 1, d.hour, d.minute, d.second))