Example #1
0
def get_missing_records():
    inspire_ids = get_all_ids_in_current_system(prepend_id_with="")
    missing_ids = []
    for inspire_id in inspire_ids:
        if not record_exists(inspire_id=inspire_id):
            missing_ids.append(inspire_id)

    print("Missing {} records.".format(len(missing_ids)))
    print(missing_ids)
    return missing_ids
Example #2
0
def get_missing_records():
    """
    Finds all records that are missing in the new system (compared to the legacy environment)
    and returns the IDs as a list
    :return: an array of missing IDd
    """
    inspire_ids = get_all_ids_in_current_system(prepend_id_with="")
    missing_ids = []
    for inspire_id in inspire_ids:
        if not record_exists(inspire_id=inspire_id):
            missing_ids.append(inspire_id)

    print("Missing {} records.".format(len(missing_ids)))
    print(missing_ids)
    return missing_ids
Example #3
0
def find_duplicates_and_remove():
    """Will go through the application to find any duplicates then remove them."""
    inspire_ids = get_all_ids_in_current_system(prepend_id_with="")

    duplicates = []
    for inspire_id in inspire_ids:
        matches = get_records_matching_field('inspire_id', inspire_id,
                                             doc_type=CFG_PUB_TYPE)
        if len(matches['hits']['hits']) > 1:
            duplicates.append(matches['hits']['hits'][0]['_source']['recid'])
    print('There are {} duplicates. Going to remove.'.format(len(duplicates)))
    do_unload(duplicates)

    # reindex submissions for dashboard view
    admin_indexer = AdminIndexer()
    admin_indexer.reindex(recreate=True)
Example #4
0
def migrate(missing, start, end, date=None):
    """Migrates all content from HEPData."""
    print(missing)
    if missing:
        inspire_ids = get_missing_records()
    else:
        inspire_ids = get_all_ids_in_current_system(date)

    print("Found {} inspire ids to load.".format(len(inspire_ids)))
    if start is not None:
        _slice = slice(int(start), end)
        inspire_ids = inspire_ids[_slice]
        print("Sliced, going to load {} records.".format(len(inspire_ids)))

    print(inspire_ids)

    load_files(inspire_ids)
Example #5
0
def migrate(start, end, date=None, missing_only=False):
    """
    Migrates all content from HEPData
    :return:
    """
    if missing_only:
        inspire_ids = get_missing_records()
    else:
        inspire_ids = get_all_ids_in_current_system(date)

    print("Found {} inspire ids to load.".format(len(inspire_ids)))
    if start is not None:
        _slice = slice(int(start), end)
        inspire_ids = inspire_ids[_slice]
        print("Sliced, going to load {} records.".format(len(inspire_ids)))
        print(inspire_ids)

    load_files(inspire_ids)
Example #6
0
def find_duplicates_and_remove():
    """
    Will go through the application to find any duplicates then remove them.
    :return:
    """
    inspire_ids = get_all_ids_in_current_system(prepend_id_with="")

    duplicates = []
    for inspire_id in inspire_ids:
        matches = get_records_matching_field('inspire_id', inspire_id,
                                             doc_type=CFG_PUB_TYPE)
        if len(matches['hits']['hits']) > 1:
            duplicates.append(matches['hits']['hits'][0]['_source']['recid'])
    print('There are {} duplicates. Going to remove.'.format(len(duplicates)))
    do_unload(duplicates)

    # reindex submissions for dashboard view
    admin_indexer = AdminIndexer()
    admin_indexer.reindex(recreate=True)
Example #7
0
def test_get_ids_in_current_system():
    ids = get_all_ids_in_current_system()

    assert (ids is not None)