Example #1
0
    def test_keys(self):
        nad = NameAwareDict()
        for name in diff_names:
            nad[name] = PathNode(name)
        keys = nad.keys()
        self.assertIn(diff_names[0], keys)
        self.assertIn(diff_names[1], keys)

        for key in nad:
            self.assertIn(key, diff_names)
Example #2
0
    def test_get_set_item(self):
        nad = NameAwareDict()

        with self.assertRaises(KeyError):
            nad[diff_names[0]]
        with self.assertRaises(KeyError):
            nad[diff_names[1]]

        node = PathNode(equal_names[0])
        nad[equal_names[0]] = node

        diff_nodes = []
        for name in diff_names:
            with self.assertRaises(KeyError):
                nad[name]
            new_node = PathNode(name)
            nad[name] = new_node
            diff_nodes.append(new_node)

        for name in equal_names:
            self.assertIs(node, nad[name])

        for name, target_node in zip(diff_names, diff_nodes):
            self.assertIsNot(node, nad[name])
            self.assertIs(target_node, nad[name])

        new_node = PathNode(equal_names[2])
        nad[equal_names[2]] = new_node
        for name in equal_names:
            self.assertIsNot(node, nad[name])
            self.assertIs(new_node, nad[name])
Example #3
0
    def test_len(self):
        nad = NameAwareDict()

        for name in diff_names:
            nad[name] = PathNode(name)

        self.assertEqual(len(nad), len(diff_names))

        for name in equal_names:
            nad[name] = PathNode(name)

        self.assertEqual(len(nad), len(diff_names) + 1)
Example #4
0
    def test_contains(self):
        nad = NameAwareDict()

        self.assertNotIn(equal_names[0], nad)

        nad[equal_names[0]] = PathNode(equal_names[0])

        for name in equal_names:
            self.assertIn(name, nad)

        for name in diff_names:
            self.assertNotIn(name, nad)

        for name in diff_names:
            nad[name] = PathNode(name)

        for name in diff_names:
            self.assertIn(name, nad)
Example #5
0
def graph_translation(chains, source, dest):
    # We have a list of chains---the table in the web view. These chains
    # may contain many different forms of a name, and it's important to
    # preserve that for the table display. But for the graph display, it's
    # better to collapse down to one node per person, not one node per
    # name form. So within each column, we want to canonicalize each name
    # to the least-detailed form of that name appearing in the column.
    source = get_name_as_in_ADS(source, [c[0] for c in chains])
    dest = get_name_as_in_ADS(dest, [c[-1] for c in chains])
    source = ADSName.parse(source)
    dest = ADSName.parse(dest)

    nads = []
    for i in range(len(chains[0])):
        # This dict will map names to canonical forms
        nad = NameAwareDict()
        nads.append(nad)
        if i == 0:
            nad[source] = source
        if i == len(chains[0]) - 1:
            nad[dest] = dest
        for chain in chains:
            name = ADSName.parse(chain[i])
            if name in nad:
                if name.level_of_detail < nad[name].level_of_detail:
                    nad[name] = name
            else:
                nad[name] = name

    mappings = []
    for i, nad in enumerate(nads):
        mapping = {}
        mappings.append(mapping)
        for chain in chains:
            name = chain[i].lower()
            mapping[name] = nad[name].original_name
    return mappings
Example #6
0
    def test_del_item(self):
        nad = NameAwareDict()
        for name in diff_names:
            nad[name] = PathNode(name)

        self.assertIn(diff_names[0], nad)
        self.assertIn(diff_names[1], nad)
        self.assertIn(diff_names[2], nad)
        del nad[diff_names[0]]
        self.assertNotIn(diff_names[0], nad)
        self.assertIn(diff_names[1], nad)
        self.assertIn(diff_names[2], nad)
        del nad[diff_names[1]]
        self.assertNotIn(diff_names[1], nad)
        self.assertIn(diff_names[2], nad)
        del nad[diff_names[2]]
        self.assertNotIn(diff_names[2], nad)

        nad[equal_names[0]] = PathNode(equal_names[0])
        del nad[equal_names[1]]
        for name in equal_names:
            self.assertNotIn(name, nad)
            with self.assertRaises(KeyError):
                nad[name]
Example #7
0
    def get_papers_for_author(self, query_author):
        query_author = ADSName.parse(query_author)

        query_authors = self._select_authors_to_prefetch()
        if query_author not in query_authors:
            query_authors.append(query_author)

        lb.i(f"Querying ADS for author " + query_author.qualified_full_name)
        if len(query_authors) > 1:
            lb.i(" Also prefetching. Query: " +
                 "; ".join([a.qualified_full_name for a in query_authors]))

        query_strings = []
        for author in query_authors:
            query_string = '"' + author.full_name + '"'
            if author.require_exact_match:
                query_string = "=" + query_string
            query_strings.append(query_string)
        query = " OR ".join(query_strings)
        query = f"author:({query})"

        documents = self._inner_query_for_author(query, len(query_authors))

        author_records = NameAwareDict()
        for author in query_authors:
            author_records[author] = AuthorRecord(name=author, documents=[])
        # We need to go through all the documents and match them to our
        # author list. This is critically important if we're pre-fetching
        # authors, but it's also important to support the "<" and ">"
        # specificity selectors for author names
        for document in documents:
            matched = False
            names = [ADSName.parse(n) for n in document.authors]
            for name in names:
                try:
                    author_records[name].documents.append(document.bibcode)
                    matched = True
                except KeyError:
                    pass
            if (not matched and all(
                    not a.require_more_specific and not a.require_less_specific
                    for a in query_authors)):
                # See if we can guess which names should have been matched
                guesses = []
                doc_authors = [n.full_name for n in names]
                doc_authors_initialized = \
                    [n.convert_to_initials().full_name for n in names]
                for query_author in query_authors:
                    guess = difflib.get_close_matches(query_author.full_name,
                                                      doc_authors,
                                                      n=1,
                                                      cutoff=0.8)
                    if len(guess):
                        guesses.append(
                            f"{query_author.full_name} -> {guess[0]}")
                    else:
                        # Try again, changing names to use initials throughout
                        guess = difflib.get_close_matches(
                            query_author.convert_to_initials().full_name,
                            doc_authors_initialized,
                            n=1,
                            cutoff=0.7)
                        if len(guess):
                            # Having found a match with initialized names,
                            # report using the full form of each name
                            chosen_doc_author = doc_authors[
                                doc_authors_initialized.index(guess[0])]
                            guesses.append(f"{query_author.full_name}"
                                           f" -> {chosen_doc_author}")
                msg = "ADS Buddy: No matches for " + document.bibcode
                if len(guesses):
                    msg += " . Guesses: " + "; ".join(guesses)
                lb.w(msg)

        for author_record in author_records.values():
            # Remove any duplicate document listings
            # Becomes important for papers with _many_ authors, e.g. LIGO
            # papers, which use only initials and so can have duplicate names
            author_record.documents = sorted(set(author_record.documents))

        if len(query_authors) == 1:
            return author_records[query_author], documents
        else:
            return author_records, documents
Example #8
0
    def test_with_synonyms(self):
        synonyms = [
            "test_synAA; test_synAB", "test_synB, a; test_synB, b",
            "test_synCA, q; test_synCB, q", "test_synD, a; test_synD, b c",
            "test_synEB, b; test_synEA, a",
            "test_synFA, a b c d; test_synFB, a",
            "test_synGA, a b c d; test_synGB, a; test_synGC, b"
        ]
        # Hack: inject test synonyms
        ads_name._name_cache.clear()
        ads_name._parse_name_synonyms(synonyms)

        for synonym in synonyms:
            names = synonym.split(';')

            # The second copy is for the deletion tests later
            nad = NameAwareDict()
            nad2 = NameAwareDict()
            for i, name in enumerate(names):
                nad[name] = i
                nad2[name] = i

            # Do the insertion in both orders, to ensure we try both
            # "canonical first" and "canonical last"
            nad_rev = NameAwareDict()
            nad_rev2 = NameAwareDict()
            for i, name in enumerate(reversed(names)):
                nad_rev[name] = i
                nad_rev2[name] = i

            # Ensure that, after inserting under one form and updating under
            # the other form, we can get the latest value from either form.
            for name in names:
                self.assertEqual(nad[name], i)
                self.assertEqual(nad_rev[name], i)

            # Check other misc methods
            for name in names:
                self.assertIn(name, nad)
                self.assertIn(name, nad_rev)

            self.assertEqual(len(nad), 1)
            self.assertEqual(len(nad_rev), 1)

            self.assertEqual(nad.keys(), (ADSName.parse(names[-1]), ))
            self.assertEqual(nad_rev.keys(), (ADSName.parse(names[0]), ))

            self.assertEqual(nad.values(), (i, ))
            self.assertEqual(nad_rev.values(), (i, ))

            # Ensure that deleting one form deletes them all.
            del nad[names[0]]
            self.assertEqual(len(nad), 0)
            for name in names:
                self.assertNotIn(name, nad)

            del nad2[names[1]]
            self.assertEqual(len(nad2), 0)
            for name in names:
                self.assertNotIn(name, nad2)

            del nad_rev[names[0]]
            self.assertEqual(len(nad_rev), 0)
            for name in names:
                self.assertNotIn(name, nad_rev)

            del nad_rev2[names[1]]
            self.assertEqual(len(nad_rev2), 0)
            for name in names:
                self.assertNotIn(name, nad_rev2)

        # Verify functionality with '@' modifier
        for synonym in synonyms:
            names_orig = synonym.split(';')

            for names in [names_orig, list(reversed(names_orig))]:
                # We'll insert under one name, then verify we can't access
                # or delete under the other
                nad1 = NameAwareDict()
                nad2 = NameAwareDict()
                nad3 = NameAwareDict()
                nad4 = NameAwareDict()

                nad1[names[0]] = 1
                nad2[names[-1]] = 1
                nad3['@' + names[0]] = 1
                nad4['@' + names[-1]] = 1

                with self.assertRaises(KeyError):
                    nad1['@' + names[-1]]
                with self.assertRaises(KeyError):
                    nad2['@' + names[0]]
                with self.assertRaises(KeyError):
                    nad3[names[-1]]
                with self.assertRaises(KeyError):
                    nad4[names[0]]

                # I don't think it's worth it to test modification because
                # it's hard to define how it should work. If we store under
                # 'name' which has 'name2' as a synonym, we get the same
                # value for 'name' and 'name2'. If we then store under
                # '@name2', what should we get when retrieving as 'name2'?
                # If we then store again under 'name', what should we get
                # for 'name2'? Or for '@name2'?

                # nad1['@' + names[-1]] = 2
                # self.assertEqual(nad1[names[0]], 1)
                # nad1['@' + names[0]] = 2
                # self.assertEqual(nad1[names[-1]], 1)
                # nad1[names[-1]] = 2
                # self.assertEqual('@' + nad1[names[0]], 1)
                # nad1[names[0]] = 2
                # self.assertEqual('@' + nad1[names[-1]], 1)

                with self.assertRaises(KeyError):
                    del nad1['@' + names[-1]]
                with self.assertRaises(KeyError):
                    del nad2['@' + names[0]]
                with self.assertRaises(KeyError):
                    del nad3[names[-1]]
                with self.assertRaises(KeyError):
                    del nad4[names[0]]

        # Remove our test synonyms
        ads_name._name_cache.clear()
        ads_name._name_synonyms.clear()
        ads_name._load_synonyms()
Example #9
0
    def test_with_specificity(self):
        nad = NameAwareDict()

        for name in diff_names:
            nad[name] = PathNode(name)

        for i, name in enumerate(equal_names):
            lt = ADSName.parse("<" + str(name))
            lte = ADSName.parse("<=" + str(name))
            gt = ADSName.parse(">" + str(name))
            gte = ADSName.parse(">=" + str(name))
            ex = ADSName.parse("=" + str(name))

            if i == 0:
                self.assertNotIn(lt, nad)
                self.assertNotIn(lte, nad)
            else:
                self.assertIn(lt, nad)
                self.assertIn(lte, nad)
            self.assertNotIn(gt, nad)
            self.assertNotIn(gte, nad)
            self.assertNotIn(ex, nad)

            # Node "Last, First" will match and overwrite an existing entry
            # for "Last, F"
            nad[name] = PathNode(name)

            self.assertNotIn(lt, nad)
            self.assertIn(gte, nad)
            self.assertIn(lte, nad)
            self.assertNotIn(gt, nad)
            self.assertIn(ex, nad)

        nad = NameAwareDict()

        for name in diff_names:
            nad[name] = PathNode(name)

        for i, name in enumerate(equal_names[::-1]):
            lt = ADSName.parse("<" + str(name))
            lte = ADSName.parse("<=" + str(name))
            gt = ADSName.parse(">" + str(name))
            gte = ADSName.parse(">=" + str(name))
            ex = ADSName.parse("=" + str(name))

            if i == 0:
                self.assertNotIn(gt, nad)
                self.assertNotIn(gte, nad)
            else:
                self.assertIn(gt, nad)
                self.assertIn(gte, nad)
            self.assertNotIn(lt, nad)
            self.assertNotIn(lte, nad)
            self.assertNotIn(ex, nad)

            # Node "Last, First" will match and overwrite an existing entry
            # for "Last, F"
            nad[name] = PathNode(name)

            self.assertNotIn(lt, nad)
            self.assertIn(gte, nad)
            self.assertIn(lte, nad)
            self.assertNotIn(gt, nad)
            self.assertIn(ex, nad)
Example #10
0
    def __init__(self, src, dest, excluded_names=None):
        self.repository = Repository()
        if not key_is_valid(src) and not is_orcid_id(src):
            raise PathFinderError("invalid_char_in_name",
                                  'The "source" name is invalid.')
        if not key_is_valid(dest) and not is_orcid_id(dest):
            raise PathFinderError("invalid_char_in_name",
                                  'The "destination" name is invalid.')

        names_to_be_queried = []
        if is_orcid_id(src):
            src = normalize_orcid_id(src)
        else:
            try:
                src = ADSName.parse(src)
            except InvalidName:
                raise PathFinderError("invalid_char_in_name",
                                      'The "source" name is invalid.')
            if src.excludes_self:
                raise PathFinderError(
                    "src_invalid_lt_gt",
                    "'<' and '>' are invalid modifiers for the source and "
                    "destination authors and can only be used in the "
                    "exclusions "
                    "list. Try '<=' or '>=' instead.")
            names_to_be_queried.append(src)

        if is_orcid_id(dest):
            dest = normalize_orcid_id(dest)
        else:
            try:
                dest = ADSName.parse(dest)
            except InvalidName:
                raise PathFinderError("invalid_char_in_name",
                                      'The "destination" name is invalid.')
            if dest.excludes_self:
                raise PathFinderError(
                    "dest_invalid_lt_gt",
                    "'<' and '>' are invalid modifiers for the source and "
                    "destination authors and can only be used in the "
                    "exclusions "
                    "list. Try '<=' or '>=' instead.")
            names_to_be_queried.append(dest)

        if type(src) == type(dest) and src == dest:
            raise PathFinderError(
                "src_is_dest",
                'The "source" and "destination" names are equal (or at least'
                ' consistent). The distance is zero. APPA would like something'
                ' more challenging, please.')

        self.excluded_names = NameAwareSet()
        self.excluded_bibcodes = set()
        if excluded_names is not None:
            if type(excluded_names) is str:
                excluded_names = [excluded_names]
            for name in excluded_names:
                name = name.strip()
                if name == '':
                    continue
                elif is_bibcode(name):
                    self.excluded_bibcodes.add(name)
                else:
                    try:
                        self.excluded_names.add(ADSName.parse(name))
                    except InvalidName:
                        raise PathFinderError(
                            "invalid_excl",
                            f"'{name}' is an invalid name to exclude.")

        self.repository.notify_of_upcoming_author_request(*names_to_be_queried)
        self.authors_to_expand_src = []
        self.authors_to_expand_src_next = []
        self.authors_to_expand_dest = []
        self.authors_to_expand_dest_next = []

        self.nodes = NameAwareDict()
        self.connecting_nodes = set()

        self.orig_src = src
        self.orig_dest = dest
Example #11
0
class PathFinder:
    repository: Repository()
    nodes: NameAwareDict
    src: PathNode
    dest: PathNode
    excluded_names: NameAwareSet
    excluded_bibcodes: set
    connecting_nodes: Set[PathNode]
    n_iterations: int

    authors_to_expand_src = List[AuthorRecord]
    authors_to_expand_src_next = List[AuthorRecord]
    authors_to_expand_dest = List[AuthorRecord]
    authors_to_expand_dest_next = List[AuthorRecord]

    def __init__(self, src, dest, excluded_names=None):
        self.repository = Repository()
        if not key_is_valid(src) and not is_orcid_id(src):
            raise PathFinderError("invalid_char_in_name",
                                  'The "source" name is invalid.')
        if not key_is_valid(dest) and not is_orcid_id(dest):
            raise PathFinderError("invalid_char_in_name",
                                  'The "destination" name is invalid.')

        names_to_be_queried = []
        if is_orcid_id(src):
            src = normalize_orcid_id(src)
        else:
            try:
                src = ADSName.parse(src)
            except InvalidName:
                raise PathFinderError("invalid_char_in_name",
                                      'The "source" name is invalid.')
            if src.excludes_self:
                raise PathFinderError(
                    "src_invalid_lt_gt",
                    "'<' and '>' are invalid modifiers for the source and "
                    "destination authors and can only be used in the "
                    "exclusions "
                    "list. Try '<=' or '>=' instead.")
            names_to_be_queried.append(src)

        if is_orcid_id(dest):
            dest = normalize_orcid_id(dest)
        else:
            try:
                dest = ADSName.parse(dest)
            except InvalidName:
                raise PathFinderError("invalid_char_in_name",
                                      'The "destination" name is invalid.')
            if dest.excludes_self:
                raise PathFinderError(
                    "dest_invalid_lt_gt",
                    "'<' and '>' are invalid modifiers for the source and "
                    "destination authors and can only be used in the "
                    "exclusions "
                    "list. Try '<=' or '>=' instead.")
            names_to_be_queried.append(dest)

        if type(src) == type(dest) and src == dest:
            raise PathFinderError(
                "src_is_dest",
                'The "source" and "destination" names are equal (or at least'
                ' consistent). The distance is zero. APPA would like something'
                ' more challenging, please.')

        self.excluded_names = NameAwareSet()
        self.excluded_bibcodes = set()
        if excluded_names is not None:
            if type(excluded_names) is str:
                excluded_names = [excluded_names]
            for name in excluded_names:
                name = name.strip()
                if name == '':
                    continue
                elif is_bibcode(name):
                    self.excluded_bibcodes.add(name)
                else:
                    try:
                        self.excluded_names.add(ADSName.parse(name))
                    except InvalidName:
                        raise PathFinderError(
                            "invalid_excl",
                            f"'{name}' is an invalid name to exclude.")

        self.repository.notify_of_upcoming_author_request(*names_to_be_queried)
        self.authors_to_expand_src = []
        self.authors_to_expand_src_next = []
        self.authors_to_expand_dest = []
        self.authors_to_expand_dest_next = []

        self.nodes = NameAwareDict()
        self.connecting_nodes = set()

        self.orig_src = src
        self.orig_dest = dest

    def find_path(self):
        lb.on_start_path_finding()
        self.n_iterations = 0

        if is_orcid_id(self.orig_src):
            src_rec = self.repository.get_author_record_by_orcid_id(
                self.orig_src)
            self.src = PathNode(name=src_rec.name,
                                dist_from_src=0,
                                legal_bibcodes=set(src_rec.documents))
        else:
            src_rec = self.repository.get_author_record(self.orig_src)
            self.src = PathNode(name=self.orig_src, dist_from_src=0)

        if is_orcid_id(self.orig_dest):
            dest_rec = self.repository.get_author_record_by_orcid_id(
                self.orig_dest)
            self.dest = PathNode(name=dest_rec.name,
                                 dist_from_dest=0,
                                 legal_bibcodes=set(dest_rec.documents))
        else:
            dest_rec = self.repository.get_author_record(self.orig_dest)
            self.dest = PathNode(name=self.orig_dest, dist_from_dest=0)

        # If we were given a name and an ORCID ID and they turn out to refer
        # to the same person, error out.
        mixed_name_formats = (
            (type(self.orig_src) == ADSName and type(self.orig_dest) == str) or
            (type(self.orig_src) == str and type(self.orig_dest) == ADSName))
        if mixed_name_formats and src_rec.name == dest_rec.name:
            raise PathFinderError(
                "src_is_dest_after_orcid",
                'After looking up the ORCID ID, the "source" and "destination"'
                ' identities are equal (or at least overlap).')

        self.nodes[src_rec.name] = self.src
        self.nodes[dest_rec.name] = self.dest
        self.authors_to_expand_src_next.append(self.src.name)
        self.authors_to_expand_dest_next.append(self.dest.name)

        if (len(src_rec.documents) == 0 or all(
            [d in self.excluded_bibcodes for d in src_rec.documents])):
            raise PathFinderError(
                "src_empty",
                "No documents found for " + self.src.name.original_name)
        if (len(dest_rec.documents) == 0 or all(
            [d in self.excluded_bibcodes for d in dest_rec.documents])):
            raise PathFinderError(
                "dest_empty",
                "No documents found for " + self.dest.name.original_name)

        while True:
            lb.d("Beginning new iteration")
            lb.d(f"{len(self.authors_to_expand_src_next)} "
                 "authors on src side")
            lb.d(f"{len(self.authors_to_expand_dest_next)} "
                 "authors on dest side")
            if (len(self.authors_to_expand_src_next) == 0
                    or len(self.authors_to_expand_dest_next) == 0):
                raise PathFinderError(
                    "no_authors_to_expand", "No connections possible after "
                    f"{self.n_iterations} iterations")
            # Of the two lists of authors we could expand, let's always
            # choose the shortest. This tends to get us to a solution
            # faster.
            expanding_from_src = (len(self.authors_to_expand_src_next) < len(
                self.authors_to_expand_dest_next))
            lb.d("Expanding from "
                 f"{'src' if expanding_from_src else 'dest'} side")

            authors = (self.authors_to_expand_src
                       if expanding_from_src else self.authors_to_expand_dest)
            authors_next = (self.authors_to_expand_src_next
                            if expanding_from_src else
                            self.authors_to_expand_dest_next)
            authors.clear()
            authors.extend(authors_next)
            authors_next.clear()

            # There's no point pre-fetching for only one author, and this
            # ensures we don't re-fetch the src and dest authors if they
            # were provided by ORCID ID
            if len(authors) > 1:
                self.repository.notify_of_upcoming_author_request(*authors)
            for expand_author in authors:
                lb.d(f"Expanding author {expand_author}")
                expand_node = self.nodes[expand_author]
                expand_node_dist = expand_node.dist(expanding_from_src)

                # We already have src and dest records handy, and this special
                # handling is required if either was provided by ORCID ID
                if expand_node is self.src:
                    record = src_rec
                elif expand_node is self.dest:
                    record = dest_rec
                else:
                    record = self.repository.get_author_record(expand_author)

                # Here's a tricky one. If "<=Last, F" is in the exclude
                # list, and if we previously came across "Last, First" and
                # we're now expanding that node, we're ok using papers
                # written under "Last, First" but we're _not_ ok using
                # papers written under "Last, F.". So we need to ensure
                # we're allowed to use each paper by ensuring Last, First's
                # name appears on it in a way that's not excluded.
                ok_aliases = [
                    name for name in record.appears_as
                    if name not in self.excluded_names
                ]
                if (len(self.excluded_bibcodes)
                        or len(ok_aliases) != len(record.appears_as)):
                    ok_bibcodes = {
                        bibcode
                        for alias in ok_aliases
                        for bibcode in record.appears_as[alias]
                        if bibcode not in self.excluded_bibcodes
                    }
                else:
                    ok_bibcodes = None

                for coauthor, bibcodes in record.coauthors.items():
                    # lb.d(f"  Checking coauthor {coauthor}")
                    if ok_bibcodes is not None:
                        bibcodes = [
                            bibcode for bibcode in bibcodes
                            if bibcode in ok_bibcodes
                        ]
                    if len(bibcodes) == 0:
                        continue

                    coauthor = ADSName.parse(coauthor)
                    if coauthor in self.excluded_names:
                        # lb.d("   Author is excluded")
                        continue

                    try:
                        node = self.nodes[coauthor]
                        # lb.d(f"   Author exists in graph")
                    except KeyError:
                        # lb.d(f"   New author added to graph")
                        lb.on_coauthor_seen()
                        node = PathNode(name=coauthor)
                        self.nodes[coauthor] = node
                        node.set_dist(expand_node_dist + 1, expanding_from_src)
                        node.neighbors(expanding_from_src).add(expand_node)
                        links = node.links(expanding_from_src)[expand_node]
                        links.update(bibcodes)
                        authors_next.append(coauthor)
                        continue

                    # if (node.dist(expanding_from_src)
                    #         <= expand_node_dist):
                    # This node is closer to the src/dest than we are
                    # and must have been encountered in a
                    # previous expansion cycle. Ignore it.
                    # pass
                    if (node.dist(expanding_from_src) > expand_node_dist):
                        # We provide an equal-or-better route from the
                        # src/dest than the route (if any) that this node
                        # is aware of, meaning this node is a viable next
                        # step along the chain from the src/dest through
                        # us. That it already exists suggests it has
                        # multiple chains of equal length connecting it to
                        # the src or dest.
                        # If the src or dest was given via ORCID ID, we need
                        # to make sure we have a valid connection. (E.g. if
                        # the given ID is for one J Doe and our expand_author
                        # is connected to a different J Doe, we need to
                        # exclude that.
                        if len(node.legal_bibcodes):
                            legal_bibcodes = set(
                                bibcodes) & node.legal_bibcodes
                        else:
                            legal_bibcodes = bibcodes
                        if len(legal_bibcodes):
                            links = node.links(expanding_from_src)[expand_node]
                            links.update(legal_bibcodes)
                            node.set_dist(expand_node_dist + 1,
                                          expanding_from_src)
                            node.neighbors(expanding_from_src).add(expand_node)
                            # lb.d(f"   Added viable step")
                            if self.node_connects(node, expanding_from_src):
                                self.connecting_nodes.add(node)
                                lb.d(f"   Connecting author found!")
            lb.d("All expansions complete")
            self.n_iterations += 1
            if len(self.connecting_nodes) > 0:
                break
            elif self.n_iterations > 8:
                raise PathFinderError(
                    "too_far",
                    "The distance is >8, which is quite far. Giving up.")
            else:
                continue
        self.produce_final_graph()
        lb.set_n_connections(len(self.connecting_nodes))
        lb.set_distance(self.src.dist_from_dest)
        lb.on_stop_path_finding()

    def node_connects(self, node: PathNode, expanding_from_src: bool):
        if (len(node.neighbors_toward_src) > 0
                and len(node.neighbors_toward_dest) > 0):
            return True
        if expanding_from_src and node is self.dest:
            return True
        if not expanding_from_src and node is self.src:
            return True

    def produce_final_graph(self):
        # Step one: Make all linkages bidirectional
        nodes_to_walk = deque(self.connecting_nodes)
        visited = set()
        while len(nodes_to_walk):
            node = nodes_to_walk.popleft()
            if node in visited:
                continue
            visited.add(node)
            for neighbor in node.neighbors_toward_src:
                if neighbor not in visited:
                    nodes_to_walk.append(neighbor)
                neighbor.neighbors_toward_dest.add(node)
                neighbor.dist_from_dest = min(node.dist_from_dest + 1,
                                              neighbor.dist_from_dest)
                neighbor.links_toward_dest[node] = \
                    node.links_toward_src[neighbor]
            for neighbor in node.neighbors_toward_dest:
                if neighbor not in visited:
                    nodes_to_walk.append(neighbor)
                neighbor.neighbors_toward_src.add(node)
                neighbor.dist_from_src = min(node.dist_from_src + 1,
                                             neighbor.dist_from_src)
                neighbor.links_toward_src[node] = \
                    node.links_toward_dest[neighbor]

        # Step two: Remove any links that aren't along the most direct route
        nodes_to_walk = [self.src]
        while len(nodes_to_walk):
            node = nodes_to_walk.pop()
            if len(node.neighbors_toward_dest):
                dist_of_best_neighbor = min(
                    (neighbor.dist_from_dest
                     for neighbor in node.neighbors_toward_dest))
                # Copy the set we're iterating over, since we mutate it
                # in the loop
                for neighbor in list(node.neighbors_toward_dest):
                    if neighbor.dist_from_dest != dist_of_best_neighbor:
                        node.neighbors_toward_dest.remove(neighbor)
                        node.links_toward_dest.pop(neighbor)

                        neighbor.neighbors_toward_src.remove(node)
                        neighbor.links_toward_src.pop(node)
                    else:
                        nodes_to_walk.append(neighbor)

            if len(node.neighbors_toward_src):
                dist_of_best_neighbor = min(
                    (neighbor.dist_from_src
                     for neighbor in node.neighbors_toward_src))
                for neighbor in list(node.neighbors_toward_src):
                    if neighbor.dist_from_src != dist_of_best_neighbor:
                        node.neighbors_toward_src.remove(neighbor)
                        node.links_toward_src.pop(neighbor)

                        neighbor.neighbors_toward_dest.remove(node)
                        neighbor.links_toward_dest.pop(node)

        # Step three: Remove nodes that aren't on a path between src and dest
        for name, node in self.nodes.items():
            if node is self.src or node is self.dest:
                continue
            if (len(node.neighbors_toward_src) == 0
                    or len(node.neighbors_toward_dest) == 0):
                del self.nodes[name]