Python Repository.get_author_record 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: repository

클래스/타입: Repository

메소드/함수: get_author_record

hotexamples.com에서의 예제들: 4

Python Repository.get_author_record - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 repository.Repository.get_author_record에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Repository(30)

add(14)

add_item(10)

get_items(8)

get_all(6)

getItemById(5)

get_author_record(4)

__init__(4)

get_commits(3)

add_all_and_commit(3)

execute(3)

exists(3)

get_author_record_by_orcid_id(2)

create_repository(2)

set_config(2)

get_item(2)

fetch(2)

clone(2)

get_creation_time(2)

is_name_used(2)

get_document(2)

generate_derived_data(2)

add_student(2)

getAll(2)

_distro(2)

getAllItems(2)

add_grade(2)

add_discipline(2)

add_all(2)

get_all_items(2)

get_full_name(2)

get_commit(1)

get_converted(1)

get_data(1)

get_dataset_and_labels(1)

get_disciplines_with_grades(1)

set_minecraft_target(1)

get_experiments(1)

identify_repository_type(1)

status(1)

save_readings(1)

revert_all(1)

repository_head(1)

register_extension(1)

read_repos_from_file(1)

populate(1)

new(1)

handle_update(1)

get_failing_students(1)

get_public_time_line(1)

예제 #1

파일 보기

파일: route_jsonifyer.py 프로젝트: svank/appa-backend

def get_name_as_in_ADS(target_name, names_in_result: []):
    """For presentation in the UI, figures out how to capitalize a name
    
    The user may have typed in the query names in all lowercase. For the large
    banner at the top of the page, it would be nice to format the names more
    properly. Rather than just defaulting to first-letter-uppercase, we can
    use our ADS data to present the name in a form (or one of the forms) ADS
    has for the name. This means we may also pick up diacritics.
    
    Looks through all the publications belonging to the name and how the
    author's name appears in those publications. Grabs (one of) the
    most-detailed forms. If it contains more given names than the target
    names, truncates the list. Shortens given names to initials if the target
    name has an initial at that position."""
    # Unique-ify names_in_result
    names_in_result = list(set(names_in_result))

    repo = Repository(can_skip_refresh=True)
    names_in_result = [ADSName.parse(name) for name in names_in_result]
    orcid = is_orcid_id(target_name)
    if orcid:
        record = repo.get_author_record_by_orcid_id(target_name)
    else:
        target_name = ADSName.parse(target_name)
        record = repo.get_author_record(target_name)

    aliases = record.appears_as.keys()
    aliases = [ADSName.parse(alias) for alias in aliases]
    # Remove all aliases that aren't consistent with any of the name forms
    # used in the set of possible chains. E.g. if the user searched for
    # "Last" and all chains terminate at "Last, B.", then we shouldn't view
    # "Last, I." as a viable alias.
    aliases = [alias for alias in aliases if alias in names_in_result]

    # Grab the most-detailed alias. As tie-breaker, choose the form with the
    # most publications.
    alias = sorted([(a.level_of_detail,
                     len(record.appears_as[a.original_name]), a.original_name)
                    for a in aliases])[-1][-1]
    alias = ADSName.parse(alias, preserve=True)

    if orcid:
        gns = alias.given_names
    else:
        # Trim it down to size
        gns = alias.given_names
        if len(gns) > len(target_name.given_names):
            gns = gns[:len(target_name.given_names)]

        # Ensure we have initials where we need them
        gns = [
            gn if len(tgn) > 1 else gn[0]
            for gn, tgn in zip(gns, target_name.given_names)
        ]

    final_name = ADSName.parse(alias.last_name, *gns, preserve=True)
    return final_name.full_name

예제 #2

파일 보기

class TestRecordCompression(TestCase):
    """Uses mock_backing_cache records to test compression & decompression
    
    Loads the author and document records. Checks that the decompressed
    records appear consistent. Re-compresses those records and checks that
    they match the compressed source records."""
    def setUp(self):
        self.real_backing_cache = cache_buddy.backing_cache
        cache_buddy.backing_cache = mock_backing_cache
        self.repository = Repository()
    
    def tearDown(self):
        cache_buddy.backing_cache = self.real_backing_cache
        self.real_backing_cache = None
        cache_buddy._loaded_authors = {}
        cache_buddy._loaded_documents = {}
    
    def test_author_record_compression(self):
        for author in mock_backing_cache.authors:
            raw_data = mock_backing_cache.load_author(author)
            raw_data = {**raw_data}
            del raw_data['version']
            record = self.repository.get_author_record(author)
            
            # We have an uncompressed record. Check it for consistency
            for alias in record.appears_as:
                raw_datum = raw_data['appears_as'][alias].split(',')
                self.assertEqual(len(record.appears_as[alias]),
                                 len(raw_datum))
                for idx, bibcode in zip(raw_datum, record.appears_as[alias]):
                    self.assertEqual(bibcode, record.documents[int(idx)])
            
            for coauthor in record.coauthors:
                raw_datum = raw_data['coauthors'][coauthor].split(',')
                self.assertEqual(len(record.coauthors[coauthor]),
                                 len(raw_datum))
                for idx, bibcode in zip(raw_datum, record.coauthors[coauthor]):
                    self.assertEqual(bibcode, record.documents[int(idx)])
            
            uncompressed_appears_as = copy.deepcopy(record.appears_as)
            uncompressed_coauthors = copy.deepcopy(record.coauthors)

            # Make sure the copy is independent of the original
            native_copy = record.copy()
            record.compress()
            self.assertNotEqual(record.asdict(), native_copy.asdict())

            for alias in record.appears_as:
                self.assertEqual(len(record.appears_as[alias].split(',')),
                                 len(uncompressed_appears_as[alias]))
            
            for coauthor in record.coauthors:
                self.assertEqual(len(record.coauthors[coauthor].split(',')),
                                 len(uncompressed_coauthors[coauthor]))
            
            # The source record in mock_backing_cache is compressed. Check that
            # it matches the re-compressed record
            self.assertEqual(raw_data, record.asdict())

            # Make sure the copy is independent of the original
            native_copy = record.copy()
            record.decompress()
            self.assertNotEqual(record.asdict(), native_copy.asdict())

    def test_document_record_compression(self):
        for document, raw_data in mock_backing_cache.documents.items():
            raw_data = {**raw_data}
            del raw_data['version']
            record = self.repository.get_document(document)
            
            # We have an uncompressed record. Check it for consistency
            self.assertEqual(len(record.authors), len(record.affils))
            self.assertEqual(len(record.authors), len(record.orcid_ids))
            self.assertEqual(len(record.authors), len(record.orcid_id_src))
            for orcid_id, src in zip(record.orcid_ids, record.orcid_id_src):
                if orcid_id == '':
                    self.assertEqual(src, 0)
                if src != 0:
                    self.assertNotEqual('', orcid_id)
            
            uncompressed_affils = copy.deepcopy(record.affils)
            uncompressed_orcid_ids = copy.deepcopy(record.orcid_ids)
            uncompressed_orcid_srcs = copy.deepcopy(record.orcid_id_src)

            # Make sure the copy is independent of the original
            native_copy = record.copy()
            record.compress()
            self.assertNotEqual(record.asdict(), native_copy.asdict())
            
            # Ensure only empty items are removed
            for affil in uncompressed_affils[len(record.affils):]:
                self.assertEqual(affil, '')
            for orcid_id in uncompressed_orcid_ids[len(record.orcid_ids):]:
                self.assertEqual(orcid_id, '')
            for orcid_src in uncompressed_orcid_srcs[len(record.orcid_id_src.split(',')):]:
                self.assertEqual(orcid_src, 0)
            
            # The source record in mock_backing_cache is compressed. Check that
            # it matches the re-compressed record
            self.assertEqual(raw_data, record.asdict())

            # Make sure the copy is independent of the original
            native_copy = record.copy()
            record.decompress()
            self.assertNotEqual(record.asdict(), native_copy.asdict())

예제 #3

파일 보기

class PathFinder:
    repository: Repository()
    nodes: NameAwareDict
    src: PathNode
    dest: PathNode
    excluded_names: NameAwareSet
    excluded_bibcodes: set
    connecting_nodes: Set[PathNode]
    n_iterations: int

    authors_to_expand_src = List[AuthorRecord]
    authors_to_expand_src_next = List[AuthorRecord]
    authors_to_expand_dest = List[AuthorRecord]
    authors_to_expand_dest_next = List[AuthorRecord]

    def __init__(self, src, dest, excluded_names=None):
        self.repository = Repository()
        if not key_is_valid(src) and not is_orcid_id(src):
            raise PathFinderError("invalid_char_in_name",
                                  'The "source" name is invalid.')
        if not key_is_valid(dest) and not is_orcid_id(dest):
            raise PathFinderError("invalid_char_in_name",
                                  'The "destination" name is invalid.')

        names_to_be_queried = []
        if is_orcid_id(src):
            src = normalize_orcid_id(src)
        else:
            try:
                src = ADSName.parse(src)
            except InvalidName:
                raise PathFinderError("invalid_char_in_name",
                                      'The "source" name is invalid.')
            if src.excludes_self:
                raise PathFinderError(
                    "src_invalid_lt_gt",
                    "'<' and '>' are invalid modifiers for the source and "
                    "destination authors and can only be used in the "
                    "exclusions "
                    "list. Try '<=' or '>=' instead.")
            names_to_be_queried.append(src)

        if is_orcid_id(dest):
            dest = normalize_orcid_id(dest)
        else:
            try:
                dest = ADSName.parse(dest)
            except InvalidName:
                raise PathFinderError("invalid_char_in_name",
                                      'The "destination" name is invalid.')
            if dest.excludes_self:
                raise PathFinderError(
                    "dest_invalid_lt_gt",
                    "'<' and '>' are invalid modifiers for the source and "
                    "destination authors and can only be used in the "
                    "exclusions "
                    "list. Try '<=' or '>=' instead.")
            names_to_be_queried.append(dest)

        if type(src) == type(dest) and src == dest:
            raise PathFinderError(
                "src_is_dest",
                'The "source" and "destination" names are equal (or at least'
                ' consistent). The distance is zero. APPA would like something'
                ' more challenging, please.')

        self.excluded_names = NameAwareSet()
        self.excluded_bibcodes = set()
        if excluded_names is not None:
            if type(excluded_names) is str:
                excluded_names = [excluded_names]
            for name in excluded_names:
                name = name.strip()
                if name == '':
                    continue
                elif is_bibcode(name):
                    self.excluded_bibcodes.add(name)
                else:
                    try:
                        self.excluded_names.add(ADSName.parse(name))
                    except InvalidName:
                        raise PathFinderError(
                            "invalid_excl",
                            f"'{name}' is an invalid name to exclude.")

        self.repository.notify_of_upcoming_author_request(*names_to_be_queried)
        self.authors_to_expand_src = []
        self.authors_to_expand_src_next = []
        self.authors_to_expand_dest = []
        self.authors_to_expand_dest_next = []

        self.nodes = NameAwareDict()
        self.connecting_nodes = set()

        self.orig_src = src
        self.orig_dest = dest

    def find_path(self):
        lb.on_start_path_finding()
        self.n_iterations = 0

        if is_orcid_id(self.orig_src):
            src_rec = self.repository.get_author_record_by_orcid_id(
                self.orig_src)
            self.src = PathNode(name=src_rec.name,
                                dist_from_src=0,
                                legal_bibcodes=set(src_rec.documents))
        else:
            src_rec = self.repository.get_author_record(self.orig_src)
            self.src = PathNode(name=self.orig_src, dist_from_src=0)

        if is_orcid_id(self.orig_dest):
            dest_rec = self.repository.get_author_record_by_orcid_id(
                self.orig_dest)
            self.dest = PathNode(name=dest_rec.name,
                                 dist_from_dest=0,
                                 legal_bibcodes=set(dest_rec.documents))
        else:
            dest_rec = self.repository.get_author_record(self.orig_dest)
            self.dest = PathNode(name=self.orig_dest, dist_from_dest=0)

        # If we were given a name and an ORCID ID and they turn out to refer
        # to the same person, error out.
        mixed_name_formats = (
            (type(self.orig_src) == ADSName and type(self.orig_dest) == str) or
            (type(self.orig_src) == str and type(self.orig_dest) == ADSName))
        if mixed_name_formats and src_rec.name == dest_rec.name:
            raise PathFinderError(
                "src_is_dest_after_orcid",
                'After looking up the ORCID ID, the "source" and "destination"'
                ' identities are equal (or at least overlap).')

        self.nodes[src_rec.name] = self.src
        self.nodes[dest_rec.name] = self.dest
        self.authors_to_expand_src_next.append(self.src.name)
        self.authors_to_expand_dest_next.append(self.dest.name)

        if (len(src_rec.documents) == 0 or all(
            [d in self.excluded_bibcodes for d in src_rec.documents])):
            raise PathFinderError(
                "src_empty",
                "No documents found for " + self.src.name.original_name)
        if (len(dest_rec.documents) == 0 or all(
            [d in self.excluded_bibcodes for d in dest_rec.documents])):
            raise PathFinderError(
                "dest_empty",
                "No documents found for " + self.dest.name.original_name)

        while True:
            lb.d("Beginning new iteration")
            lb.d(f"{len(self.authors_to_expand_src_next)} "
                 "authors on src side")
            lb.d(f"{len(self.authors_to_expand_dest_next)} "
                 "authors on dest side")
            if (len(self.authors_to_expand_src_next) == 0
                    or len(self.authors_to_expand_dest_next) == 0):
                raise PathFinderError(
                    "no_authors_to_expand", "No connections possible after "
                    f"{self.n_iterations} iterations")
            # Of the two lists of authors we could expand, let's always
            # choose the shortest. This tends to get us to a solution
            # faster.
            expanding_from_src = (len(self.authors_to_expand_src_next) < len(
                self.authors_to_expand_dest_next))
            lb.d("Expanding from "
                 f"{'src' if expanding_from_src else 'dest'} side")

            authors = (self.authors_to_expand_src
                       if expanding_from_src else self.authors_to_expand_dest)
            authors_next = (self.authors_to_expand_src_next
                            if expanding_from_src else
                            self.authors_to_expand_dest_next)
            authors.clear()
            authors.extend(authors_next)
            authors_next.clear()

            # There's no point pre-fetching for only one author, and this
            # ensures we don't re-fetch the src and dest authors if they
            # were provided by ORCID ID
            if len(authors) > 1:
                self.repository.notify_of_upcoming_author_request(*authors)
            for expand_author in authors:
                lb.d(f"Expanding author {expand_author}")
                expand_node = self.nodes[expand_author]
                expand_node_dist = expand_node.dist(expanding_from_src)

                # We already have src and dest records handy, and this special
                # handling is required if either was provided by ORCID ID
                if expand_node is self.src:
                    record = src_rec
                elif expand_node is self.dest:
                    record = dest_rec
                else:
                    record = self.repository.get_author_record(expand_author)

                # Here's a tricky one. If "<=Last, F" is in the exclude
                # list, and if we previously came across "Last, First" and
                # we're now expanding that node, we're ok using papers
                # written under "Last, First" but we're _not_ ok using
                # papers written under "Last, F.". So we need to ensure
                # we're allowed to use each paper by ensuring Last, First's
                # name appears on it in a way that's not excluded.
                ok_aliases = [
                    name for name in record.appears_as
                    if name not in self.excluded_names
                ]
                if (len(self.excluded_bibcodes)
                        or len(ok_aliases) != len(record.appears_as)):
                    ok_bibcodes = {
                        bibcode
                        for alias in ok_aliases
                        for bibcode in record.appears_as[alias]
                        if bibcode not in self.excluded_bibcodes
                    }
                else:
                    ok_bibcodes = None

                for coauthor, bibcodes in record.coauthors.items():
                    # lb.d(f"  Checking coauthor {coauthor}")
                    if ok_bibcodes is not None:
                        bibcodes = [
                            bibcode for bibcode in bibcodes
                            if bibcode in ok_bibcodes
                        ]
                    if len(bibcodes) == 0:
                        continue

                    coauthor = ADSName.parse(coauthor)
                    if coauthor in self.excluded_names:
                        # lb.d("   Author is excluded")
                        continue

                    try:
                        node = self.nodes[coauthor]
                        # lb.d(f"   Author exists in graph")
                    except KeyError:
                        # lb.d(f"   New author added to graph")
                        lb.on_coauthor_seen()
                        node = PathNode(name=coauthor)
                        self.nodes[coauthor] = node
                        node.set_dist(expand_node_dist + 1, expanding_from_src)
                        node.neighbors(expanding_from_src).add(expand_node)
                        links = node.links(expanding_from_src)[expand_node]
                        links.update(bibcodes)
                        authors_next.append(coauthor)
                        continue

                    # if (node.dist(expanding_from_src)
                    #         <= expand_node_dist):
                    # This node is closer to the src/dest than we are
                    # and must have been encountered in a
                    # previous expansion cycle. Ignore it.
                    # pass
                    if (node.dist(expanding_from_src) > expand_node_dist):
                        # We provide an equal-or-better route from the
                        # src/dest than the route (if any) that this node
                        # is aware of, meaning this node is a viable next
                        # step along the chain from the src/dest through
                        # us. That it already exists suggests it has
                        # multiple chains of equal length connecting it to
                        # the src or dest.
                        # If the src or dest was given via ORCID ID, we need
                        # to make sure we have a valid connection. (E.g. if
                        # the given ID is for one J Doe and our expand_author
                        # is connected to a different J Doe, we need to
                        # exclude that.
                        if len(node.legal_bibcodes):
                            legal_bibcodes = set(
                                bibcodes) & node.legal_bibcodes
                        else:
                            legal_bibcodes = bibcodes
                        if len(legal_bibcodes):
                            links = node.links(expanding_from_src)[expand_node]
                            links.update(legal_bibcodes)
                            node.set_dist(expand_node_dist + 1,
                                          expanding_from_src)
                            node.neighbors(expanding_from_src).add(expand_node)
                            # lb.d(f"   Added viable step")
                            if self.node_connects(node, expanding_from_src):
                                self.connecting_nodes.add(node)
                                lb.d(f"   Connecting author found!")
            lb.d("All expansions complete")
            self.n_iterations += 1
            if len(self.connecting_nodes) > 0:
                break
            elif self.n_iterations > 8:
                raise PathFinderError(
                    "too_far",
                    "The distance is >8, which is quite far. Giving up.")
            else:
                continue
        self.produce_final_graph()
        lb.set_n_connections(len(self.connecting_nodes))
        lb.set_distance(self.src.dist_from_dest)
        lb.on_stop_path_finding()

    def node_connects(self, node: PathNode, expanding_from_src: bool):
        if (len(node.neighbors_toward_src) > 0
                and len(node.neighbors_toward_dest) > 0):
            return True
        if expanding_from_src and node is self.dest:
            return True
        if not expanding_from_src and node is self.src:
            return True

    def produce_final_graph(self):
        # Step one: Make all linkages bidirectional
        nodes_to_walk = deque(self.connecting_nodes)
        visited = set()
        while len(nodes_to_walk):
            node = nodes_to_walk.popleft()
            if node in visited:
                continue
            visited.add(node)
            for neighbor in node.neighbors_toward_src:
                if neighbor not in visited:
                    nodes_to_walk.append(neighbor)
                neighbor.neighbors_toward_dest.add(node)
                neighbor.dist_from_dest = min(node.dist_from_dest + 1,
                                              neighbor.dist_from_dest)
                neighbor.links_toward_dest[node] = \
                    node.links_toward_src[neighbor]
            for neighbor in node.neighbors_toward_dest:
                if neighbor not in visited:
                    nodes_to_walk.append(neighbor)
                neighbor.neighbors_toward_src.add(node)
                neighbor.dist_from_src = min(node.dist_from_src + 1,
                                             neighbor.dist_from_src)
                neighbor.links_toward_src[node] = \
                    node.links_toward_dest[neighbor]

        # Step two: Remove any links that aren't along the most direct route
        nodes_to_walk = [self.src]
        while len(nodes_to_walk):
            node = nodes_to_walk.pop()
            if len(node.neighbors_toward_dest):
                dist_of_best_neighbor = min(
                    (neighbor.dist_from_dest
                     for neighbor in node.neighbors_toward_dest))
                # Copy the set we're iterating over, since we mutate it
                # in the loop
                for neighbor in list(node.neighbors_toward_dest):
                    if neighbor.dist_from_dest != dist_of_best_neighbor:
                        node.neighbors_toward_dest.remove(neighbor)
                        node.links_toward_dest.pop(neighbor)

                        neighbor.neighbors_toward_src.remove(node)
                        neighbor.links_toward_src.pop(node)
                    else:
                        nodes_to_walk.append(neighbor)

            if len(node.neighbors_toward_src):
                dist_of_best_neighbor = min(
                    (neighbor.dist_from_src
                     for neighbor in node.neighbors_toward_src))
                for neighbor in list(node.neighbors_toward_src):
                    if neighbor.dist_from_src != dist_of_best_neighbor:
                        node.neighbors_toward_src.remove(neighbor)
                        node.links_toward_src.pop(neighbor)

                        neighbor.neighbors_toward_dest.remove(node)
                        neighbor.links_toward_dest.pop(node)

        # Step three: Remove nodes that aren't on a path between src and dest
        for name, node in self.nodes.items():
            if node is self.src or node is self.dest:
                continue
            if (len(node.neighbors_toward_src) == 0
                    or len(node.neighbors_toward_dest) == 0):
                del self.nodes[name]

예제 #4

파일 보기

파일: test_repository.py 프로젝트: svank/appa-backend

class TestRepository(TestCase):
    def setUp(self):
        self.real_backing_cache = cache_buddy.backing_cache
        cache_buddy.backing_cache = mock_backing_cache
        self.repository = Repository()
        mock_backing_cache.store_author.reset_mock()

    def tearDown(self):
        cache_buddy.backing_cache = self.real_backing_cache
        cache_buddy._loaded_authors = {}
        cache_buddy._loaded_documents = {}
        mock_backing_cache.store_author.reset_mock()

    def test_get_author(self):
        record = self.repository.get_author_record('author, a.')
        record.compress()
        record = record.asdict()
        record['version'] = cache_buddy.AUTHOR_VERSION_NUMBER
        self.assertEqual(record, mock_backing_cache.load_author('author, a.'))

    def test_get_document(self):
        record = self.repository.get_document('paperAB')
        record.compress()
        record = record.asdict()
        record['version'] = cache_buddy.DOCUMENT_VERSION_NUMBER
        self.assertEqual(record, mock_backing_cache.documents['paperAB'])

    def test_author_record_generation(self):
        record = self.repository.get_author_record('>author, a.')
        self.assertEqual(len(record.documents), 3)
        self.assertEqual(record.documents[0], 'paperAB2')
        self.assertEqual(record.documents[1], 'paperAE')
        self.assertEqual(record.documents[2], 'paperAK')

        mock_backing_cache.store_author.assert_called_once()
        cached_record = mock_backing_cache.store_author.call_args[0][0]
        self.assertEqual(cached_record['name'], '>author, a.')
        self.assertEqual(cached_record['documents'], record.documents)
        mock_backing_cache.store_author.reset_mock()

        record = self.repository.get_author_record('=author, a.')

        self.assertEqual(len(record.documents), 1)
        self.assertEqual(sorted(record.documents)[0], 'paperAB')

        mock_backing_cache.store_author.assert_called_once()
        cached_record = mock_backing_cache.store_author.call_args[0][0]
        self.assertEqual(cached_record['name'], '=author, a.')
        self.assertEqual(cached_record['documents'], record.documents)
        mock_backing_cache.store_author.reset_mock()

        record = self.repository.get_author_record('<author, aa')

        self.assertEqual(len(record.documents), 1)
        self.assertEqual(sorted(record.documents)[0], 'paperAB')

        mock_backing_cache.store_author.assert_called_once()
        cached_record = mock_backing_cache.store_author.call_args[0][0]
        self.assertEqual(cached_record['name'], '<author, aa')
        self.assertEqual(cached_record['documents'], record.documents)
        mock_backing_cache.store_author.reset_mock()