def test_calculate_h_index():
    citations_with_h_index_5 = {
        '123311': 34,
        '123113': 3,
        '3424': 5,
        '3423421': 7,
        '3242346': 8,
        '3426733': 12,
        '1231432': 2
    }

    expected = 5
    result = calculate_h_index(citations_with_h_index_5)

    assert expected == result
Example #2
0
    def serialize(self, pid, record, links_factory=None):
        """Return a different metrics for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value

        fields = set()
        keywords = []

        statistics = {}
        statistics['citations'] = 0
        statistics['publications'] = 0
        statistics['types'] = {}

        statistics_citations = {}

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(
            _source=[
                "citation_count",
                "control_number",
                "facet_inspire_doc_type",
                "facet_inspire_subjects",
                "thesaurus_terms",
            ]
        )

        for result in search.scan():
            result_source = result.to_dict()

            # Increment the count of the total number of publications.
            statistics['publications'] += 1

            # Increment the count of citations.
            citation_count = result_source.get('citation_count', 0)

            statistics['citations'] += citation_count
            statistics_citations[int(result_source['control_number'])] = \
                citation_count

            # Count how many times certain type of publication was published.
            try:
                publication_type = result_source.get(
                    'facet_inspire_doc_type', [])[0]
            except IndexError:
                pass

            if publication_type:
                if publication_type in statistics['types']:
                    statistics['types'][publication_type] += 1
                else:
                    statistics['types'][publication_type] = 1

            # Get fields.
            for field in result_source.get('facet_inspire_subjects', []):
                fields.add(field)

            # Get keywords.
            keywords.extend([
                k for k in force_force_list(
                    get_value(result_source, 'thesaurus_terms.keyword'))
                if k != '* Automatic Keywords *'])

        # Calculate h-index together with i10-index.
        statistics['hindex'] = calculate_h_index(statistics_citations)
        statistics['i10index'] = calculate_i10_index(statistics_citations)

        if fields:
            statistics['fields'] = list(fields)

        # Return the top 25 keywords.
        if keywords:
            counter = Counter(keywords)
            statistics['keywords'] = [{
                'count': i[1],
                'keyword': i[0]
            } for i in counter.most_common(25)]

        return json.dumps(statistics)
Example #3
0
    def serialize(self, pid, record, links_factory=None):
        """Return a different metrics for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value

        fields = set()
        keywords = []

        statistics = {}
        statistics['citations'] = 0
        statistics['publications'] = 0
        statistics['types'] = {}

        statistics_citations = {}

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(_source=[
            "citation_count",
            "control_number",
            "facet_inspire_doc_type",
            "facet_inspire_subjects",
            "keywords",
        ])

        for result in search.scan():
            result_source = result.to_dict()

            # Increment the count of the total number of publications.
            statistics['publications'] += 1

            # Increment the count of citations.
            citation_count = result_source.get('citation_count', 0)

            statistics['citations'] += citation_count
            statistics_citations[result_source['control_number']] = \
                citation_count

            # Count how many times certain type of publication was published.
            try:
                publication_type = result_source.get('facet_inspire_doc_type',
                                                     [])[0]
            except IndexError:
                pass

            if publication_type:
                if publication_type in statistics['types']:
                    statistics['types'][publication_type] += 1
                else:
                    statistics['types'][publication_type] = 1

            # Get fields.
            for field in result_source.get('facet_inspire_subjects', []):
                fields.add(field)

            # Get keywords.
            keywords.extend([
                k
                for k in force_list(get_value(result_source, 'keywords.value'))
                if k != '* Automatic Keywords *'
            ])

        # Calculate h-index together with i10-index.
        statistics['hindex'] = calculate_h_index(statistics_citations)
        statistics['i10index'] = calculate_i10_index(statistics_citations)

        if fields:
            statistics['fields'] = list(fields)

        # Return the top 25 keywords.
        if keywords:
            counter = Counter(keywords)
            statistics['keywords'] = [{
                'count': i[1],
                'keyword': i[0]
            } for i in counter.most_common(25)]

        return json.dumps(statistics)
def test_calculate_h_index_ignores_none_values(citations_with_none_values):
    expected = 4
    result = calculate_h_index(citations_with_none_values)

    assert expected == result