Python Bib_matrixの例、invenio.bibauthorid_backinterface.Bib_matrix Pythonの例

コード例 #1

0

ファイルを表示

ファイル: bibauthorid_prob_matrix.py プロジェクト: ppiotr/Invenio

    def recalculate(self, cluster_set):
        '''
        Constructs probability matrix. If use_cache is true, it will
        try to load old computations from the database. If save cache
        is true it will save the current results into the database.
        @param cluster_set: A cluster set object, used to initialize
        the matrix.
        '''
        last_cleaned = 0

        old_matrix = self._bib_matrix
        cached_bibs = self.__get_up_to_date_bibs()
        have_cached_bibs = bool(cached_bibs)
        self._bib_matrix = Bib_matrix(cluster_set)

        ncl = cluster_set.num_all_bibs
        expected = ((ncl * (ncl - 1)) / 2)
        if expected == 0:
            expected = 1

        cur_calc, opti, prints_counter = 0, 0, 0
        for cl1 in cluster_set.clusters:

            if cur_calc + opti - prints_counter > 100000:
                update_status(
                    (float(opti) + cur_calc) / expected,
                    "Prob matrix: calc %d, opti %d." % (cur_calc, opti))
                prints_counter = cur_calc + opti

            #clean caches
            if cur_calc - last_cleaned > 2000000:
                clear_comparison_caches()
                last_cleaned = cur_calc

            for cl2 in cluster_set.clusters:
                if id(cl1) < id(cl2) and not cl1.hates(cl2):
                    for bib1 in cl1.bibs:
                        for bib2 in cl2.bibs:
                            if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs:
                                val = old_matrix[bib1, bib2]
                                if not val:
                                    cur_calc += 1
                                    val = compare_bibrefrecs(bib1, bib2)
                                else:
                                    opti += 1
                                    if bconfig.DEBUG_CHECKS:
                                        assert _debug_is_eq_v(
                                            val,
                                            compare_bibrefrecs(bib1, bib2))
                            else:
                                cur_calc += 1
                                val = compare_bibrefrecs(bib1, bib2)
                            self._bib_matrix[bib1, bib2] = val

        clear_comparison_caches()
        update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))

コード例 #2

0

ファイルを表示

ファイル: bibauthorid_prob_matrix.py プロジェクト: AlbertoPeon/invenio

    def recalculate(self, cluster_set):
        '''
        Constructs probability matrix. If use_cache is true, it will
        try to load old computations from the database. If save cache
        is true it will save the current results into the database.
        @param cluster_set: A cluster set object, used to initialize
        the matrix.
        '''
        last_cleaned = 0

        old_matrix = self._bib_matrix
        cached_bibs = self.__get_up_to_date_bibs()
        have_cached_bibs = bool(cached_bibs)
        self._bib_matrix = Bib_matrix(cluster_set)

        ncl = cluster_set.num_all_bibs
        expected = ((ncl * (ncl - 1)) / 2)
        if expected == 0:
            expected = 1

        cur_calc, opti, prints_counter = 0, 0, 0
        for cl1 in cluster_set.clusters:

            if cur_calc+opti - prints_counter > 100000:
                update_status((float(opti) + cur_calc) / expected, "Prob matrix: calc %d, opti %d." % (cur_calc, opti))
                prints_counter = cur_calc+opti

            #clean caches
            if cur_calc - last_cleaned > 2000000:
                clear_comparison_caches()
                last_cleaned = cur_calc

            for cl2 in cluster_set.clusters:
                if id(cl1) < id(cl2) and not cl1.hates(cl2):
                    for bib1 in cl1.bibs:
                        for bib2 in cl2.bibs:
                            if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs:
                                val = old_matrix[bib1, bib2]
                                if not val:
                                    cur_calc += 1
                                    val = compare_bibrefrecs(bib1, bib2)
                                else:
                                    opti += 1
                                    if bconfig.DEBUG_CHECKS:
                                        assert _debug_is_eq_v(val, compare_bibrefrecs(bib1, bib2))
                            else:
                                cur_calc += 1
                                val = compare_bibrefrecs(bib1, bib2)
                            self._bib_matrix[bib1, bib2] = val

        clear_comparison_caches()
        update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))

コード例 #3

0

ファイルを表示

ファイル: bibauthorid_prob_matrix.py プロジェクト: ppiotr/Invenio

 def __init__(self):
     self._bib_matrix = Bib_matrix()

コード例 #4

0

ファイルを表示

ファイル: bibauthorid_prob_matrix.py プロジェクト: ppiotr/Invenio

class ProbabilityMatrix(object):
    '''
    This class contains and maintains the comparison
    between all virtual authors. It is able to write
    and read from the database and update the results.
    '''
    def __init__(self):
        self._bib_matrix = Bib_matrix()

    def load(self, lname, load_map=True, load_matrix=True):
        update_status(0., "Loading probability matrix...")
        self._bib_matrix.load(lname, load_map, load_matrix)
        update_status_final("Probability matrix loaded.")

    def store(self, name):
        update_status(0., "Saving probability matrix...")
        self._bib_matrix.store(name)
        update_status_final("Probability matrix saved.")

    def __getitem__(self, bibs):
        return self._bib_matrix[bibs[0], bibs[1]]

    def __get_up_to_date_bibs(self):
        return frozenset(
            filter_modified_record_ids(self._bib_matrix.get_keys(),
                                       self._bib_matrix.creation_time))

    def is_up_to_date(self, cluster_set):
        return self.__get_up_to_date_bibs() >= frozenset(
            cluster_set.all_bibs())

    def recalculate(self, cluster_set):
        '''
        Constructs probability matrix. If use_cache is true, it will
        try to load old computations from the database. If save cache
        is true it will save the current results into the database.
        @param cluster_set: A cluster set object, used to initialize
        the matrix.
        '''
        last_cleaned = 0

        old_matrix = self._bib_matrix
        cached_bibs = self.__get_up_to_date_bibs()
        have_cached_bibs = bool(cached_bibs)
        self._bib_matrix = Bib_matrix(cluster_set)

        ncl = cluster_set.num_all_bibs
        expected = ((ncl * (ncl - 1)) / 2)
        if expected == 0:
            expected = 1

        cur_calc, opti, prints_counter = 0, 0, 0
        for cl1 in cluster_set.clusters:

            if cur_calc + opti - prints_counter > 100000:
                update_status(
                    (float(opti) + cur_calc) / expected,
                    "Prob matrix: calc %d, opti %d." % (cur_calc, opti))
                prints_counter = cur_calc + opti

            #clean caches
            if cur_calc - last_cleaned > 2000000:
                clear_comparison_caches()
                last_cleaned = cur_calc

            for cl2 in cluster_set.clusters:
                if id(cl1) < id(cl2) and not cl1.hates(cl2):
                    for bib1 in cl1.bibs:
                        for bib2 in cl2.bibs:
                            if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs:
                                val = old_matrix[bib1, bib2]
                                if not val:
                                    cur_calc += 1
                                    val = compare_bibrefrecs(bib1, bib2)
                                else:
                                    opti += 1
                                    if bconfig.DEBUG_CHECKS:
                                        assert _debug_is_eq_v(
                                            val,
                                            compare_bibrefrecs(bib1, bib2))
                            else:
                                cur_calc += 1
                                val = compare_bibrefrecs(bib1, bib2)
                            self._bib_matrix[bib1, bib2] = val

        clear_comparison_caches()
        update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))

コード例 #5

0

ファイルを表示

ファイル: bibauthorid_prob_matrix.py プロジェクト: AlbertoPeon/invenio

 def __init__(self):
     self._bib_matrix = Bib_matrix()

コード例 #6

0

ファイルを表示

ファイル: bibauthorid_prob_matrix.py プロジェクト: AlbertoPeon/invenio

class ProbabilityMatrix(object):
    '''
    This class contains and maintains the comparison
    between all virtual authors. It is able to write
    and read from the database and update the results.
    '''
    def __init__(self):
        self._bib_matrix = Bib_matrix()

    def load(self, lname, load_map=True, load_matrix=True):
        update_status(0., "Loading probability matrix...")
        self._bib_matrix.load(lname, load_map, load_matrix)
        update_status_final("Probability matrix loaded.")

    def store(self, name):
        update_status(0., "Saving probability matrix...")
        self._bib_matrix.store(name)
        update_status_final("Probability matrix saved.")

    def __getitem__(self, bibs):
        return self._bib_matrix[bibs[0], bibs[1]]


    def __get_up_to_date_bibs(self):
        return frozenset(filter_modified_record_ids(
                         self._bib_matrix.get_keys(),
                         self._bib_matrix.creation_time))

    def is_up_to_date(self, cluster_set):
        return self.__get_up_to_date_bibs() >= frozenset(cluster_set.all_bibs())

    def recalculate(self, cluster_set):
        '''
        Constructs probability matrix. If use_cache is true, it will
        try to load old computations from the database. If save cache
        is true it will save the current results into the database.
        @param cluster_set: A cluster set object, used to initialize
        the matrix.
        '''
        last_cleaned = 0

        old_matrix = self._bib_matrix
        cached_bibs = self.__get_up_to_date_bibs()
        have_cached_bibs = bool(cached_bibs)
        self._bib_matrix = Bib_matrix(cluster_set)

        ncl = cluster_set.num_all_bibs
        expected = ((ncl * (ncl - 1)) / 2)
        if expected == 0:
            expected = 1

        cur_calc, opti, prints_counter = 0, 0, 0
        for cl1 in cluster_set.clusters:

            if cur_calc+opti - prints_counter > 100000:
                update_status((float(opti) + cur_calc) / expected, "Prob matrix: calc %d, opti %d." % (cur_calc, opti))
                prints_counter = cur_calc+opti

            #clean caches
            if cur_calc - last_cleaned > 2000000:
                clear_comparison_caches()
                last_cleaned = cur_calc

            for cl2 in cluster_set.clusters:
                if id(cl1) < id(cl2) and not cl1.hates(cl2):
                    for bib1 in cl1.bibs:
                        for bib2 in cl2.bibs:
                            if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs:
                                val = old_matrix[bib1, bib2]
                                if not val:
                                    cur_calc += 1
                                    val = compare_bibrefrecs(bib1, bib2)
                                else:
                                    opti += 1
                                    if bconfig.DEBUG_CHECKS:
                                        assert _debug_is_eq_v(val, compare_bibrefrecs(bib1, bib2))
                            else:
                                cur_calc += 1
                                val = compare_bibrefrecs(bib1, bib2)
                            self._bib_matrix[bib1, bib2] = val

        clear_comparison_caches()
        update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))