예제 #1
0
파일: counts.py 프로젝트: aashish24/lisc
    def check_data(self, data_type='counts', dim='A'):
        """Prints out the highest value count or score for each term.

        Parameters
        ----------
        data_type : {'counts', 'score'}
            Which data type to use.
        dim : {'A', 'B'}, optional
            Which set of terms to check.
        """

        if data_type not in ['counts', 'score']:
            raise ValueError('Data type not understood - can not proceed.')
        if data_type == 'score' and self.score.size == 0:
            raise ValueError('Score is not computed - can not proceed.')

        # Set up which direction to act across
        dat = getattr(self, data_type) if dim == 'A' else getattr(
            self, data_type).T
        alt = 'B' if dim == 'A' and not self.square else 'A'

        # Loop through each term, find maximally associated term and print out
        for term_ind, term in enumerate(self.terms[dim].labels):

            # Find the index of the most common association for current term
            assoc_ind = np.argmax(dat[term_ind, :])

            print(
                "For  {:{twd1}}  the highest association is  {:{twd2}}  with  {:{nwd}}"
                .format(wrap(term),
                        wrap(self.terms[alt].labels[assoc_ind]),
                        dat[term_ind, assoc_ind],
                        twd1=get_max_length(self.terms[dim].labels, 2),
                        twd2=get_max_length(self.terms[alt].labels, 2),
                        nwd='>10.0f' if data_type == 'counts' else '06.3f'))
예제 #2
0
파일: counts.py 프로젝트: lisc-tools/lisc
    def check_counts(self, dim='A'):
        """Check how many articles were found for each term.

        Parameters
        ----------
        dim : {'A', 'B'}
            Which set of terms to check.

        Examples
        --------
        Print the number of articles found for each term (assuming `counts` already has data):

        >>> counts.check_counts() # doctest: +SKIP
        """

        if not self.has_data:
            raise ValueError('No data is available - cannot proceed.')

        # Calculate widths for printing
        twd = get_max_length(self.terms[dim].labels, 2)
        nwd = get_max_length(self.terms[dim].counts)

        print("The number of documents found for each search term is:")
        for ind, term in enumerate(self.terms[dim].labels):
            print("  {:{twd}}   -   {:{nwd}.0f}".format(
                wrap(term), self.terms[dim].counts[ind], twd=twd, nwd=nwd))
예제 #3
0
파일: counts.py 프로젝트: lisc-tools/lisc
    def check_data(self, data_type='counts', dim='A'):
        """Prints out the highest value count or score for each term.

        Parameters
        ----------
        data_type : {'counts', 'score'}
            Which data type to use.
        dim : {'A', 'B'}, optional
            Which set of terms to check.

        Examples
        --------
        Print the highest count for each term (assuming `counts` already has data):

        >>> counts.check_data() # doctest: +SKIP

        Print the highest score value for each term (assuming `counts` already has data):

        >>> counts.check_data(data_type='score') # doctest: +SKIP
        """

        if not self.has_data:
            raise ValueError('No data is available - cannot proceed.')

        if data_type not in ['counts', 'score']:
            raise ValueError('Data type not understood - can not proceed.')
        if data_type == 'score':
            if self.score.size == 0:
                raise ValueError('Score is not computed - can not proceed.')
            if self.score_info['type'] == 'similarity':
                raise ValueError(
                    'Cannot check value counts for similarity score.')

        # Set up which direction to act across
        data = getattr(self, data_type)
        data = data.T if dim == 'B' else data
        alt = 'B' if dim == 'A' and not self.square else 'A'

        # Calculate widths for printing
        twd1 = get_max_length(self.terms[dim].labels, 2)
        twd2 = get_max_length(self.terms[alt].labels, 2)
        nwd = '>10.0f' if data_type == 'counts' else '06.3f'

        # Loop through each term, find maximally associated term and print out
        for term_ind, term in enumerate(self.terms[dim].labels):

            # Find the index of the most common association for current term
            assoc_ind = np.argmax(data[term_ind, :])

            print(
                "For  {:{twd1}}  the highest association is  {:{twd2}}  with  {:{nwd}}"
                .format(wrap(term),
                        wrap(self.terms[alt].labels[assoc_ind]),
                        data[term_ind, assoc_ind],
                        twd1=twd1,
                        twd2=twd2,
                        nwd=nwd))
예제 #4
0
파일: counts.py 프로젝트: aashish24/lisc
    def check_counts(self, dim='A'):
        """Check how many articles were found for each term.

        Parameters
        ----------
        dim : {'A', 'B'}, optional
            Which set of terms to check.
        """

        print("The number of documents found for each search term is:")
        for ind, term in enumerate(self.terms[dim].labels):
            print("  {:{twd}}   -   {:{nwd}.0f}".format(
                wrap(term),
                self.terms[dim].counts[ind],
                twd=get_max_length(self.terms[dim].labels, 2),
                nwd=get_max_length(self.terms[dim].counts)))
예제 #5
0
파일: words.py 프로젝트: lisc-tools/lisc
    def check_data(self):
        """Prints out the number of articles collected for each term."""

        twd = get_max_length(self.labels)
        print("Number of collected articles per term:")
        for label, data in zip(self.labels, self.results):
            print("\t{:{twd}} \t\t  {}".format(label, data.n_articles,
                                               twd=twd))
예제 #6
0
파일: counts.py 프로젝트: ryanhammonds/lisc
    def check_counts(self, dim='A'):
        """Check how many articles were found for each term.

        Parameters
        ----------
        dim : {'A', 'B'}, optional
            Which set of terms to check.

        Examples
        --------
        Print the number of articles found for each term (assuming `counts` already has data):

        >>> counts.check_counts() # doctest: +SKIP
        """

        print("The number of documents found for each search term is:")
        for ind, term in enumerate(self.terms[dim].labels):
            print("  {:{twd}}   -   {:{nwd}.0f}".format(
                wrap(term),
                self.terms[dim].counts[ind],
                twd=get_max_length(self.terms[dim].labels, 2),
                nwd=get_max_length(self.terms[dim].counts)))