コード例 #1
0
    def write_to_file(self, path, format_):
        """
        Output report to a file.

        :param path: location to write file
        :param format_: file format. Currently supports 'tsv'
        :return:
        """
        if format_ == "tsv":
            self.write_tsv(path)
        else:
            raise PycounterException("unknown file type %s" % format_)
コード例 #2
0
def parse(filename,
          filetype=None,
          encoding="utf-8",
          fallback_encoding="latin-1"):
    """Parse a COUNTER file, first attempting to determine type.

    Returns a :class:`CounterReport <CounterReport>` object.

    :param filename: path to COUNTER report to load and parse.
    :param filetype: type of file provided, one of "csv", "tsv", "xlsx".
        If set to None (the default), an attempt will be made to
        detect the correct type, first from the file extension, then from
        the file's contents.
    :param encoding: encoding to use to decode the file. Defaults to 'utf-8',
        ignored for XLSX files (which specify their encoding in their XML)
    :param fallback_encoding: alternative encoding to use to try to decode
        the file if the primary encoding fails. This defaults to 'latin-1',
        which will accept any bytes (possibly producing junk results...)
        Ignored for XLSX files.

    """
    if filetype is None:
        if filename.endswith(".tsv"):
            filetype = "tsv"
        elif filename.endswith(".xlsx"):
            filetype = "xlsx"
        elif filename.endswith(".csv"):
            filetype = "csv"
        else:
            with open(filename, "rb") as file_obj:
                filetype = guess_type_from_content(file_obj)

    if filetype == "tsv":
        return parse_separated(filename, "\t", encoding, fallback_encoding)
    elif filetype == "xlsx":
        return parse_xlsx(filename)
    elif filetype == "csv":
        return parse_separated(filename, ",", encoding, fallback_encoding)
    else:
        raise PycounterException("Unknown file type %s" % filetype)
コード例 #3
0
def _parse_line(line, report, last_col):
    """Parse a single line from a report.

    :param line: sequence of cells in a report line
    :param report: a CounterReport the line came from
    :param last_col: last column number containing data
    :return: an appropriate CounterResource subclass instance
    """
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    issn = None
    eissn = None
    isbn = None
    html_total = 0
    pdf_total = 0
    doi = ""
    prop_id = ""

    metric = report.metric
    if report.report_version >= 4:
        if report.report_type.startswith(
                "JR1") or report.report_type == "TR_J1":
            old_line = line
            line = line[0:3] + line[5:7] + line[10:last_col]
            doi = old_line[3]
            prop_id = old_line[4]
            html_total = format_stat(old_line[8])
            pdf_total = format_stat(old_line[9])
            issn = line[3].strip()
            eissn = line[4].strip()

        elif report.report_type in ("BR1", "BR2"):
            line = line[0:3] + line[5:7] + line[8:last_col]
            isbn = line[3].strip()
            issn = line[4].strip()

        elif report.report_type in ("BR3", "JR2"):
            metric = line[7]
            doi = line[3]
            prop_id = line[4]
            line = line[0:3] + line[5:7] + line[9:last_col]
            eissn = line[4].strip()
            if report.report_type == "BR3":
                isbn = line[3].strip()
            else:
                issn = line[3].strip()
        # For DB1 and DB2, nothing additional to do here

    else:
        if report.report_type.startswith("JR1"):
            html_total = format_stat(line[-2])
            pdf_total = format_stat(line[-1])
            issn = line[3].strip()
            eissn = line[4].strip()
        line = line[0:last_col]

    logging.debug(line)
    common_args = {
        "title": line[0],
        "publisher": line[1],
        "platform": line[2],
        "period": report.period,
    }
    month_data = []
    curr_month = report.period[0]
    months_start_idx = 5 if report.report_type != "PR1" else 4
    for data in line[months_start_idx:]:
        month_data.append((curr_month, format_stat(data)))
        curr_month = next_month(curr_month)
    if report.report_type.startswith("JR") or report.report_type == "TR_J1":
        return CounterJournal(metric=metric,
                              month_data=month_data,
                              doi=doi,
                              issn=issn,
                              eissn=eissn,
                              proprietary_id=prop_id,
                              html_total=html_total,
                              pdf_total=pdf_total,
                              **common_args)
    elif report.report_type.startswith("BR"):
        return CounterBook(metric=metric,
                           month_data=month_data,
                           doi=doi,
                           issn=issn,
                           isbn=isbn,
                           proprietary_id=prop_id,
                           **common_args)
    elif report.report_type.startswith("DB"):
        return CounterDatabase(metric=line[3],
                               month_data=month_data,
                               **common_args)
    elif report.report_type == "PR1":
        # there is no title in the PR1 report
        return CounterPlatform(
            metric=line[2],
            month_data=month_data,
            platform=line[0],
            publisher=line[1],
            period=report.period,
        )
    raise PycounterException("Should be unreachable")  # pragma: no cover
コード例 #4
0
def _parse_line(line, report, last_col):
    """Parse a single line from a report and return a CounterResource subclass
        instance as appropriate
    """
    issn = None
    eissn = None
    isbn = None
    html_total = 0
    pdf_total = 0
    doi = ""
    prop_id = ""

    if report.report_version == 4:
        if report.report_type.startswith('JR1'):
            old_line = line
            line = line[0:3] + line[5:7] + line[10:last_col]
            doi = old_line[3]
            prop_id = old_line[4]
            html_total = int(old_line[8])
            pdf_total = int(old_line[9])
            issn = line[3].strip()
            eissn = line[4].strip()

        elif report.report_type in ('BR1', 'BR2'):
            line = line[0:3] + line[5:7] + line[8:last_col]
            isbn = line[3].strip()
            issn = line[4].strip()

        elif report.report_type in ('DB1', 'DB2'):
            # format coincidentally works for these. This is a kludge
            # so leaving this explicit...
            pass
    else:
        if report.report_type.startswith('JR1'):
            html_total = int(line[-2])
            pdf_total = int(line[-1])
            issn = line[3].strip()
            eissn = line[4].strip()
        line = line[0:last_col]

    logging.debug(line)
    common_args = {
        'title': line[0],
        'publisher': line[1],
        'platform': line[2],
        'period': report.period
    }
    month_data = []
    curr_month = report.period[0]
    for data in line[5:]:
        month_data.append((curr_month, format_stat(data)))
        curr_month = next_month(curr_month)
    if report.report_type.startswith('JR'):
        return CounterJournal(metric=report.metric,
                              month_data=month_data,
                              doi=doi,
                              issn=issn,
                              eissn=eissn,
                              proprietary_id=prop_id,
                              html_total=html_total,
                              pdf_total=pdf_total,
                              **common_args
                              )
    elif report.report_type.startswith('BR'):
        return CounterBook(metric=report.metric,
                           month_data=month_data,
                           doi=doi,
                           issn=issn,
                           isbn=isbn,
                           proprietary_id=prop_id,
                           **common_args)
    elif report.report_type.startswith('DB'):
        return CounterDatabase(metric=line[3],
                               month_data=month_data,
                               **common_args)
    raise PycounterException("Should be unreachable")  # pragma: no cover