Ejemplo n.º 1
0
def _parse_line(line, report, last_col):
    """Parse a single line from a report and return a CounterResource subclass
        instance as appropriate
    """
    issn = None
    eissn = None
    isbn = None
    html_total = 0
    pdf_total = 0
    doi = ""
    prop_id = ""

    if report.report_version == 4:
        if report.report_type.startswith('JR1'):
            old_line = line
            line = line[0:3] + line[5:7] + line[10:last_col]
            doi = old_line[3]
            prop_id = old_line[4]
            html_total = int(old_line[8])
            pdf_total = int(old_line[9])
            issn = line[3].strip()
            eissn = line[4].strip()

        elif report.report_type in ('BR1', 'BR2'):
            line = line[0:3] + line[5:7] + line[8:last_col]
            isbn = line[3].strip()
            issn = line[4].strip()

        elif report.report_type in ('DB1', 'DB2'):
            # format coincidentally works for these. This is a kludge
            # so leaving this explicit...
            pass
    else:
        if report.report_type.startswith('JR1'):
            html_total = int(line[-2])
            pdf_total = int(line[-1])
            issn = line[3].strip()
            eissn = line[4].strip()
        line = line[0:last_col]

    logging.debug(line)
    common_args = {
        'title': line[0],
        'publisher': line[1],
        'platform': line[2],
        'period': report.period
    }
    month_data = []
    curr_month = report.period[0]
    for data in line[5:]:
        month_data.append((curr_month, format_stat(data)))
        curr_month = next_month(curr_month)
    if report.report_type.startswith('JR'):
        return CounterJournal(metric=report.metric,
                              month_data=month_data,
                              doi=doi,
                              issn=issn,
                              eissn=eissn,
                              proprietary_id=prop_id,
                              html_total=html_total,
                              pdf_total=pdf_total,
                              **common_args
                              )
    elif report.report_type.startswith('BR'):
        return CounterBook(metric=report.metric,
                           month_data=month_data,
                           doi=doi,
                           issn=issn,
                           isbn=isbn,
                           proprietary_id=prop_id,
                           **common_args)
    elif report.report_type.startswith('DB'):
        return CounterDatabase(metric=line[3],
                               month_data=month_data,
                               **common_args)
    raise PycounterException("Should be unreachable")  # pragma: no cover
Ejemplo n.º 2
0
def _parse_line(line, report, last_col):
    """Parse a single line from a report.

    :param line: sequence of cells in a report line
    :param report: a CounterReport the line came from
    :param last_col: last column number containing data
    :return: an appropriate CounterResource subclass instance
    """
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    issn = None
    eissn = None
    isbn = None
    html_total = 0
    pdf_total = 0
    doi = ""
    prop_id = ""

    metric = report.metric
    if report.report_version >= 4:
        if report.report_type.startswith(
                "JR1") or report.report_type == "TR_J1":
            old_line = line
            line = line[0:3] + line[5:7] + line[10:last_col]
            doi = old_line[3]
            prop_id = old_line[4]
            html_total = format_stat(old_line[8])
            pdf_total = format_stat(old_line[9])
            issn = line[3].strip()
            eissn = line[4].strip()

        elif report.report_type in ("BR1", "BR2"):
            line = line[0:3] + line[5:7] + line[8:last_col]
            isbn = line[3].strip()
            issn = line[4].strip()

        elif report.report_type in ("BR3", "JR2"):
            metric = line[7]
            doi = line[3]
            prop_id = line[4]
            line = line[0:3] + line[5:7] + line[9:last_col]
            eissn = line[4].strip()
            if report.report_type == "BR3":
                isbn = line[3].strip()
            else:
                issn = line[3].strip()
        # For DB1 and DB2, nothing additional to do here

    else:
        if report.report_type.startswith("JR1"):
            html_total = format_stat(line[-2])
            pdf_total = format_stat(line[-1])
            issn = line[3].strip()
            eissn = line[4].strip()
        line = line[0:last_col]

    logging.debug(line)
    common_args = {
        "title": line[0],
        "publisher": line[1],
        "platform": line[2],
        "period": report.period,
    }
    month_data = []
    curr_month = report.period[0]
    months_start_idx = 5 if report.report_type != "PR1" else 4
    for data in line[months_start_idx:]:
        month_data.append((curr_month, format_stat(data)))
        curr_month = next_month(curr_month)
    if report.report_type.startswith("JR") or report.report_type == "TR_J1":
        return CounterJournal(metric=metric,
                              month_data=month_data,
                              doi=doi,
                              issn=issn,
                              eissn=eissn,
                              proprietary_id=prop_id,
                              html_total=html_total,
                              pdf_total=pdf_total,
                              **common_args)
    elif report.report_type.startswith("BR"):
        return CounterBook(metric=metric,
                           month_data=month_data,
                           doi=doi,
                           issn=issn,
                           isbn=isbn,
                           proprietary_id=prop_id,
                           **common_args)
    elif report.report_type.startswith("DB"):
        return CounterDatabase(metric=line[3],
                               month_data=month_data,
                               **common_args)
    elif report.report_type == "PR1":
        # there is no title in the PR1 report
        return CounterPlatform(
            metric=line[2],
            month_data=month_data,
            platform=line[0],
            publisher=line[1],
            period=report.period,
        )
    raise PycounterException("Should be unreachable")  # pragma: no cover
Ejemplo n.º 3
0
def _parse_line(line, report, last_col):
    """Parse a single line from a report and return a CounterResource subclass
        instance as appropriate
    """
    issn = None
    eissn = None
    isbn = None
    html_total = 0
    pdf_total = 0
    doi = ""
    prop_id = ""

    if report.report_version == 4:
        if report.report_type.startswith('JR1'):
            old_line = line
            line = line[0:3] + line[5:7] + line[10:last_col]
            doi = old_line[3]
            prop_id = old_line[4]
            html_total = int(old_line[8])
            pdf_total = int(old_line[9])
            issn = line[3].strip()
            eissn = line[4].strip()

        elif report.report_type in ('BR1', 'BR2'):
            line = line[0:3] + line[5:7] + line[8:last_col]
            isbn = line[3].strip()
            issn = line[4].strip()

        elif report.report_type in ('DB1', 'DB2'):
            # format coincidentally works for these. This is a kludge
            # so leaving this explicit...
            pass
    else:
        if report.report_type.startswith('JR1'):
            html_total = int(line[-2])
            pdf_total = int(line[-1])
            issn = line[3].strip()
            eissn = line[4].strip()
        line = line[0:last_col]

    logging.debug(line)
    common_args = {
        'title': line[0],
        'publisher': line[1],
        'platform': line[2],
        'period': report.period
    }
    month_data = []
    curr_month = report.period[0]
    for data in line[5:]:
        month_data.append((curr_month, format_stat(data)))
        curr_month = next_month(curr_month)
    if report.report_type.startswith('JR'):
        return CounterJournal(metric=report.metric,
                              month_data=month_data,
                              doi=doi,
                              issn=issn,
                              eissn=eissn,
                              proprietary_id=prop_id,
                              html_total=html_total,
                              pdf_total=pdf_total,
                              **common_args
                              )
    elif report.report_type.startswith('BR'):
        return CounterBook(metric=report.metric,
                           month_data=month_data,
                           doi=doi,
                           issn=issn,
                           isbn=isbn,
                           proprietary_id=prop_id,
                           **common_args)
    elif report.report_type.startswith('DB'):
        return CounterDatabase(metric=line[3],
                               month_data=month_data,
                               **common_args)
    raise PycounterException("Should be unreachable")  # pragma: no cover
Ejemplo n.º 4
0
def parse_generic(report_reader):
    """Takes an iterator of COUNTER report rows and
    returns a CounterReport object

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists

    """
    report = CounterReport()

    line1 = six.next(report_reader)

    rt_match = re.match(r".*(Journal|Book|Database) Report (\d(?: GOA)?) ?\(R(\d)\)", line1[0])
    if rt_match:
        report.report_type = CODES[rt_match.group(1)] + rt_match.group(2)
        report.report_version = int(rt_match.group(3))

    # noinspection PyTypeChecker
    report.metric = METRICS.get(report.report_type)

    report.customer = six.next(report_reader)[0]

    if report.report_version == 4:
        inst_id_line = six.next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[0]

        six.next(report_reader)

        covered_line = six.next(report_reader)
        report.period = convert_covered(covered_line[0])

    six.next(report_reader)

    date_run_line = six.next(report_reader)
    report.date_run = convert_date_run(date_run_line[0])

    header = six.next(report_reader)
    first_date_col = 10 if report.report_version == 4 else 5
    if report.report_type in ("BR1", "BR2") and report.report_version == 4:
        first_date_col = 8
    elif report.report_type == "DB1" and report.report_version == 4:
        first_date_col = 6
    elif report.report_type == "DB2" and report.report_version == 4:
        first_date_col = 5
    year = int(header[first_date_col].split("-")[1])
    if year < 100:
        year += 2000

    report.year = year

    if report.report_version == 4:
        countable_header = header[0:8]
        for col in header[8:]:
            if col:
                countable_header.append(col)
        last_col = len(countable_header)
    else:
        last_col = 0
        for val in header:
            if "YTD" in val:
                break
            last_col += 1

        start_date = datetime.date(year, 1, 1)
        end_date = last_day(convert_date_column(header[last_col - 1]))
        report.period = (start_date, end_date)

    if report.report_type != "DB1":
        six.next(report_reader)

    if report.report_type == "DB2":
        six.next(report_reader)

    for line in report_reader:
        issn = None
        eissn = None
        html_total = 0
        pdf_total = 0
        doi = ""
        prop_id = ""
        if not line:
            continue
        if report.report_version == 4:
            if report.report_type.startswith("JR1"):
                old_line = line
                line = line[0:3] + line[5:7] + line[10:last_col]
                doi = old_line[3]
                prop_id = old_line[4]
                html_total = int(old_line[8])
                pdf_total = int(old_line[9])
                issn = line[3].strip()
                eissn = line[4].strip()

            elif report.report_type in ("BR1", "BR2"):
                line = line[0:3] + line[5:7] + line[8:last_col]
            elif report.report_type in ("DB1", "DB2"):
                # format coincidentally works for these. This is a kludge
                # so leaving this explicit...
                pass
        else:
            if report.report_type.startswith("JR1"):
                html_total = int(line[-2])
                pdf_total = int(line[-1])
                issn = line[3].strip()
                eissn = line[4].strip()
            line = line[0:last_col]

        logging.debug(line)
        title = line[0]
        publisher = line[1]
        platform = line[2]
        month_data = []
        curr_month = report.period[0]
        for data in line[5:]:
            month_data.append((curr_month, format_stat(data)))
            curr_month = next_month(curr_month)
        if report.report_type:
            if report.report_type.startswith("JR"):
                report.pubs.append(
                    CounterJournal(
                        report.period,
                        report.metric,
                        month_data=month_data,
                        title=title,
                        publisher=publisher,
                        platform=platform,
                        doi=doi,
                        issn=issn,
                        eissn=eissn,
                        proprietary_id=prop_id,
                        html_total=html_total,
                        pdf_total=pdf_total,
                    )
                )
            elif report.report_type.startswith("BR"):
                report.pubs.append(
                    CounterBook(
                        report.period,
                        report.metric,
                        month_data=month_data,
                        title=title,
                        publisher=publisher,
                        platform=platform,
                    )
                )
            elif report.report_type.startswith("DB"):
                report.pubs.append(
                    CounterDatabase(
                        report.period,
                        line[3],
                        month_data=month_data,
                        title=title,
                        publisher=publisher,
                        platform=platform,
                    )
                )
            else:
                raise UnknownReportTypeError(report.report_type)

    return report
Ejemplo n.º 5
0
def _parse_line(line, report, last_col):
    """Parse a single line from a report.

    :param line: sequence of cells in a report line
    :param report: a CounterReport the line came from
    :param last_col: last column number containing data
    :return: an appropriate CounterResource subclass instance
    """
    issn = None
    eissn = None
    isbn = None
    html_total = 0
    pdf_total = 0
    doi = ""
    prop_id = ""

    if report.report_version == 4:
        if report.report_type.startswith("JR1"):
            old_line = line
            line = line[0:3] + line[5:7] + line[10:last_col]
            doi = old_line[3]
            prop_id = old_line[4]
            html_total = format_stat(old_line[8])
            pdf_total = format_stat(old_line[9])
            issn = line[3].strip()
            eissn = line[4].strip()

        elif report.report_type in ("BR1", "BR2"):
            line = line[0:3] + line[5:7] + line[8:last_col]
            isbn = line[3].strip()
            issn = line[4].strip()

        # For DB1 and DB2, nothing additional to do here

    else:
        if report.report_type.startswith("JR1"):
            html_total = format_stat(line[-2])
            pdf_total = format_stat(line[-1])
            issn = line[3].strip()
            eissn = line[4].strip()
        line = line[0:last_col]

    logging.debug(line)
    common_args = {
        "title": line[0],
        "publisher": line[1],
        "platform": line[2],
        "period": report.period,
    }
    month_data = []
    curr_month = report.period[0]
    for data in line[5:]:
        month_data.append((curr_month, format_stat(data)))
        curr_month = next_month(curr_month)
    if report.report_type.startswith("JR"):
        return CounterJournal(
            metric=report.metric,
            month_data=month_data,
            doi=doi,
            issn=issn,
            eissn=eissn,
            proprietary_id=prop_id,
            html_total=html_total,
            pdf_total=pdf_total,
            **common_args
        )
    elif report.report_type.startswith("BR"):
        return CounterBook(
            metric=report.metric,
            month_data=month_data,
            doi=doi,
            issn=issn,
            isbn=isbn,
            proprietary_id=prop_id,
            **common_args
        )
    elif report.report_type.startswith("DB"):
        return CounterDatabase(metric=line[3], month_data=month_data, **common_args)
    raise PycounterException("Should be unreachable")  # pragma: no cover