Ejemplo n.º 1
0
def parse_generic(report_reader):
    """Parse COUNTER report rows into a CounterReport.

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists
    :return: CounterReport object

    """
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    report = CounterReport()

    first_line = six.next(report_reader)
    if first_line[0] == "Report_Name":  # COUNTER 5 report
        second_line = six.next(report_reader)
        third_line = six.next(report_reader)
        report.report_type, report.report_version = _get_c5_type_and_version(
            second_line, third_line)
    else:
        report.report_type, report.report_version = _get_type_and_version(
            first_line[0])

    if report.report_version != 5:
        # noinspection PyTypeChecker
        report.metric = METRICS.get(report.report_type)

    report.customer = six.next(report_reader)[1 if report.report_version ==
                                              5 else 0]

    if report.report_version >= 4:
        inst_id_line = six.next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[
                1 if report.report_version == 5 else 0]
            if report.report_type == "BR2":
                report.section_type = inst_id_line[1]

        six.next(report_reader)
        if report.report_version == 5:
            for _ in range(3):
                six.next(report_reader)

        covered_line = six.next(report_reader)
        report.period = convert_covered(
            covered_line[1 if report.report_version == 5 else 0])

    if report.report_version < 5:
        six.next(report_reader)

    date_run_line = six.next(report_reader)
    report.date_run = convert_date_run(
        date_run_line[1 if report.report_version == 5 else 0])

    if report.report_version == 5:
        for _ in range(2):
            # Skip Created_By and blank line
            six.next(report_reader)

    header = six.next(report_reader)

    if report.report_version < 5:
        try:
            report.year = _year_from_header(header, report)
        except AttributeError:
            warnings.warn("Could not determine year from malformed header")

    if report.report_version >= 4:
        countable_header = header[0:8]
        for col in header[8:]:
            if col:
                countable_header.append(col)
        last_col = len(countable_header)
    else:
        last_col = 0
        for val in header:
            if "YTD" in val:
                break
            last_col += 1

        start_date = datetime.date(report.year, 1, 1)
        end_date = last_day(convert_date_column(header[last_col - 1]))
        report.period = (start_date, end_date)

    if report.report_type not in ("DB1", "PR1") and report.report_version != 5:
        # these reports do not have line with totals
        six.next(report_reader)

    if report.report_type in ("DB2", "BR3", "JR3"):
        # this report has two lines of totals
        six.next(report_reader)

    for line in report_reader:
        if not line:
            continue
        report.pubs.append(_parse_line(line, report, last_col))

    return report
Ejemplo n.º 2
0
def parse_generic(report_reader):
    """Takes an iterator of COUNTER report rows and
    returns a CounterReport object

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists

    """
    report = CounterReport()

    report.report_type, report.report_version = _get_type_and_version(
        six.next(report_reader)[0])

# noinspection PyTypeChecker
    report.metric = METRICS.get(report.report_type)

    report.customer = six.next(report_reader)[0]

    if report.report_version == 4:
        inst_id_line = six.next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[0]

        six.next(report_reader)

        covered_line = six.next(report_reader)
        report.period = convert_covered(covered_line[0])

    six.next(report_reader)

    date_run_line = six.next(report_reader)
    report.date_run = convert_date_run(date_run_line[0])

    header = six.next(report_reader)

    report.year = _year_from_header(header, report)

    if report.report_version == 4:
        countable_header = header[0:8]
        for col in header[8:]:
            if col:
                countable_header.append(col)
        last_col = len(countable_header)
    else:
        last_col = 0
        for val in header:
            if 'YTD' in val:
                break
            last_col += 1

        start_date = datetime.date(report.year, 1, 1)
        end_date = last_day(convert_date_column(header[last_col - 1]))
        report.period = (start_date, end_date)

    if report.report_type != 'DB1':
        six.next(report_reader)

    if report.report_type == 'DB2':
        six.next(report_reader)

    for line in report_reader:
        if not line:
            continue
        report.pubs.append(_parse_line(line, report, last_col))

    return report
Ejemplo n.º 3
0
def parse_generic(report_reader):
    """Takes an iterator of COUNTER report rows and
    returns a CounterReport object

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists

    """
    report = CounterReport()

    line1 = six.next(report_reader)

    rt_match = re.match(r".*(Journal|Book|Database) Report (\d(?: GOA)?) ?\(R(\d)\)", line1[0])
    if rt_match:
        report.report_type = CODES[rt_match.group(1)] + rt_match.group(2)
        report.report_version = int(rt_match.group(3))

    # noinspection PyTypeChecker
    report.metric = METRICS.get(report.report_type)

    report.customer = six.next(report_reader)[0]

    if report.report_version == 4:
        inst_id_line = six.next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[0]

        six.next(report_reader)

        covered_line = six.next(report_reader)
        report.period = convert_covered(covered_line[0])

    six.next(report_reader)

    date_run_line = six.next(report_reader)
    report.date_run = convert_date_run(date_run_line[0])

    header = six.next(report_reader)
    first_date_col = 10 if report.report_version == 4 else 5
    if report.report_type in ("BR1", "BR2") and report.report_version == 4:
        first_date_col = 8
    elif report.report_type == "DB1" and report.report_version == 4:
        first_date_col = 6
    elif report.report_type == "DB2" and report.report_version == 4:
        first_date_col = 5
    year = int(header[first_date_col].split("-")[1])
    if year < 100:
        year += 2000

    report.year = year

    if report.report_version == 4:
        countable_header = header[0:8]
        for col in header[8:]:
            if col:
                countable_header.append(col)
        last_col = len(countable_header)
    else:
        last_col = 0
        for val in header:
            if "YTD" in val:
                break
            last_col += 1

        start_date = datetime.date(year, 1, 1)
        end_date = last_day(convert_date_column(header[last_col - 1]))
        report.period = (start_date, end_date)

    if report.report_type != "DB1":
        six.next(report_reader)

    if report.report_type == "DB2":
        six.next(report_reader)

    for line in report_reader:
        issn = None
        eissn = None
        html_total = 0
        pdf_total = 0
        doi = ""
        prop_id = ""
        if not line:
            continue
        if report.report_version == 4:
            if report.report_type.startswith("JR1"):
                old_line = line
                line = line[0:3] + line[5:7] + line[10:last_col]
                doi = old_line[3]
                prop_id = old_line[4]
                html_total = int(old_line[8])
                pdf_total = int(old_line[9])
                issn = line[3].strip()
                eissn = line[4].strip()

            elif report.report_type in ("BR1", "BR2"):
                line = line[0:3] + line[5:7] + line[8:last_col]
            elif report.report_type in ("DB1", "DB2"):
                # format coincidentally works for these. This is a kludge
                # so leaving this explicit...
                pass
        else:
            if report.report_type.startswith("JR1"):
                html_total = int(line[-2])
                pdf_total = int(line[-1])
                issn = line[3].strip()
                eissn = line[4].strip()
            line = line[0:last_col]

        logging.debug(line)
        title = line[0]
        publisher = line[1]
        platform = line[2]
        month_data = []
        curr_month = report.period[0]
        for data in line[5:]:
            month_data.append((curr_month, format_stat(data)))
            curr_month = next_month(curr_month)
        if report.report_type:
            if report.report_type.startswith("JR"):
                report.pubs.append(
                    CounterJournal(
                        report.period,
                        report.metric,
                        month_data=month_data,
                        title=title,
                        publisher=publisher,
                        platform=platform,
                        doi=doi,
                        issn=issn,
                        eissn=eissn,
                        proprietary_id=prop_id,
                        html_total=html_total,
                        pdf_total=pdf_total,
                    )
                )
            elif report.report_type.startswith("BR"):
                report.pubs.append(
                    CounterBook(
                        report.period,
                        report.metric,
                        month_data=month_data,
                        title=title,
                        publisher=publisher,
                        platform=platform,
                    )
                )
            elif report.report_type.startswith("DB"):
                report.pubs.append(
                    CounterDatabase(
                        report.period,
                        line[3],
                        month_data=month_data,
                        title=title,
                        publisher=publisher,
                        platform=platform,
                    )
                )
            else:
                raise UnknownReportTypeError(report.report_type)

    return report
Ejemplo n.º 4
0
def parse_generic(report_reader):
    """Takes an iterator of COUNTER report rows and
    returns a CounterReport object

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists

    """
    report = CounterReport()

    report.report_type, report.report_version = _get_type_and_version(
        six.next(report_reader)[0])

# noinspection PyTypeChecker
    report.metric = METRICS.get(report.report_type)

    report.customer = six.next(report_reader)[0]

    if report.report_version == 4:
        inst_id_line = six.next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[0]
            if report.report_type == 'BR2':
                report.section_type = inst_id_line[1]

        six.next(report_reader)

        covered_line = six.next(report_reader)
        report.period = convert_covered(covered_line[0])

    six.next(report_reader)

    date_run_line = six.next(report_reader)
    report.date_run = convert_date_run(date_run_line[0])

    header = six.next(report_reader)

    report.year = _year_from_header(header, report)

    if report.report_version == 4:
        countable_header = header[0:8]
        for col in header[8:]:
            if col:
                countable_header.append(col)
        last_col = len(countable_header)
    else:
        last_col = 0
        for val in header:
            if 'YTD' in val:
                break
            last_col += 1

        start_date = datetime.date(report.year, 1, 1)
        end_date = last_day(convert_date_column(header[last_col - 1]))
        report.period = (start_date, end_date)

    if report.report_type != 'DB1':
        six.next(report_reader)

    if report.report_type == 'DB2':
        six.next(report_reader)

    for line in report_reader:
        if not line:
            continue
        report.pubs.append(_parse_line(line, report, last_col))

    return report
Ejemplo n.º 5
0
def parse_generic(report_reader):
    """Parse COUNTER report rows into a CounterReport.

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists
    :return: CounterReport object

    """
    report = CounterReport()

    report.report_type, report.report_version = _get_type_and_version(
        six.next(report_reader)[0]
    )

    # noinspection PyTypeChecker
    report.metric = METRICS.get(report.report_type)

    report.customer = six.next(report_reader)[0]

    if report.report_version == 4:
        inst_id_line = six.next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[0]
            if report.report_type == "BR2":
                report.section_type = inst_id_line[1]

        six.next(report_reader)

        covered_line = six.next(report_reader)
        report.period = convert_covered(covered_line[0])

    six.next(report_reader)

    date_run_line = six.next(report_reader)
    report.date_run = convert_date_run(date_run_line[0])

    header = six.next(report_reader)

    try:
        report.year = _year_from_header(header, report)
    except AttributeError:
        warnings.warn("Could not determine year from malformed header")

    if report.report_version == 4:
        countable_header = header[0:8]
        for col in header[8:]:
            if col:
                countable_header.append(col)
        last_col = len(countable_header)
    else:
        last_col = 0
        for val in header:
            if "YTD" in val:
                break
            last_col += 1

        start_date = datetime.date(report.year, 1, 1)
        end_date = last_day(convert_date_column(header[last_col - 1]))
        report.period = (start_date, end_date)

    if report.report_type != "DB1":
        six.next(report_reader)

    if report.report_type == "DB2":
        six.next(report_reader)

    for line in report_reader:
        if not line:
            continue
        report.pubs.append(_parse_line(line, report, last_col))

    return report
Ejemplo n.º 6
0
def parse_generic(report_reader):
    """Parse COUNTER report rows into a CounterReport.

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists
    :return: CounterReport object

    """
    # pylint: disable=too-many-branches
    report = CounterReport()

    first_line = next(report_reader)
    if first_line[0] == "Report_Name":  # COUNTER 5 report
        second_line = next(report_reader)
        third_line = next(report_reader)
        report.report_type, report.report_version = _get_c5_type_and_version(
            second_line, third_line
        )
    else:
        report.report_type, report.report_version = _get_type_and_version(first_line[0])

    if report.report_version != 5:
        # noinspection PyTypeChecker
        report.metric = METRICS.get(report.report_type)

    report.customer = next(report_reader)[1 if report.report_version == 5 else 0]

    if report.report_version >= 4:
        inst_id_line = next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[
                1 if report.report_version == 5 else 0
            ]
            if report.report_type == "BR2":
                report.section_type = inst_id_line[1]

        next(report_reader)
        if report.report_version == 5:
            for _ in range(3):
                next(report_reader)

        covered_line = next(report_reader)
        report.period = convert_covered(
            covered_line[1 if report.report_version == 5 else 0]
        )

    if report.report_version < 5:
        next(report_reader)

    date_run_line = next(report_reader)
    report.date_run = convert_date_run(
        date_run_line[1 if report.report_version == 5 else 0]
    )

    if report.report_version == 5:
        for _ in range(2):
            # Skip Created_By and blank line
            next(report_reader)

    header = next(report_reader)

    countable_header = header[0:8]
    for col in header[8:]:
        if col:
            countable_header.append(col)
    last_col = len(countable_header)

    if report.report_type not in ("DB1", "PR1") and report.report_version != 5:
        # these reports do not have line with totals
        next(report_reader)

    if report.report_type in ("DB2", "BR3", "JR3"):
        # this report has two lines of totals
        next(report_reader)

    for line in report_reader:
        if not line:
            continue
        report.pubs.append(_parse_line(line, report, last_col))

    return report
Ejemplo n.º 7
0
def parse_generic(report_reader):
    """Parse COUNTER report rows into a CounterReport.

    :param report_reader: a iterable object that yields lists COUNTER
        data formatted as tabular lists
    :return: CounterReport object

    """
    report = CounterReport()

    report.report_type, report.report_version = _get_type_and_version(
        six.next(report_reader)[0])

    # noinspection PyTypeChecker
    report.metric = METRICS.get(report.report_type)

    report.customer = six.next(report_reader)[0]

    if report.report_version == 4:
        inst_id_line = six.next(report_reader)
        if inst_id_line:
            report.institutional_identifier = inst_id_line[0]
            if report.report_type == "BR2":
                report.section_type = inst_id_line[1]

        six.next(report_reader)

        covered_line = six.next(report_reader)
        report.period = convert_covered(covered_line[0])

    six.next(report_reader)

    date_run_line = six.next(report_reader)
    report.date_run = convert_date_run(date_run_line[0])

    header = six.next(report_reader)

    try:
        report.year = _year_from_header(header, report)
    except AttributeError:
        warnings.warn("Could not determine year from malformed header")

    if report.report_version == 4:
        countable_header = header[0:8]
        for col in header[8:]:
            if col:
                countable_header.append(col)
        last_col = len(countable_header)
    else:
        last_col = 0
        for val in header:
            if "YTD" in val:
                break
            last_col += 1

        start_date = datetime.date(report.year, 1, 1)
        end_date = last_day(convert_date_column(header[last_col - 1]))
        report.period = (start_date, end_date)

    if report.report_type != "DB1":
        six.next(report_reader)

    if report.report_type == "DB2":
        six.next(report_reader)

    for line in report_reader:
        if not line:
            continue
        report.pubs.append(_parse_line(line, report, last_col))

    return report