def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements report = CounterReport() first_line = six.next(report_reader) if first_line[0] == "Report_Name": # COUNTER 5 report second_line = six.next(report_reader) third_line = six.next(report_reader) report.report_type, report.report_version = _get_c5_type_and_version( second_line, third_line) else: report.report_type, report.report_version = _get_type_and_version( first_line[0]) if report.report_version != 5: # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[1 if report.report_version == 5 else 0] if report.report_version >= 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[ 1 if report.report_version == 5 else 0] if report.report_type == "BR2": report.section_type = inst_id_line[1] six.next(report_reader) if report.report_version == 5: for _ in range(3): six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered( covered_line[1 if report.report_version == 5 else 0]) if report.report_version < 5: six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run( date_run_line[1 if report.report_version == 5 else 0]) if report.report_version == 5: for _ in range(2): # Skip Created_By and blank line six.next(report_reader) header = six.next(report_reader) if report.report_version < 5: try: report.year = _year_from_header(header, report) except AttributeError: warnings.warn("Could not determine year from malformed header") if report.report_version >= 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type not in ("DB1", "PR1") and report.report_version != 5: # these reports do not have line with totals six.next(report_reader) if report.report_type in ("DB2", "BR3", "JR3"): # this report has two lines of totals six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0]) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) report.year = _year_from_header(header, report) if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if 'YTD' in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != 'DB1': six.next(report_reader) if report.report_type == 'DB2': six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() line1 = six.next(report_reader) rt_match = re.match(r".*(Journal|Book|Database) Report (\d(?: GOA)?) ?\(R(\d)\)", line1[0]) if rt_match: report.report_type = CODES[rt_match.group(1)] + rt_match.group(2) report.report_version = int(rt_match.group(3)) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) first_date_col = 10 if report.report_version == 4 else 5 if report.report_type in ("BR1", "BR2") and report.report_version == 4: first_date_col = 8 elif report.report_type == "DB1" and report.report_version == 4: first_date_col = 6 elif report.report_type == "DB2" and report.report_version == 4: first_date_col = 5 year = int(header[first_date_col].split("-")[1]) if year < 100: year += 2000 report.year = year if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != "DB1": six.next(report_reader) if report.report_type == "DB2": six.next(report_reader) for line in report_reader: issn = None eissn = None html_total = 0 pdf_total = 0 doi = "" prop_id = "" if not line: continue if report.report_version == 4: if report.report_type.startswith("JR1"): old_line = line line = line[0:3] + line[5:7] + line[10:last_col] doi = old_line[3] prop_id = old_line[4] html_total = int(old_line[8]) pdf_total = int(old_line[9]) issn = line[3].strip() eissn = line[4].strip() elif report.report_type in ("BR1", "BR2"): line = line[0:3] + line[5:7] + line[8:last_col] elif report.report_type in ("DB1", "DB2"): # format coincidentally works for these. This is a kludge # so leaving this explicit... pass else: if report.report_type.startswith("JR1"): html_total = int(line[-2]) pdf_total = int(line[-1]) issn = line[3].strip() eissn = line[4].strip() line = line[0:last_col] logging.debug(line) title = line[0] publisher = line[1] platform = line[2] month_data = [] curr_month = report.period[0] for data in line[5:]: month_data.append((curr_month, format_stat(data))) curr_month = next_month(curr_month) if report.report_type: if report.report_type.startswith("JR"): report.pubs.append( CounterJournal( report.period, report.metric, month_data=month_data, title=title, publisher=publisher, platform=platform, doi=doi, issn=issn, eissn=eissn, proprietary_id=prop_id, html_total=html_total, pdf_total=pdf_total, ) ) elif report.report_type.startswith("BR"): report.pubs.append( CounterBook( report.period, report.metric, month_data=month_data, title=title, publisher=publisher, platform=platform, ) ) elif report.report_type.startswith("DB"): report.pubs.append( CounterDatabase( report.period, line[3], month_data=month_data, title=title, publisher=publisher, platform=platform, ) ) else: raise UnknownReportTypeError(report.report_type) return report
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0]) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] if report.report_type == 'BR2': report.section_type = inst_id_line[1] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) report.year = _year_from_header(header, report) if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if 'YTD' in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != 'DB1': six.next(report_reader) if report.report_type == 'DB2': six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0] ) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] if report.report_type == "BR2": report.section_type = inst_id_line[1] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) try: report.year = _year_from_header(header, report) except AttributeError: warnings.warn("Could not determine year from malformed header") if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != "DB1": six.next(report_reader) if report.report_type == "DB2": six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ # pylint: disable=too-many-branches report = CounterReport() first_line = next(report_reader) if first_line[0] == "Report_Name": # COUNTER 5 report second_line = next(report_reader) third_line = next(report_reader) report.report_type, report.report_version = _get_c5_type_and_version( second_line, third_line ) else: report.report_type, report.report_version = _get_type_and_version(first_line[0]) if report.report_version != 5: # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = next(report_reader)[1 if report.report_version == 5 else 0] if report.report_version >= 4: inst_id_line = next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[ 1 if report.report_version == 5 else 0 ] if report.report_type == "BR2": report.section_type = inst_id_line[1] next(report_reader) if report.report_version == 5: for _ in range(3): next(report_reader) covered_line = next(report_reader) report.period = convert_covered( covered_line[1 if report.report_version == 5 else 0] ) if report.report_version < 5: next(report_reader) date_run_line = next(report_reader) report.date_run = convert_date_run( date_run_line[1 if report.report_version == 5 else 0] ) if report.report_version == 5: for _ in range(2): # Skip Created_By and blank line next(report_reader) header = next(report_reader) countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) if report.report_type not in ("DB1", "PR1") and report.report_version != 5: # these reports do not have line with totals next(report_reader) if report.report_type in ("DB2", "BR3", "JR3"): # this report has two lines of totals next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0]) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] if report.report_type == "BR2": report.section_type = inst_id_line[1] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) try: report.year = _year_from_header(header, report) except AttributeError: warnings.warn("Could not determine year from malformed header") if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != "DB1": six.next(report_reader) if report.report_type == "DB2": six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report