def _parse_line(line, report, last_col): """Parse a single line from a report and return a CounterResource subclass instance as appropriate """ issn = None eissn = None isbn = None html_total = 0 pdf_total = 0 doi = "" prop_id = "" if report.report_version == 4: if report.report_type.startswith('JR1'): old_line = line line = line[0:3] + line[5:7] + line[10:last_col] doi = old_line[3] prop_id = old_line[4] html_total = int(old_line[8]) pdf_total = int(old_line[9]) issn = line[3].strip() eissn = line[4].strip() elif report.report_type in ('BR1', 'BR2'): line = line[0:3] + line[5:7] + line[8:last_col] isbn = line[3].strip() issn = line[4].strip() elif report.report_type in ('DB1', 'DB2'): # format coincidentally works for these. This is a kludge # so leaving this explicit... pass else: if report.report_type.startswith('JR1'): html_total = int(line[-2]) pdf_total = int(line[-1]) issn = line[3].strip() eissn = line[4].strip() line = line[0:last_col] logging.debug(line) common_args = { 'title': line[0], 'publisher': line[1], 'platform': line[2], 'period': report.period } month_data = [] curr_month = report.period[0] for data in line[5:]: month_data.append((curr_month, format_stat(data))) curr_month = next_month(curr_month) if report.report_type.startswith('JR'): return CounterJournal(metric=report.metric, month_data=month_data, doi=doi, issn=issn, eissn=eissn, proprietary_id=prop_id, html_total=html_total, pdf_total=pdf_total, **common_args ) elif report.report_type.startswith('BR'): return CounterBook(metric=report.metric, month_data=month_data, doi=doi, issn=issn, isbn=isbn, proprietary_id=prop_id, **common_args) elif report.report_type.startswith('DB'): return CounterDatabase(metric=line[3], month_data=month_data, **common_args) raise PycounterException("Should be unreachable") # pragma: no cover
def _parse_line(line, report, last_col): """Parse a single line from a report. :param line: sequence of cells in a report line :param report: a CounterReport the line came from :param last_col: last column number containing data :return: an appropriate CounterResource subclass instance """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements issn = None eissn = None isbn = None html_total = 0 pdf_total = 0 doi = "" prop_id = "" metric = report.metric if report.report_version >= 4: if report.report_type.startswith( "JR1") or report.report_type == "TR_J1": old_line = line line = line[0:3] + line[5:7] + line[10:last_col] doi = old_line[3] prop_id = old_line[4] html_total = format_stat(old_line[8]) pdf_total = format_stat(old_line[9]) issn = line[3].strip() eissn = line[4].strip() elif report.report_type in ("BR1", "BR2"): line = line[0:3] + line[5:7] + line[8:last_col] isbn = line[3].strip() issn = line[4].strip() elif report.report_type in ("BR3", "JR2"): metric = line[7] doi = line[3] prop_id = line[4] line = line[0:3] + line[5:7] + line[9:last_col] eissn = line[4].strip() if report.report_type == "BR3": isbn = line[3].strip() else: issn = line[3].strip() # For DB1 and DB2, nothing additional to do here else: if report.report_type.startswith("JR1"): html_total = format_stat(line[-2]) pdf_total = format_stat(line[-1]) issn = line[3].strip() eissn = line[4].strip() line = line[0:last_col] logging.debug(line) common_args = { "title": line[0], "publisher": line[1], "platform": line[2], "period": report.period, } month_data = [] curr_month = report.period[0] months_start_idx = 5 if report.report_type != "PR1" else 4 for data in line[months_start_idx:]: month_data.append((curr_month, format_stat(data))) curr_month = next_month(curr_month) if report.report_type.startswith("JR") or report.report_type == "TR_J1": return CounterJournal(metric=metric, month_data=month_data, doi=doi, issn=issn, eissn=eissn, proprietary_id=prop_id, html_total=html_total, pdf_total=pdf_total, **common_args) elif report.report_type.startswith("BR"): return CounterBook(metric=metric, month_data=month_data, doi=doi, issn=issn, isbn=isbn, proprietary_id=prop_id, **common_args) elif report.report_type.startswith("DB"): return CounterDatabase(metric=line[3], month_data=month_data, **common_args) elif report.report_type == "PR1": # there is no title in the PR1 report return CounterPlatform( metric=line[2], month_data=month_data, platform=line[0], publisher=line[1], period=report.period, ) raise PycounterException("Should be unreachable") # pragma: no cover
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() line1 = six.next(report_reader) rt_match = re.match(r".*(Journal|Book|Database) Report (\d(?: GOA)?) ?\(R(\d)\)", line1[0]) if rt_match: report.report_type = CODES[rt_match.group(1)] + rt_match.group(2) report.report_version = int(rt_match.group(3)) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) first_date_col = 10 if report.report_version == 4 else 5 if report.report_type in ("BR1", "BR2") and report.report_version == 4: first_date_col = 8 elif report.report_type == "DB1" and report.report_version == 4: first_date_col = 6 elif report.report_type == "DB2" and report.report_version == 4: first_date_col = 5 year = int(header[first_date_col].split("-")[1]) if year < 100: year += 2000 report.year = year if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != "DB1": six.next(report_reader) if report.report_type == "DB2": six.next(report_reader) for line in report_reader: issn = None eissn = None html_total = 0 pdf_total = 0 doi = "" prop_id = "" if not line: continue if report.report_version == 4: if report.report_type.startswith("JR1"): old_line = line line = line[0:3] + line[5:7] + line[10:last_col] doi = old_line[3] prop_id = old_line[4] html_total = int(old_line[8]) pdf_total = int(old_line[9]) issn = line[3].strip() eissn = line[4].strip() elif report.report_type in ("BR1", "BR2"): line = line[0:3] + line[5:7] + line[8:last_col] elif report.report_type in ("DB1", "DB2"): # format coincidentally works for these. This is a kludge # so leaving this explicit... pass else: if report.report_type.startswith("JR1"): html_total = int(line[-2]) pdf_total = int(line[-1]) issn = line[3].strip() eissn = line[4].strip() line = line[0:last_col] logging.debug(line) title = line[0] publisher = line[1] platform = line[2] month_data = [] curr_month = report.period[0] for data in line[5:]: month_data.append((curr_month, format_stat(data))) curr_month = next_month(curr_month) if report.report_type: if report.report_type.startswith("JR"): report.pubs.append( CounterJournal( report.period, report.metric, month_data=month_data, title=title, publisher=publisher, platform=platform, doi=doi, issn=issn, eissn=eissn, proprietary_id=prop_id, html_total=html_total, pdf_total=pdf_total, ) ) elif report.report_type.startswith("BR"): report.pubs.append( CounterBook( report.period, report.metric, month_data=month_data, title=title, publisher=publisher, platform=platform, ) ) elif report.report_type.startswith("DB"): report.pubs.append( CounterDatabase( report.period, line[3], month_data=month_data, title=title, publisher=publisher, platform=platform, ) ) else: raise UnknownReportTypeError(report.report_type) return report
def _parse_line(line, report, last_col): """Parse a single line from a report. :param line: sequence of cells in a report line :param report: a CounterReport the line came from :param last_col: last column number containing data :return: an appropriate CounterResource subclass instance """ issn = None eissn = None isbn = None html_total = 0 pdf_total = 0 doi = "" prop_id = "" if report.report_version == 4: if report.report_type.startswith("JR1"): old_line = line line = line[0:3] + line[5:7] + line[10:last_col] doi = old_line[3] prop_id = old_line[4] html_total = format_stat(old_line[8]) pdf_total = format_stat(old_line[9]) issn = line[3].strip() eissn = line[4].strip() elif report.report_type in ("BR1", "BR2"): line = line[0:3] + line[5:7] + line[8:last_col] isbn = line[3].strip() issn = line[4].strip() # For DB1 and DB2, nothing additional to do here else: if report.report_type.startswith("JR1"): html_total = format_stat(line[-2]) pdf_total = format_stat(line[-1]) issn = line[3].strip() eissn = line[4].strip() line = line[0:last_col] logging.debug(line) common_args = { "title": line[0], "publisher": line[1], "platform": line[2], "period": report.period, } month_data = [] curr_month = report.period[0] for data in line[5:]: month_data.append((curr_month, format_stat(data))) curr_month = next_month(curr_month) if report.report_type.startswith("JR"): return CounterJournal( metric=report.metric, month_data=month_data, doi=doi, issn=issn, eissn=eissn, proprietary_id=prop_id, html_total=html_total, pdf_total=pdf_total, **common_args ) elif report.report_type.startswith("BR"): return CounterBook( metric=report.metric, month_data=month_data, doi=doi, issn=issn, isbn=isbn, proprietary_id=prop_id, **common_args ) elif report.report_type.startswith("DB"): return CounterDatabase(metric=line[3], month_data=month_data, **common_args) raise PycounterException("Should be unreachable") # pragma: no cover