def main(url, report, release, start_date, end_date, requestor_id, requestor_email, requestor_name, customer_name, customer_reference, format_, output_file): click.echo("pycounter SUSHI client for URL %s (%s R%s)" % (url, report, release)) if end_date is not None and start_date is None: click.echo('Cannot specify --end_date without --start_date', err=True) sys.exit(1) if start_date is None: converted_start_date = prev_month(datetime.datetime.now()) else: converted_start_date = convert_date_run(start_date) if end_date is None: converted_end_date = last_day(converted_start_date) else: converted_end_date = convert_date_run(end_date) report = sushi.get_report(wsdl_url=url, report=report, release=release, requestor_id=requestor_id, requestor_name=requestor_name, requestor_email=requestor_email, customer_reference=customer_reference, customer_name=customer_name, start_date=converted_start_date, end_date=converted_end_date) output_file = output_file % format_ report.write_to_file(output_file, format_)
def main( url, report, release, start_date, end_date, requestor_id, requestor_email, requestor_name, customer_name, customer_reference, api_key, format_, output_file, dump, no_ssl_verify, no_delay, ): """Main function for the SUSHI client.""" # pylint: disable=too-many-locals click.echo(f"pycounter SUSHI client for URL {url} ({report} R{release})") if end_date is not None and start_date is None: click.echo("Cannot specify --end_date without --start_date", err=True) sys.exit(1) if start_date is None: converted_start_date = prev_month(datetime.datetime.now()) else: converted_start_date = convert_date_run(start_date) if end_date is None: converted_end_date = last_day(converted_start_date) else: converted_end_date = convert_date_run(end_date) report = sushi.get_report( wsdl_url=url, report=report, release=release, requestor_id=requestor_id, requestor_name=requestor_name, requestor_email=requestor_email, customer_reference=customer_reference, customer_name=customer_name, start_date=converted_start_date, end_date=converted_end_date, sushi_dump=dump, no_delay=no_delay, verify=not no_ssl_verify, api_key=api_key, ) if "%s" in output_file: output_file = output_file % format_ report.write_to_file(output_file, format_)
def main( url, report, release, start_date, end_date, requestor_id, requestor_email, requestor_name, customer_name, customer_reference, format_, output_file, dump, no_ssl_verify, no_delay, ): """Main function for the SUSHI client.""" click.echo("pycounter SUSHI client for URL %s (%s R%s)" % (url, report, release)) if end_date is not None and start_date is None: click.echo("Cannot specify --end_date without --start_date", err=True) sys.exit(1) if start_date is None: converted_start_date = prev_month(datetime.datetime.now()) else: converted_start_date = convert_date_run(start_date) if end_date is None: converted_end_date = last_day(converted_start_date) else: converted_end_date = convert_date_run(end_date) report = sushi.get_report( wsdl_url=url, report=report, release=release, requestor_id=requestor_id, requestor_name=requestor_name, requestor_email=requestor_email, customer_reference=customer_reference, customer_name=customer_name, start_date=converted_start_date, end_date=converted_end_date, sushi_dump=dump, no_delay=no_delay, verify=not no_ssl_verify, ) if "%s" in output_file: output_file = output_file % format_ report.write_to_file(output_file, format_)
def _raw_to_full(raw_report): """Convert a raw report to a :class:`pycounter.report.CounterReport` object """ try: root = etree.fromstring(raw_report) except etree.XMLSyntaxError as e: logger.error("XML syntax error: %s", raw_report) raise pycounter.exceptions.SushiException(e) o_root = objectify.fromstring(raw_report) rep = None try: rep = o_root.Body[_ns('sushicounter', "ReportResponse")] c_report = rep.Report[_ns('counter', 'Report')] except AttributeError: try: c_report = rep.Report[_ns('counter', 'Reports')].Report except AttributeError: logger.error("report not found in XML: %s", raw_report) raise pycounter.exceptions.SushiException logger.debug("COUNTER report: %s", etree.tostring(c_report)) start_date = datetime.datetime.strptime( root.find('.//%s' % _ns('sushi', 'Begin')).text, "%Y-%m-%d").date() end_date = datetime.datetime.strptime( root.find('.//%s' % _ns('sushi', 'End')).text, "%Y-%m-%d").date() report_data = {'period': (start_date, end_date)} rep_def = root.find('.//%s' % _ns('sushi', 'ReportDefinition')) report_data['report_version'] = int(rep_def.get('Release')) report_data['report_type'] = rep_def.get('Name') customer = root.find('.//%s' % _ns('counter', 'Customer')) try: report_data['customer'] = (customer.find('.//%s' % _ns('counter', 'Name')).text) except AttributeError: report_data['customer'] = "" inst_id = customer.find('.//%s' % _ns('counter', 'ID')).text report_data['institutional_identifier'] = inst_id rep_root = root.find('.//%s' % _ns('counter', 'Report')) created_string = rep_root.get('Created') if created_string is not None: report_data['date_run'] = arrow.get(created_string) else: report_data['date_run'] = datetime.datetime.now() report = pycounter.report.CounterReport(**report_data) report.metric = pycounter.constants.METRICS.get(report_data['report_type']) for item in c_report.Customer.ReportItems: try: publisher_name = item.ItemPublisher.text except AttributeError: publisher_name = "" title = item.ItemName.text platform = item.ItemPlatform.text eissn = issn = isbn = "" try: for identifier in item.ItemIdentifier: if identifier.Type == "Print_ISSN": issn = identifier.Value.text if issn is None: issn = "" elif identifier.Type == "Online_ISSN": eissn = identifier.Value.text if eissn is None: eissn = "" elif identifier.Type == "Online_ISBN": logging.debug("FOUND ISBN") isbn = identifier.Value.text if isbn is None: isbn = "" except AttributeError: pass month_data = [] html_usage = 0 pdf_usage = 0 metrics_for_db = collections.defaultdict(list) for perform_item in item.ItemPerformance: item_date = convert_date_run(perform_item.Period.Begin.text) logger.debug("perform_item date: %r", item_date) usage = None for inst in perform_item.Instance: if inst.MetricType == "ft_total": usage = str(inst.Count) elif inst.MetricType == "ft_pdf": pdf_usage += int(inst.Count) elif inst.MetricType == "ft_html": html_usage += int(inst.Count) elif report.report_type.startswith('DB'): metrics_for_db[inst.MetricType].append((item_date, int(inst.Count))) if usage is not None: month_data.append((item_date, int(usage))) if report.report_type: if report.report_type.startswith('JR'): report.pubs.append(pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, eissn=eissn, month_data=month_data, html_total=html_usage, pdf_total=pdf_usage )) elif report.report_type.startswith('BR'): report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, isbn=isbn, month_data=month_data, )) elif report.report_type.startswith('DB'): for metric, month_data in six.iteritems(metrics_for_db): report.pubs.append( pycounter.report.CounterDatabase( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, month_data=month_data )) return report
def _raw_to_full(raw_report): """Convert a raw report to CounterReport. :param raw_report: raw XML report :return: a :class:`pycounter.report.CounterReport` """ try: root = etree.fromstring(raw_report) except etree.XMLSyntaxError: logger.error("XML syntax error: %s", raw_report) raise pycounter.exceptions.SushiException(message="XML syntax error", raw=raw_report) o_root = objectify.fromstring(raw_report) rep = None try: rep = o_root.Body[_ns('sushicounter', "ReportResponse")] c_report = rep.Report[_ns('counter', 'Report')] except AttributeError: try: c_report = rep.Report[_ns('counter', 'Reports')].Report except AttributeError: logger.error("report not found in XML: %s", raw_report) raise pycounter.exceptions.SushiException( message="report not found in XML", raw=raw_report, xml=o_root) logger.debug("COUNTER report: %s", etree.tostring(c_report)) start_date = datetime.datetime.strptime( root.find('.//%s' % _ns('sushi', 'Begin')).text, "%Y-%m-%d").date() end_date = datetime.datetime.strptime( root.find('.//%s' % _ns('sushi', 'End')).text, "%Y-%m-%d").date() report_data = {'period': (start_date, end_date)} rep_def = root.find('.//%s' % _ns('sushi', 'ReportDefinition')) report_data['report_version'] = int(rep_def.get('Release')) report_data['report_type'] = rep_def.get('Name') customer = root.find('.//%s' % _ns('counter', 'Customer')) try: report_data['customer'] = (customer.find('.//%s' % _ns('counter', 'Name')).text) except AttributeError: report_data['customer'] = "" inst_id = customer.find('.//%s' % _ns('counter', 'ID')).text report_data['institutional_identifier'] = inst_id rep_root = root.find('.//%s' % _ns('counter', 'Report')) created_string = rep_root.get('Created') if created_string is not None: report_data['date_run'] = arrow.get(created_string) else: report_data['date_run'] = datetime.datetime.now() report = pycounter.report.CounterReport(**report_data) report.metric = pycounter.constants.METRICS.get(report_data['report_type']) for item in c_report.Customer.ReportItems: try: publisher_name = item.ItemPublisher.text except AttributeError: publisher_name = "" title = item.ItemName.text platform = item.ItemPlatform.text eissn = issn = "" print_isbn = None online_isbn = None doi = "" prop_id = "" try: for identifier in item.ItemIdentifier: if identifier.Type == "Print_ISSN": issn = identifier.Value.text if issn is None: issn = "" elif identifier.Type == "Online_ISSN": eissn = identifier.Value.text if eissn is None: eissn = "" elif identifier.Type == "Online_ISBN": online_isbn = identifier.Value.text elif identifier.Type == "Print_ISBN": print_isbn = identifier.Value.text elif identifier.Type == "DOI": doi = identifier.Value.text elif identifier.Type == "Proprietary": prop_id = identifier.Value.text except AttributeError: pass month_data = [] html_usage = 0 pdf_usage = 0 metrics_for_db = collections.defaultdict(list) for perform_item in item.ItemPerformance: item_date = convert_date_run(perform_item.Period.Begin.text) logger.debug("perform_item date: %r", item_date) usage = None if hasattr(perform_item, 'Instance'): for inst in perform_item.Instance: if inst.MetricType == "ft_total": usage = str(inst.Count) elif inst.MetricType == "ft_pdf": pdf_usage += int(inst.Count) elif inst.MetricType == "ft_html": html_usage += int(inst.Count) elif report.report_type.startswith('DB'): metrics_for_db[inst.MetricType].append( (item_date, int(inst.Count))) if usage is not None: month_data.append((item_date, int(usage))) if report.report_type: if report.report_type.startswith('JR'): report.pubs.append( pycounter.report.CounterJournal(title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, eissn=eissn, doi=doi, proprietary_id=prop_id, month_data=month_data, html_total=html_usage, pdf_total=pdf_usage)) elif report.report_type.startswith('BR'): report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, doi=doi, proprietary_id=prop_id, print_isbn=print_isbn, online_isbn=online_isbn, month_data=month_data, )) elif report.report_type.startswith('DB'): for metric_code, month_data in six.iteritems(metrics_for_db): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterDatabase( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, month_data=month_data)) return report
def _raw_to_full(raw_report): """Convert a raw report to CounterReport. :param raw_report: raw XML report :return: a :class:`pycounter.report.CounterReport` """ try: root = etree.fromstring(raw_report) except etree.XMLSyntaxError: logger.error("XML syntax error: %s", raw_report) raise pycounter.exceptions.SushiException( message="XML syntax error", raw=raw_report ) o_root = objectify.fromstring(raw_report) rep = None try: rep = o_root.Body[_ns("sushicounter", "ReportResponse")] c_report = rep.Report[_ns("counter", "Report")] except AttributeError: try: c_report = rep.Report[_ns("counter", "Reports")].Report except AttributeError: if b"Report Queued" in raw_report: raise pycounter.exceptions.ServiceBusyError("Report Queued") else: logger.error("report not found in XML: %s", raw_report) raise pycounter.exceptions.SushiException( message="report not found in XML", raw=raw_report, xml=o_root ) logger.debug("COUNTER report: %s", etree.tostring(c_report)) start_date = datetime.datetime.strptime( root.find(".//%s" % _ns("sushi", "Begin")).text, "%Y-%m-%d" ).date() end_date = datetime.datetime.strptime( root.find(".//%s" % _ns("sushi", "End")).text, "%Y-%m-%d" ).date() report_data = {"period": (start_date, end_date)} rep_def = root.find(".//%s" % _ns("sushi", "ReportDefinition")) report_data["report_version"] = int(rep_def.get("Release")) report_data["report_type"] = rep_def.get("Name") customer = root.find(".//%s" % _ns("counter", "Customer")) try: report_data["customer"] = customer.find(".//%s" % _ns("counter", "Name")).text except AttributeError: report_data["customer"] = "" try: inst_id = customer.find(".//%s" % _ns("counter", "ID")).text except AttributeError: inst_id = u"" report_data["institutional_identifier"] = inst_id rep_root = root.find(".//%s" % _ns("counter", "Report")) created_string = rep_root.get("Created") if created_string is not None: report_data["date_run"] = arrow.get(created_string) else: report_data["date_run"] = datetime.datetime.now() report = pycounter.report.CounterReport(**report_data) report.metric = pycounter.constants.METRICS.get(report_data["report_type"]) for item in c_report.Customer.ReportItems: try: publisher_name = item.ItemPublisher.text except AttributeError: publisher_name = "" title = item.ItemName.text platform = item.ItemPlatform.text eissn = issn = "" print_isbn = None online_isbn = None doi = "" prop_id = "" try: for identifier in item.ItemIdentifier: if identifier.Type == "Print_ISSN": issn = identifier.Value.text if issn is None: issn = "" elif identifier.Type == "Online_ISSN": eissn = identifier.Value.text if eissn is None: eissn = "" elif identifier.Type == "Online_ISBN": online_isbn = identifier.Value.text elif identifier.Type == "Print_ISBN": print_isbn = identifier.Value.text elif identifier.Type == "DOI": doi = identifier.Value.text elif identifier.Type == "Proprietary": prop_id = identifier.Value.text except AttributeError: pass month_data = [] html_usage = 0 pdf_usage = 0 metrics_for_db = collections.defaultdict(list) for perform_item in item.ItemPerformance: item_date = convert_date_run(perform_item.Period.Begin.text) logger.debug("perform_item date: %r", item_date) usage = None if hasattr(perform_item, "Instance"): for inst in perform_item.Instance: if inst.MetricType == "ft_total": usage = str(inst.Count) elif inst.MetricType == "ft_pdf": pdf_usage += int(inst.Count) elif inst.MetricType == "ft_html": html_usage += int(inst.Count) elif report.report_type.startswith("DB"): metrics_for_db[inst.MetricType].append( (item_date, int(inst.Count)) ) if usage is not None: month_data.append((item_date, int(usage))) if report.report_type: if report.report_type.startswith("JR"): report.pubs.append( pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, eissn=eissn, doi=doi, proprietary_id=prop_id, month_data=month_data, html_total=html_usage, pdf_total=pdf_usage, ) ) elif report.report_type.startswith("BR"): report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, doi=doi, proprietary_id=prop_id, print_isbn=print_isbn, online_isbn=online_isbn, month_data=month_data, ) ) elif report.report_type.startswith("DB"): for metric_code, month_data in six.iteritems(metrics_for_db): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterDatabase( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, month_data=month_data, ) ) return report
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() line1 = six.next(report_reader) rt_match = re.match(r".*(Journal|Book|Database) Report (\d(?: GOA)?) ?\(R(\d)\)", line1[0]) if rt_match: report.report_type = CODES[rt_match.group(1)] + rt_match.group(2) report.report_version = int(rt_match.group(3)) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) first_date_col = 10 if report.report_version == 4 else 5 if report.report_type in ("BR1", "BR2") and report.report_version == 4: first_date_col = 8 elif report.report_type == "DB1" and report.report_version == 4: first_date_col = 6 elif report.report_type == "DB2" and report.report_version == 4: first_date_col = 5 year = int(header[first_date_col].split("-")[1]) if year < 100: year += 2000 report.year = year if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != "DB1": six.next(report_reader) if report.report_type == "DB2": six.next(report_reader) for line in report_reader: issn = None eissn = None html_total = 0 pdf_total = 0 doi = "" prop_id = "" if not line: continue if report.report_version == 4: if report.report_type.startswith("JR1"): old_line = line line = line[0:3] + line[5:7] + line[10:last_col] doi = old_line[3] prop_id = old_line[4] html_total = int(old_line[8]) pdf_total = int(old_line[9]) issn = line[3].strip() eissn = line[4].strip() elif report.report_type in ("BR1", "BR2"): line = line[0:3] + line[5:7] + line[8:last_col] elif report.report_type in ("DB1", "DB2"): # format coincidentally works for these. This is a kludge # so leaving this explicit... pass else: if report.report_type.startswith("JR1"): html_total = int(line[-2]) pdf_total = int(line[-1]) issn = line[3].strip() eissn = line[4].strip() line = line[0:last_col] logging.debug(line) title = line[0] publisher = line[1] platform = line[2] month_data = [] curr_month = report.period[0] for data in line[5:]: month_data.append((curr_month, format_stat(data))) curr_month = next_month(curr_month) if report.report_type: if report.report_type.startswith("JR"): report.pubs.append( CounterJournal( report.period, report.metric, month_data=month_data, title=title, publisher=publisher, platform=platform, doi=doi, issn=issn, eissn=eissn, proprietary_id=prop_id, html_total=html_total, pdf_total=pdf_total, ) ) elif report.report_type.startswith("BR"): report.pubs.append( CounterBook( report.period, report.metric, month_data=month_data, title=title, publisher=publisher, platform=platform, ) ) elif report.report_type.startswith("DB"): report.pubs.append( CounterDatabase( report.period, line[3], month_data=month_data, title=title, publisher=publisher, platform=platform, ) ) else: raise UnknownReportTypeError(report.report_type) return report
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0]) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] if report.report_type == 'BR2': report.section_type = inst_id_line[1] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) report.year = _year_from_header(header, report) if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if 'YTD' in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != 'DB1': six.next(report_reader) if report.report_type == 'DB2': six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def test_convert_date_run(date_run, expected): expected_date = datetime.date(*expected) assert convert_date_run(date_run) == expected_date
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() line1 = six.next(report_reader) rt_match = re.match( r'.*(Journal|Book|Database) Report (\d(?: GOA)?) ?\(R(\d)\)', line1[0]) if rt_match: report.report_type = (rt_match.group(1)[0].capitalize() + 'R' + rt_match.group(2)) report.report_version = int(rt_match.group(3)) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) first_date_col = 10 if report.report_version == 4 else 5 if report.report_type in ('BR1', 'BR2') and report.report_version == 4: first_date_col = 8 year = int(header[first_date_col].split('-')[1]) if year < 100: year += 2000 report.year = year if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if 'YTD' in val: break last_col += 1 start_date = datetime.date(year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) six.next(report_reader) for line in report_reader: if not line: continue if report.report_version == 4: if report.report_type.startswith('JR1'): line = line[0:3] + line[5:7] + line[10:last_col] elif report.report_type in ('BR1', 'BR2'): line = line[0:3] + line[5:7] + line[8:last_col] else: line = line[0:last_col] logging.debug(line) title = line[0] publisher = line[1] platform = line[2] if report.report_type: if report.report_type.startswith('JR'): report.pubs.append(CounterJournal(line, report.period, report.metric, title=title, publisher=publisher, platform=platform)) elif report.report_type.startswith('BR'): report.pubs.append(CounterBook(line, report.period, report.metric, title=title, publisher=publisher, platform=platform)) else: raise UnknownReportTypeError(report.report_type) return report
def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ # pylint: disable=too-many-branches report = CounterReport() first_line = next(report_reader) if first_line[0] == "Report_Name": # COUNTER 5 report second_line = next(report_reader) third_line = next(report_reader) report.report_type, report.report_version = _get_c5_type_and_version( second_line, third_line ) else: report.report_type, report.report_version = _get_type_and_version(first_line[0]) if report.report_version != 5: # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = next(report_reader)[1 if report.report_version == 5 else 0] if report.report_version >= 4: inst_id_line = next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[ 1 if report.report_version == 5 else 0 ] if report.report_type == "BR2": report.section_type = inst_id_line[1] next(report_reader) if report.report_version == 5: for _ in range(3): next(report_reader) covered_line = next(report_reader) report.period = convert_covered( covered_line[1 if report.report_version == 5 else 0] ) if report.report_version < 5: next(report_reader) date_run_line = next(report_reader) report.date_run = convert_date_run( date_run_line[1 if report.report_version == 5 else 0] ) if report.report_version == 5: for _ in range(2): # Skip Created_By and blank line next(report_reader) header = next(report_reader) countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) if report.report_type not in ("DB1", "PR1") and report.report_version != 5: # these reports do not have line with totals next(report_reader) if report.report_type in ("DB2", "BR3", "JR3"): # this report has two lines of totals next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def raw_to_full(raw_report): """Convert a raw report to CounterReport. :param raw_report: raw report as dict decoded from JSON :return: a :class:`pycounter.report.CounterReport` """ # pylint: disable=too-many-locals header = raw_report["Report_Header"] period = _dates_from_filters(header["Report_Filters"]) date_run = header.get("Created") report = pycounter.report.CounterReport( period=period, report_version=int(header.get("Release", raw_report.get("Release", 5))), report_type=header["Report_ID"], customer=header.get("Institution_Name", ""), institutional_identifier=header.get("Customer_ID", ""), metric=None, # COUNTER 5 reports usually contain multiple metrics date_run=pendulum.parse(date_run) if date_run else datetime.datetime.now(), ) for item in raw_report["Report_Items"]: publisher_name = item.get("Publisher", "") platform = item.get("Platform", "") title = item["Title"] identifiers = _get_identifiers(item) metrics_data = collections.OrderedDict() for perform_item in item["Performance"]: item_date = convert_date_run(perform_item["Period"]["Begin_Date"]) for inst in perform_item["Instance"]: usage = inst["Count"] metrics_data.setdefault(inst["Metric_Type"], []).append( (item_date, int(usage))) if report.report_type == "TR_J1": report.pubs.append( pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric="Total_Item_Requests", issn=identifiers["issn"], eissn=identifiers["eissn"], doi=identifiers["doi"], proprietary_id=identifiers["prop_id"], month_data=metrics_data["Total_Item_Requests"], )) elif report.report_type == "TR_J2": for metric, data in metrics_data.items(): report.pubs.append( pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, issn=identifiers["issn"], eissn=identifiers["eissn"], doi=identifiers["doi"], proprietary_id=identifiers["prop_id"], month_data=data, )) elif report.report_type.startswith("TR_B"): report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric="Total_Item_Requests", issn=identifiers["issn"], isbn=identifiers["isbn"], doi=identifiers["doi"], proprietary_id=identifiers["prop_id"], month_data=metrics_data["Total_Item_Requests"], )) else: raise pycounter.exceptions.UnknownReportTypeError return report
def raw_to_full(raw_report): """Convert a raw report to CounterReport. :param raw_report: raw XML report :return: a :class:`pycounter.report.CounterReport` """ # pylint: disable=too-many-statements,too-many-branches,too-many-locals try: root = etree.fromstring(raw_report) except etree.XMLSyntaxError: logger.error("XML syntax error: %s", raw_report) raise pycounter.exceptions.SushiException(message="XML syntax error", raw=raw_report) o_root = objectify.fromstring(raw_report) rep = None try: rep = o_root.Body[ns("sushicounter", "ReportResponse")] c_report = rep.Report[ns("counter", "Report")] except AttributeError: try: c_report = rep.Report[ns("counter", "Reports")].Report except AttributeError: if b"Report Queued" in raw_report: raise pycounter.exceptions.ServiceBusyError("Report Queued") logger.error("report not found in XML: %s", raw_report) raise pycounter.exceptions.SushiException( message="report not found in XML", raw=raw_report, xml=o_root) logger.debug("COUNTER report: %s", etree.tostring(c_report)) start_date = datetime.datetime.strptime( root.find(".//%s" % ns("sushi", "Begin")).text, "%Y-%m-%d").date() end_date = datetime.datetime.strptime( root.find(".//%s" % ns("sushi", "End")).text, "%Y-%m-%d").date() report_data = {"period": (start_date, end_date)} rep_def = root.find(".//%s" % ns("sushi", "ReportDefinition")) report_data["report_version"] = int(rep_def.get("Release")) report_data["report_type"] = rep_def.get("Name") customer = root.find(".//%s" % ns("counter", "Customer")) try: report_data["customer"] = customer.find(".//%s" % ns("counter", "Name")).text except AttributeError: report_data["customer"] = "" try: inst_id = customer.find(".//%s" % ns("counter", "ID")).text except AttributeError: inst_id = "" report_data["institutional_identifier"] = inst_id rep_root = root.find(".//%s" % ns("counter", "Report")) created_string = rep_root.get("Created") if created_string is not None: report_data["date_run"] = pendulum.parse(created_string) else: report_data["date_run"] = datetime.datetime.now() report = pycounter.report.CounterReport(**report_data) report.metric = pycounter.constants.METRICS.get(report_data["report_type"]) for item in c_report.Customer.ReportItems: try: publisher_name = item.ItemPublisher.text except AttributeError: publisher_name = "" title = item.ItemName.text platform = item.ItemPlatform.text eissn = issn = "" print_isbn = None online_isbn = None doi = "" prop_id = "" try: for identifier in item.ItemIdentifier: if identifier.Type == "Print_ISSN": issn = identifier.Value.text if issn is None: issn = "" elif identifier.Type == "Online_ISSN": eissn = identifier.Value.text if eissn is None: eissn = "" elif identifier.Type == "Online_ISBN": online_isbn = identifier.Value.text elif identifier.Type == "Print_ISBN": print_isbn = identifier.Value.text elif identifier.Type == "DOI": doi = identifier.Value.text elif identifier.Type == "Proprietary": prop_id = identifier.Value.text except AttributeError: pass month_data = [] html_usage = 0 pdf_usage = 0 metrics_for_db = collections.OrderedDict() for perform_item in item.ItemPerformance: item_date = convert_date_run(perform_item.Period.Begin.text) logger.debug("perform_item date: %r", item_date) usage = None if hasattr(perform_item, "Instance"): for inst in perform_item.Instance: if inst.MetricType == "ft_total": usage = str(inst.Count) elif inst.MetricType == "ft_pdf": pdf_usage += int(inst.Count) elif inst.MetricType == "ft_html": html_usage += int(inst.Count) elif report.report_type.startswith( "DB") or report.report_type in ( "PR1", "JR2", "BR3", ): metrics_for_db.setdefault(inst.MetricType, []).append( (item_date, int(inst.Count))) if usage is not None: month_data.append((item_date, int(usage))) if report.report_type: if report.report_type == "JR1": report.pubs.append( pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, eissn=eissn, doi=doi, proprietary_id=prop_id, month_data=month_data, html_total=html_usage, pdf_total=pdf_usage, )) elif report.report_type == "BR3": for metric_code, month_data in metrics_for_db.items(): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, issn=issn, print_isbn=print_isbn, online_isbn=online_isbn, doi=doi, proprietary_id=prop_id, month_data=month_data, )) elif report.report_type.startswith("BR"): # BR1, BR2 report.pubs.append( pycounter.report.CounterBook( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=report.metric, issn=issn, doi=doi, proprietary_id=prop_id, print_isbn=print_isbn, online_isbn=online_isbn, month_data=month_data, )) elif report.report_type.startswith("DB"): for metric_code, month_data in metrics_for_db.items(): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterDatabase( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, month_data=month_data, )) elif report.report_type == "PR1": for metric_code, month_data in metrics_for_db.items(): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterPlatform( platform=platform, publisher=publisher_name, period=report.period, metric=metric, month_data=month_data, )) elif report.report_type == "JR2": for metric_code, month_data in metrics_for_db.items(): metric = pycounter.constants.DB_METRIC_MAP[metric_code] report.pubs.append( pycounter.report.CounterJournal( title=title, platform=platform, publisher=publisher_name, period=report.period, metric=metric, issn=issn, eissn=eissn, doi=doi, proprietary_id=prop_id, month_data=month_data, )) return report
def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0] ) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] if report.report_type == "BR2": report.section_type = inst_id_line[1] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) try: report.year = _year_from_header(header, report) except AttributeError: warnings.warn("Could not determine year from malformed header") if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != "DB1": six.next(report_reader) if report.report_type == "DB2": six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements report = CounterReport() first_line = six.next(report_reader) if first_line[0] == "Report_Name": # COUNTER 5 report second_line = six.next(report_reader) third_line = six.next(report_reader) report.report_type, report.report_version = _get_c5_type_and_version( second_line, third_line) else: report.report_type, report.report_version = _get_type_and_version( first_line[0]) if report.report_version != 5: # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[1 if report.report_version == 5 else 0] if report.report_version >= 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[ 1 if report.report_version == 5 else 0] if report.report_type == "BR2": report.section_type = inst_id_line[1] six.next(report_reader) if report.report_version == 5: for _ in range(3): six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered( covered_line[1 if report.report_version == 5 else 0]) if report.report_version < 5: six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run( date_run_line[1 if report.report_version == 5 else 0]) if report.report_version == 5: for _ in range(2): # Skip Created_By and blank line six.next(report_reader) header = six.next(report_reader) if report.report_version < 5: try: report.year = _year_from_header(header, report) except AttributeError: warnings.warn("Could not determine year from malformed header") if report.report_version >= 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type not in ("DB1", "PR1") and report.report_version != 5: # these reports do not have line with totals six.next(report_reader) if report.report_type in ("DB2", "BR3", "JR3"): # this report has two lines of totals six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def _raw_to_full(raw_report): """Convert a raw report to a :class:`pycounter.report.CounterReport` object """ try: root = etree.fromstring(raw_report) except etree.XMLSyntaxError as e: logger.error("XML syntax error: %s", raw_report) raise pycounter.exceptions.SushiException(e) oroot = objectify.fromstring(raw_report) rep = None try: rep = oroot.Body[_ns('sushicounter', "ReportResponse")] creport = rep.Report[_ns('counter', 'Report')] except AttributeError: try: creport = rep.Report[_ns('counter', 'Reports')].Report except AttributeError: logger.error("report not found in XML: %s", raw_report) raise logger.debug("COUNTER report: %s", etree.tostring(creport)) startdate = datetime.datetime.strptime( root.find('.//%s' % _ns('sushi', 'Begin')).text, "%Y-%m-%d").date() enddate = datetime.datetime.strptime( root.find('.//%s' % _ns('sushi', 'End')).text, "%Y-%m-%d").date() report_data = {'period': (startdate, enddate)} rdef = root.find('.//%s' % _ns('sushi', 'ReportDefinition')) report_data['report_version'] = int(rdef.get('Release')) report_data['report_type'] = rdef.get('Name') customer = root.find('.//%s' % _ns('counter', 'Customer')) try: report_data['customer'] = (customer.find('.//%s' % _ns('counter', 'Name')).text) except AttributeError: report_data['customer'] = "" inst_id = customer.find('.//%s' % _ns('counter', 'ID')).text report_data['institutional_identifier'] = inst_id reproot = root.find('.//%s' % _ns('counter', 'Report')) created_string = reproot.get('Created') if created_string is not None: report_data['date_run'] = dateutil.parser.parse(created_string) else: report_data['date_run'] = datetime.datetime.now() report = pycounter.report.CounterReport() for key, value in six.iteritems(report_data): setattr(report, key, value) report.metric = pycounter.report.METRICS.get(report_data['report_type']) for item in creport.Customer.ReportItems: try: publisher_name = item.ItemPublisher.text except AttributeError: publisher_name = "" itemline = [item.ItemName.text, publisher_name, item.ItemPlatform.text] eissn = issn = "" for identifier in item.ItemIdentifier: if identifier.Type == "Print_ISSN": issn = identifier.Value.text if issn is None: issn = "" elif identifier.Type == "Online_ISSN": eissn = identifier.Value.text if eissn is None: eissn = "" itemline.append(issn) itemline.append(eissn) month_data = [] for perfitem in item.ItemPerformance: logger.debug("Perfitem date: %r", convert_date_run(perfitem.Period.Begin.text)) item_date = convert_date_run( perfitem.Period.Begin.text) usage = None for inst in perfitem.Instance: if inst.MetricType == "ft_total": usage = str(inst.Count) break if usage is not None: month_data.append((item_date, usage)) itemline.append(usage) if report.report_type: if report.report_type.startswith('JR'): report.pubs.append(pycounter.report.CounterJournal( line=itemline, period=report.period, metric=report.metric, issn=issn, eissn=eissn, month_data=month_data )) elif report.report_type.startswith('BR'): report.pubs.append( pycounter.report.CounterBook(itemline, report.period, report.metric)) return report
def parse_generic(report_reader): """Takes an iterator of COUNTER report rows and returns a CounterReport object :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0]) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) report.year = _year_from_header(header, report) if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if 'YTD' in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != 'DB1': six.next(report_reader) if report.report_type == 'DB2': six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report
def parse_generic(report_reader): """Parse COUNTER report rows into a CounterReport. :param report_reader: a iterable object that yields lists COUNTER data formatted as tabular lists :return: CounterReport object """ report = CounterReport() report.report_type, report.report_version = _get_type_and_version( six.next(report_reader)[0]) # noinspection PyTypeChecker report.metric = METRICS.get(report.report_type) report.customer = six.next(report_reader)[0] if report.report_version == 4: inst_id_line = six.next(report_reader) if inst_id_line: report.institutional_identifier = inst_id_line[0] if report.report_type == "BR2": report.section_type = inst_id_line[1] six.next(report_reader) covered_line = six.next(report_reader) report.period = convert_covered(covered_line[0]) six.next(report_reader) date_run_line = six.next(report_reader) report.date_run = convert_date_run(date_run_line[0]) header = six.next(report_reader) try: report.year = _year_from_header(header, report) except AttributeError: warnings.warn("Could not determine year from malformed header") if report.report_version == 4: countable_header = header[0:8] for col in header[8:]: if col: countable_header.append(col) last_col = len(countable_header) else: last_col = 0 for val in header: if "YTD" in val: break last_col += 1 start_date = datetime.date(report.year, 1, 1) end_date = last_day(convert_date_column(header[last_col - 1])) report.period = (start_date, end_date) if report.report_type != "DB1": six.next(report_reader) if report.report_type == "DB2": six.next(report_reader) for line in report_reader: if not line: continue report.pubs.append(_parse_line(line, report, last_col)) return report