def make_objects(self, item, court, sha1_hash, content): """Takes the meta data from the scraper and associates it with objects. Returns the created objects. """ blocked = item['blocked_statuses'] if blocked is not None: date_blocked = date.today() else: date_blocked = None case_name_short = (item.get('case_name_shorts') or self.cnt.make_case_name_short(item['case_names'])) docket = Docket( docket_number=item.get('docket_numbers', ''), case_name=item['case_names'], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, source=Docket.SCRAPER, ) cluster = OpinionCluster( judges=item.get('judges', ''), date_filed=item['case_dates'], case_name=item['case_names'], case_name_short=case_name_short, source='C', precedential_status=item['precedential_statuses'], nature_of_suit=item.get('nature_of_suit', ''), blocked=blocked, date_blocked=date_blocked, federal_cite_one=item.get('west_citations', ''), state_cite_one=item.get('west_state_citations', ''), neutral_cite=item.get('neutral_citations', ''), ) opinion = Opinion( type='010combined', sha1=sha1_hash, download_url=item['download_urls'], ) error = False try: cf = ContentFile(content) extension = get_extension(content) file_name = trunc(item['case_names'].lower(), 75) + extension opinion.file_with_date = cluster.date_filed opinion.local_path.save(file_name, cf, save=False) except: msg = ('Unable to save binary to disk. Deleted ' 'item: %s.\n %s' % (item['case_names'], traceback.format_exc())) logger.critical(msg.encode('utf-8')) ErrorLog(log_level='CRITICAL', court=court, message=msg).save() error = True return docket, opinion, cluster, error
def make_objects(self, item, court, sha1_hash, content): """Takes the meta data from the scraper and associates it with objects. Returns the created objects. """ blocked = item['blocked_statuses'] if blocked is not None: date_blocked = date.today() else: date_blocked = None case_name_short = (item.get('case_name_shorts') or self.cnt.make_case_name_short(item['case_names'])) docket = Docket( docket_number=item.get('docket_numbers', ''), case_name=item['case_names'], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, source=Docket.SCRAPER, ) cluster = OpinionCluster( judges=item.get('judges', ''), date_filed=item['case_dates'], case_name=item['case_names'], case_name_short=case_name_short, source='C', precedential_status=item['precedential_statuses'], nature_of_suit=item.get('nature_of_suit', ''), blocked=blocked, date_blocked=date_blocked, federal_cite_one=item.get('west_citations', ''), state_cite_one=item.get('west_state_citations', ''), neutral_cite=item.get('neutral_citations', ''), ) opinion = Opinion( type='010combined', sha1=sha1_hash, download_url=item['download_urls'], ) error = False try: cf = ContentFile(content) extension = get_extension(content) file_name = trunc(item['case_names'].lower(), 75) + extension opinion.file_with_date = cluster.date_filed opinion.local_path.save(file_name, cf, save=False) except: msg = ('Unable to save binary to disk. Deleted ' 'item: %s.\n %s' % (item['case_names'], traceback.format_exc())) logger.critical(msg.encode('utf-8')) ErrorLog(log_level='CRITICAL', court=court, message=msg).save() error = True return docket, opinion, cluster, error
def test_save_old_opinion(self): """Can we save opinions older than 1900?""" docket = Docket(case_name=u"Blah", court_id='test', source=Docket.DEFAULT) docket.save() oc = OpinionCluster( case_name=u"Blah", docket=docket, date_filed=datetime.date(1899, 1, 1), ) oc.save() o = Opinion(cluster=oc, type='Lead Opinion') try: cf = ContentFile(StringIO.StringIO('blah').read()) o.file_with_date = datetime.date(1899, 1, 1) o.local_path.save('file_name.pdf', cf, save=False) o.save(index=False) except ValueError as e: raise ValueError("Unable to save a case older than 1900. Did you " "try to use `strftime`...again?")
def make_objects( item: Dict[str, Union[str, Any]], court: Court, sha1_hash: str, content: bytes, ) -> Tuple[Docket, Opinion, OpinionCluster, List[Citation]]: """Takes the meta data from the scraper and associates it with objects. Returns the created objects. """ blocked = item["blocked_statuses"] if blocked: date_blocked = date.today() else: date_blocked = None case_name_short = item.get("case_name_shorts") or cnt.make_case_name_short( item["case_names"] ) docket = Docket( docket_number=item.get("docket_numbers", ""), case_name=item["case_names"], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, source=item.get("source") or Docket.SCRAPER, ) west_cite_str = item.get("west_citations", "") state_cite_str = item.get("west_state_citations", "") neutral_cite_str = item.get("neutral_citations", "") cluster = OpinionCluster( judges=item.get("judges", ""), date_filed=item["case_dates"], date_filed_is_approximate=item["date_filed_is_approximate"], case_name=item["case_names"], case_name_short=case_name_short, source=item.get("cluster_source") or "C", precedential_status=item["precedential_statuses"], nature_of_suit=item.get("nature_of_suit", ""), blocked=blocked, date_blocked=date_blocked, syllabus=item.get("summaries", ""), ) citations = [] cite_types = [ (west_cite_str, Citation.WEST), (state_cite_str, Citation.STATE), (neutral_cite_str, Citation.NEUTRAL), ] for cite_str, cite_type in cite_types: if cite_str: citations.append(make_citation(cite_str, cluster, cite_type)) opinion = Opinion( type=Opinion.COMBINED, sha1=sha1_hash, download_url=item["download_urls"], ) cf = ContentFile(content) extension = get_extension(content) file_name = trunc(item["case_names"].lower(), 75) + extension opinion.file_with_date = cluster.date_filed opinion.local_path.save(file_name, cf, save=False) return docket, opinion, cluster, citations
def make_objects(self, item, court, sha1_hash, content): """Takes the meta data from the scraper and associates it with objects. Returns the created objects. """ blocked = item["blocked_statuses"] if blocked: date_blocked = date.today() else: date_blocked = None case_name_short = item.get( "case_name_shorts") or self.cnt.make_case_name_short( item["case_names"]) docket = Docket( docket_number=item.get("docket_numbers", ""), case_name=item["case_names"], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, source=Docket.SCRAPER, ) west_cite_str = item.get("west_citations", "") state_cite_str = item.get("west_state_citations", "") neutral_cite_str = item.get("neutral_citations", "") cluster = OpinionCluster( judges=item.get("judges", ""), date_filed=item["case_dates"], date_filed_is_approximate=item["date_filed_is_approximate"], case_name=item["case_names"], case_name_short=case_name_short, source="C", precedential_status=item["precedential_statuses"], nature_of_suit=item.get("nature_of_suit", ""), blocked=blocked, date_blocked=date_blocked, # These three fields are replaced below. federal_cite_one=west_cite_str, state_cite_one=state_cite_str, neutral_cite=neutral_cite_str, syllabus=item.get("summaries", ""), ) citations = [] cite_types = [ (west_cite_str, Citation.WEST), (state_cite_str, Citation.STATE), (neutral_cite_str, Citation.NEUTRAL), ] for cite_str, cite_type in cite_types: if cite_str: citations.append(make_citation(cite_str, cluster, cite_type)) opinion = Opinion( type=Opinion.COMBINED, sha1=sha1_hash, download_url=item["download_urls"], ) error = False try: cf = ContentFile(content) extension = get_extension(content) file_name = trunc(item["case_names"].lower(), 75) + extension opinion.file_with_date = cluster.date_filed opinion.local_path.save(file_name, cf, save=False) except: msg = ("Unable to save binary to disk. Deleted " "item: %s.\n %s" % (item["case_names"], traceback.format_exc())) logger.critical(msg.encode("utf-8")) ErrorLog(log_level="CRITICAL", court=court, message=msg).save() error = True return docket, opinion, cluster, citations, error
def make_objects(self, item, court, sha1_hash, content): """Takes the meta data from the scraper and associates it with objects. Returns the created objects. """ blocked = item['blocked_statuses'] if blocked: date_blocked = date.today() else: date_blocked = None case_name_short = (item.get('case_name_shorts') or self.cnt.make_case_name_short(item['case_names'])) docket = Docket( docket_number=item.get('docket_numbers', ''), case_name=item['case_names'], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, source=Docket.SCRAPER, ) west_cite_str = item.get('west_citations', '') state_cite_str = item.get('west_state_citations', '') neutral_cite_str = item.get('neutral_citations', '') cluster = OpinionCluster( judges=item.get('judges', ''), date_filed=item['case_dates'], date_filed_is_approximate=item['date_filed_is_approximate'], case_name=item['case_names'], case_name_short=case_name_short, source='C', precedential_status=item['precedential_statuses'], nature_of_suit=item.get('nature_of_suit', ''), blocked=blocked, date_blocked=date_blocked, # These three fields are replaced below. federal_cite_one=west_cite_str, state_cite_one=state_cite_str, neutral_cite=neutral_cite_str, syllabus=item.get('summaries', ''), ) citations = [] if west_cite_str: citation_obj = get_citations(west_cite_str)[0] citations.append( Citation( cluster=cluster, volume=citation_obj.volume, reporter=citation_obj.reporter, page=citation_obj.page, type=Citation.WEST, )) if state_cite_str: citation_obj = get_citations(state_cite_str)[0] citations.append( Citation( cluster=cluster, volume=citation_obj.volume, reporter=citation_obj.reporter, page=citation_obj.page, type=Citation.STATE, )) if neutral_cite_str: citation_obj = get_citations(neutral_cite_str)[0] citations.append( Citation( cluster=cluster, volume=citation_obj.volume, reporter=citation_obj.reporter, page=citation_obj.page, type=Citation.NEUTRAL, )) opinion = Opinion( type='010combined', sha1=sha1_hash, download_url=item['download_urls'], ) error = False try: cf = ContentFile(content) extension = get_extension(content) file_name = trunc(item['case_names'].lower(), 75) + extension opinion.file_with_date = cluster.date_filed opinion.local_path.save(file_name, cf, save=False) except: msg = ('Unable to save binary to disk. Deleted ' 'item: %s.\n %s' % (item['case_names'], traceback.format_exc())) logger.critical(msg.encode('utf-8')) ErrorLog(log_level='CRITICAL', court=court, message=msg).save() error = True return docket, opinion, cluster, citations, error