def opinion_ids_to_names(opinion_ids: Iterable[str]) -> List[str]: op_names = [] for op_id in opinion_ids: if (op_model := Opinion.select().where( Opinion.resource_id == op_id).first()) is not None: op_names.append(op_model.cluster.case_name) else: op_names.append("Unknown")
def search_cases(query, max_cases=25): search_text = CaseSearch.prepare_query(query) return (Opinion.select( Opinion, fn.ts_headline(Cluster.case_display_name(), fn.to_tsquery(search_text)).alias("headline"), ).join(Cluster).where( ts_match(Cluster.searchable_case_name, fn.to_tsquery(search_text))).order_by( Cluster.citation_count.desc()).limit(max_cases))
def get_case_clusters(): case_resource_ids = [int(c) for c in request.args.getlist("cases")] num_clusters = int(request.args.get("num_clusters") or 0) or None if len(case_resource_ids) < 1: return "You must provide at least one case ID.", HTTPStatus.UNPROCESSABLE_ENTITY clusters = clustering.spectral_cluster(set(case_resource_ids), num_clusters=num_clusters) output_dict = {} for cluster_name, opinion_ids in clusters.items(): opinion_models = Opinion.select().where( Opinion.resource_id << opinion_ids) output_dict[str(cluster_name)] = model_list_to_dicts(opinion_models) return output_dict
def get_recommended_cases(): case_resource_ids = frozenset(map(int, request.args.getlist("cases"))) court_ids = frozenset(map(str, request.args.getlist("courts"))) max_cases = int(request.args.get("max_cases") or 10) if len(case_resource_ids) < 1: return "You must provide at least one case ID.", HTTPStatus.UNPROCESSABLE_ENTITY recommendations = recommendation.recommendations(case_resource_ids, max_cases, courts=court_ids) recommended_opinions = sorted( Opinion.select().join(Cluster).where( Opinion.resource_id << list(recommendations.keys())), key=lambda op: recommendations[op.resource_id], reverse=True, ) return model_list_to_json(recommended_opinions)
def ingest_citation_data(citations_file): # Since there's only ~65,000 opinions, it's feasible to just load all the IDs into memory to avoid making # millions of DB queries. opinion_set = {o.resource_id for o in Opinion.select()} citation_records = [] with open(citations_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter=",") for row in csv_reader: try: integer_row = [int(cell) for cell in row] if integer_row[0] in opinion_set and integer_row[1] in opinion_set: new_record = Citation( citing_opinion=integer_row[0], cited_opinion=integer_row[1], depth=integer_row[2], ) citation_records.append(new_record) except Exception as e: print(f"Failure on row {row}: {e}") with db.atomic(): Citation.bulk_create(citation_records, batch_size=100)