def do_annotation(s, f, q): # This will probably stay hardcoded TEI_data = "http://shelleygodwinarchive.org/tei/" libraries = ["ox/", "bl/"] format = "jsonld" hl_simple_pre = '_#_' hl_simple_post = '_#_' annotations = [] # Create a UUID for this iteration uid = str(uuid.uuid4()) # get solr fields from request fields = f.split(",") fqs = [] if len(fields) > 0: fqs = fields[1:] fqs = [f + ":" + q for f in fqs] # send query, filter by fields (AND only at the moment), return highlights on text field. # text field is the only one that keeps all the text with all the whitespace # so all the positions are extracted from there. # # hl_fragsize=0 is important to calculate correct positions that SC will understand. response = s.raw_query(q=fields[0] + ":" + q, fl='shelfmark,id', fq=fqs, wt='json', start=0, rows=9999, hl='true', hl_fl="text", hl_fragsize='0', hl_simple_pre=hl_simple_pre, hl_simple_post=hl_simple_post) r = json.loads(response) # Find all the highlights and make them into OA annotations for i, TEI_id in enumerate(r["highlighting"]): hl = r["highlighting"][TEI_id]["text"][0] for library in libraries: annotations += annotator.oa_annotations( hl, TEI_id, TEI_data + library, uid + ":-" + str(i), hl_simple_pre, hl_simple_post, format) # prepare a headless JSON final = {} if format == "jsonld": final["@context"] = "http://mith.um.edu/sc/context.json" final["@graph"] = [] for anno in annotations: if format == "jsonld": final["@graph"].append(anno) else: for a in anno: final[a] = anno[a] return jsonify(final)
def do_annotation(s, f, q): # This will probably stay hardcoded TEI_data = "http://shelleygodwinarchive.org/tei/" libraries = ["ox/", "bl/"] format = "jsonld" hl_simple_pre = '_#_' hl_simple_post = '_#_' annotations = [] # Create a UUID for this iteration uid = str(uuid.uuid4()) # get solr fields from request fields = f.split(",") fqs = [] if len(fields) > 0: fqs = fields[1:] fqs = [f+":"+q for f in fqs] # send query, filter by fields (AND only at the moment), return highlights on text field. # text field is the only one that keeps all the text with all the whitespace # so all the positions are extracted from there. # # hl_fragsize=0 is important to calculate correct positions that SC will understand. response = s.raw_query(q=fields[0]+":"+q, fl='shelfmark,id', fq=fqs, wt='json', start=0, rows=9999, hl='true', hl_fl="text", hl_fragsize='0', hl_simple_pre=hl_simple_pre, hl_simple_post=hl_simple_post) r = json.loads(response) # Find all the highlights and make them into OA annotations for i, TEI_id in enumerate(r["highlighting"]): hl = r["highlighting"][TEI_id]["text"][0] for library in libraries: annotations += annotator.oa_annotations(hl, TEI_id, TEI_data + library, uid+":-"+str(i), hl_simple_pre, hl_simple_post, format) # prepare a headless JSON final = {} if format == "jsonld": final["@context"] = "http://mith.um.edu/sc/context.json" final["@graph"] = [] for anno in annotations: if format == "jsonld": final["@graph"].append(anno) else: for a in anno: final[a] = anno[a] return jsonify(final)