def import_from_net_by_isbn(isbn): riak_obj = None meta = NdlHelper._import_from_net_by_isbn(isbn) if meta: client = riak.RiakClient() bucket = client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"]) q = "source_identifier:{0}".format(SolrHelper.escape(meta["source_identifier"])) results = client.fulltext_search(settings.RIAK["STORE_BUCKET_TYPE"], q) if results["num_found"] > 0: print("already exist: find by resouce_identifier ({1}) isbn=({0})".format(isbn, meta["source_identifier"])) doc = results["docs"][0] m = bucket.get(doc["_yz_rk"]) m = Manifestation(meta, doc["_yz_rk"], bucket, m) riak_obj = m.store() else: m = Manifestation(meta, None, None, None) riak_obj = m.store() return riak_obj
def _import_from_xml(doc): client = riak.RiakClient() bucket = client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"]) record_count = 0 success_count = 0 updated_count = 0 created_count = 0 items = doc.xpath("//item") for item in items: meta = {"record_source": "NDL", "record_source_sub": "OPENSEARCH_XMLFILE"} creators = [] publishers = [] identifiers = {} subjects = [] desc_creators = [] languages = {"body": "jpn"} descriptions = [] for attr in item: #print('(1) element={0} attr={1} body={2}'.format(attr.tag, attr.attrib, attr.text)) tag = regex.sub(r'(?<rec>\{(?:[^{}]+|(?&rec))*\})', "", attr.tag) xmlattr_t = literal_eval(str(attr.attrib)) xmlattr_key = "" xmlattr_body = "" for keyname in xmlattr_t.keys(): xmlattr_key = regex.sub(r'(?<rec>\{(?:[^{}]+|(?&rec))*\})', "", keyname) xmlattr_body = xmlattr_t[keyname] if tag == "title": meta["title"] = attr.text elif tag == "titleTranscription": meta["title_transcription"] = attr.text elif tag == "category": meta["category"] = attr.text elif tag == "publisher": if len(publishers) == 0: publishers.append({"full_name": attr.text, "full_name_transcription": "", "role": "", "location": ""}) elif tag == "publicationPlace": if len(publishers) == 0: publishers.append({"full_name": "", "full_name_transcription": "", "role": "", "location": attr.text}) else: p = publishers[0] p.update({"location": attr.text}) elif tag == "pubDate": meta["pub_date"] = attr.text elif tag == "dcndl:volume": meta["volume"] = attr.text elif tag == "dcndl:edition": meta["edition"] = attr.text elif tag == "seriesTitle": meta["series_title"] = attr.text elif tag == "seriesTitleTranscription": meta["series_title_transcription"] = attr.text elif tag == "subject": if xmlattr_key == "": subjects.append({"value": attr.text}) elif xmlattr_key == "type" and xmlattr_body in ["dcndl:NDC9","dcndl:NDC8"]: akey = regex.sub(r'^.*?:', "", xmlattr_body) identifiers.update({akey: attr.text}) elif tag == "identifier": if xmlattr_key == "type" and xmlattr_body in ["dcndl:JPNO","dcndl:ISBN","dcndl:TRCMARCNO"]: akey = regex.sub(r'^.*?:', "", xmlattr_body) identifiers.update({akey: attr.text}) elif tag == "link": meta["source_link"] = attr.text meta["source_identifier"] = attr.text elif tag == "description": descriptions.append({"content": attr.text}) elif tag == "author": authors = str(attr.text).split(",") print("desc_c={0}".format(authors)) for a in authors: values = str(a).rsplit(None, 1) #print(values) #print(len(values)) full_name = "" role = "" if len(values) >= 1: full_name = values[0] if len(values) == 2: role = values[1] #print("name={} role={}".format(full_name, role)) desc_creators.append({"full_name": full_name, "role": role}) elif tag == "creator": m = regex.match(r"(.*)[//](.*)", attr.text) if m != None: name = m.group(1) role = m.group(2) creators.append({"full_name": name, "role": role}) elif tag == "extent": meta["extent"] = attr.text elif tag == "price": meta["price"] = attr.text # end for item #meta["languages"] = languages meta["descriptions"] = descriptions meta["identifiers"] = identifiers meta["subjects"] = subjects meta["publishers"] = publishers meta["creators"] = creators meta["desc_creators"] = desc_creators # check q = "source_identifier:{0}".format(SolrHelper.escape(meta["source_identifier"])) results = client.fulltext_search(settings.RIAK["STORE_BUCKET_TYPE"], q) if results["num_found"] > 0: print("already exist: find by resouce_identifier ({0})".format(meta["source_identifier"])) doc = results["docs"][0] m = bucket.get(doc["_yz_rk"]) m = Manifestation(meta, doc["_yz_rk"], bucket, m) riak_obj = m.store() updated_count += 1 else: m = Manifestation(meta, None, None, None) riak_obj = m.store() created_count += 1 print("manifestation stored success. key=%s" % (riak_obj.key)) record_count += 1 success_count += 1 # end for results = {"record_count": record_count, "success_count": success_count, "updated_count": updated_count, "created_count": created_count} print(results) return results