def store(self): print("store") update_flag = False now_str = SolrHelper.datetime2solrtime(datetime.utcnow()) if self.key and self.riak_obj: if self.riak_obj.exists: update_flag = True if update_flag: # update print("update") self.riak_obj.data.update(self.meta) self.riak_obj.data["updated_at"] = now_str else: # insert self.key = KassisNumbering.numbering("M") print("new record: generate key=%s" % (str(self.key))) self.meta["record_identifier"] = self.key self.bucket = self.client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"]) self.riak_obj = self.bucket.new(self.key, self.meta) self.riak_obj.data["created_at"] = now_str self.riak_obj.data["updated_at"] = now_str # meta = self.prepare_stored() riak_obj = self.riak_obj.store() # replicator # msgpack return riak_obj
def store(self): print("store") update_flag = False now_str = SolrHelper.datetime2solrtime(datetime.utcnow()) if self.key and self.riak_obj: if self.riak_obj.exists: update_flag = True update_flag = False if update_flag: print("update") else: # insert # TODO: ほんとうのunique番号の生成方法(現状だと同時アクセス時にconflictする) # TODO: 採番クラス用意する。 counter_bucket = self.client.bucket_type(settings.RIAK["COUNTER_BUCKET_TYPE"]).bucket(Location.CounterBucketName) #counter_bucket.update_counter() counter = Counter(counter_bucket, Location.CounterBucketName) counter.increment() counter.store() self.key = str(counter.value) print("new record: generate key=%s" % (str(self.key))) self.meta["record_identifier"] = self.key self.bucket = self.client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(Location.BucketName) self.riak_obj = self.bucket.new(self.key, self.meta) self.riak_obj.data["created_at"] = now_str self.riak_obj.data["updated_at"] = now_str # #meta = self.prepare_stored() riak_obj = self.riak_obj.store() # replicator # msgpack return riak_obj
def import_from_net_by_isbn(isbn): riak_obj = None meta = NdlHelper._import_from_net_by_isbn(isbn) if meta: client = riak.RiakClient() bucket = client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"]) q = "source_identifier:{0}".format(SolrHelper.escape(meta["source_identifier"])) results = client.fulltext_search(settings.RIAK["STORE_BUCKET_TYPE"], q) if results["num_found"] > 0: print("already exist: find by resouce_identifier ({1}) isbn=({0})".format(isbn, meta["source_identifier"])) doc = results["docs"][0] m = bucket.get(doc["_yz_rk"]) m = Manifestation(meta, doc["_yz_rk"], bucket, m) riak_obj = m.store() else: m = Manifestation(meta, None, None, None) riak_obj = m.store() return riak_obj
def _import_from_xml(doc): client = riak.RiakClient() bucket = client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"]) record_count = 0 success_count = 0 updated_count = 0 created_count = 0 items = doc.xpath("//item") for item in items: meta = {"record_source": "NDL", "record_source_sub": "OPENSEARCH_XMLFILE"} creators = [] publishers = [] identifiers = {} subjects = [] desc_creators = [] languages = {"body": "jpn"} descriptions = [] for attr in item: #print('(1) element={0} attr={1} body={2}'.format(attr.tag, attr.attrib, attr.text)) tag = regex.sub(r'(?<rec>\{(?:[^{}]+|(?&rec))*\})', "", attr.tag) xmlattr_t = literal_eval(str(attr.attrib)) xmlattr_key = "" xmlattr_body = "" for keyname in xmlattr_t.keys(): xmlattr_key = regex.sub(r'(?<rec>\{(?:[^{}]+|(?&rec))*\})', "", keyname) xmlattr_body = xmlattr_t[keyname] if tag == "title": meta["title"] = attr.text elif tag == "titleTranscription": meta["title_transcription"] = attr.text elif tag == "category": meta["category"] = attr.text elif tag == "publisher": if len(publishers) == 0: publishers.append({"full_name": attr.text, "full_name_transcription": "", "role": "", "location": ""}) elif tag == "publicationPlace": if len(publishers) == 0: publishers.append({"full_name": "", "full_name_transcription": "", "role": "", "location": attr.text}) else: p = publishers[0] p.update({"location": attr.text}) elif tag == "pubDate": meta["pub_date"] = attr.text elif tag == "dcndl:volume": meta["volume"] = attr.text elif tag == "dcndl:edition": meta["edition"] = attr.text elif tag == "seriesTitle": meta["series_title"] = attr.text elif tag == "seriesTitleTranscription": meta["series_title_transcription"] = attr.text elif tag == "subject": if xmlattr_key == "": subjects.append({"value": attr.text}) elif xmlattr_key == "type" and xmlattr_body in ["dcndl:NDC9","dcndl:NDC8"]: akey = regex.sub(r'^.*?:', "", xmlattr_body) identifiers.update({akey: attr.text}) elif tag == "identifier": if xmlattr_key == "type" and xmlattr_body in ["dcndl:JPNO","dcndl:ISBN","dcndl:TRCMARCNO"]: akey = regex.sub(r'^.*?:', "", xmlattr_body) identifiers.update({akey: attr.text}) elif tag == "link": meta["source_link"] = attr.text meta["source_identifier"] = attr.text elif tag == "description": descriptions.append({"content": attr.text}) elif tag == "author": authors = str(attr.text).split(",") print("desc_c={0}".format(authors)) for a in authors: values = str(a).rsplit(None, 1) #print(values) #print(len(values)) full_name = "" role = "" if len(values) >= 1: full_name = values[0] if len(values) == 2: role = values[1] #print("name={} role={}".format(full_name, role)) desc_creators.append({"full_name": full_name, "role": role}) elif tag == "creator": m = regex.match(r"(.*)[//](.*)", attr.text) if m != None: name = m.group(1) role = m.group(2) creators.append({"full_name": name, "role": role}) elif tag == "extent": meta["extent"] = attr.text elif tag == "price": meta["price"] = attr.text # end for item #meta["languages"] = languages meta["descriptions"] = descriptions meta["identifiers"] = identifiers meta["subjects"] = subjects meta["publishers"] = publishers meta["creators"] = creators meta["desc_creators"] = desc_creators # check q = "source_identifier:{0}".format(SolrHelper.escape(meta["source_identifier"])) results = client.fulltext_search(settings.RIAK["STORE_BUCKET_TYPE"], q) if results["num_found"] > 0: print("already exist: find by resouce_identifier ({0})".format(meta["source_identifier"])) doc = results["docs"][0] m = bucket.get(doc["_yz_rk"]) m = Manifestation(meta, doc["_yz_rk"], bucket, m) riak_obj = m.store() updated_count += 1 else: m = Manifestation(meta, None, None, None) riak_obj = m.store() created_count += 1 print("manifestation stored success. key=%s" % (riak_obj.key)) record_count += 1 success_count += 1 # end for results = {"record_count": record_count, "success_count": success_count, "updated_count": updated_count, "created_count": created_count} print(results) return results
def prepare_stored(self): meta = self.meta isbn = "" issn = "" identifiers_ts = [] creators_ts = [] publishers_ts = [] ndc = "" ndc_types = ["ndc","ndc9","ndc8"] # isbn/issn/identifiers_ts for k, v in meta['identifiers'].items(): if k.lower() == "isbn": print("@@@") v = self.isbn_normalizer(v) isbn = v if k.lower() == "issn": #TODO: issn normalize v = v identifiers_ts.append("{0}:{1}".format(k, v)) # creators_ts for c in meta['creators']: creators_ts.append(c['full_name']) # publishers_ts for c in meta['publishers']: creators_ts.append(c['full_name']) creators_ts.append(c['full_name_transcription']) # languages (key lower) languages = {} if 'languages' in meta: for k, v in meta['languages'].items(): languages[k.lower()] = v.lower() # titles titles_ts = [] titles_ts.append(meta['title']) if 'title_transcription' in meta: titles_ts.append(meta['title_transcription']) if 'series_title' in meta: titles_ts.append(meta['series_title']) if 'series_title_transcription' in meta: titles_ts.append(meta['series_title_transcription']) meta["languages"] = languages meta["isbn"] = isbn meta["issn"] = issn meta["creators_ja"] = " ".join(creators_ts) meta["identifiers_ja"] = " ".join(identifiers_ts) meta["publishers_ja"] = " ".join(publishers_ts) meta["titles_ja"] = " ".join(titles_ts) # pub_date pub_date_from = None pub_date_to = None if meta["pub_date"]: df = DateHelper.expand_date(meta["pub_date"]) dt = DateHelper.expand_date(meta["pub_date"], { "mode": 'to' }) if df and dt: meta["pub_date_from_tdt"] = SolrHelper.date2solrtime(df) meta["pub_date_to_tdt"] = SolrHelper.date2solrtime(dt) self.meta = meta return self.meta