Esempi in Python per SolrHelper, esempi in Python per app_search.helpers.solr_helper.SolrHelper

Esempio n. 1

0

Mostra file

File: manifestation.py Progetto: nakamura-akifumi/kassis_orange

    def store(self):
        print("store")
        update_flag = False
        now_str = SolrHelper.datetime2solrtime(datetime.utcnow())

        if self.key and self.riak_obj:
            if self.riak_obj.exists:
                update_flag = True

        if update_flag:
            # update
            print("update")
            self.riak_obj.data.update(self.meta)
            self.riak_obj.data["updated_at"] = now_str

        else:
            # insert
            self.key = KassisNumbering.numbering("M")
            print("new record: generate key=%s" % (str(self.key)))

            self.meta["record_identifier"] = self.key

            self.bucket = self.client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"])
            self.riak_obj = self.bucket.new(self.key, self.meta)
            self.riak_obj.data["created_at"] = now_str
            self.riak_obj.data["updated_at"] = now_str
        #
        meta = self.prepare_stored()

        riak_obj = self.riak_obj.store()

        # replicator
        # msgpack

        return riak_obj

Esempio n. 2

0

Mostra file

File: location.py Progetto: nakamura-akifumi/kassis_orange

    def store(self):
        print("store")
        update_flag = False
        now_str = SolrHelper.datetime2solrtime(datetime.utcnow())

        if self.key and self.riak_obj:
            if self.riak_obj.exists:
                update_flag = True

        update_flag = False
        if update_flag:
            print("update")

        else:
            # insert
            # TODO: ほんとうのunique番号の生成方法（現状だと同時アクセス時にconflictする）
            # TODO: 採番クラス用意する。
            counter_bucket = self.client.bucket_type(settings.RIAK["COUNTER_BUCKET_TYPE"]).bucket(Location.CounterBucketName)
            #counter_bucket.update_counter()
            counter = Counter(counter_bucket, Location.CounterBucketName)
            counter.increment()
            counter.store()
            self.key = str(counter.value)
            print("new record: generate key=%s" % (str(self.key)))

            self.meta["record_identifier"] = self.key

            self.bucket = self.client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(Location.BucketName)
            self.riak_obj = self.bucket.new(self.key, self.meta)
            self.riak_obj.data["created_at"] = now_str
            self.riak_obj.data["updated_at"] = now_str
        #
        #meta = self.prepare_stored()

        riak_obj = self.riak_obj.store()

        # replicator
        # msgpack

        return riak_obj

Esempio n. 3

0

Mostra file

File: ndl_search_opensearch.py Progetto: nakamura-akifumi/kassis_orange

    def import_from_net_by_isbn(isbn):
        riak_obj = None
        meta = NdlHelper._import_from_net_by_isbn(isbn)

        if meta:
            client = riak.RiakClient()
            bucket = client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"])

            q = "source_identifier:{0}".format(SolrHelper.escape(meta["source_identifier"]))
            results = client.fulltext_search(settings.RIAK["STORE_BUCKET_TYPE"], q)
            if results["num_found"] > 0:
                print("already exist: find by resouce_identifier ({1}) isbn=({0})".format(isbn, meta["source_identifier"]))
                doc = results["docs"][0]
                m = bucket.get(doc["_yz_rk"])

                m = Manifestation(meta, doc["_yz_rk"], bucket, m)
                riak_obj = m.store()

            else:
                m = Manifestation(meta, None, None, None)
                riak_obj = m.store()

        return riak_obj

Esempio n. 4

0

Mostra file

File: ndl_search_opensearch.py Progetto: nakamura-akifumi/kassis_orange

    def _import_from_xml(doc):
        client = riak.RiakClient()
        bucket = client.bucket_type(settings.RIAK["STORE_BUCKET_TYPE"]).bucket(settings.RIAK["STORE_BUCKET"])

        record_count = 0
        success_count = 0
        updated_count = 0
        created_count = 0
        items = doc.xpath("//item")
        for item in items:
            meta = {"record_source": "NDL", "record_source_sub": "OPENSEARCH_XMLFILE"}
            creators = []
            publishers = []
            identifiers = {}
            subjects = []
            desc_creators = []
            languages = {"body": "jpn"}
            descriptions = []

            for attr in item:
                #print('(1) element={0} attr={1} body={2}'.format(attr.tag, attr.attrib, attr.text))

                tag = regex.sub(r'(?<rec>\{(?:[^{}]+|(?&rec))*\})', "", attr.tag)
                xmlattr_t = literal_eval(str(attr.attrib))
                xmlattr_key = ""
                xmlattr_body = ""
                for keyname in xmlattr_t.keys():
                    xmlattr_key = regex.sub(r'(?<rec>\{(?:[^{}]+|(?&rec))*\})', "", keyname)
                    xmlattr_body = xmlattr_t[keyname]

                if tag == "title":
                    meta["title"] = attr.text
                elif tag == "titleTranscription":
                    meta["title_transcription"] = attr.text
                elif tag == "category":
                    meta["category"] = attr.text
                elif tag == "publisher":
                    if len(publishers) == 0:
                        publishers.append({"full_name": attr.text, "full_name_transcription": "", "role": "", "location": ""})
                elif tag == "publicationPlace":
                    if len(publishers) == 0:
                        publishers.append({"full_name": "", "full_name_transcription": "", "role": "", "location": attr.text})
                    else:
                        p = publishers[0]
                        p.update({"location": attr.text})
                elif tag == "pubDate":
                    meta["pub_date"] = attr.text
                elif tag == "dcndl:volume":
                    meta["volume"] = attr.text
                elif tag == "dcndl:edition":
                    meta["edition"] = attr.text
                elif tag == "seriesTitle":
                    meta["series_title"] = attr.text
                elif tag == "seriesTitleTranscription":
                    meta["series_title_transcription"] = attr.text
                elif tag == "subject":
                    if xmlattr_key == "":
                        subjects.append({"value": attr.text})
                    elif xmlattr_key == "type" and xmlattr_body in ["dcndl:NDC9","dcndl:NDC8"]:
                        akey = regex.sub(r'^.*?:', "", xmlattr_body)
                        identifiers.update({akey: attr.text})
                elif tag == "identifier":
                    if xmlattr_key == "type" and xmlattr_body in ["dcndl:JPNO","dcndl:ISBN","dcndl:TRCMARCNO"]:
                        akey = regex.sub(r'^.*?:', "", xmlattr_body)
                        identifiers.update({akey: attr.text})
                elif tag == "link":
                    meta["source_link"] = attr.text
                    meta["source_identifier"] = attr.text
                elif tag == "description":
                    descriptions.append({"content": attr.text})
                elif tag == "author":
                    authors = str(attr.text).split(",")
                    print("desc_c={0}".format(authors))
                    for a in authors:
                        values = str(a).rsplit(None, 1)
                        #print(values)
                        #print(len(values))
                        full_name = ""
                        role = ""
                        if len(values) >= 1:
                            full_name = values[0]
                        if len(values) == 2:
                            role = values[1]

                        #print("name={} role={}".format(full_name, role))
                        desc_creators.append({"full_name": full_name, "role": role})
                elif tag == "creator":
                    m = regex.match(r"(.*)[/／](.*)", attr.text)
                    if m != None:
                        name = m.group(1)
                        role = m.group(2)
                        creators.append({"full_name": name, "role": role})
                elif tag == "extent":
                    meta["extent"] = attr.text
                elif tag == "price":
                    meta["price"] = attr.text


            # end for item
            #meta["languages"] = languages
            meta["descriptions"] = descriptions
            meta["identifiers"] = identifiers
            meta["subjects"] = subjects
            meta["publishers"] = publishers
            meta["creators"] = creators
            meta["desc_creators"] = desc_creators

            # check
            q = "source_identifier:{0}".format(SolrHelper.escape(meta["source_identifier"]))
            results = client.fulltext_search(settings.RIAK["STORE_BUCKET_TYPE"], q)
            if results["num_found"] > 0:
                print("already exist: find by resouce_identifier ({0})".format(meta["source_identifier"]))
                doc = results["docs"][0]
                m = bucket.get(doc["_yz_rk"])
                m = Manifestation(meta, doc["_yz_rk"], bucket, m)
                riak_obj = m.store()
                updated_count += 1
            else:
                m = Manifestation(meta, None, None, None)
                riak_obj = m.store()
                created_count += 1

            print("manifestation stored success. key=%s" % (riak_obj.key))
            record_count += 1
            success_count += 1
        # end for
        results = {"record_count": record_count,
                   "success_count": success_count,
                   "updated_count": updated_count,
                   "created_count": created_count}
        print(results)
        return results

Esempio n. 5

0

Mostra file

File: manifestation.py Progetto: nakamura-akifumi/kassis_orange

    def prepare_stored(self):
        meta = self.meta

        isbn = ""
        issn = ""
        identifiers_ts = []
        creators_ts = []
        publishers_ts = []
        ndc = ""
        ndc_types = ["ndc","ndc9","ndc8"]

        # isbn/issn/identifiers_ts
        for k, v in meta['identifiers'].items():
            if  k.lower() == "isbn":
                print("@@@")
                v = self.isbn_normalizer(v)
                isbn = v

            if  k.lower() == "issn":
                #TODO: issn normalize
                v = v

            identifiers_ts.append("{0}:{1}".format(k, v))

        # creators_ts
        for c in meta['creators']:
            creators_ts.append(c['full_name'])

        # publishers_ts
        for c in meta['publishers']:
            creators_ts.append(c['full_name'])
            creators_ts.append(c['full_name_transcription'])

        # languages　(key lower)
        languages = {}
        if 'languages' in meta:
            for k, v in meta['languages'].items():
                languages[k.lower()] = v.lower()


        # titles
        titles_ts = []
        titles_ts.append(meta['title'])
        if 'title_transcription' in meta:
            titles_ts.append(meta['title_transcription'])
        if 'series_title' in meta:
            titles_ts.append(meta['series_title'])
        if 'series_title_transcription' in meta:
            titles_ts.append(meta['series_title_transcription'])

        meta["languages"] = languages
        meta["isbn"] = isbn
        meta["issn"] = issn

        meta["creators_ja"] = " ".join(creators_ts)
        meta["identifiers_ja"] = " ".join(identifiers_ts)
        meta["publishers_ja"] = " ".join(publishers_ts)
        meta["titles_ja"] = " ".join(titles_ts)

        # pub_date
        pub_date_from = None
        pub_date_to = None
        if meta["pub_date"]:
            df = DateHelper.expand_date(meta["pub_date"])
            dt = DateHelper.expand_date(meta["pub_date"], { "mode": 'to' })

            if df and dt:
                meta["pub_date_from_tdt"] = SolrHelper.date2solrtime(df)
                meta["pub_date_to_tdt"] = SolrHelper.date2solrtime(dt)

        self.meta = meta
        return self.meta