Beispiel #1
0
    def _get_metadata(self, query, category, crossref=False):
        """
        Retrieve metadata from all search results and store in dictionary of elements.

        Positional arguments:
        query (str) -- the initial query string for the given search results.
        category (str) -- the container in which to store the gathered metadata.
            See self.metadata_collection template in __init__.
        """
        # It seems results are returned in a totally different format depending on search client used.
        # The 'premium' client ("Search") returns XML of matching records.
        self.crossref = crossref
        if self.client == "Search":
            self.tree = etree.fromstring(self.search_results.records)
            objectify.deannotate(self.tree, cleanup_namespaces=True)
            for record in self.tree:
                self.meta_record = MetaWos(record, query)
                article_metadata = self.meta_record.compile_metadata()
                if self.citing_metadata:
                    article_metadata["source_id"] = self.uid

                self.metadata_collection[category].append(article_metadata)

        # The 'lite' client returns a list of dictionary-like objects
        elif self.client == "Lite":
            for record in self.search_results.records:
                article_metadata = dict(record)
                if self.crossref:
                    abstract = CrossRef.get_abstract(article_metadata)
                self.metadata_collection[category].append(article_metadata)

        else:
            print("Inappropriate method for metadata retrieval: {0}".format(
                self.client))
Beispiel #2
0
    def get_full_record(self):
        """
        Run title search on 'hot' records, that is, ones with WOS ids.

        Start with most stringent search, then progressively loosen to improve likelihood of a match:
        Search 1: journal_title AND pub_year AND title
        Search 2: journal_title AND title
        Search 3: title
        """
        booleans = ["and", "near", "or", "not"]

        record_title = " ".join([
            w.lower().strip("?;:.,-_()[]<>{}!`'").lstrip().rstrip().replace(
                "=", "").replace("(",
                                 "").replace(")",
                                             "").replace("[",
                                                         "").replace("]", "")
            for w in self.hot_item["citedTitle"].split()
            if w.lower() not in booleans
        ])

        if "citedWork" in self.hot_item:
            journal_title = self.hot_item["citedWork"]
        else:
            journal_title = "NONE"

        if "year" in self.hot_item:
            pub_year = self.hot_item["year"]
        else:
            pub_year = "NONE"

        self.rp_title_search = self.retrieve_parameters(count="1")
        try:
            # Search 1
            if self._run_full_record_search(record_title,
                                            pub_year=pub_year,
                                            journal_title=journal_title) >= 1:
                self.tree = etree.fromstring(self.title_search_results.records)

            # Search 2
            elif self._run_full_record_search(
                    record_title, journal_title=journal_title) >= 1:
                self.tree = etree.fromstring(self.title_search_results.records)

            # Search 3
            else:
                self._run_full_record_search(record_title)

            self.tree = etree.fromstring(self.title_search_results.records)

            if self.search_count == 1:
                for record in self.tree:

                    self.title_meta_record = MetaWos(record, record_title)
                    self.title_metadata = self.title_meta_record.compile_metadata(
                    )
                    self.title_metadata["source_id"] = self.uid
                    self.metadata_collection["hot_records"].append(
                        self.title_metadata)

        except Exception as e:
            print("*******************ERROR*************************")
            print(record_title)
            print(e)
            print("*************************************************")