def parse_record(self, line) -> Optional[DirectoryInfo]: record = json.loads(line) extra = dict( status=clean_str(record.get("current_status")), first_year=record.get("first_year"), collection=record.get("collection_acronym"), ) for k in list(extra.keys()): if extra[k] is None: extra.pop(k) country: Optional[str] = None if record["publisher_country"] and len( record["publisher_country"][0]) == 2: country = record["publisher_country"][0].lower() info = DirectoryInfo( directory_slug=self.source_slug, issne=clean_issn(record.get("electronic_issn") or ""), issnp=clean_issn(record.get("print_issn") or ""), custom_id=clean_str(record.get("scielo_issn")), name=clean_str(record.get("fulltitle")), publisher=clean_str((record.get("publisher_name") or [""])[0]), abbrev=clean_str(record["abbreviated_iso_title"]), platform="scielo", langs=[ lang for lang in [parse_lang(s) for s in record["languages"]] if lang ], country=country, extra=extra, ) if record["url"]: homepage = HomepageUrl.from_url(record["url"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: if not row: return None row = json.loads(row) info = DirectoryInfo( directory_slug=self.source_slug, issne=row.get("issne"), issnp=row.get("issnp"), raw_issn=row.get("issn"), name=clean_str(row["title"]), publisher=clean_str(row.get("ed")), ) info.extra["as_of"] = self.config.szczepanski.date if row.get("extra"): info.extra["notes"] = row.get("extra") for k in ("other_titles", "year_spans", "ed"): if row.get(k): info.extra[k] = row[k] url = HomepageUrl.from_url(row.get("url")) if url: info.homepage_urls.append(url) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: if not row: return None row = json.loads(row) info = DirectoryInfo( directory_slug=self.source_slug, issne=row.get("issne"), issnp=row.get("issnp"), custom_id=row["ezb_id"], name=clean_str(row["title"]), publisher=clean_str(row.get("publisher")), ) info.extra = dict() for k in ( "ezb_color", "subjects", "keywords", "zdb_id", "first_volume", "first_issue", "first_year", "appearance", "costs", ): if row.get(k): info.extra[k] = row[k] url = HomepageUrl.from_url(row.get("url")) if url: info.homepage_urls.append(url) return info
def parse_record(self, record) -> Optional[DirectoryInfo]: if not (record.get("ISSN (Online)") or record.get("ISSN (Print)")): return None return DirectoryInfo( directory_slug=self.source_slug, issne=record.get("ISSN (Online)"), issnp=record.get("ISSN (Print)"), custom_id=record.get("NlmId").strip() or None, name=clean_str(record.get("JournalTitle")), abbrev=clean_str(record["IsoAbbr"]), )
def parse_record(self, record) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["Issn"]), custom_id=record["JournalId"], name=clean_str(record["DisplayName"]), publisher=clean_str(record["Publisher"]), ) homepage = HomepageUrl.from_url(record["Webpage"] or "") if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, record) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, issne=record["eissn"], issnp=record["pissn"], custom_id=record.get("doi").strip() or None, name=clean_str(record.get("JournalTitle")), publisher=clean_str(record.get("Publisher")), ) if record["additionalIssns"]: info.raw_issn = record["additionalIssns"][0] return info
def parse_record(self, row) -> Optional[DirectoryInfo]: # TODO: Subjects, Permanent article identifiers, work_level stuff info = DirectoryInfo( directory_slug=self.source_slug, issnp=row["Journal ISSN (print version)"], issne=row["Journal EISSN (online version)"], name=clean_str(row["Journal title"]), publisher=clean_str(row["Publisher"]), country=parse_country(row["Country of publisher"]), ) lang = parse_lang( row["Languages in which the journal accepts manuscripts"]) if lang: info.langs.append(lang) info.extra["as_of"] = self.config.snapshot.date if row["DOAJ Seal"]: info.extra["seal"] = { "no": False, "yes": True }[row["DOAJ Seal"].lower()] if row["Preservation Services"]: info.extra["archive"] = [ a.strip() for a in row["Preservation Services"].split(",") if a.strip() ] elif row["Preservation Service: national library"]: info.extra["archive"] = ["national-library"] default_license = row["Journal license"] if default_license and default_license.startswith("CC"): info.extra["default_license"] = default_license.replace( "CC ", "CC-").strip() url = row["Journal URL"] if url: homepage = HomepageUrl.from_url(row["Journal URL"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: # super mangled :( row.update(self.sherpa_policies[row["RoMEO Record ID"]]) info = DirectoryInfo( directory_slug=self.source_slug, issnp=row["ISSN"], issne=row["ESSN"], name=clean_str(row["Journal Title"]), publisher=clean_str(row["Publisher"]), country=parse_country(row["Country"]), custom_id=row["RoMEO Record ID"], ) if row["RoMEO colour"]: info.extra["color"] = row["RoMEO colour"] return info
def parse_record(self, row) -> Optional[DirectoryInfo]: row = json.loads(row) info = DirectoryInfo( directory_slug=self.source_slug, ) # format is an array of metadata elements for el in row: if "label" in el and el["@id"].startswith( "http://id.loc.gov/vocabulary/countries" ): value = el["label"] if "(State)" in value: value = "" if value == "Russia (Federation)": value = "Russia" info.country = parse_country(el["label"]) if not "@type" in el: continue if el["@type"] == "http://id.loc.gov/ontologies/bibframe/IssnL": info.issnl = clean_issn(el["value"]) if "mainTitle" in el: if type(el["mainTitle"]) == list: info.name = clean_str(el["mainTitle"][0]) else: info.name = clean_str(el["mainTitle"]) if el.get("format") == "vocabularies/medium#Print": info.issnp = clean_issn(el["issn"]) elif el.get("format") == "vocabularies/medium#Electronic": info.issne = clean_issn(el["issn"]) urls = el.get("url", []) if isinstance(urls, str): urls = [ urls, ] for url in urls: homepage = HomepageUrl.from_url(url) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, record) -> Optional[DirectoryInfo]: if not record["Journal Name"]: return None info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["ISSN"]), issne=clean_issn(record["E-ISSN"]), name=clean_str(record["Journal Name"]), publisher=clean_str(record["Publisher"]), langs=[ lang for lang in [parse_lang(record["Language(s)"])] if lang ], country=parse_country(record["Country"]), ) homepage = HomepageUrl.from_url(record["Internet Archive Link"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: if not row.get("issn"): return None info = DirectoryInfo( directory_slug=self.source_slug, issne=row["issn_electronic"], issnp=row["issn_print"], raw_issn=row["issn_l"] or row["issn"], name=clean_str(row["journal_full_title"]), publisher=clean_str(row["publisher"]), ) info.extra["is_hybrid"] = bool(row["is_hybrid"]) homepage = HomepageUrl.from_url(row["url"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: raw_issn = clean_issn(row["ISSN"]) issnl = issn_db.issn2issnl(raw_issn or "") start_year = int(row["Published"][:4]) start_volume = clean_str(row["Vol"]) record = KbartRecord( issnl=issnl, issne=None, issnp=None, embargo=None, title=clean_str(row["Title"]), publisher=clean_str(row["Publisher"]), url=HomepageUrl.from_url(row["Url"]), start_year=start_year, end_year=start_year, start_volume=start_volume, end_volume=start_volume, year_spans=[], ) return record
def parse_record(self, row) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=row["ISSN-L"], name=clean_str(row["Short Title"]), publisher=clean_str(row["Publisher"]), langs=[ lang for lang in [parse_lang(s) for s in (row["Lang1"], row["Lang2"])] if lang ], ) # TODO: region mapping: "Europe and North America" # TODO: lang mapping: already alpha-3 # homepages for url in [u for u in (row["URL1"], row["URL2"]) if u]: homepage = HomepageUrl.from_url(url) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: issne: Optional[str] = clean_issn(row["online_identifier"] or "") issnp: Optional[str] = clean_issn(row["print_identifier"] or "") issnl: Optional[str] = None if issne: issnl = issn_db.issn2issnl(issne) if issnp and not issnl: issnl = issn_db.issn2issnl(issnp) start_year: Optional[int] = None end_year: Optional[int] = None if row["date_first_issue_online"]: start_year = int(row["date_first_issue_online"][:4]) if row["date_last_issue_online"]: end_year = int(row["date_last_issue_online"][:4]) end_volume = row["num_last_vol_online"] # hack to handle open-ended preservation if end_year is None and end_volume and "(present)" in end_volume: end_year = THIS_YEAR record = KbartRecord( issnl=issnl, issnp=issnp, issne=issne, title=clean_str(row["publication_title"]), publisher=clean_str(row["publisher_name"]), url=HomepageUrl.from_url(row["title_url"]), embargo=clean_str(row["embargo_info"]), start_year=start_year, end_year=end_year, start_volume=clean_str(row["num_first_vol_online"]), end_volume=clean_str(row["num_last_vol_online"]), year_spans=[], ) if record.start_volume == "null": record.start_volume = None if record.end_volume == "null": record.end_volume = None return record
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: raw_issn = clean_issn(row["ISSN"]) issne = clean_issn(row["ISSN"]) issnl = issn_db.issn2issnl(raw_issn or issne or "") # convert list of years to a set of year spans years = [int(y.strip()) for y in row["Preserved Years"].split(";") if y] year_spans = merge_spans([], [[y, y] for y in years]) record = KbartRecord( issnl=issnl, issne=issne, issnp=None, embargo=None, title=clean_str(row["Title"]), publisher=clean_str(row["Publisher"]), url=None, start_year=None, end_year=None, start_volume=None, end_volume=None, year_spans=year_spans, ) return record
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: # unpack fields # access = dict(allow="bright", deny="dark")[row['access']] raw_issn = clean_issn(row["issn"].split(",")[0]) imprint = clean_str(row["imprint"]) raw_date = row["rights_date_used"].strip() issnl = issn_db.issn2issnl(raw_issn or "") rights_date: Optional[int] = None if raw_date.isdigit(): rights_date = int(raw_date) start_year: Optional[int] = rights_date if start_year == 9999: start_year = None publisher: Optional[str] = None if imprint: publisher = imprint.split(".")[0].split(",")[0].split("[")[0].strip() record = KbartRecord( issnl=issnl, issne=None, issnp=None, embargo=None, title=clean_str(row["title"]), publisher=publisher, url=None, start_year=start_year, end_year=start_year, start_volume=None, end_volume=None, year_spans=[], ) return record
def parse_record(self, row) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, issnp=row["Print ISSN"], issne=row["Online ISSN"], custom_id=clean_str(row["NSD tidsskrift_id"]), publisher=clean_str(row["Publisher"]), country=parse_country(row["Country of publication"]), name=clean_str(row.get("International title")), langs=[lang for lang in [parse_lang(row["Language"])] if lang], ) info.extra["as_of"] = self.config.norwegian.date if row["Level 2019"]: info.extra["level"] = int(row["Level 2019"]) if row["Original title"] != row["International title"]: info.original_name = clean_str(row["Original title"]) url = HomepageUrl.from_url(row["URL"]) if url: info.homepage_urls.append(url) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: if not (row.get("issn") and row.get("title")): return None wikidata_qid = row["item"].strip().split("/")[-1] publisher = row["publisher_name"] if ((publisher.startswith("Q") and publisher[1].isdigit()) or publisher.startswith("t1") or not publisher): publisher = None info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=row["issn"], custom_id=wikidata_qid, name=clean_str(row["title"]), publisher=clean_str(publisher), ) if row.get("start_year"): info.extra["start_year"] = row["start_year"] url = HomepageUrl.from_url(row.get("websiteurl")) if url: info.homepage_urls.append(url) return info
def parse_record(self, record) -> Optional[DirectoryInfo]: # HACK if "\ufeffTitle" in record: record["Title"] = record["\ufeffTitle"] if not record["Title"]: return None info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["ISSN"]), issne=clean_issn(record["EISSN"]), name=clean_str(record["Title"]), ) homepage = HomepageUrl.from_url(record["URL"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=row["ISSN 1"], custom_id=clean_str(row["ERA Journal Id"]), name=clean_str(row.get("Title")), original_name=clean_str(row.get("Foreign Title")), extra=dict(australian_era=dict( era_id=clean_str(row["ERA Journal Id"]), field=clean_str(row["FoR 1 Name"]), field_code=clean_str(row["FoR 1"]), )), ) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: if not (row.get("ISSN_L") and row.get("TITLE")): return None # TODO: also add for other non-direct indices # for ind in ('WOS', 'SCOPUS'): # issnl, status = self.add_issn( # ind.lower(), # raw_issn=row['ISSN_L'], # name=row['TITLE'], # ) extra = dict() for ind in ("DOAJ", "ROAD", "PMC", "OAPC", "WOS", "SCOPUS"): extra["in_" + ind.lower()] = bool(int(row["JOURNAL_IN_" + ind])) return DirectoryInfo( directory_slug=self.source_slug, raw_issn=row["ISSN_L"], name=clean_str(row["TITLE"]), extra=extra, )
def parse_record(self, line) -> Optional[DirectoryInfo]: record = json.loads(line) issn_info = record.get("identifiers", {}).get("issn", {}) # sometimes is a list for k in "generic", "electronic", "print": if type(issn_info.get(k)) == list: issn_info[k] = issn_info[k][0] info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(issn_info.get("generic", "")), issne=clean_issn(issn_info.get("electronic", "")), issnp=clean_issn(issn_info.get("print", "")), name=clean_str(record.get("title")), langs=[ lang for lang in [parse_lang(s) for s in record["languages"]] if lang ], ) if record["url"]: homepage = HomepageUrl.from_url(record["url"]) if homepage: info.homepage_urls.append(homepage) return info