def _get_source_by_issn(self, issnModel: SourceRawData) -> SourceRecord: """ get the source by the issn si el issn no esta en ningun Source, crea uno nuevo, usando la informacion de el modelo ISSN """ issn = issnModel.identifier data = issnModel.issn_data # print("buscando el issn {0}".format(issn)) pid, source = SourceRecord.get_source_by_pid(issn) if source: return source # print("no existe, creando source {0}".format(issn)) for item in data["@graph"]: if item['@id'] == 'resource/ISSN/' + issn + '#KeyTitle': title = item["value"] # print(title) data = dict() data['source_type'] = SourceType.SERIAL.value data['name'] = title data['source_status'] = SourceStatus.UNOFFICIAL.value data['title'] = title data['identifiers'] = [{'idtype': 'pissn', 'value': issn}] user = get_default_user() msg, source = SourceRecord.new_source_revision(data, user.id) if source: return source return None
def sync_records(cls): issn_list = SourceRawData.query.all() for issn in issn_list: record = IssnDataParser.parse(issn.identifier) if record: user = get_default_user() record['source_status'] = SourceStatus.UNOFFICIAL.value record['source_type'] = SourceType.JOURNAL.value print(record) SourceRecord.new_source_revision(record, user_id=user.id, comment='issn.org import')
def init_repos(): datadir = current_app.config['IROKO_DATA_DIRECTORY'] path = os.path.join(datadir, 'repos.json') user = get_default_user() with open(path) as repos: raw = json.load(repos, object_hook=remove_nulls) for k, record in raw.items(): source = dict() data = dict() data['source_type'] = SourceType.REPOSITORY.value data['name'] = record['name'] data['title'] = record['name'] ids = [] if 'url' in record: data['url'] = record['url'] ids.append(dict(idtype='url', value=record['url'])) if 'oaiurl' in record: data['oaiurl'] = record['oaiurl'] ids.append(dict(idtype='oaiurl', value=record['oaiurl'])) data[pids.IDENTIFIERS_FIELD] = ids source['data'] = data data['source_status'] = SourceStatus.UNOFFICIAL.value user = get_default_user() data['_save_info'] = { 'user_id': str(user.id), 'comment': 'seed data', 'updated': str(datetime.date.today()) } new_source = SourceRecord.new_source_revision( data, user.id, 'seed data')
def init_journals(): # sources_path = '../../data/journals.json' # delete_all_sources() # print('delete all source and relations') datadir = current_app.config['IROKO_DATA_DIRECTORY'] path = os.path.join(datadir, 'journals.json') path_tax = os.path.join(datadir, 'vocabularies.json') path_oai = os.path.join(datadir, 'oaisources.json') user = User.query.filter_by(email='*****@*****.**').first() org_cache = dict() with open(path) as fsource, open(path_oai) as foai, open(path_tax) as ftax: data = json.load(fsource, object_hook=remove_nulls) urls = json.load(foai) tax = json.load(ftax) inserted = {} if isinstance(data, dict): for k, record in data.items(): if not inserted.__contains__(record['title']): inserted[record['title']] = record['title'] # print(record['title']) source = dict() data = dict() data['source_type'] = SourceType.JOURNAL.value data['name'] = record['title'] ids = [] _assing_if_exist(data, record, 'title') _assing_if_exist(data, record, 'description') _assing_if_exist(data, record, 'email') _assing_if_exist(data, record, 'logo') _assing_if_exist(data, record, 'seriadas_cubanas') if 'url' in record: data['url'] = record['url'] ids.append(dict(idtype='url', value=record['url'])) if 'rnps' in record: data['rnps'] = {'p': record['rnps'], 'e': ''} ids.append(dict(idtype='prnps', value=record['rnps'])) issn = {} issn_org = None if 'p' in record['issn']: issn['p'] = record['issn']['p'] issn_org = IssnDataParser.parse(record['issn']['p']) # ids.append(dict(idtype='issn_p', value=record['issn']['p'])) if 'e' in record['issn']: issn_org = IssnDataParser.parse(record['issn']['e']) issn['e'] = record['issn']['e'] if 'l' in record['issn']: issn_org = IssnDataParser.parse(record['issn']['l']) issn['l'] = record['issn']['l'] data['issn'] = issn if issn_org: data['name'] = issn_org['name'] data['title'] = issn_org['title'] data['aliases'] = issn_org['aliases'] ids.extend(issn_org['identifiers']) for url in urls: if url['id'] == k: data['oaiurl'] = url['url'] ids.append(dict(idtype='oaiurl', value=url['url'])) data[pids.IDENTIFIERS_FIELD] = ids # SourceRecord.delete_all_pids_without_object(data[pids.IDENTIFIERS_FIELD]) source['data'] = data if 'licence' in record: data['classifications'] = [] name = string_as_identifier( tax['licences'][record['licence']]["name"]) term = Term.query.filter_by(identifier=name).first() data['classifications'].append({ 'id': str(term.uuid), 'description': term.description, 'vocabulary': term.vocabulary_id }) if 'institution' in record: # print(tax['institutions'][record['institution']]["name"]) data['organizations'] = [] if "orgaid" in tax['institutions'][ record['institution']]: orgaid = tax['institutions'][ record['institution']]["orgaid"] if orgaid in org_cache: org = org_cache[orgaid] else: org = CuorHelper.query_cuor_by_pid(orgaid) org_cache[orgaid] = org else: name = tax['institutions'][ record['institution']]["name"] if name in org_cache: org = org_cache[name] else: org = CuorHelper.query_cuor_by_label( name, country='Cuba') org_cache[name] = org if org: data['organizations'].append({ 'id': org['id'], 'name': org['metadata']['name'], 'role': 'MAIN' }) parent_id = tax['institutions'][ record['institution']]['parents'][0] if parent_id != '0': if "orgaid" in tax['institutions'][parent_id]: orgaid = tax['institutions'][parent_id][ "orgaid"] if orgaid in org_cache: parent_org = org_cache[orgaid] else: parent_org = CuorHelper.query_cuor_by_pid( orgaid) org_cache[orgaid] = parent_org else: name = tax['institutions'][parent_id]["name"] if name in org_cache: parent_org = org_cache[name] else: parent_org = CuorHelper.query_cuor_by_label( name, country='Cuba') org_cache[name] = parent_org if parent_org: data['organizations'].append({ 'id': parent_org['id'], 'name': parent_org['metadata']['name'], 'role': 'COLABORATOR' }) data['source_type'] = SourceType.JOURNAL.value data['source_status'] = SourceStatus.UNOFFICIAL.value user = get_default_user() data['_save_info'] = { 'user_id': str(user.id), 'comment': 'seed data', 'updated': str(datetime.date.today()) } # TODO: en este caso hace falta hacer patch en vez de update, porque ya issn # trajo datos... new_source = SourceRecord.new_source_revision( data, user.id, 'seed data') # new_source, msg = SourceRecord.create_or_update(data, None, True, True) # # msg, new_source = Sources.insert_new_source(source, # SourceStatus.UNOFFICIAL, user=user) # # if 'oaiurl' in data: # repo = Repository.query.filter_by(source_uuid=new_source.id).first() # if not repo: # repo = Repository() # repo.source_uuid = new_source.id # repo.harvest_endpoint = data['oaiurl'] # repo.harvest_type = HarvestType.OAI # db.session.add(repo) # # IrokoSourceVersions.new_version(new_source.id, data, user=user, # comment='fixing is_current field', is_current=True) # print('-----------------------') # print(new_source) # print('----------------------- sleep 5 seconds') sleep(5)
def syncronize_miar_journals(self): """ sincronizar lo que hay en el info de miar con el modelo TermSource donde Source es el source dado el issn Term, es el Tems con el nombre de la base de datos en cuestion Source es el que tenga el issn que se recolecto. Si no existe el Source, se debe crear uno nuevo utilizando la informacion que hay en el model ISSN """ issncount = 0 sourcecount = 0 issn_list = SourceRawData.query.all() if issn_list: try: for issn in issn_list: issncount = issncount + 1 data = issn.get_data_field('miar') if type( data ) == str and data == issn.identifier + ' IS NOT LISTED IN MIAR DATABASE': print(issn.identifier + ' IS NOT LISTED IN MIAR DATABASE') continue else: archive_issn_miar = self._parse_journal_information( data) try: # atribute = archive_issn_miar['Indexed\xa0in:'] dbs_split = [] print( '****************************************************************' ) print(archive_issn_miar) print( '****************************************************************' ) # TODO: en algun momento debe ser posible mejorar el parser.. keys = [ # 'Indexed\xa0in:', 'Indexed\u00a0in:', # 'Evaluated\xa0in:', 'Evaluated\u00a0in:' ] # index = miar['Indexed\xa0in:'] for key in keys: if key in archive_issn_miar: dbs_split.extend(archive_issn_miar[key]) print(dbs_split) pid, source = SourceRecord.get_source_by_pid( issn.identifier) print(type(source), source) sourcecount = sourcecount + 1 to_add = [] for dbs in dbs_split: miar = Term.query.filter_by( identifier= 'http://miar.ub.edu/databases/ID/' + dbs.lower().strip()).first() if miar: print("add {0}".format(dbs)) to_add.append(miar) # miar_db_type_terms = Term.query.filter_by( # vocabulary_id=IrokoVocabularyIdentifiers.INDEXES.value).all() # # for miar in miar_db_type_terms: # if miar.identifier == 'http://miar.ub.edu/databases/ID/' + # dbs.lower().strip(): # print("add {0}".format(dbs)) # to_add.append(miar) for t in to_add: print( "----------- !! ADD a Clasfication {0}-{1}-{2}-{3}" .format(t.uuid, t.description, t.vocabulary_id, t.parent_id)) source.add_classification( str(t.uuid), t.description, t.vocabulary_id, dict(url='', initial_cover='', end_cover='')) # add also the parent, meaning the miar_groups if t.parent_id and t.parent_id != 0: parent = Term.query.filter_by( id=t.parent_id).first() print( "----------- !! ADD a parent {0}- {1}". format(parent.uuid, parent.description)) source.add_classification( str(parent.uuid), parent.description, parent.vocabulary_id, dict()) print('***********', dict(source), source) source.new_revision( user_id=get_default_user().id, comment='MIAR Classifications Update') # SourceRecord.new_source_revision(data=source.model.json, # user_id=get_default_user().id, # comment='MIAR Classifications # Update') # # source.update(data=source.model.json, dbcommit=True, reindex=True) # IrokoSourceVersions.new_version( # source.id, # source.model.json, # user_id=get_default_user().id, # comment='MIAR Classifications Update', # is_current=True # ) # source.commit() except Exception: # print("issncount={0}".format(issncount)) # print("sourcecount={0}".format(sourcecount)) print(traceback.format_exc()) continue except Exception as e: # print("issncount={0}".format(issncount)) # print("sourcecount={0}".format(sourcecount)) print(traceback.format_exc()) return None # print("issncount={0}".format(issncount)) # print("sourcecount={0}".format(sourcecount)) return 'success'