Exemple #1
0
    def _get_source_by_issn(self, issnModel: SourceRawData) -> SourceRecord:
        """
        get the source by the issn
        si el issn no esta en ningun Source, crea uno nuevo, usando la informacion de el modelo ISSN
        """

        issn = issnModel.identifier
        data = issnModel.issn_data
        # print("buscando el issn {0}".format(issn))
        pid, source = SourceRecord.get_source_by_pid(issn)
        if source:
            return source
        # print("no existe, creando source {0}".format(issn))
        for item in data["@graph"]:
            if item['@id'] == 'resource/ISSN/' + issn + '#KeyTitle':
                title = item["value"]
                # print(title)
                data = dict()
                data['source_type'] = SourceType.SERIAL.value
                data['name'] = title
                data['source_status'] = SourceStatus.UNOFFICIAL.value
                data['title'] = title
                data['identifiers'] = [{'idtype': 'pissn', 'value': issn}]
                user = get_default_user()
                msg, source = SourceRecord.new_source_revision(data, user.id)
                if source:
                    return source
        return None
Exemple #2
0
    def sync_records(cls):

        issn_list = SourceRawData.query.all()
        for issn in issn_list:
            record = IssnDataParser.parse(issn.identifier)
            if record:
                user = get_default_user()
                record['source_status'] = SourceStatus.UNOFFICIAL.value
                record['source_type'] = SourceType.JOURNAL.value
                print(record)
                SourceRecord.new_source_revision(record,
                                                 user_id=user.id,
                                                 comment='issn.org import')
Exemple #3
0
def init_repos():
    datadir = current_app.config['IROKO_DATA_DIRECTORY']

    path = os.path.join(datadir, 'repos.json')
    user = get_default_user()

    with open(path) as repos:
        raw = json.load(repos, object_hook=remove_nulls)
        for k, record in raw.items():
            source = dict()
            data = dict()
            data['source_type'] = SourceType.REPOSITORY.value
            data['name'] = record['name']
            data['title'] = record['name']
            ids = []

            if 'url' in record:
                data['url'] = record['url']
                ids.append(dict(idtype='url', value=record['url']))
            if 'oaiurl' in record:
                data['oaiurl'] = record['oaiurl']
                ids.append(dict(idtype='oaiurl', value=record['oaiurl']))

            data[pids.IDENTIFIERS_FIELD] = ids

            source['data'] = data

            data['source_status'] = SourceStatus.UNOFFICIAL.value

            user = get_default_user()
            data['_save_info'] = {
                'user_id': str(user.id),
                'comment': 'seed data',
                'updated': str(datetime.date.today())
            }

            new_source = SourceRecord.new_source_revision(
                data, user.id, 'seed data')
Exemple #4
0
def init_journals():
    # sources_path = '../../data/journals.json'
    # delete_all_sources()
    # print('delete all source and relations')
    datadir = current_app.config['IROKO_DATA_DIRECTORY']

    path = os.path.join(datadir, 'journals.json')
    path_tax = os.path.join(datadir, 'vocabularies.json')
    path_oai = os.path.join(datadir, 'oaisources.json')

    user = User.query.filter_by(email='*****@*****.**').first()
    org_cache = dict()
    with open(path) as fsource, open(path_oai) as foai, open(path_tax) as ftax:
        data = json.load(fsource, object_hook=remove_nulls)
        urls = json.load(foai)
        tax = json.load(ftax)
        inserted = {}
        if isinstance(data, dict):
            for k, record in data.items():
                if not inserted.__contains__(record['title']):
                    inserted[record['title']] = record['title']
                    # print(record['title'])
                    source = dict()
                    data = dict()

                    data['source_type'] = SourceType.JOURNAL.value
                    data['name'] = record['title']
                    ids = []

                    _assing_if_exist(data, record, 'title')
                    _assing_if_exist(data, record, 'description')
                    _assing_if_exist(data, record, 'email')
                    _assing_if_exist(data, record, 'logo')
                    _assing_if_exist(data, record, 'seriadas_cubanas')

                    if 'url' in record:
                        data['url'] = record['url']
                        ids.append(dict(idtype='url', value=record['url']))

                    if 'rnps' in record:
                        data['rnps'] = {'p': record['rnps'], 'e': ''}
                        ids.append(dict(idtype='prnps', value=record['rnps']))

                    issn = {}
                    issn_org = None
                    if 'p' in record['issn']:
                        issn['p'] = record['issn']['p']
                        issn_org = IssnDataParser.parse(record['issn']['p'])
                        # ids.append(dict(idtype='issn_p', value=record['issn']['p']))
                    if 'e' in record['issn']:
                        issn_org = IssnDataParser.parse(record['issn']['e'])
                        issn['e'] = record['issn']['e']
                    if 'l' in record['issn']:
                        issn_org = IssnDataParser.parse(record['issn']['l'])
                        issn['l'] = record['issn']['l']
                    data['issn'] = issn
                    if issn_org:
                        data['name'] = issn_org['name']
                        data['title'] = issn_org['title']
                        data['aliases'] = issn_org['aliases']
                        ids.extend(issn_org['identifiers'])

                    for url in urls:
                        if url['id'] == k:
                            data['oaiurl'] = url['url']
                            ids.append(dict(idtype='oaiurl', value=url['url']))

                    data[pids.IDENTIFIERS_FIELD] = ids
                    # SourceRecord.delete_all_pids_without_object(data[pids.IDENTIFIERS_FIELD])

                    source['data'] = data

                    if 'licence' in record:
                        data['classifications'] = []
                        name = string_as_identifier(
                            tax['licences'][record['licence']]["name"])
                        term = Term.query.filter_by(identifier=name).first()
                        data['classifications'].append({
                            'id':
                            str(term.uuid),
                            'description':
                            term.description,
                            'vocabulary':
                            term.vocabulary_id
                        })
                    if 'institution' in record:
                        # print(tax['institutions'][record['institution']]["name"])
                        data['organizations'] = []
                        if "orgaid" in tax['institutions'][
                                record['institution']]:
                            orgaid = tax['institutions'][
                                record['institution']]["orgaid"]
                            if orgaid in org_cache:
                                org = org_cache[orgaid]
                            else:
                                org = CuorHelper.query_cuor_by_pid(orgaid)
                                org_cache[orgaid] = org
                        else:
                            name = tax['institutions'][
                                record['institution']]["name"]
                            if name in org_cache:
                                org = org_cache[name]
                            else:
                                org = CuorHelper.query_cuor_by_label(
                                    name, country='Cuba')
                                org_cache[name] = org
                        if org:
                            data['organizations'].append({
                                'id':
                                org['id'],
                                'name':
                                org['metadata']['name'],
                                'role':
                                'MAIN'
                            })
                        parent_id = tax['institutions'][
                            record['institution']]['parents'][0]
                        if parent_id != '0':
                            if "orgaid" in tax['institutions'][parent_id]:
                                orgaid = tax['institutions'][parent_id][
                                    "orgaid"]
                                if orgaid in org_cache:
                                    parent_org = org_cache[orgaid]
                                else:
                                    parent_org = CuorHelper.query_cuor_by_pid(
                                        orgaid)
                                    org_cache[orgaid] = parent_org
                            else:
                                name = tax['institutions'][parent_id]["name"]
                                if name in org_cache:
                                    parent_org = org_cache[name]
                                else:
                                    parent_org = CuorHelper.query_cuor_by_label(
                                        name, country='Cuba')
                                    org_cache[name] = parent_org
                            if parent_org:
                                data['organizations'].append({
                                    'id':
                                    parent_org['id'],
                                    'name':
                                    parent_org['metadata']['name'],
                                    'role':
                                    'COLABORATOR'
                                })

                    data['source_type'] = SourceType.JOURNAL.value
                    data['source_status'] = SourceStatus.UNOFFICIAL.value

                    user = get_default_user()
                    data['_save_info'] = {
                        'user_id': str(user.id),
                        'comment': 'seed data',
                        'updated': str(datetime.date.today())
                    }

                    # TODO: en este caso hace falta hacer patch en vez de update, porque ya issn
                    #  trajo datos...
                    new_source = SourceRecord.new_source_revision(
                        data, user.id, 'seed data')
                    # new_source, msg = SourceRecord.create_or_update(data, None, True, True)
                    # # msg, new_source = Sources.insert_new_source(source,
                    # SourceStatus.UNOFFICIAL, user=user)
                    #
                    # if 'oaiurl' in data:
                    #     repo = Repository.query.filter_by(source_uuid=new_source.id).first()
                    #     if not repo:
                    #         repo = Repository()
                    #         repo.source_uuid = new_source.id
                    #     repo.harvest_endpoint = data['oaiurl']
                    #     repo.harvest_type = HarvestType.OAI
                    #     db.session.add(repo)
                    #
                    # IrokoSourceVersions.new_version(new_source.id, data, user=user,
                    # comment='fixing is_current field', is_current=True)

                    # print('-----------------------')
                    # print(new_source)
                    # print('----------------------- sleep 5 seconds')
                    sleep(5)
Exemple #5
0
    def syncronize_miar_journals(self):
        """
        sincronizar lo que hay en el info de miar con el modelo TermSource donde
        Source es el source dado el issn
        Term, es el Tems con el nombre de la base de datos en cuestion
        Source es el que tenga el issn que se recolecto.
        Si no existe el Source, se debe crear uno nuevo utilizando la informacion que hay en el
        model ISSN
        """
        issncount = 0
        sourcecount = 0
        issn_list = SourceRawData.query.all()
        if issn_list:
            try:
                for issn in issn_list:

                    issncount = issncount + 1
                    data = issn.get_data_field('miar')
                    if type(
                            data
                    ) == str and data == issn.identifier + ' IS NOT LISTED IN MIAR DATABASE':
                        print(issn.identifier +
                              ' IS NOT LISTED IN MIAR DATABASE')
                        continue
                    else:
                        archive_issn_miar = self._parse_journal_information(
                            data)
                        try:
                            # atribute = archive_issn_miar['Indexed\xa0in:']
                            dbs_split = []
                            print(
                                '****************************************************************'
                            )
                            print(archive_issn_miar)
                            print(
                                '****************************************************************'
                            )
                            # TODO: en algun momento debe ser posible mejorar el parser..
                            keys = [
                                # 'Indexed\xa0in:',
                                'Indexed\u00a0in:',
                                # 'Evaluated\xa0in:',
                                'Evaluated\u00a0in:'
                            ]
                            # index = miar['Indexed\xa0in:']
                            for key in keys:
                                if key in archive_issn_miar:
                                    dbs_split.extend(archive_issn_miar[key])

                            print(dbs_split)
                            pid, source = SourceRecord.get_source_by_pid(
                                issn.identifier)
                            print(type(source), source)
                            sourcecount = sourcecount + 1
                            to_add = []
                            for dbs in dbs_split:
                                miar = Term.query.filter_by(
                                    identifier=
                                    'http://miar.ub.edu/databases/ID/' +
                                    dbs.lower().strip()).first()
                                if miar:
                                    print("add {0}".format(dbs))
                                    to_add.append(miar)
                                # miar_db_type_terms = Term.query.filter_by(
                                #     vocabulary_id=IrokoVocabularyIdentifiers.INDEXES.value).all()
                                #
                                # for miar in miar_db_type_terms:
                                #     if miar.identifier == 'http://miar.ub.edu/databases/ID/' +
                                #     dbs.lower().strip():
                                #         print("add {0}".format(dbs))
                                #         to_add.append(miar)
                            for t in to_add:
                                print(
                                    "----------- !! ADD a Clasfication {0}-{1}-{2}-{3}"
                                    .format(t.uuid, t.description,
                                            t.vocabulary_id, t.parent_id))
                                source.add_classification(
                                    str(t.uuid), t.description,
                                    t.vocabulary_id,
                                    dict(url='',
                                         initial_cover='',
                                         end_cover=''))
                                # add also the parent, meaning the miar_groups
                                if t.parent_id and t.parent_id != 0:
                                    parent = Term.query.filter_by(
                                        id=t.parent_id).first()
                                    print(
                                        "----------- !! ADD a parent {0}- {1}".
                                        format(parent.uuid,
                                               parent.description))
                                    source.add_classification(
                                        str(parent.uuid), parent.description,
                                        parent.vocabulary_id, dict())

                            print('***********', dict(source), source)
                            source.new_revision(
                                user_id=get_default_user().id,
                                comment='MIAR Classifications Update')
                            # SourceRecord.new_source_revision(data=source.model.json,
                            #                                  user_id=get_default_user().id,
                            #                                  comment='MIAR Classifications
                            #                                  Update')
                            #
                            # source.update(data=source.model.json, dbcommit=True, reindex=True)
                            # IrokoSourceVersions.new_version(
                            #     source.id,
                            #     source.model.json,
                            #     user_id=get_default_user().id,
                            #     comment='MIAR Classifications Update',
                            #     is_current=True
                            # )
                            # source.commit()
                        except Exception:
                            # print("issncount={0}".format(issncount))
                            # print("sourcecount={0}".format(sourcecount))
                            print(traceback.format_exc())
                            continue
            except Exception as e:
                # print("issncount={0}".format(issncount))
                # print("sourcecount={0}".format(sourcecount))
                print(traceback.format_exc())
                return None
            # print("issncount={0}".format(issncount))
            # print("sourcecount={0}".format(sourcecount))

        return 'success'