Beispiel #1
0
 def get_or_create_institution_object(self, institution_name, parent=None):
     institution = None
     dict_key = institution_name
     if parent is not None:
         dict_key = dict_key + parent.institution_shortname
     institution_key = ImportHelper.create_dict_key(dict_key)
     if (institution_key not in self.institutions_dict):
         # truncator add tree dots at the end and do smth else
         inst_shortname = Truncator(
             ImportHelper.create_shortname(institution_name)).chars(32)
         inst_slug = slugify_text_title(inst_shortname)
         if parent:
             institution = Institution.objects.create(
                 institution_shortname=inst_shortname,
                 institution_fullname=institution_name,
                 institution_slug=inst_slug,
                 institution_parent=parent)
         else:
             institution = Institution.objects.create(
                 institution_shortname=inst_shortname,
                 institution_fullname=institution_name,
                 institution_slug=inst_slug)
         self.institutions_dict[institution_key] = institution
         logger.info(self.LOG_MSG_SUCC_DATABASE_CREATE_INST +
                     'get_or_create_institution_object institution_name=' +
                     institution_name + ' institution_id=' +
                     str(institution.institution_id))
         import_report.info(';'.join((self.REPORT_INSTITUTION_CREATED,
                                      str(institution.institution_id), '',
                                      '', '', institution_name)))
     else:
         institution = self.institutions_dict[institution_key]
     return institution
Beispiel #2
0
 def __init__(self):
     # scientific titles
     self.MARK_PROF = 'prof'
     self.MARK_DR = 'dr'
     self.MARK_MGR = 'prof'
     scientific_titles = list(ScientificTitle.objects.all());
     self._prof_t = [t for t in scientific_titles if self.MARK_PROF in t.scientific_title_abbreviation][0]
     self._dr_t = [t for t in scientific_titles if self.MARK_DR in t.scientific_title_abbreviation][0]
     self._mgr_t = [t for t in scientific_titles if self.MARK_MGR in t.scientific_title_abbreviation][0]
     
     institutions = Institution.objects.all();
     self.institutions_dict = dict((ImportHelper.create_dict_key(i.get_as_dict_key), i) for i in institutions)
     
     # people dictionary and set of ids
     people = list(Person.objects.all());
     self.people_ids = set(d.person_id for d in people)
     self.people_dict = dict((ImportHelper.create_dict_key(p.person_first_name, p.person_last_name), p.person_id) for p in people)
     self.dict_opi_id_person_id = dict((p.person_opi_id, p.person_id) for p in people)
     self.max_person_id = max(self.people_ids)
     
     # role
     self.MARK_AUTHOR = 'autor'
     self.MARK_REVIEWER = 'recenz'
     self.MARK_SUPERVISOR = 'promot'
     
     # dissertations dictionary
     dissertations = list(Dissertation.objects.all().prefetch_related("dissertation_institution", "dissertation_supervisors", "dissertation_reviewers", "dissertation_author"));
     self.dissertations_ids_dict = dict((d.dissertation_id, d) for d in dissertations)
     self.dissertations_ids = set(d.dissertation_id for d in dissertations)
     self.dissertations_opi_ids = set(d.dissertation_opi_id for d in dissertations)
     self.dict_opi_id_dissertation_id = dict((d.dissertation_opi_id, d.dissertation_id) for d in dissertations)
     self.max_dissertation_id = max(self.dissertations_ids) if len(self.dissertations_ids) > 0 else 0
     self.dissertations_dict = dict((ImportHelper.create_dict_key(d.dissertation_title_text), d.dissertation_id) for d in dissertations)
Beispiel #3
0
 def check_project_title(self, ptitle):
     project_key = ImportHelper.create_dict_key(ptitle)
     project_id = None
     project_key_exist = project_key in self.projects_dict
     if project_key_exist:
         project_id = self.projects_dict[project_key]
     return [project_key_exist, project_id]
 def check_person_name(self, fname, lname):
     person_key = ImportHelper.create_dict_key(fname, lname)
     person_id = None
     person_key_exist = person_key in self.people_dict
     if person_key_exist:
         person_id = self.people_dict[person_key]
     return [person_key_exist, person_id]
Beispiel #5
0
 def check_dissertation_title(self, dtitle):
     dissertation_key = ImportHelper.create_dict_key(dtitle)
     dissertation_id = None
     dissertation_key_exist = dissertation_key in self.dissertations_dict
     if dissertation_key_exist:
         dissertation_id = self.dissertations_dict[dissertation_key]
     return [dissertation_key_exist, dissertation_id]
Beispiel #6
0
 def get_dissertation_data(self, row):
     # get dissertation_institutions
     row_dissertation_institutions = row.get(self.COLUMN_DISSERTATION_INSTITUTION, '')
     if not isinstance(row_dissertation_institutions, int):
         institutions = self.dissertation_import_helper.get_institutions(row_dissertation_institutions)
         row[self.COLUMN_DISSERTATION_INSTITUTION] = institutions[0].institution_id if institutions is not None and len(institutions) > 0 else None                      
     
     # get dissertation_title ant title_slug
     row_dissertation_opi_id = row.get(self.COLUMN_DISSERTATION_OPI_ID, '')
     row_dissertation_title = row.get(self.COLUMN_DISSERTATION_TITLE, '')
     text_title = strip_tags(row_dissertation_title)
     row[self.COLUMN_DISSERTATION_TITLE_TEXT] = text_title
     row[self.COLUMN_DISSERTATION_TITLE_SLUG] = slugify_text_title(text_title)
     row[self.COLUMN_DISSERTATION_TITLE] = row_dissertation_title
     
     # get date
     date_start = row[self.COLUMN_DISSERTATION_DATE_START]
     row[self.COLUMN_DISSERTATION_DATE_START] = ImportHelper.create_date_isoformat(date_start)
     date_end = row[self.COLUMN_DISSERTATION_DATE_END]
     row[self.COLUMN_DISSERTATION_DATE_END] = ImportHelper.create_date_isoformat(date_end)
     
     # get type
     row_dissertation_type = row[self.COLUMN_DISSERTATION_TYPE]
     dissertation_type = self.dissertation_import_helper.get_dissertation_type(row_dissertation_type)
     row[self.COLUMN_DISSERTATION_TYPE] = dissertation_type
     
     # check for duplicates
     [dissertation_key_exist, dissertation_duplicate_id] = self.dissertation_import_helper.check_dissertation_title(text_title) 
     dissertation_id = row[self.COLUMN_DISSERTATION_ID]
     if dissertation_key_exist:
         if dissertation_id == dissertation_duplicate_id:
             logger.warning('Dissertation update. Found dissertation with the same id: dissertation_title=' + text_title + ' dissertation_id=' + str(dissertation_id))
             import_report.warning(';'.join((self.REPORT_DISSERTATION_UPDATE, str(dissertation_id), text_title, row_dissertation_opi_id)))
         elif dissertation_id is not None:
             import_report.warning(';'.join((self.REPORT_DISSERTATION_NEW_EXIST_DUPLICATES, str(dissertation_id), text_title, row_dissertation_opi_id)))
             import_report.warning(';'.join((self.REPORT_DISSERTATION_DUPLICATE, str(dissertation_duplicate_id))))
             logger.warning(self.LOG_MSG_DUPLICATE_OBJECT)
             logger.warning('Existing object: dissertation_title=' + text_title + ' dissertation_id=' + str(dissertation_id))
             logger.warning('Duplicate object: ' + ' dissertation_id=' + str(dissertation_duplicate_id))
     else:
         import_report.info(';'.join((self.REPORT_DISSERTATION_NEW, str(dissertation_id), text_title, row_dissertation_opi_id)))
     return row
Beispiel #7
0
 def get_institutions(self, institutions_names_str):
     institutions_triples = ImportHelper.untangle_institutions_names(institutions_names_str)
     institutions = list()
     for [university_name, faculty_name, _] in institutions_triples:
         if university_name:
             university = self.get_or_create_institution_object(university_name)
             if faculty_name:
                 faculty = self.get_or_create_institution_object(faculty_name, university)
                 institutions.append(faculty)
             else:
                 institutions.append(university)
     return institutions
 def get_or_create_discipline_object(self, discipline_name):
     discipline = None
     discipline_key = ImportHelper.create_dict_key(discipline_name)
     if (discipline_key not in self.disciplines_dict):
         discipline = ResearchDiscipline.objects.create(
             discipline_fullname=discipline_name.capitalize())
         self.disciplines_dict[discipline_key] = discipline
         logger.info(self.LOG_MSG_SUCC_DATABASE_CREATE_DISC +
                     'get_or_create_discipline_object discipline_name=' +
                     discipline_name + ' discipline_id=' +
                     str(discipline.discipline_id))
         import_report.info(';'.join(
             (self.REPORT_DISCIPLINE_CREATED, str(discipline.discipline_id),
              '', '', '', discipline_name)))
     else:
         discipline = self.disciplines_dict[discipline_key]
     return discipline
Beispiel #9
0
    def get_project_data(self, row):
        # get project_institutions
        row_project_institutions = row.get(self.COLUMN_PROJECT_INSTITUTIONS,
                                           '')
        if isinstance(row_project_institutions, str):
            self.institutions = self.project_import_helper.get_institutions(
                row_project_institutions)
        else:
            self.institutions = row_project_institutions

        row[self.
            COLUMN_PROJECT_INSTITUTIONS] = None  # it will be rewrite after save instance

        # get project_title ant title_slug
        row_project_opi_id = row.get(self.COLUMN_PROJECT_OPI_ID, '')
        row_project_title = row.get(self.COLUMN_PROJECT_TITLE, '')
        text_title = strip_tags(row_project_title)
        row[self.COLUMN_PROJECT_TITLE_TEXT] = text_title
        row[self.COLUMN_PROJECT_TITLE_SLUG] = slugify_text_title(text_title)
        row[self.COLUMN_PROJECT_TITLE] = row_project_title

        # get date
        date_start = row[self.COLUMN_PROJECT_DATE_START]
        row[self.
            COLUMN_PROJECT_DATE_START] = ImportHelper.create_date_isoformat(
                date_start)
        date_end = row[self.COLUMN_PROJECT_DATE_END]
        row[self.COLUMN_PROJECT_DATE_END] = ImportHelper.create_date_isoformat(
            date_end)

        # check for duplicates
        [project_key_exist, project_duplicate_id
         ] = self.project_import_helper.check_project_title(text_title)
        project_id = row[self.COLUMN_PROJECT_ID]
        if project_key_exist:
            if project_id == project_duplicate_id:
                logger.warning(
                    'Project update. Found project with the same id: project_title='
                    + text_title + ' project_id=' + str(project_id))
                import_report.warning(';'.join(
                    (self.REPORT_PROJECT_UPDATE, str(project_id), text_title,
                     str(row_project_opi_id))))
            elif project_id is not None:
                logger.warning(self.LOG_MSG_DUPLICATE_OBJECT)
                logger.warning('Existing object: project_title=' + text_title +
                               ' project_id=' + str(project_id))
                logger.warning('Duplicate object: ' + ' project_id=' +
                               str(project_duplicate_id))
                import_report.warning(';'.join(
                    (self.REPORT_PROJECT_NEW_EXIST_DUPLICATES, str(project_id),
                     text_title, str(row_project_opi_id))))
                import_report.warning(';'.join((self.REPORT_PROJECT_DUPLICATE,
                                                str(project_duplicate_id))))
        else:
            import_report.info(';'.join(
                (self.REPORT_PROJECT_NEW, str(project_id), text_title)))
#         ##get project type TODO: check if it will be used?
#         row_project_type = row[self.COLUMN_PROJECT_TYPE]
#         project_type = self.project_import_helper.get_project_type(row_project_type)
#         row[self.COLUMN_PROJECT_TYPE] = project_type

        return row
Beispiel #10
0
    def __init__(self):
        # scientific titles
        self.MARK_PROF = 'prof'
        self.MARK_DR = 'dr'
        self.MARK_MGR = 'prof'
        scientific_titles = ScientificTitle.objects.all()
        self._prof_t = [
            t for t in scientific_titles
            if self.MARK_PROF in t.scientific_title_abbreviation
        ][0]
        self._dr_t = [
            t for t in scientific_titles
            if self.MARK_DR in t.scientific_title_abbreviation
        ][0]
        self._mgr_t = [
            t for t in scientific_titles
            if self.MARK_MGR in t.scientific_title_abbreviation
        ][0]

        institutions = Institution.objects.all()
        self.institutions_dict = dict(
            (ImportHelper.create_dict_key(i.get_as_dict_key), i)
            for i in institutions)

        # people dictionary and set of ids
        people = Person.objects.all()
        self.people_ids = set(p.person_id for p in people)
        self.people_dict = dict((ImportHelper.create_dict_key(
            p.person_first_name, p.person_last_name), p.person_id)
                                for p in people)
        self.people_ids_dict = dict((p.person_id, p) for p in people)
        self.dict_opi_id_person_id = dict(
            (p.person_opi_id, p.person_id) for p in people)
        self.max_person_id = max(self.people_ids)

        # role
        self.MARK_DIRECTOR = 'kierow'
        self.MARK_MAIN_CONTRACTOR = 'główny'
        self.MARK_DOCTORAL = 'doktoran'

        # institution role
        self.MARK_INST_DIRECTOR = 'kier'
        self.MARK_INST_COWORK = 'wsp'
        self.MARK_INST_RELATED = 'powi'
        institution_roles = InstitutionRole.objects.all()
        self._roleinst_director = [
            r for r in institution_roles
            if self.MARK_INST_DIRECTOR in r.institution_role_role
        ][0]
        self._roleinst_cowork = [
            r for r in institution_roles
            if self.MARK_INST_COWORK in r.institution_role_role
        ][0]
        self._role_inst_related = [
            r for r in institution_roles
            if self.MARK_INST_RELATED in r.institution_role_role
        ][0]

        # projects dictionary
        projects = list(Project.objects.all().prefetch_related(
            "project_disciplines", "project_targets", "project_institutions",
            "project_participants", "project_person_participations"))
        self.projects_ids_dict = dict((p.project_id, p) for p in projects)
        self.projects_ids = set(p.project_id for p in projects)
        self.projects_opi_ids = set(p.project_opi_id for p in projects)
        self.dict_opi_id_project_id = dict(
            (p.project_opi_id, p.project_id) for p in projects)
        self.max_project_id = max(
            self.projects_ids) if len(self.projects_ids) > 0 else 0
        self.projects_dict = dict(
            (ImportHelper.create_dict_key(p.project_title_text), p.project_id)
            for p in projects)

        # cache
        self.all_participants = list(
            ProjectParticipant.objects.filter(is_principal=False))
        self.all_directors = list(
            ProjectParticipant.objects.filter(is_principal=True))
        self.projects_participants = {}
        self.projects_directors = {}

        for pp in self.all_participants:
            if pp.project.project_id not in self.projects_participants:
                self.projects_participants[pp.project.project_id] = [
                    pp.person.person_id
                ]
            else:
                self.projects_participants[pp.project.project_id].append(
                    pp.person.person_id)

        for pp in self.all_directors:
            if pp.project.project_id not in self.projects_directors:
                self.projects_directors[pp.project.project_id] = [
                    pp.person.person_id
                ]
            else:
                self.projects_directors[pp.project.project_id].append(
                    pp.person.person_id)
    def __init__(self):

        # scientific titles
        self.MARK_PROF = 'prof'
        self.MARK_DR = 'dr'
        self.MARK_MGR = 'prof'

        scientific_titles = list(ScientificTitle.objects.all())
        self._prof_t = [
            t for t in scientific_titles
            if self.MARK_PROF in t.scientific_title_abbreviation
        ][0]
        self._dr_t = [
            t for t in scientific_titles
            if self.MARK_DR in t.scientific_title_abbreviation
        ][0]
        self._mgr_t = [
            t for t in scientific_titles
            if self.MARK_MGR in t.scientific_title_abbreviation
        ][0]

        institutions = list(Institution.objects.all())
        self.institutions_dict = dict(
            (ImportHelper.create_dict_key(i.get_as_dict_key), i)
            for i in institutions)

        disciplines = list(ResearchDiscipline.objects.all())
        self.disciplines_dict = dict(
            (ImportHelper.create_dict_key(d.discipline_fullname), d)
            for d in disciplines)

        # people dictionary and set of ids
        people = list(
            Person.objects.all().prefetch_related('person_disciplines'))
        print(len(people))
        self.people_dict = dict((ImportHelper.create_dict_key(
            p.person_first_name, p.person_last_name), p.person_id)
                                for p in people)
        self.people_ids = list(p.person_id for p in people)
        self.people_opi_ids = list(p.person_opi_id for p in people)
        self.dict_opi_id_person_id = dict(
            (p.person_opi_id, p.person_id) for p in people)
        self.max_person_id = max(self.people_ids)

        # cache
        affiliations = list(
            PersonAffiliation.objects.filter(
                is_principal=False).select_related('institution', 'person'))
        principal_affiliations = list(
            PersonAffiliation.objects.filter(is_principal=True).select_related(
                'institution', 'person'))
        self.person_affiliations = {}
        self.person_principal_affiliations = {}
        self.person_disciplines = {}

        for pa in affiliations:
            if pa.person.person_id not in self.person_affiliations:
                self.person_affiliations[pa.person.person_id] = [
                    pa.institution.institution_id
                ]
            else:
                self.person_affiliations[pa.person.person_id].append(
                    pa.institution.institution_id)

        for pa in principal_affiliations:
            if pa.person.person_id not in self.person_principal_affiliations:
                self.person_principal_affiliations[pa.person.person_id] = [
                    pa.institution.institution_id
                ]
            else:
                self.person_principal_affiliations[pa.person.person_id].append(
                    pa.institution.institution_id)

        for p in people:
            self.person_disciplines[p.person_id] = list(
                p.person_disciplines.all())