def get_or_create_institution_object(self, institution_name, parent=None): institution = None dict_key = institution_name if parent is not None: dict_key = dict_key + parent.institution_shortname institution_key = ImportHelper.create_dict_key(dict_key) if (institution_key not in self.institutions_dict): # truncator add tree dots at the end and do smth else inst_shortname = Truncator( ImportHelper.create_shortname(institution_name)).chars(32) inst_slug = slugify_text_title(inst_shortname) if parent: institution = Institution.objects.create( institution_shortname=inst_shortname, institution_fullname=institution_name, institution_slug=inst_slug, institution_parent=parent) else: institution = Institution.objects.create( institution_shortname=inst_shortname, institution_fullname=institution_name, institution_slug=inst_slug) self.institutions_dict[institution_key] = institution logger.info(self.LOG_MSG_SUCC_DATABASE_CREATE_INST + 'get_or_create_institution_object institution_name=' + institution_name + ' institution_id=' + str(institution.institution_id)) import_report.info(';'.join((self.REPORT_INSTITUTION_CREATED, str(institution.institution_id), '', '', '', institution_name))) else: institution = self.institutions_dict[institution_key] return institution
def __init__(self): # scientific titles self.MARK_PROF = 'prof' self.MARK_DR = 'dr' self.MARK_MGR = 'prof' scientific_titles = list(ScientificTitle.objects.all()); self._prof_t = [t for t in scientific_titles if self.MARK_PROF in t.scientific_title_abbreviation][0] self._dr_t = [t for t in scientific_titles if self.MARK_DR in t.scientific_title_abbreviation][0] self._mgr_t = [t for t in scientific_titles if self.MARK_MGR in t.scientific_title_abbreviation][0] institutions = Institution.objects.all(); self.institutions_dict = dict((ImportHelper.create_dict_key(i.get_as_dict_key), i) for i in institutions) # people dictionary and set of ids people = list(Person.objects.all()); self.people_ids = set(d.person_id for d in people) self.people_dict = dict((ImportHelper.create_dict_key(p.person_first_name, p.person_last_name), p.person_id) for p in people) self.dict_opi_id_person_id = dict((p.person_opi_id, p.person_id) for p in people) self.max_person_id = max(self.people_ids) # role self.MARK_AUTHOR = 'autor' self.MARK_REVIEWER = 'recenz' self.MARK_SUPERVISOR = 'promot' # dissertations dictionary dissertations = list(Dissertation.objects.all().prefetch_related("dissertation_institution", "dissertation_supervisors", "dissertation_reviewers", "dissertation_author")); self.dissertations_ids_dict = dict((d.dissertation_id, d) for d in dissertations) self.dissertations_ids = set(d.dissertation_id for d in dissertations) self.dissertations_opi_ids = set(d.dissertation_opi_id for d in dissertations) self.dict_opi_id_dissertation_id = dict((d.dissertation_opi_id, d.dissertation_id) for d in dissertations) self.max_dissertation_id = max(self.dissertations_ids) if len(self.dissertations_ids) > 0 else 0 self.dissertations_dict = dict((ImportHelper.create_dict_key(d.dissertation_title_text), d.dissertation_id) for d in dissertations)
def check_project_title(self, ptitle): project_key = ImportHelper.create_dict_key(ptitle) project_id = None project_key_exist = project_key in self.projects_dict if project_key_exist: project_id = self.projects_dict[project_key] return [project_key_exist, project_id]
def check_person_name(self, fname, lname): person_key = ImportHelper.create_dict_key(fname, lname) person_id = None person_key_exist = person_key in self.people_dict if person_key_exist: person_id = self.people_dict[person_key] return [person_key_exist, person_id]
def check_dissertation_title(self, dtitle): dissertation_key = ImportHelper.create_dict_key(dtitle) dissertation_id = None dissertation_key_exist = dissertation_key in self.dissertations_dict if dissertation_key_exist: dissertation_id = self.dissertations_dict[dissertation_key] return [dissertation_key_exist, dissertation_id]
def get_dissertation_data(self, row): # get dissertation_institutions row_dissertation_institutions = row.get(self.COLUMN_DISSERTATION_INSTITUTION, '') if not isinstance(row_dissertation_institutions, int): institutions = self.dissertation_import_helper.get_institutions(row_dissertation_institutions) row[self.COLUMN_DISSERTATION_INSTITUTION] = institutions[0].institution_id if institutions is not None and len(institutions) > 0 else None # get dissertation_title ant title_slug row_dissertation_opi_id = row.get(self.COLUMN_DISSERTATION_OPI_ID, '') row_dissertation_title = row.get(self.COLUMN_DISSERTATION_TITLE, '') text_title = strip_tags(row_dissertation_title) row[self.COLUMN_DISSERTATION_TITLE_TEXT] = text_title row[self.COLUMN_DISSERTATION_TITLE_SLUG] = slugify_text_title(text_title) row[self.COLUMN_DISSERTATION_TITLE] = row_dissertation_title # get date date_start = row[self.COLUMN_DISSERTATION_DATE_START] row[self.COLUMN_DISSERTATION_DATE_START] = ImportHelper.create_date_isoformat(date_start) date_end = row[self.COLUMN_DISSERTATION_DATE_END] row[self.COLUMN_DISSERTATION_DATE_END] = ImportHelper.create_date_isoformat(date_end) # get type row_dissertation_type = row[self.COLUMN_DISSERTATION_TYPE] dissertation_type = self.dissertation_import_helper.get_dissertation_type(row_dissertation_type) row[self.COLUMN_DISSERTATION_TYPE] = dissertation_type # check for duplicates [dissertation_key_exist, dissertation_duplicate_id] = self.dissertation_import_helper.check_dissertation_title(text_title) dissertation_id = row[self.COLUMN_DISSERTATION_ID] if dissertation_key_exist: if dissertation_id == dissertation_duplicate_id: logger.warning('Dissertation update. Found dissertation with the same id: dissertation_title=' + text_title + ' dissertation_id=' + str(dissertation_id)) import_report.warning(';'.join((self.REPORT_DISSERTATION_UPDATE, str(dissertation_id), text_title, row_dissertation_opi_id))) elif dissertation_id is not None: import_report.warning(';'.join((self.REPORT_DISSERTATION_NEW_EXIST_DUPLICATES, str(dissertation_id), text_title, row_dissertation_opi_id))) import_report.warning(';'.join((self.REPORT_DISSERTATION_DUPLICATE, str(dissertation_duplicate_id)))) logger.warning(self.LOG_MSG_DUPLICATE_OBJECT) logger.warning('Existing object: dissertation_title=' + text_title + ' dissertation_id=' + str(dissertation_id)) logger.warning('Duplicate object: ' + ' dissertation_id=' + str(dissertation_duplicate_id)) else: import_report.info(';'.join((self.REPORT_DISSERTATION_NEW, str(dissertation_id), text_title, row_dissertation_opi_id))) return row
def get_institutions(self, institutions_names_str): institutions_triples = ImportHelper.untangle_institutions_names(institutions_names_str) institutions = list() for [university_name, faculty_name, _] in institutions_triples: if university_name: university = self.get_or_create_institution_object(university_name) if faculty_name: faculty = self.get_or_create_institution_object(faculty_name, university) institutions.append(faculty) else: institutions.append(university) return institutions
def get_or_create_discipline_object(self, discipline_name): discipline = None discipline_key = ImportHelper.create_dict_key(discipline_name) if (discipline_key not in self.disciplines_dict): discipline = ResearchDiscipline.objects.create( discipline_fullname=discipline_name.capitalize()) self.disciplines_dict[discipline_key] = discipline logger.info(self.LOG_MSG_SUCC_DATABASE_CREATE_DISC + 'get_or_create_discipline_object discipline_name=' + discipline_name + ' discipline_id=' + str(discipline.discipline_id)) import_report.info(';'.join( (self.REPORT_DISCIPLINE_CREATED, str(discipline.discipline_id), '', '', '', discipline_name))) else: discipline = self.disciplines_dict[discipline_key] return discipline
def get_project_data(self, row): # get project_institutions row_project_institutions = row.get(self.COLUMN_PROJECT_INSTITUTIONS, '') if isinstance(row_project_institutions, str): self.institutions = self.project_import_helper.get_institutions( row_project_institutions) else: self.institutions = row_project_institutions row[self. COLUMN_PROJECT_INSTITUTIONS] = None # it will be rewrite after save instance # get project_title ant title_slug row_project_opi_id = row.get(self.COLUMN_PROJECT_OPI_ID, '') row_project_title = row.get(self.COLUMN_PROJECT_TITLE, '') text_title = strip_tags(row_project_title) row[self.COLUMN_PROJECT_TITLE_TEXT] = text_title row[self.COLUMN_PROJECT_TITLE_SLUG] = slugify_text_title(text_title) row[self.COLUMN_PROJECT_TITLE] = row_project_title # get date date_start = row[self.COLUMN_PROJECT_DATE_START] row[self. COLUMN_PROJECT_DATE_START] = ImportHelper.create_date_isoformat( date_start) date_end = row[self.COLUMN_PROJECT_DATE_END] row[self.COLUMN_PROJECT_DATE_END] = ImportHelper.create_date_isoformat( date_end) # check for duplicates [project_key_exist, project_duplicate_id ] = self.project_import_helper.check_project_title(text_title) project_id = row[self.COLUMN_PROJECT_ID] if project_key_exist: if project_id == project_duplicate_id: logger.warning( 'Project update. Found project with the same id: project_title=' + text_title + ' project_id=' + str(project_id)) import_report.warning(';'.join( (self.REPORT_PROJECT_UPDATE, str(project_id), text_title, str(row_project_opi_id)))) elif project_id is not None: logger.warning(self.LOG_MSG_DUPLICATE_OBJECT) logger.warning('Existing object: project_title=' + text_title + ' project_id=' + str(project_id)) logger.warning('Duplicate object: ' + ' project_id=' + str(project_duplicate_id)) import_report.warning(';'.join( (self.REPORT_PROJECT_NEW_EXIST_DUPLICATES, str(project_id), text_title, str(row_project_opi_id)))) import_report.warning(';'.join((self.REPORT_PROJECT_DUPLICATE, str(project_duplicate_id)))) else: import_report.info(';'.join( (self.REPORT_PROJECT_NEW, str(project_id), text_title))) # ##get project type TODO: check if it will be used? # row_project_type = row[self.COLUMN_PROJECT_TYPE] # project_type = self.project_import_helper.get_project_type(row_project_type) # row[self.COLUMN_PROJECT_TYPE] = project_type return row
def __init__(self): # scientific titles self.MARK_PROF = 'prof' self.MARK_DR = 'dr' self.MARK_MGR = 'prof' scientific_titles = ScientificTitle.objects.all() self._prof_t = [ t for t in scientific_titles if self.MARK_PROF in t.scientific_title_abbreviation ][0] self._dr_t = [ t for t in scientific_titles if self.MARK_DR in t.scientific_title_abbreviation ][0] self._mgr_t = [ t for t in scientific_titles if self.MARK_MGR in t.scientific_title_abbreviation ][0] institutions = Institution.objects.all() self.institutions_dict = dict( (ImportHelper.create_dict_key(i.get_as_dict_key), i) for i in institutions) # people dictionary and set of ids people = Person.objects.all() self.people_ids = set(p.person_id for p in people) self.people_dict = dict((ImportHelper.create_dict_key( p.person_first_name, p.person_last_name), p.person_id) for p in people) self.people_ids_dict = dict((p.person_id, p) for p in people) self.dict_opi_id_person_id = dict( (p.person_opi_id, p.person_id) for p in people) self.max_person_id = max(self.people_ids) # role self.MARK_DIRECTOR = 'kierow' self.MARK_MAIN_CONTRACTOR = 'główny' self.MARK_DOCTORAL = 'doktoran' # institution role self.MARK_INST_DIRECTOR = 'kier' self.MARK_INST_COWORK = 'wsp' self.MARK_INST_RELATED = 'powi' institution_roles = InstitutionRole.objects.all() self._roleinst_director = [ r for r in institution_roles if self.MARK_INST_DIRECTOR in r.institution_role_role ][0] self._roleinst_cowork = [ r for r in institution_roles if self.MARK_INST_COWORK in r.institution_role_role ][0] self._role_inst_related = [ r for r in institution_roles if self.MARK_INST_RELATED in r.institution_role_role ][0] # projects dictionary projects = list(Project.objects.all().prefetch_related( "project_disciplines", "project_targets", "project_institutions", "project_participants", "project_person_participations")) self.projects_ids_dict = dict((p.project_id, p) for p in projects) self.projects_ids = set(p.project_id for p in projects) self.projects_opi_ids = set(p.project_opi_id for p in projects) self.dict_opi_id_project_id = dict( (p.project_opi_id, p.project_id) for p in projects) self.max_project_id = max( self.projects_ids) if len(self.projects_ids) > 0 else 0 self.projects_dict = dict( (ImportHelper.create_dict_key(p.project_title_text), p.project_id) for p in projects) # cache self.all_participants = list( ProjectParticipant.objects.filter(is_principal=False)) self.all_directors = list( ProjectParticipant.objects.filter(is_principal=True)) self.projects_participants = {} self.projects_directors = {} for pp in self.all_participants: if pp.project.project_id not in self.projects_participants: self.projects_participants[pp.project.project_id] = [ pp.person.person_id ] else: self.projects_participants[pp.project.project_id].append( pp.person.person_id) for pp in self.all_directors: if pp.project.project_id not in self.projects_directors: self.projects_directors[pp.project.project_id] = [ pp.person.person_id ] else: self.projects_directors[pp.project.project_id].append( pp.person.person_id)
def __init__(self): # scientific titles self.MARK_PROF = 'prof' self.MARK_DR = 'dr' self.MARK_MGR = 'prof' scientific_titles = list(ScientificTitle.objects.all()) self._prof_t = [ t for t in scientific_titles if self.MARK_PROF in t.scientific_title_abbreviation ][0] self._dr_t = [ t for t in scientific_titles if self.MARK_DR in t.scientific_title_abbreviation ][0] self._mgr_t = [ t for t in scientific_titles if self.MARK_MGR in t.scientific_title_abbreviation ][0] institutions = list(Institution.objects.all()) self.institutions_dict = dict( (ImportHelper.create_dict_key(i.get_as_dict_key), i) for i in institutions) disciplines = list(ResearchDiscipline.objects.all()) self.disciplines_dict = dict( (ImportHelper.create_dict_key(d.discipline_fullname), d) for d in disciplines) # people dictionary and set of ids people = list( Person.objects.all().prefetch_related('person_disciplines')) print(len(people)) self.people_dict = dict((ImportHelper.create_dict_key( p.person_first_name, p.person_last_name), p.person_id) for p in people) self.people_ids = list(p.person_id for p in people) self.people_opi_ids = list(p.person_opi_id for p in people) self.dict_opi_id_person_id = dict( (p.person_opi_id, p.person_id) for p in people) self.max_person_id = max(self.people_ids) # cache affiliations = list( PersonAffiliation.objects.filter( is_principal=False).select_related('institution', 'person')) principal_affiliations = list( PersonAffiliation.objects.filter(is_principal=True).select_related( 'institution', 'person')) self.person_affiliations = {} self.person_principal_affiliations = {} self.person_disciplines = {} for pa in affiliations: if pa.person.person_id not in self.person_affiliations: self.person_affiliations[pa.person.person_id] = [ pa.institution.institution_id ] else: self.person_affiliations[pa.person.person_id].append( pa.institution.institution_id) for pa in principal_affiliations: if pa.person.person_id not in self.person_principal_affiliations: self.person_principal_affiliations[pa.person.person_id] = [ pa.institution.institution_id ] else: self.person_principal_affiliations[pa.person.person_id].append( pa.institution.institution_id) for p in people: self.person_disciplines[p.person_id] = list( p.person_disciplines.all())