class Base(object): """ This class enables us to add new entities and relations in CW. Attributes ---------- relations: list of 3-uplet (mandatory) all the relations involved in schema we want to document. Notes ----- Here is an example of the definition of the 'relations' parameter: :: relations = [ ("CWUser", "in_group", "CWGroup") ] """ relations = [] assessment_relations = [ ("Assessment", "study", "Study"), ("Study", "assessments", "Assessment"), ("Subject", "assessments", "Assessment"), ("Assessment", "subjects", "Subject"), ("Center", "assessments", "Assessment"), ("Assessment", "center", "Center"), ("CWGroup", "can_read", "Assessment"), ("CWGroup", "can_update", "Assessment"), ("Assessment", "device", "Device"), ("Device", "device_assessments", "Assessment") ] fileset_relations = [ ["ParentEntitiyName", "filesets", "FileSet"], ("FileSet", "in_assessment", "Assessment"), ("FileSet", "external_files", "ExternalFile"), ("ExternalFile", "fileset", "FileSet"), ("ExternalFile", "in_assessment", "Assessment") ] device_relations = fileset_relations + [ ("Device", "center", "Center") ] device_relations[0][0] = "Device" def __init__(self, session, can_read=True, can_update=False, use_store=True, piws_security_model=True): """ Initialize the SeniorData class. Parameters ---------- session: Session (mandatory) a cubicweb session. can_read: bool (optional, default True) set the read permission to the imported data. can_update: bool (optional, default False) set the update permission to the imported data. use_store: bool (optional, default True) if True use an SQLGenObjectStore, otherwise the session. piws_security_model: bool (optional, default True) if True apply the PIWS security model. """ # CW parameters self.can_read = can_read self.can_update = can_update self.use_store = use_store self.session = session if self.use_store: self.store = SQLGenObjectStore(self.session) self.relate_method = self.store.relate self.create_entity_method = self.store.create_entity else: self.relate_method = self.session.add_relation self.create_entity_method = self.session.create_entity self.piws_security_model = piws_security_model # Speed up parameters self.inserted_assessments = {} self.inserted_devices = {} ########################################################################### # Public Methods ########################################################################### def cleanup(self): """ Method to cleanup temporary items and to commit changes. """ # Send the new entities to the db if self.use_store: self.store.flush() else: self.session.commit() def import_data(self): """ Method that import the data in cw. """ raise NotImplementedError("This method has to be defined in child " "class.") ########################################################################### # Private Methods ########################################################################### @classmethod def schema(self, outfname, text_font="sans-serif", node_text_size=12): """ Create a view of the schema described in a python structure. Parameters ---------- outfname: str (mandatory) the path to the output file where the graph will be saved. The directory containing this file must be created. text_font: str (optional, default 'sans-serif') the font used to display the text in the final image. node_text_size: int (optional, default 12) the text size. """ import pygraphviz # Create a graph graph = pygraphviz.AGraph(strict=False, directed=True, rankdir="LR", overlap=False) # Get all the entity names involved entities = set() for link in self.relations: entities.add(link[0]) entities.add(link[2]) # Go through all the entities and create a graphic table for entity_name in entities: attributes = ("CW authorized attributes") graph.add_node(entity_name, style="filled", fillcolor="blue", fontcolor="white", fontsize=node_text_size, fontname=text_font, label=entity_name + "|" + attributes, shape="Mrecord") # Relate the entities for link in self.relations: graph.add_edge(link[0], link[2], label=link[1]) # Save the graph graph.draw(outfname, prog="dot") def _md5_sum(self, path): """ Create a md5 sum of a path. Parameters ---------- path: str (madatory) a string to hash. Returns ------- out: str the input hashed string. """ m = hashlib.md5() m.update(path.encode("utf-8")) return m.hexdigest() def _progress_bar(self, ratio, title="", bar_length=40, maxsize=20): """ Method to generate a progress bar. Parameters ---------- ratio: float (mandatory 0<ratio<1) float describing the current processing status. title: str (optional) a title to identify the progress bar. bar_length: int (optional) the length of the bar that will be ploted. maxsize: int (optional) use to justify title. """ progress = int(ratio * 100.) block = int(round(bar_length * ratio)) title = title.ljust(maxsize, " ") text = "\r[{0}] {1}% {2}".format( "=" * block + " " * (bar_length - block), progress, title) sys.stdout.write(text) sys.stdout.flush() ########################################################################### # Private Insertion Methods ########################################################################### def _set_unique_relation(self, source_eid, relation_name, detination_eid, check_unicity=True, subjtype=None): """ Method to create a new unique relation. First check that the relation do not exists if 'check_unicity' is True. Parameters ---------- source_eid: int (madatory) the CW identifier of the object entity in the relation. relation_name: str (madatory) the relation name. detination_eid: int (madatory) the CW identifier of the subject entity in the relation. check_unicity: bool (optional) if True check if the relation already exists in the data base. subjtype: str (optional) give the subject etype for inlined relation when using a store. """ # With unicity contrain if check_unicity: # First build the rql request rql = "Any X Where X eid '{0}', X {1} Y, Y eid '{2}'".format( source_eid, relation_name, detination_eid) # Execute the rql request rset = self.session.execute(rql) # The request returns some data -> do nothing if rset.rowcount == 0: if self.use_store: self.relate_method(source_eid, relation_name, detination_eid, subjtype=subjtype) else: self.relate_method(source_eid, relation_name, detination_eid) # Without unicity constrain else: if self.use_store: self.relate_method(source_eid, relation_name, detination_eid, subjtype=subjtype) else: self.relate_method(source_eid, relation_name, detination_eid) def _get_or_create_unique_entity(self, rql, entity_name, check_unicity=True, *args, **kwargs): """ Method to create a new unique entity. First check that the entity do not exists by executing the rql request if 'check_unicity' is True. Parameters ---------- rql: str (madatory) the rql request to check unicity. entity_name: str (madatory) the name of the entity we want to create. check_unicity: bool (optional) if True check if the entity already exists in the data base. Returns ------- entity: CW entity the requested entity. is_created: bool return True if the entity has been created, False otherwise. """ # Initilize output prameter is_created = False # With unicity contrain if check_unicity: # First execute the rql request rset = self.session.execute(rql) # The request returns some data, get the unique entity if rset.rowcount > 0: if rset.rowcount > 1: raise Exception("The database is corrupted, please " "investigate.") entity = rset.get_entity(0, 0) # Create a new unique entity else: entity = self.create_entity_method(entity_name, **kwargs) is_created = True # Without unicity constrain else: entity = self.create_entity_method(entity_name, **kwargs) is_created = True return entity, is_created def _create_device(self, device_struct, center_eid, assessment_eid, center_name): """ Create a device and its associated relations. """ # Create the device device_id = device_struct["identifier"] extfiles = device_struct.pop("ExternalResources") device_entity, is_created = self._get_or_create_unique_entity( rql=("Any X Where X is Device, X identifier " "'{0}'".format(device_id)), check_unicity=True, entity_name="Device", **device_struct) device_eid = device_entity.eid self.inserted_devices[device_id] = device_eid # If we just create the device, relate the entity if is_created: # > add relation with the center self._set_unique_relation( device_eid, "center", center_eid, check_unicity=False) # > add relation with the exam cards if len(extfiles) > 0: fset_struct = { "identifier": device_id, "name": u"{0} exam {1} card".format( center_name, device_struct["manufacturer"])} self._import_file_set(fset_struct, extfiles, device_eid, assessment_eid) return device_eid def _create_assessment(self, assessment_struct, subject_eids, study_eid, center_eid, groups): """ Create an assessment and its associated relations. The groups that can access the 'in_assessment' linked entities are generated dynamically from the assessment identifiers: * we '_' split the string and create a group with the first returned item and the concatenation of the two first items. * the permissions 'can_read', 'can_update' relate the assessments with the corresponding groups. """ # Format inputs if not isinstance(subject_eids, list): subject_eids = [subject_eids] # Create the assessment assessment_id = assessment_struct["identifier"] assessment_entity, is_created = self._get_or_create_unique_entity( rql=("Any X Where X is Assessment, X identifier " "'{0}'".format(assessment_id)), check_unicity=True, entity_name="Assessment", **assessment_struct) assessment_eid = assessment_entity.eid self.inserted_assessments[assessment_id] = assessment_eid # If we just create the assessment, relate the entity if is_created: # > add relation with the study self._set_unique_relation( assessment_eid, "study", study_eid, check_unicity=False, subjtype="Assessment") self._set_unique_relation( study_eid, "assessments", assessment_eid, check_unicity=False, subjtype="Assessment") # > add relation with the subject for subject_eid in subject_eids: self._set_unique_relation( subject_eid, "assessments", assessment_eid, check_unicity=False) self._set_unique_relation( assessment_eid, "subjects", subject_eid, check_unicity=False) # > add relation with the center self._set_unique_relation( center_eid, "assessments", assessment_eid, check_unicity=False) self._set_unique_relation( assessment_eid, "center", center_eid, check_unicity=False) # Set the permissions # Create/get the related assessment groups if self.piws_security_model: assessment_id = assessment_id.split("_") related_groups = [ assessment_id[0], "_".join(assessment_id[:2]) ] for group_name in related_groups: # Check the group is created if group_name in groups: group_eid = groups[group_name] else: raise ValueError( "Please create first the group '{0}'.".format(group_name)) # > add relation with group if self.can_read: self._set_unique_relation( group_eid, "can_read", assessment_eid) if self.can_update: self._set_unique_relation( group_eid, "can_update", assessment_eid) else: for group_name in ("users", "guests"): group_eid = groups[group_name] # > add relation with group if self.can_read: self._set_unique_relation( group_eid, "can_read", assessment_eid) if self.can_update: self._set_unique_relation( group_eid, "can_update", assessment_eid) return assessment_eid def _import_file_set(self, fset_struct, extfiles, parent_eid, assessment_eid): """ Add the file set attached to a parent entity. """ # Create the file set fset_entity, _ = self._get_or_create_unique_entity( rql="", check_unicity=False, entity_name="FileSet", **fset_struct) # > add relation with the parent self._set_unique_relation(parent_eid, "filesets", fset_entity.eid, check_unicity=False) self._set_unique_relation(fset_entity.eid, "containers", parent_eid, check_unicity=False) # > add relation with the assessment self._set_unique_relation(fset_entity.eid, "in_assessment", assessment_eid, check_unicity=False, subjtype="FileSet") # Create the external files for extfile_struct in extfiles: file_entity, _ = self._get_or_create_unique_entity( rql="", check_unicity=False, entity_name="ExternalFile", **extfile_struct) # > add relation with the file set self._set_unique_relation(fset_entity.eid, "external_files", file_entity.eid, check_unicity=False) self._set_unique_relation(file_entity.eid, "fileset", fset_entity.eid, check_unicity=False) # > add relation with the assessment self._set_unique_relation(file_entity.eid, "in_assessment", assessment_eid, check_unicity=False, subjtype="ExternalFile")
chrs = import_chromosomes(os.path.join(genetics_dir, 'chromosomes.json')) chr_map = {} for _chr in chrs: print 'chr', _chr['name'] _chr = store.create_entity('Chromosome', **_chr) chr_map.setdefault(_chr['name'], _chr.eid) # Genes genes = import_genes(os.path.join(genetics_dir, 'chromosomes.json'), os.path.join(genetics_dir, 'hg18.refGene.meta')) for gene in genes: print 'gene', gene['name'], gene['chromosome'] gene['chromosome'] = chr_map[gene['chromosome']] gene = store.create_entity('Gene', **gene) # Flush/Commit if sqlgen_store: store.flush() # Snps snps = import_snps(os.path.join(genetics_dir, 'chromosomes.json'), os.path.join(genetics_dir, 'Localizer94.bim')) snp_eids = [] for ind, snp in enumerate(snps): print 'snp', snp['rs_id'] snp['chromosome'] = chr_map[snp['chromosome']] snp = store.create_entity('Snp', **snp) snp_eids.append(snp.eid) if sqlgen_store and ind and ind % 100000 == 0: store.flush() # Flush/Commit if sqlgen_store: store.flush() # Platform