def add_column(self): temp = '' if self.columns_cbox.current() > -1: temp = self.parent.csv_name.replace( ".csv", "") + '.' + self.columns_cbox.get() else: LOGGER.warning("Table or header not selected.") self.expressions_text.insert(tk.INSERT, temp)
def __check_db_container(self, mode='running'): """Checks if the db container already running or exist. Arguments: :param mode: 'running' for container is up and running or 'exist' when container exists but is down. """ if mode == 'running': cmd_docker = ['docker', 'ps'] elif mode == 'exist': cmd_docker = ['docker', 'ps', '-a'] else: raise DockerExecError( 'Invalid container check mode: {}.'.format(mode)) proc_docker = subprocess.Popen(cmd_docker, stdout=subprocess.PIPE) proc_grep = subprocess.Popen(['grep', self.__db_cont_name], stdin=proc_docker.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc_grep.communicate() output = str(stdout).split() LOGGER.debug(output) try: container_image = output[1] container_name = output[-1] container_port = output[-2] # remove new line spacial character container_name = container_name.rstrip("\\n'") container_port = find_xtport(container_port) except IndexError: container_name = None container_image = None container_port = None LOGGER.debug( 'Found that there is an existing container with the name: {}'. format(container_name)) if container_name == self.__db_cont_name: if container_image == self.__db_image: if mode == 'running': self.__is_db_running = True elif mode == 'exist': self.__is_db_exist = True if container_port != self.__dbport: LOGGER.warning( 'Using as external container port: {}'.format( container_port)) self.__dbport = container_port else: msg = ('The name \"{}\" is used by another container.' 'Could not create postgres database container.' 'Please use other db container name.').format( self.__db_cont_name) raise DockerExecError(msg)
def suggest_corr(self, cdedict, threshold): """ Arguments: :param cdedict: CdeDict object :param threshold: 0-1 similarity threshold, below that not a cde is suggested """ cde_sugg_dict = {} # {cdecode:sourcecolumn} source_table = self.__srctbl.filename target_table = self.__target_filename sugg_replacemnts = { } # here will be stored the suggestions replacments {cdecode:[Replacemsnts]} #source_raw_headers = self.__mapping.sourcedb.get_raw_table_headers(source_table) # for each source column for name, columnreport in self.__tblreport.columnreports.items(): cde = cdedict.suggest_cde(columnreport, threshold=threshold) # check if a cde mapping already exist if cde and (cde.code not in cde_sugg_dict.keys()): cde_sugg_dict[ cde.code] = self.__mapping.sourcedb.raw_2_mipmap_header( self.__src_filename, columnreport.name) # suggest category replacements for cases where source col and cde are nominal sugg_reps = cdedict.suggest_replecements(cde.code, columnreport, threshold=threshold) if sugg_reps: sugg_replacemnts[cde.code] = sugg_reps for cdecode, source_var in cde_sugg_dict.items(): source_paths = [(source_table, source_var, None)] target_path = (target_table, cdecode, None) filename_column = '.'.join( [os.path.splitext(source_table)[0], source_var]) # lets see if this cde have value replacements suggestions, if so create the if statment if cdecode in sugg_replacemnts.keys(): expression = ifstr(filename_column, sugg_replacemnts[cdecode]) else: expression = filename_column # let's try to create the correspondence now try: self.__mapping.add_corr( source_paths=source_paths, target_path=target_path, expression=expression, replacements=sugg_replacemnts.get(cdecode)) # If a cde correspondance already exists then pass except MappingError: LOGGER.warning( 'found cde macth for source column "{}" but cde "{}" \ is not included in the selected cde pathology.'. format(source_var, cdecode)) self.__update_cde_mapped()
def __create_db_container(self): """Creates a postgres 9.6 container. """ self.__check_db_container(mode='running') self.__check_db_container(mode='exist') if self.__is_db_running: LOGGER.info('db container ({}) is already up and' ' running. Skipping creation step...'.format( self.__db_cont_name)) self.__remove_create_db() pass elif self.__is_db_exist and not self.__is_db_running: LOGGER.info('db container({}) already exists. ' 'Restarting db container'.format(self.__db_cont_name)) subprocess.run(['docker', 'restart', self.__db_cont_name]) time.sleep(10) self.__remove_create_db() else: # create the db container LOGGER.debug('Creating db container with name {}'.format( self.__db_cont_name)) arg_port = ['-p', '{}:5432'.format(self.__dbport)] arg_name = ['--name', self.__db_cont_name] arg_env1 = ['-e', 'POSTGRES_PASSWORD={}'.format(self.__dbpassword)] arg_env2 = ['-e', 'POSTGRES_USER={}'.format(self.__dbuser)] arg_img = ['-d', self.__db_image] command2 = ['docker', 'run' ] + arg_port + arg_name + arg_env1 + arg_env2 + arg_img try: createproc = subprocess.run(command2) time.sleep(50) self.__remove_create_db() except subprocess.CalledProcessError: LOGGER.warning( 'There was an error while attempting creating the db container.' ) raise DockerExecError( 'There was an error while attempting creating the db container.' )
def createreport(self): self.button_exec.config(state='disabled') LOGGER.info('Checking if the necessary fields are filled in...') warningtitle = 'Cannot create report' if not self.dname: tkmessagebox.showwarning(warningtitle, 'Please, select dataset file') #elif not self.d_headers_cbox.get(): # tkmessagebox.showwarning(warningtitle, # 'Please, select ColumnID') elif self.md_frame.from_disk.get() and not self.md_frame.metafilepath: tkmessagebox.showwarning(warningtitle, 'Please, select metadata file') elif self.md_frame.from_dc.get() and not self.md_frame.dc_json: tkmessagebox.showwarning(warningtitle, 'Could not get metadata from Data Cataloge') elif not self.__reportfilepath: tkmessagebox.showwarning(warningtitle, 'Please, select report file first') else: try: threshold = float(self.outlier_threshold.get()) LOGGER.info('Outlier threshold: %s' % self.outlier_threshold.get()) except ValueError: LOGGER.warning('Could not retrieve outlier threshold. \ Setting it to default value: 3') threshold = 3 LOGGER.info('Everything looks ok...') #filedir = self.__exportfiledir #basename = os.path.splitext(self.dname)[0] #pdfreportfile = os.path.join(filedir, basename + '_report.pdf') #xlsxreportfile = os.path.join(filedir, basename + '_report.xlsx') schema_type = 'qc' if self.md_frame.from_disk.get(): LOGGER.info('Retrieving Metadata from localdisk...') LOGGER.info('Using metadata file: %s' % self.md_frame.metafilepath) with open(self.md_frame.metafilepath) as json_file: dict_schema = json.load(json_file) if self.md_frame.json_type.get() == 2: schema_type = 'dc' elif self.md_frame.from_dc.get(): LOGGER.info('Retrieving Metadata from Data Catalogue...') LOGGER.info('Selected pathology is {}, CDE version: {}'.format( self.md_frame.selected_pathology.get(), self.md_frame.selected_version.get()) ) dict_schema = self.md_frame.dc_json schema_type = 'dc' try: self.reportcsv = TableReport.from_disc(self.datasetpath, dict_schema=dict_schema, schema_type=schema_type, threshold=threshold)#id_column=self.d_headers_cbox.current()) if self.reportcsv.isvalid: LOGGER.info('The dataset is valid.') else: LOGGER.info('CAUTION! The dataset is invalid!') # Perform Data Cleaning? #if self.cleaning.get(): # self.reportcsv.apply_corrections() #self.reportcsv.save_corrected(correctedcsvfile) # Create the report if self.report_type.get() == 1: self.reportcsv.printexcel(self.__reportfilepath) else: self.reportcsv.printpdf(self.__reportfilepath) #self.label_export2.config(text=filedir) tkmessagebox.showinfo( title='Status info', message='Reports have been created successully' ) self.show_sugg_button.config(state='normal') self.clean_button.config(state='normal') except QCToolException as e: errortitle = 'Something went wrong!' tkmessagebox.showerror(errortitle, e) self.button_exec.config(state='normal')