def from_disc(cls, csvpath, dict_schema, schema_type='qc', id_column=1, threshold=3): """ Constucts a TableReport from a csvfile and a given schema. Arguments: :param csvpath: string, the csv filepath :param schema: dictionary describing the csv schema :param schema_type: 'qc' for frictionless type, 'dc' for Data Catalogue type json schema :param id_column: column number of dataset's primary key (id) :param threshold: outlier threshold - (mean - threshold * std, mean + threshold * std) outside this length, a numerical value is considered outlier """ if schema_type == 'qc': dataset_schema = QcSchema(dict_schema) elif schema_type == 'dc': LOGGER.info( 'Transating from Data Catalogue to Frictionless json format...' ) qcdict_schema = FrictionlessFromDC(dict_schema).qcdescriptor dataset_schema = QcSchema(qcdict_schema) dataset = QcTable(csvpath, schema=dataset_schema) return cls(dataset, id_column=id_column, threshold=threshold)
def suggest_replecements(self, cdecode, columnreport, threshold=0.6) -> list: """Suggest value replecements for a column for a given cde. cde and column must have nominal miptype. Arguments: :param cdecode: cde code (str) :param columnreport: ColumnReport object with info of a datset column :returns: List of named Replacement named tupples. """ # get cdevariable object for the given cde code cdevar = self.__cdes.get(cdecode) replacements = [] if cdevar: # make suggestions only for cases where cde and source col are nominal and also # the cdedictionary has values for mipvalues and enumeration lookup columns if cdevar.miptype == 'nominal' and columnreport.miptype == 'nominal' and cdevar.mipvalues and cdevar.enum_lookup: src_cat = columnreport.value_range for enum in src_cat: sug_val = cdevar.suggest_value(enum, threshold) if sug_val: replacements.append(Replacement(enum, sug_val)) return replacements else: return None else: msg = 'Cde "{}" not found in the CdeDictionary'.format(cdecode) LOGGER.error(msg) raise CdeDictError(msg)
def suggest_cde(self, columnreport, threshold=0.6): """Suggests the most similar CDE for the column. Arguments: :param columnreport: ColumnReport object with info of a datset column :param threshold: 0-1 similarity threshold, below that not a cde is suggested :returns: a CdeVariable object """ name = columnreport.name val_range = columnreport.value_range mip_type = columnreport.miptype LOGGER.debug('The incoming column name is: {}'.format(name)) # select cdes with tha same type and calculate similarity canditates = [ cde for cde in self.__cdes.values() if cde.miptype == mip_type ] LOGGER.debug('Number of cdes with miptype {} is: {}'.format( mip_type, len(canditates))) if canditates: canditates.sort(key=lambda x: x.similarity(name, val_range), reverse=True) canditate = canditates[0] similarity = canditate.similarity(name, val_range) LOGGER.debug( 'The simirarity between "{}" and cde "{}" is: {}'.format( name, canditate.code, similarity)) if similarity >= threshold: return canditate else: LOGGER.info( 'No cde match found for incoming column "{}"'.format(name)) return None else: LOGGER.info( 'No cde match found for incoming column "{}"'.format(name)) return None
def reorganizefiles(self, output): """reorganize the dcm files in a folder structure for LORIS import pipeline. Arguments: :param output: output folder """ LOGGER.info('Reorganizing files for LORIS pipeline into folder: %s' % output) for patient in self.patients: patientid = patient.patientid patdir = os.path.join(output, patientid) if not os.path.exists(patdir): os.mkdir(patdir) study_count = 0 for study in patient.studies: study_count += 1 d = [patientid, str(study_count)] studydir = os.path.join(patdir, '_'.join(d)) if not os.path.exists(studydir): os.mkdir(studydir) for seq in study.sequences: for dicom in seq.dicoms: sourcepath = dicom.filepath destpath = os.path.join(studydir, dicom.filename) shutil.copy(sourcepath, destpath)
def add_column(self): temp = '' if self.columns_cbox.current() > -1: temp = self.parent.csv_name.replace( ".csv", "") + '.' + self.columns_cbox.get() else: LOGGER.warning("Table or header not selected.") self.expressions_text.insert(tk.INSERT, temp)
def suggest_corr(self, cdedict, threshold): """ Arguments: :param cdedict: CdeDict object :param threshold: 0-1 similarity threshold, below that not a cde is suggested """ cde_sugg_dict = {} # {cdecode:sourcecolumn} source_table = self.__srctbl.filename target_table = self.__target_filename sugg_replacemnts = { } # here will be stored the suggestions replacments {cdecode:[Replacemsnts]} #source_raw_headers = self.__mapping.sourcedb.get_raw_table_headers(source_table) # for each source column for name, columnreport in self.__tblreport.columnreports.items(): cde = cdedict.suggest_cde(columnreport, threshold=threshold) # check if a cde mapping already exist if cde and (cde.code not in cde_sugg_dict.keys()): cde_sugg_dict[ cde.code] = self.__mapping.sourcedb.raw_2_mipmap_header( self.__src_filename, columnreport.name) # suggest category replacements for cases where source col and cde are nominal sugg_reps = cdedict.suggest_replecements(cde.code, columnreport, threshold=threshold) if sugg_reps: sugg_replacemnts[cde.code] = sugg_reps for cdecode, source_var in cde_sugg_dict.items(): source_paths = [(source_table, source_var, None)] target_path = (target_table, cdecode, None) filename_column = '.'.join( [os.path.splitext(source_table)[0], source_var]) # lets see if this cde have value replacements suggestions, if so create the if statment if cdecode in sugg_replacemnts.keys(): expression = ifstr(filename_column, sugg_replacemnts[cdecode]) else: expression = filename_column # let's try to create the correspondence now try: self.__mapping.add_corr( source_paths=source_paths, target_path=target_path, expression=expression, replacements=sugg_replacemnts.get(cdecode)) # If a cde correspondance already exists then pass except MappingError: LOGGER.warning( 'found cde macth for source column "{}" but cde "{}" \ is not included in the selected cde pathology.'. format(source_var, cdecode)) self.__update_cde_mapped()
def __check_usergroup(self): LOGGER.debug('Checking if user is in docker usergroup') process = subprocess.Popen(['groups'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output, _ = process.communicate() groups = output.decode('utf-8').split() if 'docker' in groups: return True else: return False
def __check_docker_client(self): LOGGER.debug('Checking if docker client exist.') process = subprocess.Popen(['which', 'docker'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output, _ = process.communicate() decoded = output.decode('utf-8') if decoded != '': return True else: return False
def __update_replacements(self, cdecode, is_raw_header=False): self.func_replace_trg_listbox.delete(0, tk.END) if is_raw_header: cdename = self.parent.cdemapper.get_cde_mipmap_header(cdecode) else: cdename = cdecode current_replacements = self.parent.cdemapper.get_corr_replacements( cdename) if current_replacements: LOGGER.debug('Replacements found for cde "{}": {}'.format( cdecode, len(current_replacements))) for rep in current_replacements: self.__add_replacement(rep.source, rep.target)
def add_replacement_expr(self): reps = self.func_replace_trg_listbox.get(0, tk.END) replacemnts = [] source_col = self.parent.csv_name.replace( ".csv", "") + '.' + self.selected_column LOGGER.debug('the repleacements are: {}'.format(reps)) if len(reps) > 0: self.expressions_text.delete("1.0", "end-1c") for rep in reps: s = rep.split('->') replacemnts.append(Replacement(s[0], s[1])) expr = ifstr(source_col, replacemnts) LOGGER.info('the expression is: {}'.format(expr)) self.expressions_text.insert('1.0', expr)
def save_schema(self): self.save_button.config(state='disabled') if self.schema_output.get() == 1: output_file = tkfiledialog.asksaveasfilename(title='enter file name', filetypes=(('excel files', '*.xlsx'), ('all files', '*.*'))) else: output_file = tkfiledialog.asksaveasfilename(title='enter file name', filetypes=(('json files', '*.json'), ('all files', '*.*'))) if output_file: warningtitle = 'Can not save the schema' if not self.dname: tkmessagebox.showwarning(warningtitle, 'Please, select dataset file') max_categories = int(self.inf_opt_frame.max_categories.get()) sample_rows = int(self.inf_opt_frame.sample_rows.get()) na_empty_strings_only = self.inf_opt_frame.na_empty_strings_only.get() if self.inf_opt_frame.cde_dict: infer = InferSchema.from_disc(self.datasetpath, sample_rows=sample_rows, maxlevels=max_categories, cdedict=self.inf_opt_frame.cde_dict, na_empty_strings_only=na_empty_strings_only) if self.inf_opt_frame.thresholdstring.get() == '': threshold = 0.6 else: threshold = float(self.inf_opt_frame.thresholdstring.get()) LOGGER.info('CDE similarity threshold: %f' % threshold) infer.suggest_cdes(threshold=threshold) infer.export2excel(output_file) LOGGER.info('Schema file has been created successully') tkmessagebox.showinfo( title='Status info', message='Schema file has been created successully' ) else: infer = InferSchema.from_disc(self.datasetpath, sample_rows=sample_rows, maxlevels=max_categories, cdedict=None, na_empty_strings_only=na_empty_strings_only) if self.schema_output.get() == 1: infer.export2excel(output_file) LOGGER.info('Schema file has been created successully') tkmessagebox.showinfo( title='Status info', message='Schema file has been created successully' ) else: infer.expoct2qcjson(output_file) LOGGER.info('Schema file has been created successully') tkmessagebox.showinfo( title='Status info', message='Schema file has been created successully' ) self.save_button.config(state='normal')
def suggest_corrs(self): self.suggest_btn.config(state='disabled') warningtitle = 'Could not make suggestions' if self.infer_opt_frame.cde_dict: if self.infer_opt_frame.thresholdstring.get() == '': threshold = 0.6 else: threshold = float(self.infer_opt_frame.thresholdstring.get()) self.cdemapper.suggest_corr(self.infer_opt_frame.cde_dict, threshold=threshold) LOGGER.info('Done with the correspondences suggestions..Updating listbox...') self.update_listbox_corr() else: tkmessagebox.showwarning(warningtitle, 'Could not find the CDE dictionary file') self.suggest_btn.config(state='normal')
def __readicoms_parallel(self, processes): """Read all the dicoms using multiprocessing.""" output = [] if len(self.subfolders) > processes: LOGGER.info('dicom parallel processing with {} Processes'.format( processes)) slices = list(splitdict(self.subfolders, processes)) with Pool(processes) as p: output = p.map(self.readicoms_chunks, slices) for chunk in output: self.__patients += chunk['patients'] self.__invalidseq += chunk['invalidseq'] self.__notprocessed += chunk['notprocessed'] else: LOGGER.info('Single core processing...') output = self.readicoms_chunks(self.subfolders) self.__patients += output['patients'] self.__invalidseq += output['invalidseq'] self.__notprocessed += output['notprocessed']
def __is_mipmap_container_exist(self): proc_docker = subprocess.Popen(['docker', 'ps', '-a'], stdout=subprocess.PIPE) proc_grep = subprocess.Popen(['grep', self.__name], stdin=proc_docker.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, _ = proc_grep.communicate() output = str(stdout).split() LOGGER.debug(output) try: container_name = output[-1] # remove new line spacial character container_name = container_name.rstrip("\\n'") except IndexError: container_name = None if container_name == self.__name: return True else: return False
def __init__(self, frame): self.frame = frame # Create a ScrolledText wdiget self.scrolled_text = ScrolledText(frame, state='disabled', height=12) self.scrolled_text.pack(side='bottom', fill='both', expand='yes') #self.scrolled_text.grid(row=0, column=0, sticky=(N, S, W, E)) self.scrolled_text.configure(font='TkFixedFont') self.scrolled_text.tag_config('INFO', foreground='black') self.scrolled_text.tag_config('DEBUG', foreground='gray') self.scrolled_text.tag_config('WARNING', foreground='orange') self.scrolled_text.tag_config('ERROR', foreground='red') self.scrolled_text.tag_config('CRITICAL', foreground='red', underline=1) # Create a logging handler using a queue self.log_queue = queue.Queue() self.queue_handler = QueueHandler(self.log_queue) formatter = logging.Formatter('%(asctime)s: %(message)s') self.queue_handler.setFormatter(formatter) LOGGER.addHandler(self.queue_handler) # Start polling messages from the queue self.frame.after(100, self.poll_log_queue)
def __check_db_container(self, mode='running'): """Checks if the db container already running or exist. Arguments: :param mode: 'running' for container is up and running or 'exist' when container exists but is down. """ if mode == 'running': cmd_docker = ['docker', 'ps'] elif mode == 'exist': cmd_docker = ['docker', 'ps', '-a'] else: raise DockerExecError( 'Invalid container check mode: {}.'.format(mode)) proc_docker = subprocess.Popen(cmd_docker, stdout=subprocess.PIPE) proc_grep = subprocess.Popen(['grep', self.__db_cont_name], stdin=proc_docker.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc_grep.communicate() output = str(stdout).split() LOGGER.debug(output) try: container_image = output[1] container_name = output[-1] container_port = output[-2] # remove new line spacial character container_name = container_name.rstrip("\\n'") container_port = find_xtport(container_port) except IndexError: container_name = None container_image = None container_port = None LOGGER.debug( 'Found that there is an existing container with the name: {}'. format(container_name)) if container_name == self.__db_cont_name: if container_image == self.__db_image: if mode == 'running': self.__is_db_running = True elif mode == 'exist': self.__is_db_exist = True if container_port != self.__dbport: LOGGER.warning( 'Using as external container port: {}'.format( container_port)) self.__dbport = container_port else: msg = ('The name \"{}\" is used by another container.' 'Could not create postgres database container.' 'Please use other db container name.').format( self.__db_cont_name) raise DockerExecError(msg)
def __create_db_container(self): """Creates a postgres 9.6 container. """ self.__check_db_container(mode='running') self.__check_db_container(mode='exist') if self.__is_db_running: LOGGER.info('db container ({}) is already up and' ' running. Skipping creation step...'.format( self.__db_cont_name)) self.__remove_create_db() pass elif self.__is_db_exist and not self.__is_db_running: LOGGER.info('db container({}) already exists. ' 'Restarting db container'.format(self.__db_cont_name)) subprocess.run(['docker', 'restart', self.__db_cont_name]) time.sleep(10) self.__remove_create_db() else: # create the db container LOGGER.debug('Creating db container with name {}'.format( self.__db_cont_name)) arg_port = ['-p', '{}:5432'.format(self.__dbport)] arg_name = ['--name', self.__db_cont_name] arg_env1 = ['-e', 'POSTGRES_PASSWORD={}'.format(self.__dbpassword)] arg_env2 = ['-e', 'POSTGRES_USER={}'.format(self.__dbuser)] arg_img = ['-d', self.__db_image] command2 = ['docker', 'run' ] + arg_port + arg_name + arg_env1 + arg_env2 + arg_img try: createproc = subprocess.run(command2) time.sleep(50) self.__remove_create_db() except subprocess.CalledProcessError: LOGGER.warning( 'There was an error while attempting creating the db container.' ) raise DockerExecError( 'There was an error while attempting creating the db container.' )
def get_all_cdes(self): LOGGER.info( 'Trying to retrive cde metadata from Data Cataloge. Using DC url: {}' .format(DC_DOMAIN)) all_pathologies_url = ''.join([DC_DOMAIN, DC_SUBDOMAIN_ALLPATHOLOGIES]) r = requests.get(all_pathologies_url) self.dc = DcConnector(r) if self.dc.status_code == 200: self.dc_combox1.config(values=self.dc.pathology_names) elif 500 <= self.dc.status_code <= 599: LOGGER.info('Data Cataloge server internal error.') elif 400 <= self.dc.status_code <= 499: LOGGER.info( 'Data Cataloge could not be reach!. Please check DC_DOMAIN in config url' )
def csv(input_csv, schema_json, clean, metadata, report, outlier): """This command produces a validation report for <csv file>. The report file is stored in the same folder where <csv file> is located. <schema json> file MUST be compliant with frirctionless data table-schema specs(https://specs.frictionlessdata.io/table-schema/) or with Data Catalogue json format. """ filename = os.path.basename(input_csv) # Get the path of the csv file path = os.path.dirname(os.path.abspath(input_csv)) dataset_name = os.path.splitext(filename)[0] pdfreportfile = os.path.join(path, dataset_name + '_report.pdf') xlsxreportfile = os.path.join(path, dataset_name + '_report.xlsx') correctedcsvfile = os.path.join(path, dataset_name + '_corrected.csv') # read the json file with the csv schema with open(schema_json) as json_file: dict_schema = json.load(json_file) # check metadata json type is Data Catalogue specs if metadata == 'dc': LOGGER.info( 'Transating from Data Catalogue to Frictionless json format...') dict_schema = FrictionlessFromDC(dict_schema).qcdescriptor schema = QcSchema(dict_schema) dataset = QcTable(input_csv, schema=schema) datasetreport = TableReport(dataset, threshold=outlier) # Apply data cleaning corrections? if clean: datasetreport.apply_corrections() datasetreport.save_corrected(correctedcsvfile) if datasetreport.isvalid: LOGGER.info('The dataset has is valid.') else: LOGGER.info('CAUTION! The dataset is invalid!') # export the report if report == 'pdf': datasetreport.printpdf(pdfreportfile) elif report == 'xls': datasetreport.printexcel(xlsxreportfile)
def extractSColumnsFunctions(expr, ddlFunctions, ddlColumns): try: CorrespondenceParser.firstCheckParentheses( expr) #If it fails it will raise an Exception except SyntaxError as se: LOGGER.info(str(se)) raise ExpressionError(str(se)) try: columns = CorrespondenceParser.extractColumnsList(expr, ddlColumns) except ColumnNameError as cne: LOGGER.info(str(cne)) raise ExpressionError(str(cne)) try: CorrespondenceParser.extractSColumnsFunctionsR( expr, ddlFunctions, expr) except FunctionNameError as fne: LOGGER.info(str(fne)) raise ExpressionError(str(fne)) except ArgsFunctionError as afe: LOGGER.info(str(afe)) raise ExpressionError(str(afe)) #return columns, functions return columns
def __run_mapping(self): env = {} env['mipmap_map'] = self.__mapping env['mipmap_source'] = self.__source env['mipmap_output'] = self.__output env['mipmap_pgproperties'] = self.__dbprop env['mipmap_script'] = self.__scriptpath env['mipmap_target'] = self.__target env['mipmap_db'] = MIPMAP_DB_CONTAINER #LOGGER.debug(os.getenv('mipmap_pgproperties')) self.__template.stream(env).dump(self.__dcompose) if self.__is_mipmap_container_exist: LOGGER.info('Removing previous mipmap container...') remove_proc = subprocess.run(['docker', 'rm', self.__name]) arguments = ['docker-compose', '-f', self.__dcompose, 'up', 'mipmap_etl'] process = subprocess.Popen(arguments, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) LOGGER.info('Running docker-compose...') output, _ = process.communicate() LOGGER.info(output.decode('utf-8'))
def __init__(self, rootfolder, username): """ Arguments: :param rootfolder: folder path with DICOMs subfolders :param username: str with the username """ start_time = time.time() self.reportdata = None self.rootfolder = rootfolder self.subfolders = getsubfolders(rootfolder) self.username = username self.dataset = { 'version': __version__, 'date_qc_ran': datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S'), 'username': username, 'dicomfolder': str(os.path.abspath(rootfolder)) } # files that are not DICOMS (.dcm) # list of (folder, filename) self.__notprocessed = [] # MRISequences objects self.__invalidseq = [] # MRIPatient objects self.__patients = [] # statistic info self.__totalvalidseq = 0 self.__totalstudies = 0 self.__totalinvaliddicoms = 0 self.__seriesdescriptions_valid = set() self.__seriesdescriptions_invalid = set() self.__seqperpatient_distr = { 's1': 0, 's2': 0, 's3-s5': 0, 's6more': 0, } self.__patientid_with_invalids = set() # Read all the dcm files and calc QC stats self.__readicoms_parallel(mp.cpu_count() + 1) self.__collect_stats() LOGGER.debug('Dicom analysis running time: %s seconds' % (time.time() - start_time)) LOGGER.debug('Folders read: %i' % len(self.subfolders)) LOGGER.debug('Total MRI sequences found: %i' % (self.totalvalidsequences + self.totalinvalidsequences)) LOGGER.info('Patients with good seq: %i' % self.totalpatients) LOGGER.info('Total visits: %i' % self.totalvisits) LOGGER.info('Good seq: %i' % self.totalvalidsequences) LOGGER.info('Bad seq: %i' % self.totalinvalidsequences) LOGGER.info('Files not processed: %i' % self.totalbadfiles)
def infer(self, rows, headers=1, confidence=0.75, maxlevels=10, na_empty_strings_only=False): # Get headers if isinstance(headers, int): headers_row = headers while True: headers_row -= 1 headers = rows.pop(0) if not headers_row: break elif not isinstance(headers, list): headers = [] # Get descriptor guesser = _QcTypeGuesser() resolver = _QcTypeResolver() descriptor = {'fields': []} type_matches = {} unique_values = {} missingvalues = set() for header in headers: descriptor['fields'].append({'name': header}) LOGGER.info('{} of sample rows are used for table schema inference'.format(len(rows))) for row_number, row in enumerate(rows): # Normalize rows with invalid dimensions for sanity row_length = len(row) headers_length = len(headers) if row_length > headers_length: row = row[:len(headers)] if row_length < headers_length: diff = headers_length - row_length fill = [''] * diff row = row + fill # build a column-wise lookup of type matches for index, value in enumerate(row): # remove leading and trailing whitespacing value = value.strip() rv = guesser.infer(value, na_empty_strings_only=na_empty_strings_only) name = rv[0] pattern = rv[1] # collect unique values for possible nominal variable if pattern == 'text' or name == 'integer': if unique_values.get(index): unique_values[index].add(value) else: unique_values[index] = set() unique_values[index].add(value) # collect the nans elif pattern == 'nan': missingvalues.add(value) if type_matches.get(index): type_matches[index].append(rv) else: type_matches[index] = [rv] # choose a type/format for each column based on the matches for index, results in type_matches.items(): uniques = unique_values.get(index) rv = resolver.get(results, uniques, maxlevels, confidence) descriptor['fields'][index].update(**rv) # case missing values have been found if len(missingvalues) > 0: # add the default missing value in any case missingvalues.update(set(config.DEFAULT_MISSING_VALUES)) # sort missing values missing_sorted = list(missingvalues) missing_sorted.sort() # update the missing values descriptor['missingValues'] = list(missing_sorted) # case missing values not found use default elif len(missingvalues) == 0: descriptor['missingValues'] = config.DEFAULT_MISSING_VALUES # Save descriptor self._Schema__current_descriptor = descriptor self.__build() self.__infered = True return descriptor
def __add_replacement(self, sourcevalue, targetvalue): LOGGER.debug('Target value is: {} and the source value is: {}'.format( targetvalue, sourcevalue)) stringforbox = '->'.join([sourcevalue, targetvalue]) self.func_replace_trg_listbox.insert(tk.END, stringforbox)
def __init__(self, dcjson): # generates the tree structure of the loaded DC json LOGGER.info('Finding variable tree...') self.rootnode = Node(dcjson) self.__dc2qc()
def createreport(self): self.button_exec.config(state='disabled') LOGGER.info('Checking if the necessary fields are filled in...') warningtitle = 'Cannot create report' if not self.dname: tkmessagebox.showwarning(warningtitle, 'Please, select dataset file') #elif not self.d_headers_cbox.get(): # tkmessagebox.showwarning(warningtitle, # 'Please, select ColumnID') elif self.md_frame.from_disk.get() and not self.md_frame.metafilepath: tkmessagebox.showwarning(warningtitle, 'Please, select metadata file') elif self.md_frame.from_dc.get() and not self.md_frame.dc_json: tkmessagebox.showwarning(warningtitle, 'Could not get metadata from Data Cataloge') elif not self.__reportfilepath: tkmessagebox.showwarning(warningtitle, 'Please, select report file first') else: try: threshold = float(self.outlier_threshold.get()) LOGGER.info('Outlier threshold: %s' % self.outlier_threshold.get()) except ValueError: LOGGER.warning('Could not retrieve outlier threshold. \ Setting it to default value: 3') threshold = 3 LOGGER.info('Everything looks ok...') #filedir = self.__exportfiledir #basename = os.path.splitext(self.dname)[0] #pdfreportfile = os.path.join(filedir, basename + '_report.pdf') #xlsxreportfile = os.path.join(filedir, basename + '_report.xlsx') schema_type = 'qc' if self.md_frame.from_disk.get(): LOGGER.info('Retrieving Metadata from localdisk...') LOGGER.info('Using metadata file: %s' % self.md_frame.metafilepath) with open(self.md_frame.metafilepath) as json_file: dict_schema = json.load(json_file) if self.md_frame.json_type.get() == 2: schema_type = 'dc' elif self.md_frame.from_dc.get(): LOGGER.info('Retrieving Metadata from Data Catalogue...') LOGGER.info('Selected pathology is {}, CDE version: {}'.format( self.md_frame.selected_pathology.get(), self.md_frame.selected_version.get()) ) dict_schema = self.md_frame.dc_json schema_type = 'dc' try: self.reportcsv = TableReport.from_disc(self.datasetpath, dict_schema=dict_schema, schema_type=schema_type, threshold=threshold)#id_column=self.d_headers_cbox.current()) if self.reportcsv.isvalid: LOGGER.info('The dataset is valid.') else: LOGGER.info('CAUTION! The dataset is invalid!') # Perform Data Cleaning? #if self.cleaning.get(): # self.reportcsv.apply_corrections() #self.reportcsv.save_corrected(correctedcsvfile) # Create the report if self.report_type.get() == 1: self.reportcsv.printexcel(self.__reportfilepath) else: self.reportcsv.printpdf(self.__reportfilepath) #self.label_export2.config(text=filedir) tkmessagebox.showinfo( title='Status info', message='Reports have been created successully' ) self.show_sugg_button.config(state='normal') self.clean_button.config(state='normal') except QCToolException as e: errortitle = 'Something went wrong!' tkmessagebox.showerror(errortitle, e) self.button_exec.config(state='normal')
def on_select_version(self, event): LOGGER.info('Retrieving metadata json') if self.dc and self.dc.status_code == 200: self.dc_json = self.dc.getjson(self.selected_pathology.get(), self.selected_version.get())