def copy_datasets_to_analysis_data(self, datasets): """ """ # Clear analysis data. self.clear_data() analysis_dataset = plankton_core.DatasetNode() columnsinfo = self.create_export_table_info() analysis_dataset.set_export_table_columns(columnsinfo) visit_items = [ 'visit_year', 'sample_date', 'visit_month', 'station_name', 'sample_latitude_dd', 'sample_longitude_dd', 'water_depth_m' ] sample_items = [ 'sample', 'sample_id', 'sample_min_depth_m', 'sample_max_depth_m' ] variable_items = [ 'variable', 'scientific_name', 'species_flag_code', 'size_class', 'trophic_type', 'parameter', 'value', 'unit', 'plankton_group', 'taxon_kingdom', 'taxon_phylum', 'taxon_class', 'taxon_order', 'variable', 'taxon_family', 'taxon_genus', 'taxon_hierarchy' ] for datasetnode in datasets: # for visitnode in datasetnode.get_children(): analysis_visit = plankton_core.VisitNode() analysis_dataset.add_child(analysis_visit) # for item in visit_items: analysis_visit.add_data(item, visitnode.get_data(item, '')) # for samplenode in visitnode.get_children(): analysis_sample = plankton_core.SampleNode() analysis_visit.add_child(analysis_sample) # for item in sample_items: analysis_sample.add_data(item, samplenode.get_data(item, '')) # for variablenode in samplenode.get_children(): analysis_variable = plankton_core.VariableNode() analysis_sample.add_child(analysis_variable) # for item in variable_items: analysis_variable.add_data( item, variablenode.get_data(item, '')) # # Check. if (analysis_dataset == None) or (len(analysis_dataset.get_children()) == 0): toolbox_utils.Logging().log('Selected datasets are empty.') raise UserWarning('Selected datasets are empty.') # Use the concatenated dataset for analysis. self.set_data(analysis_dataset)
def _create_variable(self, current_node, **kwargs): """ To be called from Excel-based parser. """ if isinstance(current_node, plankton_core.VisitNode): newsample = plankton_core.SampleNode() current_node.add_child(newsample) variable = plankton_core.VariableNode() newsample.add_child(variable) variable.add_data('parameter', kwargs['p']) variable.add_data('value', kwargs['v']) #variable.add_data('value_float', kwargs['v']) variable.add_data('unit', kwargs['u']) if isinstance(current_node, plankton_core.SampleNode): variable = plankton_core.VariableNode() current_node.add_child(variable) variable.add_data('parameter', kwargs['p']) variable.add_data('value', kwargs['v']) #variable.add_data('value_float', kwargs['v']) variable.add_data('unit', kwargs['u'])
def create_tree_dataset(self, dataset_top_node, update_trophic_type): """ """ # Add data to dataset node. for parsinginforow in self._parsing_info: if parsinginforow[0] == 'dataset': if parsinginforow[3] in self._sample_info: dataset_top_node.add_data( parsinginforow[1], self._sample_info[parsinginforow[3]]) # Create visit node and add data. Note: Only one visit in each file. visitnode = plankton_core.VisitNode() dataset_top_node.add_child(visitnode) # for parsinginforow in self._parsing_info: if parsinginforow[0] == 'visit': if parsinginforow[3] in self._sample_info: visitnode.add_data(parsinginforow[1], self._sample_info[parsinginforow[3]]) # Add visit_year and visit_month. sample_date = visitnode.get_data('sample_date', '') try: visitnode.add_data('visit_year', sample_date[0:4]) except: pass try: visitnode.add_data('visit_month', sample_date[5:7]) except: pass # Create sample node and add data. Note: Only one sample in each file. samplenode = plankton_core.SampleNode() visitnode.add_child(samplenode) # for parsinginforow in self._parsing_info: if parsinginforow[0] == 'sample': if parsinginforow[3] in self._sample_info: samplenode.add_data(parsinginforow[1], self._sample_info[parsinginforow[3]]) # Create variable nodes. for row in self._sample_rows: variablenode = plankton_core.VariableNode() samplenode.add_child(variablenode) # Get info from sample_info. for parsinginforow in self._parsing_info: if parsinginforow[0] == 'variable': value = self._sample_info.get(parsinginforow[3], '') variablenode.add_data(parsinginforow[1], value) # Merge data header and row. row_dict = dict(zip(self._sample_header, row)) # Get info from sample_method and add to row_dict. if 'method_step' in row_dict: if row_dict['method_step'] in self._sample_method_dict: method_dict = self._sample_method_dict[ row_dict['method_step']] row_dict.update(method_dict) else: print('DEBUG: Key: "' + row_dict['method_step'] + '" not in sample_method_dict.') # Get info from row. for parsinginforow in self._parsing_info: if parsinginforow[0] == 'variable': value = row_dict.get(parsinginforow[3], '') # Update trophic_type. if parsinginforow[1] == 'trophic_type': if update_trophic_type: scientific_name = row_dict.get( 'scientific_name', '') size_class = row_dict.get('size_class', '') trophic_type = plankton_core.Species( ).get_bvol_value(scientific_name, size_class, 'trophic_type') if trophic_type: value = trophic_type # Use existing if not in local list. # Replace empty with NS=Not specified. if not value: value = 'NS' if len(value) > 0: # Don't overwrite from previous step. variablenode.add_data(parsinginforow[1], value) # Copy to new variable nodes for parameters. for parsinginforow in self._parsing_info: if parsinginforow[0] == 'copy_parameter': paramunit = parsinginforow[1].split(':') parameter = paramunit[0] unit = paramunit[1] value = row_dict.get(parsinginforow[3], '') if len(value.strip()) > 0: self.copy_variable(variablenode, p=parameter, v=value, u=unit)
def add_missing_taxa(self, analysisdata, include_trophictypes, include_stagesex): """ """ if not analysisdata: return # Step 1: Create lists of taxa (name, trophic_type, stage and sex) and parameters (parameter and unit). parameter_set = set() taxon_set = set() for visitnode in analysisdata.get_children(): for samplenode in visitnode.get_children(): for variablenode in samplenode.get_children(): parameter = variablenode.get_data('parameter') unit = variablenode.get_data('unit') if parameter: parameter_set.add((parameter, unit)) taxonname = variablenode.get_data('scientific_name') trophic_type = '' stage = '' sex = '' if include_trophictypes: trophic_type = variablenode.get_data('trophic_type') if include_stagesex: stage = variablenode.get_data('stage') sex = variablenode.get_data('sex') if taxonname: taxon_set.add((taxonname, trophic_type, stage, sex)) # Step 2: Create list with parameter-taxon pairs. parameter_taxon_list = [] for parameterpair in parameter_set: for taxonpair in taxon_set: parameter_taxon_list.append((parameterpair, taxonpair)) # Step 3: Iterate over samples. parameter_set = set() taxon_set = set() # for visitnode in analysisdata.get_children(): # for samplenode in visitnode.get_children(): sample_parameter_taxon_list = [] for variablenode in samplenode.get_children(): parameter = variablenode.get_data('parameter') unit = variablenode.get_data('unit') taxon = variablenode.get_data('scientific_name') trophic_type = '' stage = '' sex = '' if include_trophictypes: trophic_type = variablenode.get_data('trophic_type') if include_stagesex: stage = variablenode.get_data('stage') sex = variablenode.get_data('sex') sample_parameter_taxon_list.append( ((parameter, unit), (taxon, trophic_type, stage, sex))) # Add missing variables. for itempairs in parameter_taxon_list: if itempairs not in sample_parameter_taxon_list: variable = plankton_core.VariableNode() samplenode.add_child(variable) variable.add_data('scientific_name', itempairs[1][0]) if include_trophictypes: variable.add_data('trophic_type', itempairs[1][1]) else: variable.add_data('trophic_type', '') if include_stagesex: variable.add_data('stage', itempairs[1][2]) variable.add_data('sex', itempairs[1][3]) else: variable.add_data('stage', '') variable.add_data('sex', '') variable.add_data('parameter', itempairs[0][0]) variable.add_data('value', float(0.0)) variable.add_data('unit', itempairs[0][1])
def create_filtered_dataset(self): """ Used filter items are: - 'start_date' - 'end_date' - 'station' - 'visit_month' - 'visits': Contains <station_name> : <date> - 'min_max_depth_m': Contains <sample_min_depth_m>-<sample_max_depth_m> - 'scientific_name' - 'trophic_type' - 'life_stage' """ # Create a tree dataset for filtered data. filtereddata = plankton_core.DatasetNode() # analysisdata = self.get_data() if not analysisdata: return filtereddata # Export info needed to convert from tree to table. filtereddata.set_export_table_columns( analysisdata.get_export_table_columns()) # Get selected data info. filter_startdate = self._filter['start_date'] filter_enddate = self._filter['end_date'] filter_stations = self._filter['stations'] filter_visit_months = self._filter['visit_months'] filter_visits = self._filter['visits'] filter_minmaxdepth = self._filter['min_max_depth_m'] filter_taxon = self._filter['scientific_name'] filter_trophic_type = self._filter['trophic_type'] filter_lifestage = self._filter['life_stage'] # for visitnode in analysisdata.get_children(): if filter_startdate > visitnode.get_data('sample_date'): continue if filter_enddate < visitnode.get_data('sample_date'): continue if visitnode.get_data('station_name') not in filter_stations: continue if visitnode.get_data('visit_month') not in filter_visit_months: continue if (str(visitnode.get_data('station_name')) + ' : ' + str( visitnode.get_data('sample_date'))) not in filter_visits: continue # Create node and copy node data. filteredvisit = plankton_core.VisitNode() filteredvisit.set_data_dict(visitnode.get_data_dict()) filtereddata.add_child(filteredvisit) # for samplenode in visitnode.get_children(): minmax = str(samplenode.get_data('sample_min_depth_m')) + '-' + \ str(samplenode.get_data('sample_max_depth_m')) if minmax not in filter_minmaxdepth: continue # # Create node and copy node data. filteredsample = plankton_core.SampleNode() filteredsample.set_data_dict(samplenode.get_data_dict()) filteredvisit.add_child(filteredsample) # for variablenode in samplenode.get_children(): if variablenode.get_data( 'scientific_name') not in filter_taxon: continue # if variablenode.get_data( 'trophic_type') not in filter_trophic_type: continue # lifestage = variablenode.get_data('stage') if variablenode.get_data('sex'): lifestage += '/' + variablenode.get_data('sex') if lifestage not in filter_lifestage: continue # Create node and copy node data. filteredvariable = plankton_core.VariableNode() filteredvariable.set_data_dict( variablenode.get_data_dict()) filteredsample.add_child(filteredvariable) # return filtereddata
def _aggregate_data(self): """ """ try: try: # if self._aggregate_rank_list.currentIndex() == 0: # toolbox_utils.Logging().log('Taxon level is not selected. Please try again.') # raise UserWarning('Taxon level is not selected. Please try again.') if not self._analysisdata.get_data(): toolbox_utils.Logging().log( 'No data is loaded for analysis. Please try again.') raise UserWarning( 'No data is loaded for analysis. Please try again.') # toolbox_utils.Logging().log('Aggregating data...') toolbox_utils.Logging().start_accumulated_logging() try: # selected_taxon_rank = str( self._aggregate_rank_list.currentText()) selected_trophic_type_list = self._trophic_type_listview.getSelectedDataList( ) selected_trophic_type_text = '-'.join( selected_trophic_type_list) selected_lifestage_list = self._lifestage_listview.getSelectedDataList( ) selected_lifestage_text = '-'.join(selected_lifestage_list) # for visitnode in self._analysisdata.get_data( ).get_children()[:]: for samplenode in visitnode.get_children()[:]: aggregatedvariables = {} trophic_type_set_dict = {} ### TEST for variablenode in samplenode.get_children()[:]: newtaxon = None value = variablenode.get_data('value') try: value = value.replace(',', '.').replace( ' ', '') # Try/except if already float. except: pass # Use values containing valid float data. try: value = float(value) # if selected_taxon_rank == 'Biota (all levels)': newtaxon = 'Biota' # Biota is above kingdom in the taxonomic hierarchy. elif selected_taxon_rank == 'Plankton group': newtaxon = plankton_core.Species( ).get_plankton_group_from_taxon_name( variablenode.get_data( 'scientific_name')) elif selected_taxon_rank == 'Kingdom': newtaxon = plankton_core.Species( ).get_taxon_value( variablenode.get_data( 'scientific_name'), 'taxon_kingdom') elif selected_taxon_rank == 'Phylum': newtaxon = plankton_core.Species( ).get_taxon_value( variablenode.get_data( 'scientific_name'), 'taxon_phylum') elif selected_taxon_rank == 'Class': newtaxon = plankton_core.Species( ).get_taxon_value( variablenode.get_data( 'scientific_name'), 'taxon_class') elif selected_taxon_rank == 'Order': newtaxon = plankton_core.Species( ).get_taxon_value( variablenode.get_data( 'scientific_name'), 'taxon_order') elif selected_taxon_rank == 'Family': newtaxon = plankton_core.Species( ).get_taxon_value( variablenode.get_data( 'scientific_name'), 'taxon_family') elif selected_taxon_rank == 'Genus': newtaxon = plankton_core.Species( ).get_taxon_value( variablenode.get_data( 'scientific_name'), 'taxon_genus') elif selected_taxon_rank == 'Species': newtaxon = plankton_core.Species( ).get_taxon_value( variablenode.get_data( 'scientific_name'), 'taxon_species') elif selected_taxon_rank == 'Scientific name': newtaxon = variablenode.get_data( 'scientific_name') elif selected_taxon_rank == 'Kingdom (from dataset)': newtaxon = variablenode.get_data( 'taxon_kingdom') elif selected_taxon_rank == 'Phylum (from dataset)': newtaxon = variablenode.get_data( 'taxon_phylum') elif selected_taxon_rank == 'Class (from dataset)': newtaxon = variablenode.get_data( 'taxon_class') elif selected_taxon_rank == 'Order (from dataset)': newtaxon = variablenode.get_data( 'taxon_order') elif selected_taxon_rank == 'Family (from dataset)': newtaxon = variablenode.get_data( 'taxon_family') elif selected_taxon_rank == 'Genus (from dataset)': newtaxon = variablenode.get_data( 'taxon_genus') elif selected_taxon_rank == 'Species (from dataset)': newtaxon = variablenode.get_data( 'taxon_species') # If not found in classification, then use scientific_name. # This is valid for taxon with rank above the selected rank. if not newtaxon: newtaxon = variablenode.get_data( 'scientific_name') # if not newtaxon: toolbox_utils.Logging().warning( 'Not match for selected rank. "not-designated" assigned for: ' + variablenode.get_data( 'scientific_name')) newtaxon = 'not-designated' # Use this if empty. # taxontrophic_type = variablenode.get_data( 'trophic_type') if taxontrophic_type in selected_trophic_type_list: taxontrophic_type = selected_trophic_type_text # Concatenated string of ranks. ### TEST if newtaxon not in trophic_type_set_dict: ### TEST trophic_type_set_dict[ newtaxon] = set() ### TEST trophic_type_set_dict[newtaxon].add( variablenode.get_data( 'trophic_type')) ### TEST else: continue # Phytoplankton only: Use selected trophic_type only, don't use others. # stage = variablenode.get_data('stage') sex = variablenode.get_data('sex') checkstage = stage if sex: checkstage += '/' + sex if checkstage in selected_lifestage_list: stage = selected_lifestage_text sex = '' # else: # continue # Note: Don't skip for zooplankton. # parameter = variablenode.get_data( 'parameter') unit = variablenode.get_data('unit') agg_tuple = (newtaxon, taxontrophic_type, stage, sex, parameter, unit) if agg_tuple in aggregatedvariables: aggregatedvariables[ agg_tuple] = value + aggregatedvariables[ agg_tuple] else: aggregatedvariables[agg_tuple] = value except: if variablenode.get_data('value'): toolbox_utils.Logging().warning( 'Value is not a valid float: ' + str(variablenode.get_data('value')) ) #Remove all variables for this sample. samplenode.remove_all_children() # Add the new aggregated variables instead. for variablekeytuple in aggregatedvariables: newtaxon, taxontrophic_type, stage, sex, parameter, unit = variablekeytuple # newvariable = plankton_core.VariableNode() samplenode.add_child(newvariable) # newvariable.add_data('scientific_name', newtaxon) ### TEST. newvariable.add_data('trophic_type', taxontrophic_type) newvariable.add_data('trophic_type', '-'.join( sorted( trophic_type_set_dict.get( newtaxon, [])))) ### TEST newvariable.add_data('stage', stage) newvariable.add_data('sex', sex) newvariable.add_data('parameter', parameter) newvariable.add_data('unit', unit) newvariable.add_data( 'value', aggregatedvariables[variablekeytuple]) # Add taxon class, etc. based on taxon name. newvariable.add_data( 'taxon_kingdom', plankton_core.Species().get_taxon_value( newtaxon, 'taxon_kingdom')) newvariable.add_data( 'taxon_phylum', plankton_core.Species().get_taxon_value( newtaxon, 'taxon_phylum')) newvariable.add_data( 'taxon_class', plankton_core.Species().get_taxon_value( newtaxon, 'taxon_class')) newvariable.add_data( 'taxon_order', plankton_core.Species().get_taxon_value( newtaxon, 'taxon_order')) newvariable.add_data( 'taxon_family', plankton_core.Species().get_taxon_value( newtaxon, 'taxon_family')) newvariable.add_data( 'taxon_genus', plankton_core.Species().get_taxon_value( newtaxon, 'taxon_genus')) newvariable.add_data( 'taxon_species', plankton_core.Species().get_taxon_value( newtaxon, 'taxon_species')) # self._main_activity.update_viewed_data_and_tabs() except UserWarning as e: toolbox_utils.Logging().error( 'Failed to aggregate data. ' + str(e)) QtWidgets.QMessageBox.warning( self._main_activity, 'Warning', 'Failed to aggregate data. ' + str(e)) finally: toolbox_utils.Logging().log_all_accumulated_rows() toolbox_utils.Logging().log('Aggregation of data is done.') # except Exception as e: debug_info = self.__class__.__name__ + ', row ' + str( sys._getframe().f_lineno) toolbox_utils.Logging().error('Exception: (' + debug_info + '): ' + str(e))
def create_tree_dataset(self, dataset, update_trophic_type): """ """ try: # Base class must know header for _asText(), etc. # self._set_header(self._header) # Iterate over rows in imported_table. for row in self._rows: row_dict = dict(zip(self._header, row)) # Get or create nodes. currentvisit = None currentsample = None currentvariable = None # Check if visit exists. Create or reuse. keystring = '' delimiter = '' for key_field in self._visit_key_fields: keystring += delimiter + row_dict.get(key_field, '') delimiter = '<+>' # currentvisit = dataset.get_visit_lookup(keystring) if not currentvisit: currentvisit = plankton_core.VisitNode() dataset.add_child(currentvisit) currentvisit.set_id_string(keystring) # Check if sample exists. Create or reuse. keystring = '' delimiter = '' for key_field in self._sample_key_fields: keystring += delimiter + row_dict.get(key_field, '') delimiter = '<+>' # currentsample = dataset.get_sample_lookup(keystring) if not currentsample: currentsample = plankton_core.SampleNode() currentvisit.add_child(currentsample) currentsample.set_id_string(keystring) # Add all variables in row. currentvariable = plankton_core.VariableNode() currentsample.add_child(currentvariable) # === Parse row and add fields on nodes. === for parsinginforow in self._parsing_info: # value = row_dict.get(parsinginforow[3], '') # Fix float. if parsinginforow[2] == 'float': value = value.replace(',', '.') # Calculate some values. if parsinginforow[1] == 'visit_month': try: value = row_dict.get('sample_date', '') value = value[5:7] except: pass if parsinginforow[1] == 'plankton_group': try: value = row_dict.get('scientific_name', '') value = plankton_core.Species().get_plankton_group_from_taxon_name(value) except: pass if parsinginforow[1] == 'analysed_by': try: if not value: value = row_dict.get('taxonomist', '') except: pass if parsinginforow[1] == 'trophic_type': # Update trophic_type. if parsinginforow[1] == 'trophic_type': if update_trophic_type: scientific_name = row_dict.get('scientific_name', '') size_class = row_dict.get('size_class', '') trophic_type = plankton_core.Species().get_bvol_value(scientific_name, size_class, 'trophic_type') if trophic_type: value = trophic_type # Use existing if not in local list. # Replace empty with NS=Not specified. if not value: value = 'NS' # Add at right level. if parsinginforow[0] == 'visit': currentvisit.add_data(parsinginforow[1], value) # if parsinginforow[0] == 'sample': currentsample.add_data(parsinginforow[1], value) # if parsinginforow[0] == 'variable': currentvariable.add_data(parsinginforow[1], value) # except Exception as e: toolbox_utils.Logging().warning('Failed to parse dataset: %s' % (e.args[0]))