def maintenance(filename=None): form = MaintenanceForm(filename=filename) eml_node = load_eml(filename=filename) if eml_node: dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_MAINTENANCE, filename=filename) return redirect(url) if request.method == 'POST' and form.validate_on_submit(): save = False if is_dirty_form(form): save = True if save: maintenace_description = form.description.data valid, msg = is_valid_xml_fragment(maintenace_description, names.MAINTENANCE) if not valid: flash(invalid_xml_error_message(msg, False, names.DESCRIPTION), 'error') return render_get_maintenance_page(eml_node, form, filename) update_frequency = form.update_frequency.data create_maintenance(dataset_node, maintenace_description, update_frequency) save_both_formats(filename=filename, eml_node=eml_node) form_value = request.form form_dict = form_value.to_dict(flat=False) new_page = PAGE_MAINTENANCE if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element if val == BTN_SAVE_AND_CONTINUE: new_page = PAGE_PUBLISHER else: new_page = handle_hidden_buttons(new_page, PAGE_MAINTENANCE) return redirect(url_for(new_page, filename=filename)) # Process GET if dataset_node: maintenance_node = dataset_node.find_child(names.MAINTENANCE) if maintenance_node: populate_maintenance_form(form, maintenance_node) return render_get_maintenance_page(eml_node, form, filename)
def add_geo_coverage_node(eml_node, description, north, south, east, west): dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET) coverage_node = dataset_node.find_child(names.COVERAGE) if not coverage_node: coverage_node = Node(names.COVERAGE, parent=dataset_node) add_child(dataset_node, coverage_node) gc_node = Node(names.GEOGRAPHICCOVERAGE, parent=coverage_node) add_child(coverage_node, gc_node) create_geographic_coverage(gc_node, description, west, east, north, south)
def maintenance(filename=None): form = MaintenanceForm(filename=filename) eml_node = load_eml(filename=filename) if eml_node: dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) # Process POST if request.method == 'POST' and form.validate_on_submit(): save = False if is_dirty_form(form): save = True # flash(f'save: {save}') if save: description = add_paragraph_tags(form.description.data) update_frequency = form.update_frequency.data create_maintenance(dataset_node, description, update_frequency) save_both_formats(filename=filename, eml_node=eml_node) form_value = request.form form_dict = form_value.to_dict(flat=False) if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element if val == BTN_SAVE_AND_CONTINUE: new_page = PAGE_PUBLISHER elif val == BTN_HIDDEN_CHECK: new_page = PAGE_CHECK elif val == BTN_HIDDEN_SAVE: new_page = PAGE_PROJECT elif val == BTN_HIDDEN_DOWNLOAD: new_page = PAGE_DOWNLOAD elif val == BTN_HIDDEN_NEW: new_page = PAGE_CREATE elif val == BTN_HIDDEN_OPEN: new_page = PAGE_OPEN elif val == BTN_HIDDEN_CLOSE: new_page = PAGE_CLOSE return redirect(url_for(new_page, filename=filename)) # Process GET if dataset_node: maintenance_node = dataset_node.find_child(names.MAINTENANCE) if maintenance_node: populate_maintenance_form(form, maintenance_node) set_current_page('maintenance') help = [ get_help('maintenance'), get_help('maintenance_description'), get_help('maintenance_freq') ] return render_template('maintenance.html', title='Maintenance', filename=filename, form=form, help=help)
def load_data_table(dataset_node: Node = None, uploads_path: str = None, data_file: str = ''): full_path = f'{uploads_path}/{data_file}' datatable_node = Node(names.DATATABLE, parent=dataset_node) add_child(dataset_node, datatable_node) physical_node = Node(names.PHYSICAL, parent=datatable_node) add_child(datatable_node, physical_node) physical_node.add_attribute('system', 'EDI') entity_name_node = Node(names.ENTITYNAME, parent=datatable_node) add_child(datatable_node, entity_name_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = Node(names.OBJECTNAME, parent=physical_node) add_child(physical_node, object_name_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = Node(names.SIZE, parent=physical_node) add_child(physical_node, size_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) data_format_node = Node(names.DATAFORMAT, parent=physical_node) add_child(physical_node, data_format_node) text_format_node = Node(names.TEXTFORMAT, parent=data_format_node) add_child(data_format_node, text_format_node) num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node) add_child(text_format_node, num_header_lines_node) num_header_lines_node.content = '1' num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node) add_child(text_format_node, num_footer_lines_node) num_footer_lines_node.content = '0' data_frame = pd.read_csv(full_path, comment='#') if data_frame is not None: number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node) add_child(datatable_node, number_of_records) row_count = data_frame.shape[0] number_of_records.content = f'{row_count}' attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node) add_child(datatable_node, attribute_list_node) columns = data_frame.columns for col in columns: dtype = str(data_frame[col].dtype) print(f'{col}: {dtype}') attribute_node = Node(names.ATTRIBUTE, parent=attribute_list_node) add_child(attribute_list_node, attribute_node) attribute_name_node = Node(names.ATTRIBUTENAME, parent=attribute_node) add_child(attribute_node, attribute_name_node) attribute_name_node.content = col att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node) add_child(attribute_node, att_label_node) att_label_node.content = col att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) add_child(attribute_node, att_def_node) att_def_node.content = f'Attribute definition for {col}' ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node) add_child(attribute_node, ms_node) if dtype == 'bool': nominal_node = Node(names.NOMINAL, parent=ms_node) add_child(ms_node, nominal_node) non_numeric_domain_node = Node(names.NONNUMERICDOMAIN, parent=nominal_node) add_child(nominal_node, non_numeric_domain_node) elif dtype == 'object': if is_datetime_column(col): datetime_node = Node(names.DATETIME, parent=ms_node) add_child(ms_node, datetime_node) format_string_node = Node(names.FORMATSTRING, parent=datetime_node) add_child(datetime_node, format_string_node) format_string_node.content = '' else: nominal_node = Node(names.NOMINAL, parent=ms_node) add_child(ms_node, nominal_node) non_numeric_domain_node = Node(names.NONNUMERICDOMAIN, parent=nominal_node) add_child(nominal_node, non_numeric_domain_node) elif dtype.startswith('float') or dtype.startswith('int'): number_type = 'real' if dtype.startswith('int'): number_type = 'integer' ratio_node = Node(names.RATIO, parent=ms_node) add_child(ms_node, ratio_node) numeric_domain_ratio_node = Node(names.NUMERICDOMAIN, parent=ratio_node) add_child(ratio_node, numeric_domain_ratio_node) number_type_ratio_node = Node(names.NUMBERTYPE, parent=numeric_domain_ratio_node) add_child(numeric_domain_ratio_node, number_type_ratio_node) number_type_ratio_node.content = number_type delete_data_files(uploads_path) return datatable_node
def load_other_entity(dataset_node: Node = None, uploads_path: str = None, data_file: str = ''): full_path = f'{uploads_path}/{data_file}' other_entity_node = Node(names.OTHERENTITY, parent=dataset_node) add_child(dataset_node, other_entity_node) physical_node = Node(names.PHYSICAL, parent=other_entity_node) add_child(other_entity_node, physical_node) physical_node.add_attribute('system', 'EDI') entity_name_node = Node(names.ENTITYNAME, parent=other_entity_node) add_child(other_entity_node, entity_name_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = Node(names.OBJECTNAME, parent=physical_node) add_child(physical_node, object_name_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = Node(names.SIZE, parent=physical_node) add_child(physical_node, size_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: hash_node = Node(names.AUTHENTICATION, parent=physical_node) add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') hash_node.content = str(md5_hash) data_format_node = Node(names.DATAFORMAT, parent=physical_node) add_child(physical_node, data_format_node) externally_defined_format_node = Node(names.EXTERNALLYDEFINEDFORMAT, parent=data_format_node) add_child(data_format_node, externally_defined_format_node) format_name_node = Node(names.FORMATNAME, parent=externally_defined_format_node) add_child(externally_defined_format_node, format_name_node) format_name_node.content = format_name_from_data_file(data_file) entity_type_node = new_child_node(names.ENTITYTYPE, parent=other_entity_node) entity_type_node.content = format_name_from_data_file(data_file) delete_data_files(uploads_path) return other_entity_node
def project(filename=None, node_id=None): form = ProjectForm(filename=filename) eml_node = load_eml(filename=filename) if eml_node: dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) # Process POST if request.method == 'POST' and form.validate_on_submit(): save = False if is_dirty_form(form): save = True # flash(f'save: {save}') if 'Next' in request.form: if not node_id: new_page = PAGE_OTHER_ENTITY_SELECT else: new_page = PAGE_RELATED_PROJECT_SELECT elif BTN_PROJECT_PERSONNEL in request.form: new_page = PAGE_PROJECT_PERSONNEL_SELECT elif BTN_FUNDING_AWARDS in request.form: new_page = PAGE_FUNDING_AWARD_SELECT elif BTN_RELATED_PROJECTS in request.form: new_page = PAGE_RELATED_PROJECT_SELECT elif BTN_HIDDEN_CHECK in request.form: new_page = PAGE_CHECK elif BTN_HIDDEN_SAVE in request.form: new_page = PAGE_PROJECT elif BTN_HIDDEN_DOWNLOAD in request.form: new_page = PAGE_DOWNLOAD elif BTN_HIDDEN_NEW in request.form: new_page = PAGE_CREATE elif BTN_HIDDEN_OPEN in request.form: new_page = PAGE_OPEN elif BTN_HIDDEN_CLOSE in request.form: new_page = PAGE_CLOSE if save: title = form.title.data abstract = add_paragraph_tags(form.abstract.data) if not node_id: create_project(dataset_node, title, abstract) else: related_project_node = create_related_project( dataset_node, title, abstract, node_id) node_id = related_project_node.id save_both_formats(filename=filename, eml_node=eml_node) if not node_id: return redirect(url_for(new_page, filename=filename)) else: return redirect( url_for(new_page, filename=filename, node_id=node_id)) # Process GET if node_id == '1': form.init_md5() elif node_id: related_project_node = Node.get_node_instance(node_id) populate_project_form(form, related_project_node) elif dataset_node: project_node = dataset_node.find_child(names.PROJECT) populate_project_form(form, project_node) set_current_page('project') if not node_id: help = [get_help('project'), get_help('project_title')] else: help = [get_help('related_project'), get_help('project_title')] if not node_id: page_title = 'Project' else: page_title = 'Related Project' return render_template('project.html', title=page_title, filename=filename, form=form, help=help)
def load_other_entity(dataset_node: Node = None, uploads_path: str = None, data_file: str = '', node_id: str = None): full_path = f'{uploads_path}/{data_file}' doing_reupload = node_id is not None and node_id != '1' if doing_reupload: other_entity_node = Node.get_node_instance(node_id) object_name_node = other_entity_node.find_descendant(names.OBJECTNAME) else: other_entity_node = Node(names.OTHERENTITY, parent=dataset_node) metapype_client.add_child(dataset_node, other_entity_node) physical_node = Node(names.PHYSICAL, parent=other_entity_node) metapype_client.add_child(other_entity_node, physical_node) physical_node.add_attribute('system', 'EDI') entity_name_node = Node(names.ENTITYNAME, parent=other_entity_node) metapype_client.add_child(other_entity_node, entity_name_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = Node(names.OBJECTNAME, parent=physical_node) metapype_client.add_child(physical_node, object_name_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: if not doing_reupload: size_node = Node(names.SIZE, parent=physical_node) metapype_client.add_child(physical_node, size_node) size_node.add_attribute('unit', 'byte') else: size_node = other_entity_node.find_descendant(names.SIZE) size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: if not doing_reupload: hash_node = Node(names.AUTHENTICATION, parent=physical_node) metapype_client.add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') else: hash_node = other_entity_node.find_descendant(names.AUTHENTICATION) hash_node.content = str(md5_hash) if not doing_reupload: data_format_node = Node(names.DATAFORMAT, parent=physical_node) metapype_client.add_child(physical_node, data_format_node) externally_defined_format_node = Node(names.EXTERNALLYDEFINEDFORMAT, parent=data_format_node) metapype_client.add_child(data_format_node, externally_defined_format_node) format_name_node = Node(names.FORMATNAME, parent=externally_defined_format_node) metapype_client.add_child(externally_defined_format_node, format_name_node) else: format_name_node = other_entity_node.find_descendant(names.FORMATNAME) format_name_node.content = format_name_from_data_file(data_file) if not doing_reupload: entity_type_node = metapype_client.new_child_node( names.ENTITYTYPE, parent=other_entity_node) else: entity_type_node = other_entity_node.find_descendant(names.ENTITYTYPE) entity_type_node.content = format_name_from_data_file(data_file) user_data.add_data_table_upload_filename(data_file) delete_data_files(uploads_path) return other_entity_node
def taxonomic_coverage(filename=None, node_id=None, taxon=None): form = TaxonomicCoverageForm(filename=filename) # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_TAXONOMIC_COVERAGE_SELECT, filename=filename) return redirect(url) if request.method == 'POST' and form.validate_on_submit(): save = False if is_dirty_form(form): save = True # flash(f'save: {save}') form_value = request.form have_links = False if 'Fill' in form_value: source = form.taxonomic_authority.data if source == 'ITIS': source_type = TaxonomySourceEnum.ITIS elif source == 'NCBI': source_type = TaxonomySourceEnum.NCBI elif source == "WORMS": source_type = TaxonomySourceEnum.WORMS try: source_name = '' for choice in form.taxonomic_authority.choices: if choice[0] == source: source_name = choice[1] break hierarchy = fill_taxonomic_coverage(form.taxon_value.data, source_type, source_name) if hierarchy: # set the taxon rank dropdown appropriately rank = hierarchy[0][0].capitalize() if (rank, rank) in form.taxon_rank.choices: form.taxon_rank.data = rank # see if we should display a Links column for taxon in hierarchy: if taxon[4]: have_links = True break except ValueError as e: flash(str(e)) hierarchy = [(form.taxon_rank.data, form.taxon_value.data, '', '')] form.hierarchy.data = hierarchy form.hidden_taxon_rank.data = form.taxon_rank.data form.hidden_taxon_value.data = form.taxon_value.data form.hidden_taxonomic_authority.data = form.taxonomic_authority.data help = get_helps(['taxonomic_coverage_fill_hierarchy']) return render_template( 'taxonomic_coverage.html', title='Taxonomic Coverage', form=form, hierarchy=hierarchy, taxon_rank=form.taxon_rank.data, taxon_value=form.taxon_value.data, taxonomic_authority=form.taxonomic_authority.data, help=help, have_links=have_links) form_dict = form_value.to_dict(flat=False) new_page = PAGE_TAXONOMIC_COVERAGE_SELECT if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element if val == BTN_HIDDEN_NEW: new_page = PAGE_CREATE break elif val == BTN_HIDDEN_OPEN: new_page = PAGE_OPEN break elif val == BTN_HIDDEN_CLOSE: new_page = PAGE_CLOSE break if save: if not form.taxon_value.data and not form.taxon_rank.data: return redirect(url_for(new_page, filename=filename)) submitted_hierarchy = form_value.get('hierarchy') if isinstance(form_value.get('hierarchy'), str) and form_value.get('hierarchy'): # convert hierarchy string to list submitted_hierarchy = ast.literal_eval( form_value.get('hierarchy')) form.hierarchy.data = submitted_hierarchy # if we're saving after doing 'Fill Hierarchy', fill in the values we've been passed if form_value.get('hidden_taxon_rank'): form.taxon_rank.data = form_value.get('hidden_taxon_rank') form.taxon_value.data = form_value.get('hidden_taxon_value') form.taxonomic_authority.data = form_value.get( 'hidden_taxonomic_authority') elif not submitted_hierarchy: # we don't have a hierarchy, so construct a fake hierarchy to be used by create_taxonomic_coverage() form.hierarchy.data = [(form_value.get('taxon_rank'), form_value.get('taxon_value'), '', '', '', '')] if not form_value.get('taxon_rank'): flash('Taxon Rank is required.') return redirect( url_for(PAGE_TAXONOMIC_COVERAGE, filename=filename, node_id=node_id, taxon=form.taxon_value.data)) eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET) coverage_node = dataset_node.find_child(names.COVERAGE) if not coverage_node: coverage_node = Node(names.COVERAGE, parent=dataset_node) add_child(dataset_node, coverage_node) txc_node = Node(names.TAXONOMICCOVERAGE, parent=coverage_node) create_taxonomic_coverage(txc_node, form.general_taxonomic_coverage.data, form.hierarchy.data, form.taxonomic_authority.data) if node_id and len(node_id) != 1: old_txc_node = Node.get_node_instance(node_id) if old_txc_node: coverage_parent_node = old_txc_node.parent coverage_parent_node.replace_child(old_txc_node, txc_node) else: msg = f"No node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(coverage_node, txc_node) save_both_formats(filename=filename, eml_node=eml_node) return redirect(url_for(new_page, filename=filename)) # Process GET have_links = False if node_id == '1': form.init_md5() if taxon: form.taxon_value.data = taxon else: eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if dataset_node: coverage_node = dataset_node.find_child(names.COVERAGE) if coverage_node: txc_nodes = coverage_node.find_all_children( names.TAXONOMICCOVERAGE) if txc_nodes: for txc_node in txc_nodes: if node_id == txc_node.id: have_links = populate_taxonomic_coverage_form( form, txc_node) help = get_helps(['taxonomic_coverage_fill_hierarchy']) set_current_page('taxonomic_coverage') return render_template('taxonomic_coverage.html', title='Taxonomic Coverage', form=form, hierarchy=form.hierarchy.data, have_links=have_links, help=help)
def keyword(filename=None, node_id=None): eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) form = KeywordForm(filename=filename, node_id=node_id) form.init_keywords() # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_KEYWORD_SELECT, filename=filename) return redirect(url) # if request.method == 'POST' and form.validate_on_submit(): if request.method == 'POST': form_value = request.form form_dict = form_value.to_dict(flat=False) new_page = PAGE_KEYWORD_SELECT if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element new_page = check_val_for_hidden_buttons( val, new_page, new_page) submit_type = None if is_dirty_form(form): submit_type = 'Save Changes' # flash(f'submit_type: {submit_type}') if submit_type == 'Save Changes': keyword = form.keyword.data keyword_type = form.keyword_type.data keyword_thesaurus = form.keyword_thesaurus.data # If so thesaurus was specified, see if the LTER Controlled Vocabulary applies if not keyword_thesaurus: lter_keywords = get_keywords('LTER') if keyword in lter_keywords: keyword_thesaurus = 'LTER Controlled Vocabulary' keyword_set_nodes = [] eml_node.find_all_descendants(names.KEYWORDSET, keyword_set_nodes) keyword_set_node = None for kws_node in keyword_set_nodes: keyword_thesaurus_node = kws_node.find_child( names.KEYWORDTHESAURUS) if keyword_thesaurus_node and keyword_thesaurus_node.content == keyword_thesaurus: keyword_set_node = kws_node break if not keyword_thesaurus_node and not keyword_thesaurus: keyword_set_node = kws_node break if not keyword_set_node: keyword_set_node = Node(names.KEYWORDSET, parent=dataset_node) add_child(dataset_node, keyword_set_node) if keyword_thesaurus: keyword_thesaurus_node = Node(names.KEYWORDTHESAURUS, parent=keyword_set_node) keyword_thesaurus_node.content = keyword_thesaurus keyword_set_node.children.append(keyword_thesaurus_node) keyword_node = Node(names.KEYWORD, parent=keyword_set_node) create_keyword(keyword_node, keyword, keyword_type) if node_id and len(node_id) != 1: old_keyword_node = Node.get_node_instance(node_id) if old_keyword_node: keyword_parent_node = old_keyword_node.parent keyword_parent_node.replace_child(old_keyword_node, keyword_node) else: msg = f"No keyword node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(keyword_set_node, keyword_node) save_both_formats(filename=filename, eml_node=eml_node) url = url_for(new_page, filename=filename) return redirect(url) # Process GET if node_id == '1': form.init_md5() else: keyword_set_nodes = [] eml_node.find_all_descendants(names.KEYWORDSET, keyword_set_nodes) found = False for keyword_set_node in keyword_set_nodes: keyword_nodes = keyword_set_node.find_all_children(names.KEYWORD) keyword_thesaurus_node = keyword_set_node.find_child( names.KEYWORDTHESAURUS) if keyword_nodes: for kw_node in keyword_nodes: if node_id == kw_node.id: populate_keyword_form(form, kw_node, keyword_thesaurus_node) found = True break if found: break set_current_page('keyword') help = [get_help('keywords')] return render_template('keyword.html', title='Keyword', form=form, filename=filename, help=help)
def load_data_table(uploads_path: str = None, data_file: str = '', num_header_rows: str = '1', delimiter: str = ',', quote_char: str = '"'): # if Config.LOG_DEBUG: log_info(f'Entering load_data_table: {data_file}') full_path = f'{uploads_path}/{data_file}' datatable_node = metapype_client.new_child_node(names.DATATABLE, parent=None) physical_node = metapype_client.new_child_node(names.PHYSICAL, parent=datatable_node) physical_node.add_attribute('system', 'EDI') entity_name_node = metapype_client.new_child_node(names.ENTITYNAME, parent=datatable_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = metapype_client.new_child_node(names.OBJECTNAME, parent=physical_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = metapype_client.new_child_node(names.SIZE, physical_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: hash_node = Node(names.AUTHENTICATION, parent=physical_node) metapype_client.add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') hash_node.content = str(md5_hash) data_format_node = Node(names.DATAFORMAT, parent=physical_node) metapype_client.add_child(physical_node, data_format_node) text_format_node = Node(names.TEXTFORMAT, parent=data_format_node) metapype_client.add_child(data_format_node, text_format_node) num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node) metapype_client.add_child(text_format_node, num_header_lines_node) num_header_lines_node.content = num_header_rows num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node) metapype_client.add_child(text_format_node, num_footer_lines_node) num_footer_lines_node.content = '0' simple_delimited_node = Node(names.SIMPLEDELIMITED, parent=text_format_node) metapype_client.add_child(text_format_node, simple_delimited_node) field_delimiter_node = Node(names.FIELDDELIMITER, parent=simple_delimited_node) metapype_client.add_child(simple_delimited_node, field_delimiter_node) field_delimiter_node.content = delimiter quote_character_node = Node(names.QUOTECHARACTER, parent=simple_delimited_node) metapype_client.add_child(simple_delimited_node, quote_character_node) quote_character_node.content = quote_char if file_size == 0: raise DataTableError("The CSV file is empty.") check_column_name_uniqueness(full_path, delimiter) with open(full_path) as file: next(file) line_terminator = repr(file.newlines).replace("'", "") record_delimiter_node = Node(names.RECORDDELIMITER, parent=text_format_node) metapype_client.add_child(text_format_node, record_delimiter_node) record_delimiter_node.content = line_terminator # log_info('pd.read_csv') try: data_frame = pd.read_csv(full_path, encoding='utf8', sep=delimiter, quotechar=quote_char) except pd.errors.ParserError as e: raise DataTableError(e.args[0]) column_vartypes = [] column_names = [] column_categorical_codes = [] if data_frame is not None: number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node) metapype_client.add_child(datatable_node, number_of_records) row_count = data_frame.shape[0] record_count = row_count number_of_records.content = f'{record_count}' attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node) metapype_client.add_child(datatable_node, attribute_list_node) # data_frame = data_frame.convert_dtypes() columns = data_frame.columns for col in columns: dtype = data_frame[col][1:].infer_objects().dtype # dtype = data_frame.dtypes[col] var_type, codes = infer_col_type(data_frame, col) log_info(f'col: {col} var_type: {var_type}') column_vartypes.append(var_type) column_names.append(col) column_categorical_codes.append(codes) attribute_node = metapype_client.new_child_node( names.ATTRIBUTE, attribute_list_node) attribute_name_node = metapype_client.new_child_node( names.ATTRIBUTENAME, attribute_node) attribute_name_node.content = col att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node) metapype_client.add_child(attribute_node, att_label_node) att_label_node.content = col att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) metapype_client.add_child(attribute_node, att_def_node) ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node) metapype_client.add_child(attribute_node, ms_node) missing_value_code = guess_missing_value_code( full_path, delimiter, quote_char, col) if missing_value_code: mv_node = Node(names.MISSINGVALUECODE, parent=attribute_node) metapype_client.add_child(attribute_node, mv_node) code_node = Node(names.CODE, parent=mv_node) metapype_client.add_child(mv_node, code_node) code_node.content = missing_value_code if var_type == metapype_client.VariableType.CATEGORICAL: codes = force_categorical_codes(attribute_node, dtype, codes) codes = force_missing_value_code(missing_value_code, dtype, codes) # nominal / nonNumericDomain / enumeratedDomain / ...codes... nominal_node = metapype_client.new_child_node( names.NOMINAL, ms_node) non_numeric_domain_node = metapype_client.new_child_node( names.NONNUMERICDOMAIN, nominal_node) enumerated_domain_node = metapype_client.new_child_node( names.ENUMERATEDDOMAIN, non_numeric_domain_node) for code in codes: code_definition_node = metapype_client.new_child_node( names.CODEDEFINITION, enumerated_domain_node) code_node = metapype_client.new_child_node( names.CODE, code_definition_node) code_node.content = str(code) definition_node = metapype_client.new_child_node( names.DEFINITION, code_definition_node) elif var_type == metapype_client.VariableType.NUMERICAL: # ratio / numericDomain ratio_node = metapype_client.new_child_node( names.RATIO, ms_node) numeric_domain_node = metapype_client.new_child_node( names.NUMERICDOMAIN, ratio_node) number_type = 'real' if str(dtype).startswith( 'int'): # FIXME - we can do better than this number_type = 'integer' number_type_node = metapype_client.new_child_node( names.NUMBERTYPE, numeric_domain_node) number_type_node.content = number_type numeric_domain_node = metapype_client.new_child_node( names.UNIT, ratio_node) elif var_type == metapype_client.VariableType.TEXT: # nominal / nonNumericDomain / textDomain nominal_node = metapype_client.new_child_node( names.NOMINAL, ms_node) non_numeric_domain_node = metapype_client.new_child_node( names.NONNUMERICDOMAIN, nominal_node) text_domain_node = metapype_client.new_child_node( names.TEXTDOMAIN, non_numeric_domain_node) definition_node = metapype_client.new_child_node( names.DEFINITION, text_domain_node) elif var_type == metapype_client.VariableType.DATETIME: # dateTime / formatString datetime_node = Node(names.DATETIME, parent=ms_node) metapype_client.add_child(ms_node, datetime_node) format_string_node = Node(names.FORMATSTRING, parent=datetime_node) metapype_client.add_child(datetime_node, format_string_node) format_string_node.content = codes # if Config.LOG_DEBUG: # log_info(f'Leaving load_data_table') return datatable_node, column_vartypes, column_names, column_categorical_codes, data_frame, missing_value_code
def project(filename=None, project_node_id=None): form = ProjectForm(filename=filename) eml_node = load_eml(filename=filename) if eml_node: dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) doing_related_project = project_node_id # Process POST if request.method == 'POST' and form.validate_on_submit(): save = False if is_dirty_form(form): save = True # if not node_id: if not doing_related_project: this_page = PAGE_PROJECT else: this_page = PAGE_RELATED_PROJECT_SELECT # FIXME? new_page = None if 'Next' in request.form: # if not node_id: if not doing_related_project: new_page = PAGE_OTHER_ENTITY_SELECT else: new_page = PAGE_RELATED_PROJECT_SELECT elif BTN_PROJECT_PERSONNEL in request.form: new_page = PAGE_PROJECT_PERSONNEL_SELECT elif BTN_FUNDING_AWARDS in request.form: new_page = PAGE_FUNDING_AWARD_SELECT elif BTN_RELATED_PROJECTS in request.form: new_page = PAGE_RELATED_PROJECT_SELECT # doing_related_project = True else: new_page = handle_hidden_buttons(new_page, this_page) if save: abstract = form.abstract.data valid, msg = is_valid_xml_fragment(abstract, names.ABSTRACT) if not valid: flash(invalid_xml_error_message(msg, False, names.ABSTRACT), 'error') return render_get_project_page(eml_node, form, filename, doing_related_project, project_node_id) funding = form.funding.data valid, msg = is_valid_xml_fragment(funding, names.FUNDING) if not valid: flash(invalid_xml_error_message(msg, False, names.FUNDING), 'error') return render_get_project_page(eml_node, form, filename, doing_related_project, project_node_id) title = form.title.data if not doing_related_project: create_project(dataset_node, title, abstract, funding) else: related_project_node = create_related_project( dataset_node, title, abstract, funding, project_node_id) project_node_id = related_project_node.id save_both_formats(filename=filename, eml_node=eml_node) # if not node_id: if not doing_related_project: return redirect(url_for(new_page, filename=filename)) else: # return redirect(url_for(new_page, filename=filename, node_id=None, project_node_id=project_node_id)) return redirect( url_for(new_page, filename=filename, node_id='None', project_node_id=project_node_id)) # Process GET if project_node_id == '1': form.init_md5() elif doing_related_project: related_project_node = Node.get_node_instance(project_node_id) populate_project_form(form, related_project_node) elif dataset_node: project_node = dataset_node.find_child(names.PROJECT) populate_project_form(form, project_node) return render_get_project_page(eml_node, form, filename, doing_related_project, project_node_id)
def responsible_party(filename=None, node_id=None, method=None, node_name=None, back_page=None, title=None, next_page=None, save_and_continue=False, help=None, project_node_id=None): if BTN_CANCEL in request.form: if not project_node_id: url = url_for(back_page, filename=filename) else: url = url_for(back_page, filename=filename, node_id=project_node_id) return redirect(url) form = ResponsiblePartyForm(filename=filename) eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) parent_node = dataset_node role = False new_page = select_new_page(back_page, next_page) # new_page = back_page form_value = request.form form_dict = form_value.to_dict(flat=False) url = select_post(filename, form, form_dict, 'POST', PAGE_PUBLISHER, PAGE_MAINTENANCE, PAGE_PUBLICATION_INFO, PAGE_PUBLISHER, project_node_id=project_node_id) # If this is an associatedParty or a project personnel element, # set role to True so it will appear as a form field. if node_name == names.ASSOCIATEDPARTY or node_name == names.PERSONNEL: role = True # If this is a project personnel party, place it under the # project node, not under the dataset node if node_name == names.PERSONNEL: if not project_node_id: project_node = dataset_node.find_child(names.PROJECT) if not project_node: project_node = Node(names.PROJECT, parent=dataset_node) add_child(dataset_node, project_node) parent_node = project_node else: parent_node = Node.get_node_instance(project_node_id) # Process POST save = False if is_dirty_form(form): save = True if form.validate_on_submit(): if save: salutation = form.salutation.data gn = form.gn.data mn = form.mn.data sn = form.sn.data user_id = form.user_id.data organization = form.organization.data position_name = form.position_name.data address_1 = form.address_1.data address_2 = form.address_2.data city = form.city.data state = form.state.data postal_code = form.postal_code.data country = form.country.data phone = form.phone.data fax = form.fax.data email = form.email.data online_url = form.online_url.data role = form.role.data rp_node = Node(node_name, parent=parent_node) create_responsible_party( rp_node, filename, salutation, gn, mn, sn, user_id, organization, position_name, address_1, address_2, city, state, postal_code, country, phone, fax, email, online_url, role) if node_id and len(node_id) != 1: old_rp_node = Node.get_node_instance(node_id) if old_rp_node: old_rp_parent_node = old_rp_node.parent old_rp_parent_node.replace_child(old_rp_node, rp_node) else: msg = f"No node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(parent_node, rp_node) save_both_formats(filename=filename, eml_node=eml_node) # flash(f"Changes to the '{node_name}' element have been saved.") # There is at most only one publisher element, so we don't have a # list of publishers to navigate back to. Stay on this page after # saving changes. # FIXME if node_name == names.PUBLISHER: new_page = PAGE_PUBLICATION_INFO if node_name != names.PUBLISHER: return redirect(url_for(new_page, filename=filename, node_id=project_node_id)) else: return redirect(url) # Process GET if node_id == '1': form.init_md5() else: if parent_node: rp_nodes = parent_node.find_all_children(child_name=node_name) if rp_nodes: for rp_node in rp_nodes: if node_id == rp_node.id: populate_responsible_party_form(form, rp_node) if project_node_id: title = 'Related ' + title help = get_helps([node_name]) return render_template('responsible_party.html', title=title, node_name=node_name, form=form, role=role, next_page=next_page, save_and_continue=save_and_continue, help=help)
def method_step(filename=None, node_id=None): eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if dataset_node: methods_node = dataset_node.find_child(names.METHODS) else: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) if not methods_node: methods_node = Node(names.METHODS, parent=dataset_node) add_child(dataset_node, methods_node) form = MethodStepForm(filename=filename, node_id=node_id) # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_METHOD_STEP_SELECT, filename=filename) return redirect(url) if request.method == 'POST' and form.validate_on_submit(): new_page = PAGE_METHOD_STEP_SELECT # Save or Back sends us back to the list of method steps form_value = request.form form_dict = form_value.to_dict(flat=False) if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element new_page = check_val_for_hidden_buttons( val, new_page, PAGE_METHOD_STEP) submit_type = None if is_dirty_form(form): submit_type = 'Save Changes' if submit_type == 'Save Changes': description = form.description.data valid, msg = is_valid_xml_fragment(description, names.MAINTENANCE) if not valid: flash(invalid_xml_error_message(msg, False, names.DESCRIPTION), 'error') return render_get_method_step_page(eml_node, form, filename) instrumentation = form.instrumentation.data data_sources = form.data_sources.data method_step_node = Node(names.METHODSTEP, parent=methods_node) create_method_step(method_step_node, description, instrumentation, data_sources, data_sources_marker_begin, data_sources_marker_end) if node_id and len(node_id) != 1: old_method_step_node = Node.get_node_instance(node_id) if old_method_step_node: method_step_parent_node = old_method_step_node.parent method_step_parent_node.replace_child( old_method_step_node, method_step_node) else: msg = f"No methodStep node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(methods_node, method_step_node) save_both_formats(filename=filename, eml_node=eml_node) url = url_for(new_page, filename=filename) return redirect(url) # Process GET if node_id == '1': form.init_md5() else: method_step_nodes = methods_node.find_all_children(names.METHODSTEP) if method_step_nodes: for ms_node in method_step_nodes: if node_id == ms_node.id: populate_method_step_form(form, ms_node) break return render_get_method_step_page(eml_node, form, filename)
def geographic_coverage(filename=None, node_id=None): form = GeographicCoverageForm(filename=filename) # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_GEOGRAPHIC_COVERAGE_SELECT, filename=filename) return redirect(url) if request.method == 'POST' and form.validate_on_submit(): submit_type = None if is_dirty_form(form): submit_type = 'Save Changes' form_value = request.form form_dict = form_value.to_dict(flat=False) new_page = PAGE_GEOGRAPHIC_COVERAGE_SELECT if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element if val == BTN_HIDDEN_NEW: new_page = PAGE_CREATE break elif val == BTN_HIDDEN_OPEN: new_page = PAGE_OPEN break elif val == BTN_HIDDEN_CLOSE: new_page = PAGE_CLOSE break url = url_for(new_page, filename=filename) if submit_type == 'Save Changes': eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET) coverage_node = dataset_node.find_child(names.COVERAGE) if not coverage_node: coverage_node = Node(names.COVERAGE, parent=dataset_node) add_child(dataset_node, coverage_node) geographic_description = form.geographic_description.data wbc = form.wbc.data if form.wbc.data is not None else '' ebc = form.ebc.data if form.ebc.data is not None else '' nbc = form.nbc.data if form.nbc.data is not None else '' sbc = form.sbc.data if form.sbc.data is not None else '' gc_node = Node(names.GEOGRAPHICCOVERAGE, parent=coverage_node) create_geographic_coverage(gc_node, geographic_description, wbc, ebc, nbc, sbc) if node_id and len(node_id) != 1: old_gc_node = Node.get_node_instance(node_id) if old_gc_node: coverage_parent_node = old_gc_node.parent coverage_parent_node.replace_child(old_gc_node, gc_node) else: msg = f"No node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(coverage_node, gc_node) if nbc and sbc and nbc < sbc: flash('North should be greater than or equal to South') url = (url_for(PAGE_GEOGRAPHIC_COVERAGE, filename=filename, node_id=gc_node.id)) if ebc and wbc and ebc < wbc: flash('East should be greater than or equal to West') url = (url_for(PAGE_GEOGRAPHIC_COVERAGE, filename=filename, node_id=gc_node.id)) save_both_formats(filename=filename, eml_node=eml_node) return redirect(url) # Process GET if node_id == '1': form.init_md5() else: eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if dataset_node: coverage_node = dataset_node.find_child(names.COVERAGE) if coverage_node: gc_nodes = coverage_node.find_all_children( names.GEOGRAPHICCOVERAGE) if gc_nodes: for gc_node in gc_nodes: if node_id == gc_node.id: populate_geographic_coverage_form(form, gc_node) set_current_page('geographic_coverage') help = [ get_help('geographic_coverages'), get_help('geographic_description'), get_help('bounding_coordinates') ] return render_template('geographic_coverage.html', title='Geographic Coverage', form=form, help=help)
def method_step(filename=None, node_id=None): eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if dataset_node: methods_node = dataset_node.find_child(names.METHODS) else: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) if not methods_node: methods_node = Node(names.METHODS, parent=dataset_node) add_child(dataset_node, methods_node) form = MethodStepForm(filename=filename, node_id=node_id) # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_METHOD_STEP_SELECT, filename=filename) return redirect(url) if request.method == 'POST' and form.validate_on_submit(): new_page = PAGE_METHOD_STEP_SELECT # Save or Back sends us back to the list of method steps form_value = request.form form_dict = form_value.to_dict(flat=False) if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element if val == BTN_HIDDEN_NEW: new_page = PAGE_CREATE break elif val == BTN_HIDDEN_OPEN: new_page = PAGE_OPEN break elif val == BTN_HIDDEN_CLOSE: new_page = PAGE_CLOSE break submit_type = None if is_dirty_form(form): submit_type = 'Save Changes' if submit_type == 'Save Changes': description = add_paragraph_tags(form.description.data) instrumentation = form.instrumentation.data method_step_node = Node(names.METHODSTEP, parent=methods_node) create_method_step(method_step_node, description, instrumentation) if node_id and len(node_id) != 1: old_method_step_node = Node.get_node_instance(node_id) if old_method_step_node: method_step_parent_node = old_method_step_node.parent method_step_parent_node.replace_child( old_method_step_node, method_step_node) else: msg = f"No methodStep node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(methods_node, method_step_node) save_both_formats(filename=filename, eml_node=eml_node) url = url_for(new_page, filename=filename) return redirect(url) # Process GET if node_id == '1': form.init_md5() else: method_step_nodes = methods_node.find_all_children(names.METHODSTEP) if method_step_nodes: for ms_node in method_step_nodes: if node_id == ms_node.id: populate_method_step_form(form, ms_node) break set_current_page('method_step') help = [ get_help('method_step_description'), get_help('method_step_instrumentation') ] return render_template('method_step.html', title='Method Step', form=form, filename=filename, help=help)
def temporal_coverage(filename=None, node_id=None): form = TemporalCoverageForm(filename=filename) tc_node_id = node_id # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_TEMPORAL_COVERAGE_SELECT, filename=filename) return redirect(url) if request.method == 'POST' and form.validate_on_submit(): save = False if is_dirty_form(form): save = True form_value = request.form form_dict = form_value.to_dict(flat=False) new_page = PAGE_TEMPORAL_COVERAGE_SELECT if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element if val == BTN_HIDDEN_NEW: new_page = PAGE_CREATE break elif val == BTN_HIDDEN_OPEN: new_page = PAGE_OPEN break elif val == BTN_HIDDEN_CLOSE: new_page = PAGE_CLOSE break url = url_for(new_page, filename=filename) if save: eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET) coverage_node = dataset_node.find_child(names.COVERAGE) if not coverage_node: coverage_node = Node(names.COVERAGE, parent=dataset_node) add_child(dataset_node, coverage_node) tc_node = Node(names.TEMPORALCOVERAGE, parent=coverage_node) begin_date_str = form.begin_date.data end_date_str = form.end_date.data create_temporal_coverage(tc_node, begin_date_str, end_date_str) if node_id and len(node_id) != 1: old_tc_node = Node.get_node_instance(node_id) if old_tc_node: coverage_parent_node = old_tc_node.parent coverage_parent_node.replace_child(old_tc_node, tc_node) else: msg = f"No node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(coverage_node, tc_node) tc_node_id = tc_node.id flash_msg = compare_begin_end_dates(begin_date_str, end_date_str) if flash_msg: flash(flash_msg) url = (url_for(PAGE_TEMPORAL_COVERAGE, filename=filename, node_id=tc_node_id)) save_both_formats(filename=filename, eml_node=eml_node) return redirect(url) # Process GET if node_id == '1': form.init_md5() else: eml_node = load_eml(filename=filename) dataset_node = eml_node.find_child(names.DATASET) if dataset_node: coverage_node = dataset_node.find_child(names.COVERAGE) if coverage_node: tc_nodes = coverage_node.find_all_children( names.TEMPORALCOVERAGE) if tc_nodes: for tc_node in tc_nodes: if node_id == tc_node.id: populate_temporal_coverage_form(form, tc_node) set_current_page('temporal_coverage') return render_template('temporal_coverage.html', title='Temporal Coverage', form=form)
def funding_award(filename=None, node_id=None, project_node_id=None): form = AwardForm(filename=filename) eml_node = load_eml(filename=filename) if not project_node_id: project_node = eml_node.find_single_node_by_path( [names.DATASET, names.PROJECT]) else: project_node = Node.get_node_instance(project_node_id) if request.method == 'POST': form_value = request.form form_dict = form_value.to_dict(flat=False) if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_FUNDING_AWARD_SELECT, filename=filename, node_id=project_node_id) return redirect(url) # if request.method == 'POST' and form.validate_on_submit(): if request.method == 'POST': next_page = PAGE_FUNDING_AWARD_SELECT submit_type = None if is_dirty_form(form): submit_type = 'Save Changes' # flash(f'submit_type: {submit_type}') if submit_type == 'Save Changes': funder_name = form.funder_name.data award_title = form.award_title.data funder_identifier = form.funder_identifier.data award_number = form.award_number.data award_url = form.award_url.data award_node = Node(names.AWARD, parent=project_node) create_funding_award(award_node, funder_name, award_title, funder_identifier, award_number, award_url) if node_id and len(node_id) != 1: old_award_node = Node.get_node_instance(node_id) if old_award_node: award_parent_node = old_award_node.parent award_parent_node.replace_child(old_award_node, award_node) else: msg = f"No funding award node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(project_node, award_node) save_both_formats(filename=filename, eml_node=eml_node) url = select_post(filename, form, form_dict, 'POST', PAGE_FUNDING_AWARD_SELECT, PAGE_PROJECT, PAGE_FUNDING_AWARD_SELECT, PAGE_FUNDING_AWARD, project_node_id=project_node_id) return redirect(url) # Process GET if not project_node_id: title = 'Project Funding Award' related_project = False else: title = 'Related Project Funding Award' related_project = True if node_id == '1': form.init_md5() else: award_nodes = project_node.find_all_children(names.AWARD) if award_nodes: for award_node in award_nodes: if node_id == award_node.id: populate_award_form(form, award_node) break set_current_page('project') help = [ get_help('award'), get_help('funder_name'), get_help('award_title'), get_help('funder_identifiers'), get_help('award_number'), get_help('award_url') ] return render_template('award.html', title=title, form=form, help=help, related_project=related_project)
def access(filename=None, node_id=None): eml_node = load_eml(filename=filename) if eml_node: access_node = eml_node.find_child(names.ACCESS) else: return if not access_node: access_node = Node(names.ACCESS, parent=eml_node) add_child(eml_node, access_node) form = AccessForm(filename=filename, node_id=node_id) # form = AccessForm() # Process POST if request.method == 'POST': if BTN_CANCEL in request.form: url = url_for(PAGE_ACCESS_SELECT, filename=filename) return redirect(url) next_page = PAGE_ACCESS_SELECT # Save or Back sends us back to the list of access rules if form.validate_on_submit(): if is_dirty_form(form): submit_type = 'Save Changes' else: submit_type = 'Back' if submit_type == 'Save Changes': userid = form.userid.data permission = form.permission.data allow_node = Node(names.ALLOW, parent=access_node) create_access_rule(allow_node, userid, permission) if node_id and len(node_id) != 1: old_allow_node = Node.get_node_instance(node_id) if old_allow_node: access_parent_node = old_allow_node.parent access_parent_node.replace_child( old_allow_node, allow_node) else: msg = f"No 'allow' node found in the node store with node id {node_id}" raise Exception(msg) else: add_child(access_node, allow_node) save_both_formats(filename=filename, eml_node=eml_node) url = url_for(next_page, filename=filename) return redirect(url) # Process GET if node_id == '1': form.init_md5() else: allow_nodes = access_node.find_all_children(names.ALLOW) if allow_nodes: for allow_node in allow_nodes: if node_id == allow_node.id: populate_access_rule_form(form, allow_node) break return render_template('access.html', title='Access Rule', form=form, filename=filename)
def load_data_table(uploads_path: str = None, data_file: str = '', num_header_rows: int = 1, delimiter: str = ',', quote_char: str = '"'): if Config.LOG_DEBUG: app = Flask(__name__) with app.app_context(): current_app.logger.info(f'Entering load_data_table') full_path = f'{uploads_path}/{data_file}' # datatable_node = new_child_node(names.DATATABLE, parent=dataset_node) datatable_node = new_child_node(names.DATATABLE, parent=None) physical_node = new_child_node(names.PHYSICAL, parent=datatable_node) physical_node.add_attribute('system', 'EDI') entity_name_node = new_child_node(names.ENTITYNAME, parent=datatable_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = new_child_node(names.OBJECTNAME, parent=physical_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = new_child_node(names.SIZE, physical_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: hash_node = Node(names.AUTHENTICATION, parent=physical_node) add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') hash_node.content = str(md5_hash) data_format_node = Node(names.DATAFORMAT, parent=physical_node) add_child(physical_node, data_format_node) text_format_node = Node(names.TEXTFORMAT, parent=data_format_node) add_child(data_format_node, text_format_node) num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node) add_child(text_format_node, num_header_lines_node) num_header_lines_node.content = num_header_rows num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node) add_child(text_format_node, num_footer_lines_node) num_footer_lines_node.content = '0' simple_delimited_node = Node(names.SIMPLEDELIMITED, parent=text_format_node) add_child(text_format_node, simple_delimited_node) field_delimiter_node = Node(names.FIELDDELIMITER, parent=simple_delimited_node) add_child(simple_delimited_node, field_delimiter_node) field_delimiter_node.content = delimiter quote_character_node = Node(names.QUOTECHARACTER, parent=simple_delimited_node) add_child(simple_delimited_node, quote_character_node) quote_character_node.content = quote_char with open(full_path) as file: next(file) line_terminator = repr(file.newlines).replace("'", "") record_delimiter_node = Node(names.RECORDDELIMITER, parent=text_format_node) add_child(text_format_node, record_delimiter_node) record_delimiter_node.content = line_terminator data_frame = pd.read_csv(full_path, comment='#', encoding='utf8', sep=delimiter, quotechar=quote_char) column_vartypes = [] column_names = [] column_categorical_codes = [] if data_frame is not None: number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node) add_child(datatable_node, number_of_records) row_count = data_frame.shape[0] record_count = row_count number_of_records.content = f'{record_count}' attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node) add_child(datatable_node, attribute_list_node) columns = data_frame.columns for col in columns: dtype = data_frame[col][1:].infer_objects().dtype var_type, codes = infer_col_type(data_frame, col) column_vartypes.append(var_type) column_names.append(col) column_categorical_codes.append(codes) attribute_node = new_child_node(names.ATTRIBUTE, attribute_list_node) attribute_name_node = new_child_node(names.ATTRIBUTENAME, attribute_node) attribute_name_node.content = col att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node) add_child(attribute_node, att_label_node) att_label_node.content = col att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) add_child(attribute_node, att_def_node) ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node) add_child(attribute_node, ms_node) if var_type == VariableType.CATEGORICAL: # nominal / nonNumericDomain / enumeratedDomain / ...codes... nominal_node = new_child_node(names.NOMINAL, ms_node) non_numeric_domain_node = new_child_node( names.NONNUMERICDOMAIN, nominal_node) enumerated_domain_node = new_child_node( names.ENUMERATEDDOMAIN, non_numeric_domain_node) for code in codes: code_definition_node = new_child_node( names.CODEDEFINITION, enumerated_domain_node) code_node = new_child_node(names.CODE, code_definition_node) code_node.content = code definition_node = new_child_node(names.DEFINITION, code_definition_node) elif var_type == VariableType.NUMERICAL: # ratio / numericDomain ratio_node = new_child_node(names.RATIO, ms_node) numeric_domain_node = new_child_node(names.NUMERICDOMAIN, ratio_node) number_type = 'real' if str(dtype).startswith( 'int'): # FIXME - we can do better than this number_type = 'integer' number_type_node = new_child_node(names.NUMBERTYPE, numeric_domain_node) number_type_node.content = number_type numeric_domain_node = new_child_node(names.UNIT, ratio_node) elif var_type == VariableType.TEXT: # nominal / nonNumericDomain / textDomain nominal_node = new_child_node(names.NOMINAL, ms_node) non_numeric_domain_node = new_child_node( names.NONNUMERICDOMAIN, nominal_node) text_domain_node = new_child_node(names.TEXTDOMAIN, non_numeric_domain_node) definition_node = new_child_node(names.DEFINITION, text_domain_node) elif var_type == VariableType.DATETIME: # dateTime / formatString datetime_node = Node(names.DATETIME, parent=ms_node) add_child(ms_node, datetime_node) format_string_node = Node(names.FORMATSTRING, parent=datetime_node) add_child(datetime_node, format_string_node) format_string_node.content = codes if Config.LOG_DEBUG: app = Flask(__name__) with app.app_context(): current_app.logger.info(f'Leaving load_data_table') return datatable_node, column_vartypes, column_names, column_categorical_codes