def property_reference_setter(self, multiplicity): """ Creates a reference for each link based on multiplicity to populate in the properties block """ if multiplicity in ['many_to_one', 'one_to_one']: return {'$ref': dbl_quote('_definitions.yaml#/to_one')} else: return {'$ref': dbl_quote('_definitions.yaml#/to_many')}
def validate_enum(self, temp_enum): """ Strips spaces & converts values that could be interpreted in yaml as nonstring, to double quotation string """ enum = stripper(temp_enum) # enum = enum.replace(':', '-') if enum != 'open' and '/' not in enum: if isinstance(enum, str) and enum.lower() in [ 'yes', 'no', 'true', 'false', 'root', 'na' ]: enum = dbl_quote(enum) else: try: enum_ = eval(enum) # convert to int if enum_ == 0: enum = dbl_quote(enum) elif (isinstance(enum_, int) or isinstance(enum_, float)) and not (re.search( '''[^0-9]''', enum)): enum = dbl_quote(enum) except: try: enum_ = int(enum) enum = dbl_quote(enum) except: try: enum_ = float(enum) enum = dbl_quote(enum) except: pass return enum
def get_terms(self, terms): """ Converts terms string into list of terms """ if isinstance(terms, str): lterms = terms.split(',') terms = [{'$ref': dbl_quote(stripper(t))} for t in lterms] return terms return None
def build_terms(self, terms_in_file, in_dir, out_dir, extension): """ Constructs _terms yaml file """ if extension == 'xlsx': xlsx_file = pd.ExcelFile(terms_in_file) term_sheet = None for i in xlsx_file.sheet_names: if 'terms_' in i: term_sheet = i if not (term_sheet): sys.exit('ERROR: Terms sheet not found, exiting the program') terms_df = xlsx_file.parse(sheet_name=term_sheet, index_col=None, header=0, keep_default_na=False, na_values=['']) else: terms_df = pd.read_csv(terms_in_file, index_col=None, header=0, sep='\t', keep_default_na=False, na_values=['']) terms_df = terms_df.where(terms_df.notnull(), None) term_dicts = terms_df.to_dict('records') dict_of_terms = {'id': '_terms'} for term in term_dicts: out_dict = {} property_nm = '' termdef = {} for key, val in term.items(): key = key[1:-1] if key == 'property_or_enum': if val == 'id': property_nm = '_id' else: val_ = re.sub('[\W]+', '', val.lower().strip().replace(' ', '_')) property_nm = validate_enum(val_) # val elif key == 'node': node = val elif key == 'enum_property': enum = val elif key == 'description': if val: val = fss(validate_desc(val)) out_dict[key] = val elif 'termDef:' in key: key_ = key.replace('termDef:', '') if key_ == 'term': if val: val = fss(validate_desc(val)) termdef[key_] = val elif key_ == 'term_url': if val: val = dbl_quote(val) termdef[key_] = val elif key_ == 'cde_id': try: termdef[key_] = int(val) except: termdef[key_] = val elif key_ in ['term_id', 'term_version']: if val: termdef[key_] = val else: termdef[key_] = val out_dict['termDef'] = termdef if property_nm not in dict_of_terms: dict_of_terms[property_nm] = {} if node == 'common': dict_of_terms[property_nm][node] = out_dict else: if node in dict_of_terms[property_nm]: dict_of_terms[property_nm][node][enum] = out_dict else: dict_of_terms[property_nm][node] = {} dict_of_terms[property_nm][node][enum] = out_dict yaml = YAML() yaml.default_flow_style = False yaml.indent(offset=2, sequence=4, mapping=2) yaml.representer.add_representer(type(None), my_represent_none_blank) num_terms = len(dict_of_terms.keys()) term_props = cmap(dict_of_terms) # insert blank lines in properties for k in dict_of_terms.keys(): term_props.yaml_set_comment_before_after_key(k, before='\n') with open('{0}{1}.yaml'.format(out_dir, '_terms'), 'w') as file: yaml.dump(term_props, file) print('*' * 100, '\n') print(' ' * 42, 'TSV ----> YAML', ' ' * 42, '\n') print('*' * 100, '\n') print('Source Directory : {0}'.format(in_dir), '\n') print('Number of Terms : {0}'.format(num_terms), '\n') print('Destination Directory : {0}'.format(out_dir)) print('*' * 100, '\n')
def build_nodes(self, nodes_df, var_dict): #, terms_flag): """ Builds a python dictionary that will be used as a template for constructing node yaml file """ # Transform nodes tsv into a dictionary and process fields nodedicts = nodes_df.to_dict('records') dict_of_nodes = {} for node in nodedicts: out_dict1 = {} out_dict2 = {} out_dict3 = {} out_dict4 = {} out_dict5 = {} out_dict6 = {} link_dict = {} property_ref = '' for key, val in node.items(): key = key[1:-1] if key == '$schema': out_dict1[key] = dbl_quote(val) elif key == 'id': out_dict2[key] = dbl_quote(validate_name(val, 'node')) elif key == 'description': if val: val = fss(validate_desc(val)) out_dict2[key] = val elif key == 'systemProperties': out_dict3[key] = reqs2list(val) elif key == 'required': val_ = reqs2list(val) if val_: out_dict4[key] = val_ elif key == 'uniqueKeys': out_dict5[key] = string2list(key, val) elif key == 'deprecated': if val: out_dict6[key] = reqs2list(val) elif key == 'root': if val: out_dict2[key] = val elif key == 'property_ref': property_ref = val elif key == 'nodeTerms': # and terms_flag == 'et': Check this flag value if its correct val_ = get_terms(val) if val_: out_dict2[key] = val_ elif 'link_' in key: key_ = key.replace('link_', '') link_dict[key_] = string2list(key_, val) else: out_dict2[key] = val # Add formatted links to each dictonary links, link_refs = add_links(link_dict, out_dict2['id']) # Add formatted properties to each dictonary properties = {} if property_ref and property_ref != '': properties['$ref'] = [dbl_quote(property_ref)] if out_dict2['id'] in var_dict: for key, val in var_dict[out_dict2['id']].items(): properties[key] = val for key, val in link_refs.items(): properties[key] = val dict_of_nodes[out_dict2['id']] = [ item for item in [ out_dict1, out_dict2, out_dict3, { 'links': links }, out_dict4, out_dict5, out_dict6, { 'properties': properties } ] if item ] return dict_of_nodes
def build_properties(self, variables_df, enum_df): """ Converts variables dataframe into proper dict of dict containing variable description, type & enums """ var_list = variables_df.to_dict('records') var_dict = {} if enum_df is not None: enum_dict = build_enums(enum_df) else: enum_dict = {} for var in var_list: temp_var = {} node = '' field = '' for key, val in var.items(): if val is not None: key = key[1:-1] if key == 'node': node = validate_name(val, 'node') elif key == 'property': field = validate_name(val, 'property') elif key == 'terms': val_ = reqs2list(val.lower()) for v in val_: if '$ref' not in temp_var: temp_var['$ref'] = [] if v == 'common': temp_var['$ref'].append( dbl_quote('_terms.yaml#/' + field.lower().strip().replace( ' ', '_') + '/' + v)) elif v == 'specific': temp_var['$ref'].append( dbl_quote('_terms.yaml#/' + field.lower().strip().replace( ' ', '_') + '/' + node + '/' + v)) elif v: temp_var['$ref'].append(dbl_quote(v)) ''' # Do not delete - for old format if '_terms.yaml' in v: if 'term' not in temp_var: temp_var['term'] = {} temp_var['term']['$ref'] = [] temp_var['term']['$ref'].append(dbl_quote(v)) else: if '$ref' not in temp_var: temp_var['$ref'] = [] temp_var['$ref'].append(dbl_quote(v)) ''' elif key == 'description': if val: val = fss(validate_desc(val)) temp_var[key] = val elif key == 'pattern': temp_var[key] = dbl_quote(val) elif key == 'default': if isinstance( val, str) and val.title() in ['True', 'False']: val = eval(val.title()) temp_var[key] = val elif key == 'type' and val != 'enum': val_ = reqs2list(val) if len(val_) == 1: temp_var[key] = val_[0] else: temp_type = [] for v in val_: if v == 'null': v = dbl_quote(v) temp_type.append({'type': v}) temp_var['oneOf'] = temp_type elif key != 'type': temp_var[key] = val if 'oneOf' in temp_var: var_keys = ['maximum', 'minimum', 'pattern'] for k in var_keys: if k in temp_var: for t in temp_var['oneOf']: if t['type'] != 'null' and k != 'pattern': t[k] = int(temp_var.pop(k)) elif t['type'] != 'null' and k == 'pattern': t[k] = temp_var.pop(k) if 'maximum' in temp_var: temp_var['maximum'] = int(temp_var.pop('maximum')) if 'minimum' in temp_var: temp_var['minimum'] = int(temp_var.pop('minimum')) # When type is enum it is not populated in the temp_var as temp_var is constructed # to populate the actual values supposed to be populated in yaml if 'type' not in temp_var and node in enum_dict and field in enum_dict[ node]: for k, v in enum_dict[node][field].items(): temp_var[k] = v if node != '' and field != '': if node in var_dict: var_dict[node][field] = temp_var else: var_dict[node] = {} var_dict[node][field] = temp_var return var_dict
def build_enums(self, enum_df): """ Converts enum dataframe into proper dict of dict containing enums & enumDefs """ enum_list = enum_df.to_dict('records') enum_dict = {} for enum in enum_list: node = '' field = '' enum_val = '' enum_def = '' dep_enum = '' for key, val in enum.items(): if val: key = key[1:-1] if key == 'node': node = validate_name(val, 'node') elif key == 'property': field = validate_name(val, 'property') elif key == 'enum_value': enum_val = validate_enum(val) elif key == 'enum_def': enum_def = val elif key == 'deprecated': dep_enum = val if node != '': if node not in enum_dict: enum_dict[node] = {} if field != '': if field not in enum_dict[node]: enum_dict[node][field] = {} if 'enum' not in enum_dict[node][field]: enum_dict[node][field]['enum'] = [] enum_dict[node][field]['deprecated_enum'] = [] enum_dict[node][field]['enumDef'] = {} if not dep_enum: enum_dict[node][field]['enum'].append(enum_val) if dep_enum == 'yes': enum_dict[node][field]['deprecated_enum'].append( enum_val) if enum_def == 'common': enum_dict[node][field]['enumDef'][enum_val] = { '$ref': [ dbl_quote('_terms.yaml#/' + re.sub( '[\W]+', '', enum_val.lower().strip().replace(' ', '_')) + '/' + enum_def) ] } elif enum_def == 'specific': enum_dict[node][field]['enumDef'][enum_val] = { '$ref': [ dbl_quote('_terms.yaml#/' + re.sub( '[\W]+', '', enum_val.lower().strip().replace(' ', '_')) + '/' + node + '/' + field) ] } elif enum_def: enum_dict[node][field]['enumDef'][enum_val] = { '$ref': [ dbl_quote(stripper(x)) for x in enum_def.split(',') ] } # Validate deprecated enums present in enum section missing_deprecated_enums = [] for key, val in enum_dict.items(): for k, v in val.items(): if 'deprecated_enum' in v: for d in v['deprecated_enum']: if d not in v['enum']: missing_deprecated_enums.append(d + ' - ' + key + ' : ' + k) if missing_deprecated_enums != []: sys.exit( 'ERROR : Missing enum defs in main section for below deprecated enums: \n{0}' .format(missing_deprecated_enums)) # Remove empty deprecated_enum, enumDefs for key, val in enum_dict.items(): for k, v in val.items(): if 'deprecated_enum' in v and v['deprecated_enum'] == []: v.pop('deprecated_enum') if 'enumDef' in v and v['enumDef'] == {}: v.pop('enumDef') return enum_dict