コード例 #1
0
ファイル: datamodel.py プロジェクト: cgmeyer/gen3sdk-python
    def property_reference_setter(self, multiplicity):
        """
        Creates a reference for each link based on multiplicity to populate in the
        properties block
        """

        if multiplicity in ['many_to_one', 'one_to_one']:
            return {'$ref': dbl_quote('_definitions.yaml#/to_one')}

        else:
            return {'$ref': dbl_quote('_definitions.yaml#/to_many')}
コード例 #2
0
ファイル: datamodel.py プロジェクト: cgmeyer/gen3sdk-python
    def validate_enum(self, temp_enum):
        """
        Strips spaces & converts values that could be interpreted in yaml as
        nonstring, to double quotation string
        """

        enum = stripper(temp_enum)
        # enum = enum.replace(':', '-')

        if enum != 'open' and '/' not in enum:
            if isinstance(enum, str) and enum.lower() in [
                    'yes', 'no', 'true', 'false', 'root', 'na'
            ]:
                enum = dbl_quote(enum)

            else:
                try:
                    enum_ = eval(enum)  # convert to int

                    if enum_ == 0:
                        enum = dbl_quote(enum)

                    elif (isinstance(enum_, int)
                          or isinstance(enum_, float)) and not (re.search(
                              '''[^0-9]''', enum)):
                        enum = dbl_quote(enum)

                except:
                    try:
                        enum_ = int(enum)
                        enum = dbl_quote(enum)

                    except:
                        try:
                            enum_ = float(enum)
                            enum = dbl_quote(enum)

                        except:
                            pass

        return enum
コード例 #3
0
ファイル: datamodel.py プロジェクト: cgmeyer/gen3sdk-python
    def get_terms(self, terms):
        """
        Converts terms string into list of terms
        """

        if isinstance(terms, str):
            lterms = terms.split(',')
            terms = [{'$ref': dbl_quote(stripper(t))} for t in lterms]

            return terms

        return None
コード例 #4
0
ファイル: datamodel.py プロジェクト: cgmeyer/gen3sdk-python
    def build_terms(self, terms_in_file, in_dir, out_dir, extension):
        """
        Constructs _terms yaml file
        """

        if extension == 'xlsx':
            xlsx_file = pd.ExcelFile(terms_in_file)
            term_sheet = None

            for i in xlsx_file.sheet_names:
                if 'terms_' in i:
                    term_sheet = i

            if not (term_sheet):
                sys.exit('ERROR: Terms sheet not found, exiting the program')

            terms_df = xlsx_file.parse(sheet_name=term_sheet,
                                       index_col=None,
                                       header=0,
                                       keep_default_na=False,
                                       na_values=[''])

        else:
            terms_df = pd.read_csv(terms_in_file,
                                   index_col=None,
                                   header=0,
                                   sep='\t',
                                   keep_default_na=False,
                                   na_values=[''])

        terms_df = terms_df.where(terms_df.notnull(), None)

        term_dicts = terms_df.to_dict('records')

        dict_of_terms = {'id': '_terms'}

        for term in term_dicts:
            out_dict = {}
            property_nm = ''
            termdef = {}

            for key, val in term.items():
                key = key[1:-1]

                if key == 'property_or_enum':
                    if val == 'id':
                        property_nm = '_id'

                    else:
                        val_ = re.sub('[\W]+', '',
                                      val.lower().strip().replace(' ', '_'))
                        property_nm = validate_enum(val_)  # val

                elif key == 'node':
                    node = val

                elif key == 'enum_property':
                    enum = val

                elif key == 'description':
                    if val:
                        val = fss(validate_desc(val))

                    out_dict[key] = val

                elif 'termDef:' in key:
                    key_ = key.replace('termDef:', '')

                    if key_ == 'term':
                        if val:
                            val = fss(validate_desc(val))

                        termdef[key_] = val

                    elif key_ == 'term_url':
                        if val:
                            val = dbl_quote(val)

                        termdef[key_] = val

                    elif key_ == 'cde_id':
                        try:
                            termdef[key_] = int(val)

                        except:
                            termdef[key_] = val

                    elif key_ in ['term_id', 'term_version']:
                        if val:
                            termdef[key_] = val

                    else:
                        termdef[key_] = val

            out_dict['termDef'] = termdef

            if property_nm not in dict_of_terms:
                dict_of_terms[property_nm] = {}

            if node == 'common':
                dict_of_terms[property_nm][node] = out_dict

            else:
                if node in dict_of_terms[property_nm]:
                    dict_of_terms[property_nm][node][enum] = out_dict

                else:
                    dict_of_terms[property_nm][node] = {}
                    dict_of_terms[property_nm][node][enum] = out_dict

        yaml = YAML()
        yaml.default_flow_style = False
        yaml.indent(offset=2, sequence=4, mapping=2)
        yaml.representer.add_representer(type(None), my_represent_none_blank)

        num_terms = len(dict_of_terms.keys())
        term_props = cmap(dict_of_terms)

        # insert blank lines in properties
        for k in dict_of_terms.keys():
            term_props.yaml_set_comment_before_after_key(k, before='\n')

        with open('{0}{1}.yaml'.format(out_dir, '_terms'), 'w') as file:
            yaml.dump(term_props, file)

        print('*' * 100, '\n')
        print(' ' * 42, 'TSV  ---->  YAML', ' ' * 42, '\n')
        print('*' * 100, '\n')
        print('Source Directory      : {0}'.format(in_dir), '\n')
        print('Number of Terms       : {0}'.format(num_terms), '\n')
        print('Destination Directory : {0}'.format(out_dir))
        print('*' * 100, '\n')
コード例 #5
0
ファイル: datamodel.py プロジェクト: cgmeyer/gen3sdk-python
    def build_nodes(self, nodes_df, var_dict):  #, terms_flag):
        """
        Builds a python dictionary that will be used as a template for constructing
        node yaml file
        """

        # Transform nodes tsv into a dictionary and process fields
        nodedicts = nodes_df.to_dict('records')
        dict_of_nodes = {}

        for node in nodedicts:
            out_dict1 = {}
            out_dict2 = {}
            out_dict3 = {}
            out_dict4 = {}
            out_dict5 = {}
            out_dict6 = {}
            link_dict = {}
            property_ref = ''

            for key, val in node.items():
                key = key[1:-1]

                if key == '$schema':
                    out_dict1[key] = dbl_quote(val)

                elif key == 'id':
                    out_dict2[key] = dbl_quote(validate_name(val, 'node'))

                elif key == 'description':
                    if val:
                        val = fss(validate_desc(val))

                    out_dict2[key] = val

                elif key == 'systemProperties':
                    out_dict3[key] = reqs2list(val)

                elif key == 'required':
                    val_ = reqs2list(val)

                    if val_:
                        out_dict4[key] = val_

                elif key == 'uniqueKeys':
                    out_dict5[key] = string2list(key, val)

                elif key == 'deprecated':
                    if val:
                        out_dict6[key] = reqs2list(val)

                elif key == 'root':
                    if val:
                        out_dict2[key] = val

                elif key == 'property_ref':
                    property_ref = val

                elif key == 'nodeTerms':  # and terms_flag == 'et': Check this flag value if its correct
                    val_ = get_terms(val)

                    if val_:
                        out_dict2[key] = val_

                elif 'link_' in key:
                    key_ = key.replace('link_', '')

                    link_dict[key_] = string2list(key_, val)

                else:
                    out_dict2[key] = val

            # Add formatted links to each dictonary
            links, link_refs = add_links(link_dict, out_dict2['id'])

            # Add formatted properties to each dictonary
            properties = {}

            if property_ref and property_ref != '':
                properties['$ref'] = [dbl_quote(property_ref)]

            if out_dict2['id'] in var_dict:
                for key, val in var_dict[out_dict2['id']].items():
                    properties[key] = val

            for key, val in link_refs.items():
                properties[key] = val

            dict_of_nodes[out_dict2['id']] = [
                item for item in [
                    out_dict1, out_dict2, out_dict3, {
                        'links': links
                    }, out_dict4, out_dict5, out_dict6, {
                        'properties': properties
                    }
                ] if item
            ]

        return dict_of_nodes
コード例 #6
0
ファイル: datamodel.py プロジェクト: cgmeyer/gen3sdk-python
    def build_properties(self, variables_df, enum_df):
        """
        Converts variables dataframe into proper dict of dict containing variable
        description, type & enums
        """

        var_list = variables_df.to_dict('records')
        var_dict = {}

        if enum_df is not None:
            enum_dict = build_enums(enum_df)

        else:
            enum_dict = {}

        for var in var_list:
            temp_var = {}
            node = ''
            field = ''

            for key, val in var.items():
                if val is not None:
                    key = key[1:-1]

                    if key == 'node':
                        node = validate_name(val, 'node')

                    elif key == 'property':
                        field = validate_name(val, 'property')

                    elif key == 'terms':
                        val_ = reqs2list(val.lower())

                        for v in val_:
                            if '$ref' not in temp_var:
                                temp_var['$ref'] = []

                            if v == 'common':
                                temp_var['$ref'].append(
                                    dbl_quote('_terms.yaml#/' +
                                              field.lower().strip().replace(
                                                  ' ', '_') + '/' + v))

                            elif v == 'specific':
                                temp_var['$ref'].append(
                                    dbl_quote('_terms.yaml#/' +
                                              field.lower().strip().replace(
                                                  ' ', '_') + '/' + node +
                                              '/' + v))

                            elif v:
                                temp_var['$ref'].append(dbl_quote(v))
                            '''
                            # Do not delete - for old format
                            if '_terms.yaml' in v:
                                if 'term' not in temp_var:
                                    temp_var['term'] = {}
                                    temp_var['term']['$ref'] = []

                                temp_var['term']['$ref'].append(dbl_quote(v))

                            else:
                                if '$ref' not in temp_var:
                                    temp_var['$ref'] = []

                                temp_var['$ref'].append(dbl_quote(v))
                            '''

                    elif key == 'description':
                        if val:
                            val = fss(validate_desc(val))

                        temp_var[key] = val

                    elif key == 'pattern':
                        temp_var[key] = dbl_quote(val)

                    elif key == 'default':
                        if isinstance(
                                val, str) and val.title() in ['True', 'False']:
                            val = eval(val.title())

                        temp_var[key] = val

                    elif key == 'type' and val != 'enum':
                        val_ = reqs2list(val)

                        if len(val_) == 1:
                            temp_var[key] = val_[0]

                        else:
                            temp_type = []

                            for v in val_:
                                if v == 'null':
                                    v = dbl_quote(v)

                                temp_type.append({'type': v})

                            temp_var['oneOf'] = temp_type

                    elif key != 'type':
                        temp_var[key] = val

            if 'oneOf' in temp_var:
                var_keys = ['maximum', 'minimum', 'pattern']

                for k in var_keys:
                    if k in temp_var:
                        for t in temp_var['oneOf']:
                            if t['type'] != 'null' and k != 'pattern':
                                t[k] = int(temp_var.pop(k))

                            elif t['type'] != 'null' and k == 'pattern':
                                t[k] = temp_var.pop(k)

            if 'maximum' in temp_var:
                temp_var['maximum'] = int(temp_var.pop('maximum'))

            if 'minimum' in temp_var:
                temp_var['minimum'] = int(temp_var.pop('minimum'))

            # When type is enum it is not populated in the temp_var as temp_var is constructed
            # to populate the actual values supposed to be populated in yaml
            if 'type' not in temp_var and node in enum_dict and field in enum_dict[
                    node]:
                for k, v in enum_dict[node][field].items():
                    temp_var[k] = v

            if node != '' and field != '':
                if node in var_dict:
                    var_dict[node][field] = temp_var

                else:
                    var_dict[node] = {}
                    var_dict[node][field] = temp_var

        return var_dict
コード例 #7
0
ファイル: datamodel.py プロジェクト: cgmeyer/gen3sdk-python
    def build_enums(self, enum_df):
        """
        Converts enum dataframe into proper dict of dict containing enums & enumDefs
        """

        enum_list = enum_df.to_dict('records')
        enum_dict = {}

        for enum in enum_list:
            node = ''
            field = ''
            enum_val = ''
            enum_def = ''
            dep_enum = ''

            for key, val in enum.items():
                if val:
                    key = key[1:-1]

                    if key == 'node':
                        node = validate_name(val, 'node')

                    elif key == 'property':
                        field = validate_name(val, 'property')

                    elif key == 'enum_value':
                        enum_val = validate_enum(val)

                    elif key == 'enum_def':
                        enum_def = val

                    elif key == 'deprecated':
                        dep_enum = val

            if node != '':
                if node not in enum_dict:
                    enum_dict[node] = {}

                if field != '':
                    if field not in enum_dict[node]:
                        enum_dict[node][field] = {}

                    if 'enum' not in enum_dict[node][field]:
                        enum_dict[node][field]['enum'] = []
                        enum_dict[node][field]['deprecated_enum'] = []
                        enum_dict[node][field]['enumDef'] = {}

                    if not dep_enum:
                        enum_dict[node][field]['enum'].append(enum_val)

                    if dep_enum == 'yes':
                        enum_dict[node][field]['deprecated_enum'].append(
                            enum_val)

                    if enum_def == 'common':
                        enum_dict[node][field]['enumDef'][enum_val] = {
                            '$ref': [
                                dbl_quote('_terms.yaml#/' + re.sub(
                                    '[\W]+', '',
                                    enum_val.lower().strip().replace(' ', '_'))
                                          + '/' + enum_def)
                            ]
                        }

                    elif enum_def == 'specific':
                        enum_dict[node][field]['enumDef'][enum_val] = {
                            '$ref': [
                                dbl_quote('_terms.yaml#/' + re.sub(
                                    '[\W]+', '',
                                    enum_val.lower().strip().replace(' ', '_'))
                                          + '/' + node + '/' + field)
                            ]
                        }

                    elif enum_def:
                        enum_dict[node][field]['enumDef'][enum_val] = {
                            '$ref': [
                                dbl_quote(stripper(x))
                                for x in enum_def.split(',')
                            ]
                        }

        # Validate deprecated enums present in enum section
        missing_deprecated_enums = []

        for key, val in enum_dict.items():
            for k, v in val.items():
                if 'deprecated_enum' in v:
                    for d in v['deprecated_enum']:
                        if d not in v['enum']:
                            missing_deprecated_enums.append(d + ' - ' + key +
                                                            ' : ' + k)

        if missing_deprecated_enums != []:
            sys.exit(
                'ERROR : Missing enum defs in main section for below deprecated enums: \n{0}'
                .format(missing_deprecated_enums))

        # Remove empty deprecated_enum, enumDefs
        for key, val in enum_dict.items():
            for k, v in val.items():
                if 'deprecated_enum' in v and v['deprecated_enum'] == []:
                    v.pop('deprecated_enum')

                if 'enumDef' in v and v['enumDef'] == {}:
                    v.pop('enumDef')

        return enum_dict