def micro_rna(value,ns): """ Create bel transcribedTo statement for microRNA, given namespace keyword string and value. """ source = bel_term(value, ns, 'g') target = bel_term(value, ns, 'm') s = Template('${source} transcribedTo ${target}').substitute(source=source, target=target) return s
def translated_to(value,ns): """ Create bel translatedTo statement, given namespace keyword string and value. """ source = bel_term(value, ns, 'r') target = bel_term(value, ns, 'p') s = Template('${source} translatedTo ${target}').substitute(source=source, target=target) return s
def micro_rna(value, ns): """ Create bel transcribedTo statement for microRNA, given namespace keyword string and value. """ source = bel_term(value, ns, 'g') target = bel_term(value, ns, 'm') s = Template('${source} transcribedTo ${target}').substitute(source=source, target=target) return s
def translated_to(value, ns): """ Create bel translatedTo statement, given namespace keyword string and value. """ source = bel_term(value, ns, 'r') target = bel_term(value, ns, 'p') s = Template('${source} translatedTo ${target}').substitute(source=source, target=target) return s
data_list = ['rgd', 'hgnc', 'rgd_ortho', 'mgi'] data_dict = {} for files in os.listdir("."): if files.endswith("parsed_data.pickle"): with open(files,'rb') as f: d = pickle.load(f) if str(d) in data_list: data_dict[str(d)] = d for data in data_list: if data not in data_dict.keys(): print('missing required dependency {0}!'.format(data)) hgnc_ortho_statements = set() for term_id in data_dict.get('hgnc').get_values(): term_label = data_dict.get('hgnc').get_label(term_id) hgnc_term = bel_functions.bel_term(term_label, 'HGNC', 'g') orthos = data_dict.get('hgnc').get_orthologs(term_id) if orthos is not None: for o in orthos: if len(o.split(':')) == 2: prefix, value = o.split(':') if prefix == 'RGD': o_label = data_dict.get('rgd').get_label(value) if prefix == 'MGI': o_label = data_dict.get('mgi').get_label(value) if o_label is None: print('WARNING - missing label for {0}, {1}'.format(prefix, value)) continue ortho_term = bel_functions.bel_term(o_label, prefix, 'g') hgnc_ortho_statements.add('{0} orthologous {1}'.format(hgnc_term, ortho_term))
if row[0].startswith('!'): if 'Submission Date:' in row: date = row[0].split(':')[1].strip() (month, day, year) = date.split('/') date = Template('${year}-${month}-${day}').substitute( year=year, day=day, month=month) else: continue # find records with CC GO terms, that do not have a qualifier else: goid = row[4].replace('GO:', '') qualifier = row[3] symbol = row[2] if qualifier == '' and goid in complexes: # convert complex into BEL term term = bel_term(row[4].replace('GO:', ''), complex_ns, 'complex') # convert symbol into BEL term, exclude non-protein symbols if encoding_dict.get(symbol) == 'GRP': gene = bel_term(symbol, ns, 'p') gomap[term].add(gene) # sort genes annotated to each complex in gomap gomap = {k: sorted(list(v)) for k, v in gomap.items()} print('\tGenerating {0} BEL hasComponents statements.\n'.format( len(gomap))) # create statements and associated annotations; append to output file with open(output_file, 'a') as bel: bel.write('SET Species = {0}\n'.format(species)) bel.write( 'SET Citation = {3}"Online Resource", "GO Annotation File - {0}", "{1}", "{2}", "", ""{4}\n\n' .format(species, url, date, '{', '}'))
if files.endswith("parsed_data.pickle"): with open(files, 'rb') as f: d = pickle.load(f) if str(d) in data_list: data_dict[str(d)] = d for data in data_list: if data not in data_dict.keys(): print('missing required dependency {0}!'.format(data)) # Get ortho statements from HGNC data object # MGI and RGD namespaceDataSet objects are required for translation of MGI # and RGD Ids to labels hgnc_ortho_statements = set() for term_id in data_dict.get('hgnc').get_values(): term_label = data_dict.get('hgnc').get_label(term_id) hgnc_term = bel_functions.bel_term(term_label, 'HGNC', 'g') orthos = data_dict.get('hgnc').get_orthologs(term_id) if orthos is not None: for o in orthos: if len(o.split(':')) == 2: prefix, value = o.split(':') if prefix == 'RGD': o_label = data_dict.get('rgd').get_label(value) if prefix == 'MGI': o_label = data_dict.get('mgi').get_label(value) if o_label is None: print( 'WARNING - missing label for {0}, {1}'.format(prefix, value)) continue ortho_term = bel_functions.bel_term(o_label, prefix, 'g') hgnc_ortho_statements.add(
for row in reader: if row[0].startswith('!'): if 'Submission Date:' in row: date = row[0].split(':')[1].strip() (month, day, year) = date.split('/') date = Template('${year}-${month}-${day}').substitute(year=year, day=day, month=month) else: continue # find records with CC GO terms, that do not have a qualifier else: goid = row[4].replace('GO:','') qualifier = row[3] symbol = row[2] if qualifier == '' and goid in complexes: # convert complex into BEL term term = bel_term(row[4].replace('GO:',''),complex_ns,'complex') # convert symbol into BEL term, exclude non-protein symbols if encoding_dict.get(symbol) == 'GRP': gene = bel_term(symbol,ns,'p') gomap[term].add(gene) # sort genes annotated to each complex in gomap gomap = {k:sorted(list(v)) for k, v in gomap.items()} print('\tGenerating {0} BEL hasComponents statements.\n'.format(len(gomap))) # create statements and associated annotations; append to output file with open(output_file, 'a') as bel: bel.write('SET Species = {0}\n'.format(species)) bel.write('SET Citation = {3}"Online Resource", "GO Annotation File - {0}", "{1}", "{2}", "", ""{4}\n\n'.format(species, url, date, '{', '}')) for k, v in gomap.items(): bel.write('{0} hasComponents list({1})\n'.format(k, ','.join(v))) #bel.write(k + ' hasComponents list(' + ",".join(v) + ')')
if files.endswith("parsed_data.pickle"): with open(files, 'rb') as f: d = pickle.load(f) if str(d) in data_list: data_dict[str(d)] = d for data in data_list: if data not in data_dict.keys(): print('missing required dependency {0}!'.format(data)) # Get ortho statements from HGNC data object # MGI and RGD namespaceDataSet objects are required for translation of MGI # and RGD Ids to labels hgnc_ortho_statements = set() for term_id in data_dict.get('hgnc').get_values(): term_label = data_dict.get('hgnc').get_label(term_id) hgnc_term = bel_functions.bel_term(term_label, 'HGNC', 'g') orthos = data_dict.get('hgnc').get_orthologs(term_id) if orthos is not None: for o in orthos: if len(o.split(':')) == 2: prefix, value = o.split(':') if prefix == 'RGD': o_label = data_dict.get('rgd').get_label(value) if prefix == 'MGI': o_label = data_dict.get('mgi').get_label(value) if o_label is None: print('WARNING - missing label for {0}, {1}'.format( prefix, value)) continue ortho_term = bel_functions.bel_term(o_label, prefix, 'g') hgnc_ortho_statements.add('{0} orthologous {1}'.format(