def micro_rna(value,ns):
	""" Create bel transcribedTo statement for microRNA,
	given namespace keyword string and value. """
	source = bel_term(value, ns, 'g')
	target = bel_term(value, ns, 'm')
	s = Template('${source} transcribedTo ${target}').substitute(source=source, target=target)
	return s
def translated_to(value,ns):
	""" Create bel translatedTo statement, given 
	namespace keyword string and value. """
	source = bel_term(value, ns, 'r')
	target = bel_term(value, ns, 'p')
	s = Template('${source} translatedTo ${target}').substitute(source=source, target=target)
	return s
def micro_rna(value, ns):
    """ Create bel transcribedTo statement for microRNA,
    given namespace keyword string and value. """
    source = bel_term(value, ns, 'g')
    target = bel_term(value, ns, 'm')
    s = Template('${source} transcribedTo ${target}').substitute(source=source,
                                                                 target=target)
    return s
def translated_to(value, ns):
    """ Create bel translatedTo statement, given
    namespace keyword string and value. """
    source = bel_term(value, ns, 'r')
    target = bel_term(value, ns, 'p')
    s = Template('${source} translatedTo ${target}').substitute(source=source,
                                                                target=target)
    return s
	data_list = ['rgd', 'hgnc', 'rgd_ortho', 'mgi']
	data_dict = {}
	for files in os.listdir("."):
		if files.endswith("parsed_data.pickle"):
			with open(files,'rb') as f:
				d = pickle.load(f)
				if str(d) in data_list:
					data_dict[str(d)] = d
	for data in data_list:
		if data not in data_dict.keys():
			print('missing required dependency {0}!'.format(data))

	hgnc_ortho_statements = set()
	for term_id in data_dict.get('hgnc').get_values():
		term_label = data_dict.get('hgnc').get_label(term_id)
		hgnc_term = bel_functions.bel_term(term_label, 'HGNC', 'g')
		orthos = data_dict.get('hgnc').get_orthologs(term_id)
		if orthos is not None:
			for o in orthos:
				if len(o.split(':')) == 2:
					prefix, value = o.split(':')
					if prefix == 'RGD':
						o_label = data_dict.get('rgd').get_label(value)
					if prefix == 'MGI':
						o_label = data_dict.get('mgi').get_label(value)
					if o_label is None:
						print('WARNING - missing label for {0}, {1}'.format(prefix, value))
						continue
					ortho_term = bel_functions.bel_term(o_label, prefix, 'g')
					hgnc_ortho_statements.add('{0} orthologous {1}'.format(hgnc_term, ortho_term)) 
			
            if row[0].startswith('!'):
                if 'Submission Date:' in row:
                    date = row[0].split(':')[1].strip()
                    (month, day, year) = date.split('/')
                    date = Template('${year}-${month}-${day}').substitute(
                        year=year, day=day, month=month)
                else:
                    continue
            # find records with CC GO terms, that do not have a qualifier
            else:
                goid = row[4].replace('GO:', '')
                qualifier = row[3]
                symbol = row[2]
                if qualifier == '' and goid in complexes:
                    # convert complex into BEL term
                    term = bel_term(row[4].replace('GO:', ''), complex_ns,
                                    'complex')
                    # convert symbol into BEL term, exclude non-protein symbols
                    if encoding_dict.get(symbol) == 'GRP':
                        gene = bel_term(symbol, ns, 'p')
                        gomap[term].add(gene)

    # sort genes annotated to each complex in gomap
    gomap = {k: sorted(list(v)) for k, v in gomap.items()}
    print('\tGenerating {0} BEL hasComponents statements.\n'.format(
        len(gomap)))
    # create statements and associated annotations; append to output file
    with open(output_file, 'a') as bel:
        bel.write('SET Species = {0}\n'.format(species))
        bel.write(
            'SET Citation = {3}"Online Resource", "GO Annotation File - {0}", "{1}", "{2}", "", ""{4}\n\n'
            .format(species, url, date, '{', '}'))
        if files.endswith("parsed_data.pickle"):
            with open(files, 'rb') as f:
                d = pickle.load(f)
                if str(d) in data_list:
                    data_dict[str(d)] = d
    for data in data_list:
        if data not in data_dict.keys():
            print('missing required dependency {0}!'.format(data))

    # Get ortho statements from HGNC data object
    # MGI and RGD namespaceDataSet objects are required for translation of MGI
    # and RGD Ids to labels
    hgnc_ortho_statements = set()
    for term_id in data_dict.get('hgnc').get_values():
        term_label = data_dict.get('hgnc').get_label(term_id)
        hgnc_term = bel_functions.bel_term(term_label, 'HGNC', 'g')
        orthos = data_dict.get('hgnc').get_orthologs(term_id)
        if orthos is not None:
            for o in orthos:
                if len(o.split(':')) == 2:
                    prefix, value = o.split(':')
                    if prefix == 'RGD':
                        o_label = data_dict.get('rgd').get_label(value)
                    if prefix == 'MGI':
                        o_label = data_dict.get('mgi').get_label(value)
                    if o_label is None:
                        print(
                            'WARNING - missing label for {0}, {1}'.format(prefix, value))
                        continue
                    ortho_term = bel_functions.bel_term(o_label, prefix, 'g')
                    hgnc_ortho_statements.add(
		for row in reader:
			if row[0].startswith('!'):
				if 'Submission Date:' in row:
					date = row[0].split(':')[1].strip()
					(month, day, year) = date.split('/')
					date = Template('${year}-${month}-${day}').substitute(year=year, day=day, month=month)
				else:
					continue
			# find records with CC GO terms, that do not have a qualifier
			else:
				goid = row[4].replace('GO:','')
				qualifier = row[3]
				symbol = row[2]
				if qualifier == '' and goid in complexes:
		# convert complex into BEL term
					term = bel_term(row[4].replace('GO:',''),complex_ns,'complex')
		# convert symbol into BEL term, exclude non-protein symbols
					if encoding_dict.get(symbol) == 'GRP':
						gene = bel_term(symbol,ns,'p')
						gomap[term].add(gene)

	# sort genes annotated to each complex in gomap
	gomap = {k:sorted(list(v)) for k, v in gomap.items()} 
	print('\tGenerating {0} BEL hasComponents statements.\n'.format(len(gomap))) 
	# create statements and associated annotations; append to output file
	with open(output_file, 'a') as bel:
		bel.write('SET Species = {0}\n'.format(species))
		bel.write('SET Citation = {3}"Online Resource", "GO Annotation File - {0}", "{1}", "{2}", "", ""{4}\n\n'.format(species, url, date, '{', '}'))
		for k, v in gomap.items():
			bel.write('{0} hasComponents list({1})\n'.format(k, ','.join(v)))
			#bel.write(k + ' hasComponents list(' + ",".join(v) + ')')
Exemple #9
0
        if files.endswith("parsed_data.pickle"):
            with open(files, 'rb') as f:
                d = pickle.load(f)
                if str(d) in data_list:
                    data_dict[str(d)] = d
    for data in data_list:
        if data not in data_dict.keys():
            print('missing required dependency {0}!'.format(data))

    # Get ortho statements from HGNC data object
    # MGI and RGD namespaceDataSet objects are required for translation of MGI
    # and RGD Ids to labels
    hgnc_ortho_statements = set()
    for term_id in data_dict.get('hgnc').get_values():
        term_label = data_dict.get('hgnc').get_label(term_id)
        hgnc_term = bel_functions.bel_term(term_label, 'HGNC', 'g')
        orthos = data_dict.get('hgnc').get_orthologs(term_id)
        if orthos is not None:
            for o in orthos:
                if len(o.split(':')) == 2:
                    prefix, value = o.split(':')
                    if prefix == 'RGD':
                        o_label = data_dict.get('rgd').get_label(value)
                    if prefix == 'MGI':
                        o_label = data_dict.get('mgi').get_label(value)
                    if o_label is None:
                        print('WARNING - missing label for {0}, {1}'.format(
                            prefix, value))
                        continue
                    ortho_term = bel_functions.bel_term(o_label, prefix, 'g')
                    hgnc_ortho_statements.add('{0} orthologous {1}'.format(