Beispiel #1
0
def main(argv):
    jamu_filepath = g_jamu_filepath
    protein_dir = g_protein_dir
    compound_dir = g_compound_dir
    c2mapjamu_dir = g_c2mapjamu_dir

    # Load a jamu formula
    print('Loading jamu data from %s' % jamu_filepath)
    jamu = {}
    with open(jamu_filepath) as f:
        jamu = yaml.load(f)
    print(jamu)

    # Load significant proteins
    proteins = util.load_json_from_dir(protein_dir)
    print('len(proteins)= %d' % len(proteins))

    # Load narutal compound of jamu plants
    compounds = util.load_json_from_dir(compound_dir)
    print('len(compounds)= %d' % len(compounds))

    # Construct the c2mapjamu
    c2mapjamu = c2map.construct(proteins, compounds)
    print c2mapjamu

    # Visualize the c2mapjamu
    # Generate random features and distance matrix.
    n = 10
    x = scipy.rand(n)
    D = scipy.zeros([n, n])
    for i in range(n):
        for j in range(n):
            D[i, j] = abs(x[i] - x[j])
    # c2map.save_mat(D, c2mapjamu_dir+'/c2mapjamu.mat.png')
    # c2map.save_mat(c2mapjamu, c2mapjamu_dir+'/c2mapjamu.mat.png')
    c2map.save_graph(c2mapjamu, c2mapjamu_dir + '/c2mapjamu.graph.png')
def main(argv):
    jamu_filepath = g_jamu_filepath
    protein_dir = g_protein_dir
    compound_dir = g_compound_dir
    c2mapjamu_dir = g_c2mapjamu_dir
    
    # Load a jamu formula
    print('Loading jamu data from %s' % jamu_filepath)
    jamu = {}
    with open(jamu_filepath) as f:  
        jamu = yaml.load(f)
    print(jamu)

    # Load significant proteins
    proteins = util.load_json_from_dir(protein_dir)
    print('len(proteins)= %d' % len(proteins))

    # Load narutal compound of jamu plants
    compounds = util.load_json_from_dir(compound_dir)
    print('len(compounds)= %d' % len(compounds))    

    # Construct the c2mapjamu
    c2mapjamu = c2map.construct(proteins, compounds)
    print c2mapjamu

    # Visualize the c2mapjamu
    # Generate random features and distance matrix.
    n = 10
    x = scipy.rand(n)
    D = scipy.zeros([n,n])
    for i in range(n):
        for j in range(n):
            D[i,j] = abs(x[i] - x[j])
    # c2map.save_mat(D, c2mapjamu_dir+'/c2mapjamu.mat.png')
    # c2map.save_mat(c2mapjamu, c2mapjamu_dir+'/c2mapjamu.mat.png')
    c2map.save_graph(c2mapjamu, c2mapjamu_dir+'/c2mapjamu.graph.png')
Beispiel #3
0
def main(argv):
    article_set_dir = '/home/tor/jamu/xprmnt/pubmed-article/06'
    ori_compound_data_dir = '/home/tor/jamu/xprmnt/compound-data/ori'
    ori_protein_data_dir = '/home/tor/jamu/xprmnt/protein-data/ori'
    compound_data_dir = '/home/tor/jamu/xprmnt/compound-data/searched'
    protein_data_dir = '/home/tor/jamu/xprmnt/protein-data/searched'

    # get articles
    article_filepaths = []
    for filepath in glob.glob(os.path.join(article_set_dir, '*.xml')):
        article_filepaths.append(filepath)

    article_sets = read_article_sets(article_filepaths)
    print '#article-sets=', len(article_sets)

    n_articles = sum([len(i['data']) for i in article_sets])
    print '#articles=', n_articles

    # search for compounds
    compounds = util.load_json_from_dir(ori_compound_data_dir)
    print '#compounds=', len(compounds)

    for c in compounds:
        c['n_search_abstracts'] = [n_articles]

        for article_set in article_sets:
            for article_datum in article_set['data']:
                if 'abstract' in article_datum:
                    abstract = article_datum['abstract'].lower()

                    c['pmid_of_containing_abstracts'] = []
                    if abstract.find(c['name'][0]) is not -1:
                        c['pmid_of_containing_abstracts'].append(
                            article_datum['pmid'])

        compound_filepath = compound_data_dir + '/' + c['id'] + '.json'
        with open(compound_filepath, 'w') as f:
            json.dump(c, f)

    # search for proteins
    proteins = util.load_json_from_dir(ori_protein_data_dir)
    print '#proteins=', len(proteins)

    for p in proteins:
        p['n_search_abstracts'] = [n_articles]

        for article_set in article_sets:
            for article_datum in article_set['data']:
                if 'abstract' in article_datum:
                    abstract = article_datum['abstract'].lower()

                    p['pmid_of_containing_abstracts'] = []
                    if abstract.find(p['description'][0]) is not -1:
                        p['pmid_of_containing_abstracts'].append(
                            article_datum['pmid'])
                    if abstract.find(p['symbol'][0]) is not -1:
                        p['pmid_of_containing_abstracts'].append(
                            article_datum['pmid'])

        p['pmid_of_containing_abstracts'] = list(
            set(p['pmid_of_containing_abstracts']))

        protein_filepath = protein_data_dir + '/' + p['id'] + '.json'
        with open(protein_filepath, 'w') as f:
            json.dump(p, f)
Beispiel #4
0
def main(argv):
	article_set_dir = '/home/tor/jamu/xprmnt/pubmed-article/06'
	ori_compound_data_dir = '/home/tor/jamu/xprmnt/compound-data/ori'
	ori_protein_data_dir = '/home/tor/jamu/xprmnt/protein-data/ori'
	compound_data_dir = '/home/tor/jamu/xprmnt/compound-data/searched'
	protein_data_dir = '/home/tor/jamu/xprmnt/protein-data/searched'

	# get articles
	article_filepaths = []
	for filepath in glob.glob(os.path.join(article_set_dir, '*.xml')):
		article_filepaths.append(filepath)
    
	article_sets = read_article_sets(article_filepaths)
	print '#article-sets=', len(article_sets)

	n_articles = sum([len(i['data']) for i in article_sets])
	print '#articles=', n_articles

	# search for compounds
	compounds = util.load_json_from_dir(ori_compound_data_dir)
	print '#compounds=', len(compounds)

	for c in compounds:
		c['n_search_abstracts'] = [n_articles]
	
		for article_set in article_sets:
			for article_datum in article_set['data']:
				if 'abstract' in article_datum:
					abstract = article_datum['abstract'].lower()

					c['pmid_of_containing_abstracts'] = []
					if abstract.find(c['name'][0]) is not -1:
						c['pmid_of_containing_abstracts'].append(article_datum['pmid'])

		compound_filepath = compound_data_dir+'/'+c['id']+'.json';
		with open(compound_filepath,'w') as f:
			json.dump(c, f)  

	# search for proteins
	proteins = util.load_json_from_dir(ori_protein_data_dir)
	print '#proteins=', len(proteins)

	for p in proteins:
		p['n_search_abstracts'] = [n_articles]
	
		for article_set in article_sets:
			for article_datum in article_set['data']:
				if 'abstract' in article_datum:
					abstract = article_datum['abstract'].lower()

					p['pmid_of_containing_abstracts'] = []
					if abstract.find(p['description'][0]) is not -1:
						p['pmid_of_containing_abstracts'].append(article_datum['pmid'])
					if abstract.find(p['symbol'][0]) is not -1:
						p['pmid_of_containing_abstracts'].append(article_datum['pmid'])

		p['pmid_of_containing_abstracts'] = list(set(p['pmid_of_containing_abstracts']))

		protein_filepath = protein_data_dir+'/'+p['id']+'.json';
		with open(protein_filepath,'w') as f:
			json.dump(p, f)