Esempio n. 1
0
__author__ = 'Artur Cieslewicz'
import json
import os
import xml.etree.ElementTree as ET

import initials

defs = initials.defaults()


def getJsonKeys(jsonArg, tabDelimiter):
    result = ""
    if isinstance(jsonArg, dict):
        tabD = tabDelimiter + '\t'
        for key, value in jsonArg.iteritems():
            result += tabDelimiter + key + '\n'
            if isinstance(value, dict):
                result += getJsonKeys(value, tabD)
            if isinstance(value, list):
                for item in value:
                    result += getJsonKeys(item, tabD)
    return result


def getShortJsonKeys(jsonArg, tabDelimiter):
    result = ""
    if isinstance(jsonArg, dict):
        tabD = tabDelimiter + '\t'
        for key, value in jsonArg.iteritems():
            result += tabDelimiter + key + '\n'
            if isinstance(value, dict):
Esempio n. 2
0
__author__ = 'Jakub Dutkiewicz'

from os import listdir
from initials import defaults
import xml.etree.ElementTree as ET
import json
import urllib2

defs = defaults()
working_dir = defs.root + defs.xmldocs
for i, file in enumerate(listdir(working_dir)):
    if i % 10 == 0:
        print i
    xmlTree = ET.parse(working_dir + '/' + file).getroot()
    for c in xmlTree:
        if c.tag == 'METADATA':
            jsonTree = json.loads(c.text)
            try:
                geo_acc_link = 'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=' + jsonTree[
                    'dataItem']['geo_accession']
                geoaccess = 1
                response = urllib2.urlopen(geo_acc_link)
                html = response.read()
                oFile = open(
                    defs.root + defs.geohtmls + '/' + file.split('.')[0] +
                    '_geo.html', 'w+')
                oFile.write(html)
                oFile.close()
            except:
                geoaccess = 0