Beispiel #1
0
def importGene(filename):
    filename = filename or r'C:\work\caida\Dropbox\precon\engineering\Contents\GBM_BN-symbols.txt'
    cnt = 0
    set = {}
    col = mongo.db()['entity']
    with open(filename) as f:
        content = f.read()
        for line in content.split('\n'):
            line = line.strip()
            sym = line.lower()
            if (sym in set): continue
            set[sym] = 1

            doc = ObjDict()
            doc._id = 'enti_sym_%s' % sym
            doc.symbol = sym
            doc.group = 'protein'
            doc.name = line
            doc.label = line

            if (col.find({'symbol': sym}).count() > 0): continue

            col.save(doc, safe=True)
            cnt += 1
            print "Added # %d:  %s" % (cnt, sym)
    print "Inserted %d gene" % cnt
Beispiel #2
0
def medline2ris(content, style=ENDNOTE):
    articles = []
    lines = content.splitlines()
    import re
    marker = re.compile("([A-Z ]{4})- (.*)$")
    key = None
    for indx, line in enumerate(lines):
        if (not line.strip()): continue
        m = marker.match(line)
        if (m):
            key = m.group(1)
            if (key == 'PMID'):
                article = ObjDict()
                articles.append(article)
            if (key in article):  # more than one entry
                if (type(article[key]) == list):
                    article[key].append(m.group(2))
                else:
                    article[key] = [article[key]]
            else:
                article[key] = m.group(2)
        else:
            if (key):
                if (type(article[key]) == list):
                    article[key][len(article[key]) - 1] = "%s\r\n%s" % (
                        article[key][len(article[key]) - 1], line)
                else:
                    article[key] = "%s\r\n%s" % (article[key], line
                                                 )  #continuation of last line
            else:
                raise Exception("Invalid data at line %d: %s" % (indx, line))
    # now handles the article
    #if len(articles) != len(pubmed_ids):
    #    msg = "Expecting %d articles, but harvested %d" %(len(pubmed_ids), len(articles))
    #    raise Exception(msg)
    ret = ''
    for article in articles:
        ret += "TY  - JOUR\r\n"
        tmp = {}
        for key in article.keys():
            if (key not in MEDLINE_ENDNOTE_MAPPING):
                tmp[key] = article[key]
            else:
                tmp[MEDLINE_ENDNOTE_MAPPING[key]] = article[key]

        for key in sorted(tmp.keys()):
            vals = tmp[key] if (type(tmp[key]) == list) else [tmp[key]]
            for val in vals:
                ret = "%s%s- %s\r\n" % (ret, key, val)
        # end article
        ret += "ER  - \r\n\r\n\r\n"
    return ret
Beispiel #3
0
    def _parse_node(self, node):
        node_tree = ObjDict()
        # Save attrs and text, hope there will not be a child with same name
        if node.text:
            node_tree.value = node.text
        for (k, v) in node.attrib.items():
            k, v = self._namespace_split(k, ObjDict({'value': v}))
            node_tree[k] = v
        #Save childrens
        for child in node.getchildren():
            tag, tree = self._namespace_split(child.tag,
                                              self._parse_node(child))
            if tag not in node_tree:  # the first time, so store it in dict
                node_tree[tag] = tree
                continue
            old = node_tree[tag]
            if not isinstance(old, list):
                node_tree.pop(tag)
                node_tree[tag] = [
                    old
                ]  # multi times, so change old dict to a list
            node_tree[tag].append(tree)  # add the new one

        return node_tree
Beispiel #4
0
 def fromstring(self, s):
     """parse a string"""
     t = ET.fromstring(s)
     root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t))
     return ObjDict({root_tag: root_tree})