コード例 #1
0
def parse_otc_taxonomy_parser_lost_taxa(fn):
    l = stripped_nonempty_lines(fn)
    ltrow_pat = re.compile(r"^depth=\d+\s+id=(\d+)\s+uniqname='.*'$")
    ois = set()
    for line in l:
        m = ltrow_pat.match(line)
        if not m:
            raise ValueError('lost_taxa file did not match pattern')
        i = int(m.group(1))
        assert i not in ois
        ois.add(i)
    return ois
コード例 #2
0
def parse_otc_taxonomy_parser_lost_taxa(fn):
    l = stripped_nonempty_lines(fn)
    ltrow_pat = re.compile(r"^depth=\d+\s+id=(\d+)\s+uniqname='.*'$")
    ois = set()
    for line in l:
        m = ltrow_pat.match(line)
        if not m:
            raise ValueError('lost_taxa file did not match pattern')
        i = int(m.group(1))
        assert i not in ois
        ois.add(i)
    return ois
コード例 #3
0
#!/usr/bin/env python
from peyotl import read_as_json
import codecs
import json
import sys
try:
    subproblem_ids_file, in_annotations_file, out_annotations_file = sys.argv[1:]
except:
    sys.exit('Expecting 3 arguments:\n   subproblem_ids_file, in_annotations_file, out_annotations_file')
import os
bin_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
sys.path.append(os.path.join(bin_dir))
from document_outputs import stripped_nonempty_lines
subproblems = []
for s in stripped_nonempty_lines(subproblem_ids_file):
    assert s.endswith('.tre')
    subproblems.append(s[:-4])
jsonblob = read_as_json(in_annotations_file)
nodes_dict = jsonblob['nodes']
for ott_id in subproblems:
    d = nodes_dict.setdefault(ott_id, {})
    d['was_constrained'] = True
    d['was_uncontested'] = True
with codecs.open(out_annotations_file, 'w', encoding='utf-8') as out_stream:
    json.dump(jsonblob, out_stream, indent=2, sort_keys=True, separators=(',', ': '))
コード例 #4
0
import codecs
import json
import sys
try:
    subproblem_ids_file, in_annotations_file, out_annotations_file = sys.argv[
        1:]
except:
    sys.exit(
        'Expecting 3 arguments:\n   subproblem_ids_file, in_annotations_file, out_annotations_file'
    )
import os
bin_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
sys.path.append(os.path.join(bin_dir))
from document_outputs import stripped_nonempty_lines
subproblems = []
for s in stripped_nonempty_lines(subproblem_ids_file):
    assert s.endswith('.tre')
    subproblems.append(s[:-4])
jsonblob = read_as_json(in_annotations_file)
nodes_dict = jsonblob['nodes']
for ott_id in subproblems:
    d = nodes_dict.setdefault(ott_id, {})
    d['was_constrained'] = True
    d['was_uncontested'] = True
with codecs.open(out_annotations_file, 'w', encoding='utf-8') as out_stream:
    json.dump(jsonblob,
              out_stream,
              indent=2,
              sort_keys=True,
              separators=(',', ': '))