ti = lambda x: str(datetime.timedelta(seconds=round(x, 3)))[:-3]

add_times = {}
retrieve_times = {}
write_times = {}
parse_times = {}

print 'size\tadd\tretrieve\twrite\tparse'
for n in sizes:
    s = str(n).zfill(4)

    print s,
    sys.stdout.flush()
    
    start_time = time.time()
    t.add_trees('tests/bird%s.new' % s, 'newick', 'test%s' % s)
    add_times[n] = time.time() - start_time
    print '\t', ti(add_times[n]),
    sys.stdout.flush()

    start_time = time.time()
    tree = t.serialize_trees('test%s' % s)
    retrieve_times[n] = time.time() - start_time
    print '\t', ti(retrieve_times[n]),
    sys.stdout.flush()

    start_time = time.time()
    bp.convert('tests/bird%s.new' % s, 'newick', 'tests/bird%s.cdao' % s, 'cdao')
    write_times[n] = time.time() - start_time
    print '\t', ti(write_times[n]),
    sys.stdout.flush()
Exemple #2
0
t = Treestore()

tree_files = [x for x in os.listdir('trees') if x.endswith('.nex')]
base_uri = 'http://www.phylocommons.org/trees/%s'
tree_list = set(t.list_trees())
if taxonomy:
    sys.stdout.write('Loading taxonomy...')
    sys.stdout.flush()
    taxonomy = t.get_trees('%s_taxonomy' % taxonomy)[0]
    taxonomy.index_labels()
    print 'done.'

errors = set()
for tree_file in tree_files:
    tree_id = 'TB2_' + tree_file[:-len('.nex')]
    if Treestore.uri_from_id(tree_id) in tree_list: continue
    print '**', tree_id
    tree_path = os.path.join('trees', tree_file)
    with open(tree_path) as input_file:
        r = input_file.read()
    if '<!DOCTYPE html' in r: continue
    try:
        t.add_trees(tree_path, 'nexus', tree_uri=tree_id, rooted=False,
                    taxonomy=taxonomy, tax_root=None)
    except Exception as e:
        print 'ERROR: ', e
        errors.add(tree_id)

if errors:
    print "Couldn't load the following trees:", ','.join(errors)