Beispiel #1
0
def convert_orthologs_for_upload(infile, outfile):
    '''
    The Ortholog Benchmarking website expects orthologs in a file
    where each line has an ortholog, represented as a pair of sequence
    ids separated by a tab.
    http://linneus54.inf.ethz.ch:8080/cgi-bin/gateway.pl
    '''
    with open(outfile, 'w') as fh:
        for params, orthologs in orthutil.orthDatasFromFileGen(infile):
            for qdb, sdb, distance in orthologs:
                fh.write('{qdb}\t{sdb}\n'.format(**locals()))
Beispiel #2
0
def load_orth_datas(ds):
    '''
    load orthDatas serially.  takes a long time.  use dones to resume job if it dies.
    '''
    release = roundup.dataset.getDatasetId(ds)

    print 'getting ids'
    genomeToId = roundup_db.getGenomeToId(release)
    divToId = roundup_db.getDivergenceToId(release)
    evalueToId = roundup_db.getEvalueToId(release)
    geneToId = roundup_db.getSequenceToId(release)

    print 'loading orthDatas'
    for path in roundup.dataset.getOrthologsFiles(ds):
        if get_dones(ds).done(path):
            print 'already loaded:', path
        else:
            print 'loading', path
            orthDatasGen = orthutil.orthDatasFromFileGen(path)
            roundup_db.loadReleaseResults(release, genomeToId, divToId, evalueToId, geneToId, orthDatasGen)
            get_dones(ds).mark(path)
    print 'done loading all orthDatas'