Beispiel #1
0
def copy_files_for_replica(ds_path, out_ds_path, rep, names, alg, ds_name):
    reference_seq_name = (
        os.path.join(ds_path, "R%d" % rep, names[CODE_SEQ]) if not use_initial
        and not is_alignment_available(alg) else os.path.join(
            twophase_conf.get("initial", "alignment"), ds_name, "R%d" %
            rep, "%s_%s" % (use_initial_alg_ml, use_initial_alg), ALIGNED))

    out_seq_path = os.path.join(out_ds_path, INPUT)
    if IGNORE_MISSING_OPTIONAL_FILES or os.path.exists(reference_seq_name):
        os.system('ln -sf %s %s' % (reference_seq_name, out_seq_path))
    else:
        raise RuntimeError("path %s does not exists." % reference_seq_name)

    if is_data_available(CODE_REFTREE):
        reference_tree_name = os.path.join(ds_path, "R%d" % rep,
                                           names[CODE_REFTREE])
        out_tree_path = os.path.join(out_ds_path, "reference.tre")
        if os.path.exists(reference_tree_name):
            os.system('ln -sf %s %s' % (reference_tree_name, out_tree_path))
        else:
            raise RuntimeError("path %s does not exists." %
                               reference_tree_name)

    if is_data_available(CODE_REFALG):
        reference_alg_name = os.path.join(ds_path, "R%d" % rep,
                                          names[CODE_REFALG])
        out_alg_path = os.path.join(out_ds_path, "reference.fasta")
        if os.path.exists(reference_alg_name):
            os.system('ln -sf %s %s' % (reference_alg_name, out_alg_path))
        else:
            raise RuntimeError("path %s does not exists." % reference_alg_name)

    if is_alignment_available(alg):
        m = twophase_conf.get("initial", "alignment")
        '''copy the spfn file over'''
        if is_data_available(CODE_REFALG):
            copyfile(
                os.path.join(twophase_conf.get("initial", "alignment"),
                             ds_name, "R%d" % rep,
                             "%s_%s" % (use_initial_alg_ml, use_initial_alg),
                             SPFN_STAT), os.path.join(out_ds_path, SPFN_STAT))
        '''copy the alignment stat over'''
        src_path = os.path.join(
            twophase_conf.get("initial", "alignment"), ds_name, "R%d" % rep,
            "%s_%s" % (use_initial_alg_ml, use_initial_alg), ALG_STAT)
        if os.path.exists(src_path) or not IGNORE_MISSING_OPTIONAL_FILES:
            copyfile(src_path, os.path.join(out_ds_path, ALG_STAT))

    if use_initial:
        '''alignment time needs to be added with initial alignment time'''
        inital_stat_file = os.path.join(
            twophase_conf.get("initial", "alignment"), ds_name, "R%d" % rep,
            "%s_%s" % (use_initial_alg_ml, use_initial_alg), ALG_STAT)
        if os.path.exists(inital_stat_file):
            copyfile(inital_stat_file,
                     os.path.join(out_ds_path, "initial_%s" % ALG_STAT))
        else:
            print >> sys.stderr, "Could not find initial alignment statistics file: %s" % inital_stat_file
def copy_files_for_replica(ds_path, out_ds_path, rep, names, alg, ds_name):
    reference_seq_name = (os.path.join(ds_path, "R%d" % rep, names[CODE_SEQ]) 
                          if not use_initial and not is_alignment_available(alg) else 
                          os.path.join(twophase_conf.get("initial", "alignment"), 
                                       ds_name, "R%d" % rep, 
                                       "%s_%s" %(use_initial_alg_ml,use_initial_alg), ALIGNED))

    out_seq_path = os.path.join(out_ds_path, INPUT)    
    if IGNORE_MISSING_OPTIONAL_FILES or os.path.exists(reference_seq_name): 
        os.system('ln -sf %s %s' % (reference_seq_name, out_seq_path)) 
    else:
        raise RuntimeError("path %s does not exists." %reference_seq_name)
          
    if is_data_available(CODE_REFTREE):
        reference_tree_name = os.path.join(ds_path, "R%d" % rep, names[CODE_REFTREE])
        out_tree_path = os.path.join(out_ds_path, "reference.tre")
        if os.path.exists(reference_tree_name):
            os.system('ln -sf %s %s' % (reference_tree_name, out_tree_path)) 
        else:
            raise RuntimeError("path %s does not exists." %reference_tree_name)
    
    if is_data_available(CODE_REFALG):
        reference_alg_name = os.path.join(ds_path, "R%d" % rep, names[CODE_REFALG])
        out_alg_path = os.path.join(out_ds_path, "reference.fasta")
        if os.path.exists(reference_alg_name):
            os.system('ln -sf %s %s' % (reference_alg_name, out_alg_path))  
        else: 
            raise RuntimeError("path %s does not exists." %reference_alg_name)
    
    if is_alignment_available(alg):
        m = twophase_conf.get("initial", "alignment")
        '''copy the spfn file over'''
        if is_data_available(CODE_REFALG):
            copyfile(os.path.join(twophase_conf.get("initial", "alignment"), 
                                  ds_name, "R%d" % rep, 
                                  "%s_%s" %(use_initial_alg_ml,use_initial_alg), SPFN_STAT),
                     os.path.join(out_ds_path, SPFN_STAT))
        '''copy the alignment stat over'''
        src_path = os.path.join(twophase_conf.get("initial", "alignment"), 
                              ds_name, "R%d" % rep, 
                              "%s_%s" %(use_initial_alg_ml,use_initial_alg), ALG_STAT)
        if os.path.exists(src_path) or not IGNORE_MISSING_OPTIONAL_FILES:
            copyfile(src_path,
                 os.path.join(out_ds_path, ALG_STAT))        

    if use_initial:
        '''alignment time needs to be added with initial alignment time'''
        inital_stat_file = os.path.join(twophase_conf.get("initial", "alignment"), 
                              ds_name, "R%d" % rep, 
                              "%s_%s" %(use_initial_alg_ml,use_initial_alg), ALG_STAT)
        if os.path.exists(inital_stat_file):                    
            copyfile(inital_stat_file,
                 os.path.join(out_ds_path, "initial_%s"%ALG_STAT))
        else:
            print >>sys.stderr, "Could not find initial alignment statistics file: %s" % inital_stat_file
Beispiel #3
0
def copy_files_for_replica(ds_path, out_ds_path, rep, names, alg, ds_name):
    reference_seq_name = (
        os.path.join(ds_path, "R%d" % rep, names[CODE_SEQ]) if not use_initial
        and not is_alignment_available(alg) else os.path.join(
            twophase_conf.get("initial", "alignment"), ds_name, "R%d" % rep,
            "%s_%s" % (DUMMY, use_initial_alg), ALIGNED))

    out_seq_path = os.path.join(out_ds_path, INPUT)
    if os.path.exists(reference_seq_name):
        os.system('ln -sf %s %s' % (reference_seq_name, out_seq_path))
    else:
        raise RuntimeError("path %s does not exists." % reference_seq_name)

    if is_data_available(CODE_REFTREE):
        reference_tree_name = os.path.join(ds_path, "R%d" % rep,
                                           names[CODE_REFTREE])
        out_tree_path = os.path.join(out_ds_path, "reference.tre")
        if os.path.exists(reference_tree_name):
            os.system('ln -sf %s %s' % (reference_tree_name, out_tree_path))
        else:
            raise RuntimeError(
                "path %s does not exists." % reference_tree_name)

    if is_data_available(CODE_REFALG):
        reference_alg_name = os.path.join(ds_path, "R%d" % rep,
                                          names[CODE_REFALG])
        out_alg_path = os.path.join(out_ds_path, "reference.fasta")
        if os.path.exists(reference_alg_name):
            os.system('ln -sf %s %s' % (reference_alg_name, out_alg_path))
        else:
            raise RuntimeError("path %s does not exists." % reference_alg_name)

    if is_alignment_available(alg):
        '''copy the spfn file over'''
        copyfile(
            os.path.join(
                twophase_conf.get("initial", "alignment"), ds_name,
                "R%d" % rep, "%s_%s" % (DUMMY, use_initial_alg), SPFN_STAT),
            os.path.join(out_ds_path, SPFN_STAT))
        '''copy the alignment stat over'''
        copyfile(
            os.path.join(
                twophase_conf.get("initial", "alignment"), ds_name,
                "R%d" % rep, "%s_%s" % (DUMMY, use_initial_alg), ALG_STAT),
            os.path.join(out_ds_path, ALG_STAT))

    if use_initial:
        '''alignment time needs to be added with initial alignment time'''
        copyfile(
            os.path.join(
                twophase_conf.get("initial", "alignment"), ds_name,
                "R%d" % rep, "%s_%s" % (DUMMY, use_initial_alg), ALG_STAT),
            os.path.join(out_ds_path, "initial_%s" % ALG_STAT))
Beispiel #4
0
def copy_files_for_replica(ds_path, out_ds_path, rep, names, alg, ds_name):
    reference_seq_name = (
        os.path.join(ds_path, "R%d" % rep, names[CODE_SEQ]) if not use_initial
        and not is_alignment_available(alg) else os.path.join(
            twophase_conf.get("initial", "alignment"), ds_name, "R%d" %
            rep, "%s_%s" % (DUMMY, use_initial_alg), ALIGNED))

    out_seq_path = os.path.join(out_ds_path, INPUT)
    if os.path.exists(reference_seq_name):
        os.system('ln -sf %s %s' % (reference_seq_name, out_seq_path))
    else:
        raise RuntimeError("path %s does not exists." % reference_seq_name)

    if is_data_available(CODE_REFTREE):
        reference_tree_name = os.path.join(ds_path, "R%d" % rep,
                                           names[CODE_REFTREE])
        out_tree_path = os.path.join(out_ds_path, "reference.tre")
        if os.path.exists(reference_tree_name):
            os.system('ln -sf %s %s' % (reference_tree_name, out_tree_path))
        else:
            raise RuntimeError("path %s does not exists." %
                               reference_tree_name)

    if is_data_available(CODE_REFALG):
        reference_alg_name = os.path.join(ds_path, "R%d" % rep,
                                          names[CODE_REFALG])
        out_alg_path = os.path.join(out_ds_path, "reference.fasta")
        if os.path.exists(reference_alg_name):
            os.system('ln -sf %s %s' % (reference_alg_name, out_alg_path))
        else:
            raise RuntimeError("path %s does not exists." % reference_alg_name)

    if is_alignment_available(alg):
        '''copy the spfn file over'''
        copyfile(
            os.path.join(twophase_conf.get("initial",
                                           "alignment"), ds_name, "R%d" % rep,
                         "%s_%s" % (DUMMY, use_initial_alg), SPFN_STAT),
            os.path.join(out_ds_path, SPFN_STAT))
        '''copy the alignment stat over'''
        copyfile(
            os.path.join(twophase_conf.get("initial",
                                           "alignment"), ds_name, "R%d" % rep,
                         "%s_%s" % (DUMMY, use_initial_alg), ALG_STAT),
            os.path.join(out_ds_path, ALG_STAT))

    if use_initial:
        '''alignment time needs to be added with initial alignment time'''
        copyfile(
            os.path.join(twophase_conf.get("initial",
                                           "alignment"), ds_name, "R%d" % rep,
                         "%s_%s" % (DUMMY, use_initial_alg), ALG_STAT),
            os.path.join(out_ds_path, "initial_%s" % ALG_STAT))
Beispiel #5
0
def make_alignment_condor_file(outpath, alg_tool, ds_name, alg):
    conf = twophase_conf.get("alignment", alg)
    ver = "*"
    if conf.startswith("@"):
        ver = conf[1:]
        conf = ""
    alg_tool_path = get_tool_path(alg_tool, ver)

    if conf is not None and conf != "":
        config_temp = (open(os.path.join(config_path, conf)).read().format(
            ftmodel=get_model_string("fasttree", ds_name),
            rxmodel=get_model_string("raxml", ds_name, g="CAT")))
        out = open(os.path.join(outpath, conf), "w")
        out.write(config_temp)
        out.close()

    condor_temp = (open(
        os.path.join(condor_templates_path, "condor.%s" % alg_tool)).read()
                   if os.path.exists(
                       os.path.join(condor_templates_path, "condor.%s" %
                                    alg_tool)) else condor_temp_def)
    condor_alg = condor_temp.format(
        exe=alg_tool_path,
        input=INPUT,
        output=ALIGNED,
        home=HOME,
        options=conf,
        name="alg")
    out = open(os.path.join(outpath, "condor.align"), "w")
    out.write(condor_alg)
    out.close()
def make_ML_condor_file(outpath, ml_tool,ds_name,ml, outgroup):
    conf = twophase_conf.get("ML",ml)
    ver = "*"
    if conf.startswith("@"):
        ver = conf[1:]
        conf =""
    ml_tool_path = get_tool_path(ml_tool,ver)    
    condor_temp = (open(os.path.join(condor_templates_path, "condor.%s" % ml_tool)).read() 
                   if os.path.exists(os.path.join(condor_templates_path, "condor.%s" % ml_tool)) 
                   else condor_temp_def)
    if outgroup is not None and outgroup != "":
        outg = "-o %s" %outgroup
    else:
        outg = ""
    condor_ml = condor_temp.format(exe=ml_tool_path,
                                   input="aligned.phylip",
                                   output="ml",
                                   home=HOME,
                                   options=outpath,
                                   name="ml",
                                   model= get_model_string(ml_tool, ds_name), 
                                   outgroup=outg)
    out = open(os.path.join(outpath, "condor.ml"), "w")
    out.write(condor_ml)
    out.close()
Beispiel #7
0
def make_alignment_condor_file(outpath, alg_tool, ds_name, alg):
    conf = twophase_conf.get("alignment", alg)
    ver = "*"
    if conf.startswith("@"):
        ver = conf[1:]
        conf = ""
    alg_tool_path = get_tool_path(alg_tool, ver)

    if conf is not None and conf != "":
        config_temp = (open(os.path.join(config_path, conf)).read().format(
            ftmodel=get_model_string("fasttree", ds_name),
            rxmodel=get_model_string("raxml", ds_name, g="CAT")))
        out = open(os.path.join(outpath, conf), "w")
        out.write(config_temp)
        out.close()

    condor_temp = (open(
        os.path.join(condor_templates_path, "condor.%s" %
                     alg_tool)).read() if os.path.exists(
                         os.path.join(condor_templates_path, "condor.%s" %
                                      alg_tool)) else condor_temp_def)
    condor_alg = condor_temp.format(exe=alg_tool_path,
                                    input=INPUT,
                                    output=ALIGNED,
                                    home=HOME,
                                    options=conf,
                                    name="alg")
    out = open(os.path.join(outpath, "condor.align"), "w")
    out.write(condor_alg)
    out.close()
def make_ST_condor(outpath, st_tool,ds_name,st,num):
    conf = twophase_conf.get("ST",st)
    ver = "*"
    if conf.startswith("@"):
        ver = conf[1:]
        conf =""
    tool_path = get_tool_path(st_tool,ver)    
    condor_temp = open(os.path.join(condor_templates_path, "condor.%s" % st_tool)).read()
    main = condor_temp[0:-95]
    main = main.format(name="MLBS", 
                exe=tool_path)                   
    args = condor_temp[-95:-1]
    a = []  
    for bs in range(1,num+1):
        a.append( args.format(options="BS/BS.%d"%bs,
                            input="allele.table",
                            name="BS.%d" %bs))
    condor_temp =  main + "\n".join(a)
    
    out = open(os.path.join(outpath, "condor.st"), "w")
    out.write(condor_temp)
    out.close()        
Beispiel #9
0
def get_model_string(ml, ds_name, **kwargs):
    model = (twophase_conf.get("MODELS", "default.model") if
             (not twophase_conf.has_option(
                 "MODELS", "override.default.model.%s" % ds_name)
              or twophase_conf.get(
                  "MODELS", "override.default.model.%s" % ds_name) == '') else
             twophase_conf.get("MODELS",
                               "override.default.model.%s" % ds_name))
    model = (model if not twophase_conf.has_option(
        "MODELS", "%s.name.model.%s" % (ml, model)) else twophase_conf.get(
            "MODELS", "%s.name.model.%s" % (ml, model)))

    dt = twophase_conf.get("MODELS", "default.DT")
    dt = (dt
          if not twophase_conf.has_option("MODELS", "%s.name.DT.%s" % (ml, dt))
          else twophase_conf.get("MODELS", "%s.name.DT.%s" % (ml, dt)))
    g = (twophase_conf.get("MODELS", "default.G")
         if not kwargs.has_key("g") else kwargs["g"])
    g = (g if not twophase_conf.has_option("MODELS", "%s.name.G.%s" % (ml, g))
         else twophase_conf.get("MODELS", "%s.name.G.%s" % (ml, g)))
    f = twophase_conf.get("MODELS", "default.F")
    f = (f if not twophase_conf.has_option("MODELS", "%s.name.F.%s" % (ml, f))
         else twophase_conf.get("MODELS", "%s.name.F.%s" % (ml, f)))
    i = twophase_conf.get("MODELS", "default.I")
    i = (i if not twophase_conf.has_option("MODELS", "%s.name.I.%s" % (ml, i))
         else twophase_conf.get("MODELS", "%s.name.I.%s" % (ml, i)))

    modelString = (twophase_conf.get("MODELS", "%s.pattern" % ml).format(
        DT=dt, G=g, F=f, I=i, model=model) if twophase_conf.has_option(
            "MODELS", "%s.pattern" % ml) else "")

    #print modelString
    return modelString
Beispiel #10
0
config_path = os.path.join(CONFPATH, "config_files")
CODE_SEQ = 0
CODE_REFTREE = 1
CODE_REFALG = 2
DUMMY = "dummy"
ALIGNED = "aligned.fasta"
INPUT = "input.fasta"
INITIAL_PREF = "initial."
SPFN_STAT = "spfn.stat"
ALG_STAT = "alg.stat"
condor_templates_path = os.path.join(CONFPATH, "condor_templates")
condor_temp_def = open(os.path.join(condor_templates_path,
                                    "condor.default")).read()

use_initial = twophase_conf.getboolean("initial", "use_initial")
use_initial_alg = twophase_conf.get("initial", "use_initial_alignment")

if use_initial and (use_initial is None or use_initial_alg == ''):
    raise RuntimeError("Using which initial alignment? \n \
                        set use_initial_alignment in the config file.")


def is_alignment_available(alg):
    return alg.startswith(INITIAL_PREF)


def is_data_available(code):
    return len(names) > code and names[code] != ""


def get_model_string(ml, ds_name, **kwargs):
def get_model_string(ml,ds_name,**kwargs):
    model = (twophase_conf.get("MODELS", "default.model") 
             if (not twophase_conf.has_option("MODELS","override.default.model.%s" %ds_name) 
                 or twophase_conf.get("MODELS","override.default.model.%s" %ds_name) == '')
             else twophase_conf.get("MODELS","override.default.model.%s" %ds_name))
    model = (model if not twophase_conf.has_option("MODELS","%s.name.model.%s"%(ml,model))
            else twophase_conf.get("MODELS","%s.name.model.%s"%(ml,model)))
    
    dt = twophase_conf.get("MODELS", "default.DT")
    dt = (dt if not twophase_conf.has_option("MODELS","%s.name.DT.%s"%(ml,dt))
            else twophase_conf.get("MODELS","%s.name.DT.%s"%(ml,dt)))                  
    g = (twophase_conf.get("MODELS", "default.G") if not kwargs.has_key("g")
         else kwargs["g"])
    g = (g if not twophase_conf.has_option("MODELS","%s.name.G.%s"%(ml,g))
            else twophase_conf.get("MODELS","%s.name.G.%s"%(ml,g)))    
    f = twophase_conf.get("MODELS", "default.F")
    f = (f if not twophase_conf.has_option("MODELS","%s.name.F.%s"%(ml,f))
            else twophase_conf.get("MODELS","%s.name.F.%s"%(ml,f)))    
    i = twophase_conf.get("MODELS", "default.I")
    i = (i if not twophase_conf.has_option("MODELS","%s.name.I.%s"%(ml,i))
            else twophase_conf.get("MODELS","%s.name.I.%s"%(ml,i)))    
    
    modelString = (twophase_conf.get("MODELS", "%s.pattern" %ml).format(DT=dt,
                                                                   G=g,
                                                                   F=f,
                                                                   I=i,
                                                                   model=model)
                   if twophase_conf.has_option("MODELS", "%s.pattern" %ml)
                   else "")
    
    #print modelString    
    return modelString
'''
Created on Jul 18, 2011

@author: smirarab

'''

from mirphyl.twophase import datasets, mltools, algtools, CONFPATH,\
    twophase_conf
from mirphyl.setup import get_datasets, get_tool_path, HOME
import sys
import os
from shutil import copyfile
from mirphyl.setup.utilities import require_dir

IGNORE_MISSING_OPTIONAL_FILES = twophase_conf.get("default","IGNORE_OPTIONAL_FILES")

config_path = os.path.join(CONFPATH,"config_files")
CODE_SEQ=0
CODE_REFTREE=1
CODE_REFALG=2
DUMMY = "dummy"
ALIGNED = "aligned.fasta"
INPUT="input.fasta"
INITIAL_PREF = "initial."
SPFN_STAT="spfn.stat"
ALG_STAT="alg.stat"
condor_templates_path = os.path.join(CONFPATH,"condor_templates")
condor_temp_def = open(os.path.join(condor_templates_path, "condor.default")).read()

use_initial = twophase_conf.getboolean("initial", "use_initial")
Beispiel #13
0
config_path = os.path.join(CONFPATH, "config_files")
CODE_SEQ = 0
CODE_REFTREE = 1
CODE_REFALG = 2
DUMMY = "dummy"
ALIGNED = "aligned.fasta"
INPUT = "input.fasta"
INITIAL_PREF = "initial."
SPFN_STAT = "spfn.stat"
ALG_STAT = "alg.stat"
condor_templates_path = os.path.join(CONFPATH, "condor_templates")
condor_temp_def = open(os.path.join(condor_templates_path,
                                    "condor.default")).read()

use_initial = twophase_conf.getboolean("initial", "use_initial")
use_initial_alg = twophase_conf.get("initial", "use_initial_alignment")

if use_initial and (use_initial is None or use_initial_alg == ''):
    raise RuntimeError("Using which initial alignment? \n \
                        set use_initial_alignment in the config file.")


def is_alignment_available(alg):
    return alg.startswith(INITIAL_PREF)


def is_data_available(code):
    return len(names) > code and names[code] != ""


def get_model_string(ml, ds_name, **kwargs):
Beispiel #14
0
config_path = os.path.join(CONFPATH,"config_files")
CODE_SEQ=0
CODE_REFTREE=1
CODE_REFALG=2
DUMMY = "dummy"
ALIGNED = "aligned.fasta"
INPUT="input.fasta"
INITIAL_PREF = "initial."
SPFN_STAT="spfn.stat"
ALG_STAT="alg.stat"
condor_templates_path = os.path.join(CONFPATH,"condor_templates")
condor_temp_def = open(os.path.join(condor_templates_path, "condor.default")).read()

use_initial = twophase_conf.getboolean("initial", "use_initial")
use_initial_alg = twophase_conf.get("initial", "use_initial_alignment")

if use_initial and (use_initial is None or use_initial_alg == ''): 
    raise RuntimeError("Using which initial alignment? \n \
                        set use_initial_alignment in the config file.")

def is_alignment_available (alg):
    return alg.startswith(INITIAL_PREF)

def is_data_available(code):
    return len(names)>code and names[code] != ""

def get_model_string(ml,ds_name,**kwargs):
    model = (twophase_conf.get("MODELS", "default.model") 
             if (not twophase_conf.has_option("MODELS","override.default.model.%s" %ds_name) 
                 or twophase_conf.get("MODELS","override.default.model.%s" %ds_name) == '')
Beispiel #15
0
'''
Created on Jul 18, 2011

@author: smirarab

'''

from mirphyl.twophase import datasets, mltools, algtools, CONFPATH,\
    twophase_conf
from mirphyl.setup import get_datasets, get_tool_path, HOME
import sys
import os
from shutil import copyfile
from mirphyl.setup.utilities import require_dir

IGNORE_MISSING_OPTIONAL_FILES = twophase_conf.get("default","IGNORE_OPTIONAL_FILES")

config_path = os.path.join(CONFPATH,"config_files")
CODE_SEQ=0
CODE_REFTREE=1
CODE_REFALG=2
DUMMY = "dummy"
ALIGNED = "aligned.fasta"
INPUT="input.fasta"
INITIAL_PREF = "initial."
SPFN_STAT="spfn.stat"
ALG_STAT="alg.stat"
condor_templates_path = os.path.join(CONFPATH,"condor_templates")
condor_temp_def = open(os.path.join(condor_templates_path, "condor.default")).read()

use_initial = twophase_conf.getboolean("initial", "use_initial")