Exemple #1
0
def run_example_cluster():
    """
    run a set of jobs on cluster
    """
    
    print ""
    print ""
    print "====================================="
    print "========   Submit and Wait   ========"
    print "====================================="
    print ""
    print ""

    functionJobs = make_jobs()

    print "output ret field in each job before sending it onto the cluster"
    for (i, job) in enumerate(functionJobs):
        print "Job #", i, "- ret: ", job.ret

    print ""
    print "sending function jobs to cluster"
    print ""

    processedFunctionJobs = process_jobs(functionJobs)

    print "ret fields AFTER execution on cluster"
    for (i, job) in enumerate(processedFunctionJobs):
        print "Job #", i, "- ret: ", str(job.ret)[0:10]
Exemple #2
0
def run_example_local_multithreading():
    """
    run a set of jobs on local machine using several cores
    """

    print "====================================="
    print "======  Local Multithreading  ======="
    print "====================================="
    print ""
    print ""

    print "generating function jobs"

    functionJobs = make_jobs()

    # KybJob object start out with an empty ret field, which is only filled after execution
    print "output ret field in each job before multithreaded computation"
    for (i, job) in enumerate(functionJobs):
        print "Job #", i, "- ret: ", job.ret

    print ""
    print "executing jobs on local machine using 1 thread"

    processedFunctionJobs = process_jobs(functionJobs,
                                         local=True,
                                         maxNumThreads=1)

    print "ret fields AFTER execution on local machine"
    for (i, job) in enumerate(processedFunctionJobs):
        print "Job #", i, "- ret: ", str(job.ret)[0:10]
Exemple #3
0
def run_example_local_multithreading():
    """
    run a set of jobs on local machine using several cores
    """

    print "====================================="
    print "======  Local Multithreading  ======="
    print "====================================="
    print ""
    print ""

    print "generating function jobs"

    functionJobs = make_jobs()

    # KybJob object start out with an empty ret field, which is only filled after execution
    print "output ret field in each job before multithreaded computation"
    for (i, job) in enumerate(functionJobs):
        print "Job #", i, "- ret: ", job.ret

    print ""
    print "executing jobs on local machine using 1 thread"

    processedFunctionJobs = process_jobs(functionJobs, local=True, maxNumThreads=1)

    print "ret fields AFTER execution on local machine"
    for (i, job) in enumerate(processedFunctionJobs):
        print "Job #", i, "- ret: ", str(job.ret)[0:10]
def align_rnaseq_reads(yaml_config):
    """
    wrapper for aligning rnaseq reads using 
    """
    operation_seleted = "3"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid
        lib_type = 'PE'
        lib_type = 'SE' if len(det['fastq']) == 1 else lib_type

        ## library insert size
        lib_insert_size = 100000
        num_cpu = 3

        arg = [[det, lib_type, lib_insert_size, num_cpu]]

        job = pg.cBioJob(call_align_reads, arg)

        job.mem = "90gb"
        job.vmem = "90gb"
        job.pmem = "30gb"
        job.pvmem = "30gb"
        job.nodes = 1
        job.ppn = num_cpu
        job.walltime = "48:00:00"

        Jobs.append(job)
    print
    print "sending read alignment with STAR jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs, local=False)
def transcript_prediction_trsk(yaml_config):
    """
    transcript prediction using TranscriptSkimmer
    """
    operation_seleted = "4"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid 
        arg = [det]

        job = pg.cBioJob(call_transcript_prediction_trsk, arg) 

        ## native specifications 
        job.mem="32gb"
        job.vmem="32gb"
        job.pmem="32gb"
        job.pvmem="32gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "9:00:00"

        Jobs.append(job)
    print 
    print "sending transcript assembly trsk jobs to worker"
    print 

    local = True  ## cluster compute switch 
    processedJobs = pg.process_jobs(Jobs, local=local)
Exemple #6
0
def run_example_cluster():
    """
    run a set of jobs on cluster
    """

    print ""
    print ""
    print "====================================="
    print "========   Submit and Wait   ========"
    print "====================================="
    print ""
    print ""

    functionJobs = make_jobs()

    print "output ret field in each job before sending it onto the cluster"
    for (i, job) in enumerate(functionJobs):
        print "Job #", i, "- ret: ", job.ret

    print ""
    print "sending function jobs to cluster"
    print ""

    processedFunctionJobs = process_jobs(functionJobs)

    print "ret fields AFTER execution on cluster"
    for (i, job) in enumerate(processedFunctionJobs):
        print "Job #", i, "- ret: ", str(job.ret)[0:10]
def alignment_filter(yaml_config):
    """
    run multimapper resolution program 
    """
    operation_seleted = "m"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():

        num_cpus = 5
        ## arguments to pygrid 
        arg = [[det['short_name'], det['read_map_dir'], num_cpus]]

        job = pg.cBioJob(call_alignment_filter, arg) 

        ## native specifications 
        job.pmem="90gb"
        job.pvmem="90gb"
        job.mem="90gb"
        job.vmem="90gb"
        job.nodes = 1
        job.ppn = num_cpus
        job.walltime = "48:00:00"

        Jobs.append(job)
    print 
    print "sending multi map resolution jobs to worker"
    print 
    processedJobs = pg.process_jobs(Jobs)
def transcript_prediction_cuff(yaml_config):
    """
    transcript prediction using cufflinks
    """
    operation_seleted = "c"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid 
        arg = [[det, 4]]

        job = pg.cBioJob(call_transcript_prediction_cuff, arg) 

        ## native specifications 
        job.mem="96gb"
        job.vmem="96gb"
        job.pmem="24gb"
        job.pvmem="24gb"
        job.nodes = 1
        job.ppn = 4
        job.walltime = "32:00:00"

        Jobs.append(job)
    print 
    print "sending transcript assembly cufflinks jobs to worker"
    print 
    processedJobs = pg.process_jobs(Jobs)
def create_genome_index(yaml_config):
    """
    wrapper for calling genome index function 
    """
    operation_seleted = "2"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid 
        num_cpus = 4 
        arg = [[det['fasta'], det['genome_index_dir'], det['gtf'], num_cpus, det['read_length']-1]]

        job = pg.cBioJob(call_genome_index, arg) 
    
        job.mem="46gb"
        job.vmem="46gb"
        job.pmem="46gb"
        job.pvmem="46gb"
        job.nodes = 1
        job.ppn = num_cpus
        job.walltime = "24:00:00"
        
        Jobs.append(job)
    print 
    print "sending star genome index jobs to worker"
    print 
    processedJobs = pg.process_jobs(Jobs)
Exemple #10
0
def align_rnaseq_reads(yaml_config):
    """
    wrapper for aligning rnaseq reads using 
    """
    operation_seleted = "3"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid 
        lib_type = 'PE'
        lib_type = 'SE' if len(det['fastq'])==1 else lib_type

        ## library insert size 
        lib_insert_size = 100000
        num_cpu = 3

        arg = [[det, lib_type, lib_insert_size, num_cpu]]

        job = pg.cBioJob(call_align_reads, arg) 
    
        job.mem="90gb"
        job.vmem="90gb"
        job.pmem="30gb"
        job.pvmem="30gb"
        job.nodes = 1
        job.ppn = num_cpu
        job.walltime = "48:00:00"
        
        Jobs.append(job)
    print 
    print "sending read alignment with STAR jobs to worker"
    print 
    processedJobs = pg.process_jobs(Jobs, local=False)
def download_gtf(yaml_config):
    """
    download gtf/gff file from remote data publishing services
    """
    operation_seleted = "a"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = [] 
    for org_name, det in orgdb.items():
        ## arguments to pygrid 
        arg = [[det['release_nb'], det['long_name'], det['genome_dir']]]

        if det['release_db'] == 'ensembl_metazoa_genome':
            job = pg.cBioJob(call_metazoa_gtf, arg) 
        elif det['release_db'] == 'phytozome_genome':
            job = pg.cBioJob(call_phytozome_gtf, arg) 
        elif det['release_db'] == 'ensembl_genome':
            job = pg.cBioJob(call_ensembl_gtf, arg) 
        else:
            print "error: download gtf plugin for %s not available, module works with ensembl_genome, ensembl_metazoa_genome and phytozome_genome servers." % det['release_db']
            sys.exit(0)

        job.mem="2gb"
        job.vmem="2gb"
        job.pmem="2gb"
        job.pvmem="2gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "2:00:00"
        
        Jobs.append(job)
    print 
    print "sending gtf download job to worker"
    print 
    processedJobs = pg.process_jobs(Jobs)
Exemple #12
0
def download_sra_data(yaml_config):
    """
    download sra file for the working organism   
    """
    operation_seleted = "1"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid
        arg = [[det['sra_run_id'], det['fastq_path']]]

        job = pg.cBioJob(call_download_sra_file, arg)

        job.mem = "2gb"
        job.vmem = "2gb"
        job.pmem = "2gb"
        job.pvmem = "2gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "1:00:00"

        Jobs.append(job)
    print
    print "sending download SRA file jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)
Exemple #13
0
def download_sra_data(yaml_config):
    """
    download sra file for the working organism   
    """
    operation_seleted = "1"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = [] 
    for org_name, det in orgdb.items():
        ## arguments to pygrid 
        arg = [[det['sra_run_id'], det['fastq_path']]]

        job = pg.cBioJob(call_download_sra_file, arg) 
    
        job.mem="2gb"
        job.vmem="2gb"
        job.pmem="2gb"
        job.pvmem="2gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "1:00:00"
        
        Jobs.append(job)
    print 
    print "sending download SRA file jobs to worker"
    print 
    processedJobs = pg.process_jobs(Jobs)
Exemple #14
0
def transcript_prediction_stringtie(yaml_config):
    """
    transcript prediction using StringTie
    """

    operation_seleted = "5"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid 

        arg = [[det["read_map_dir"], det["short_name"], det["read_assembly_dir"]]]

        job = pg.cBioJob(call_transcript_prediction_stringtie, arg) 
        
        cpus = 1 
        ## native specifications 
        job.mem="12gb"
        job.vmem="12gb"
        job.pmem="12gb"
        job.pvmem="12gb"
        job.nodes = 1
        job.ppn = cpus
        job.walltime = "24:00:00"

        Jobs.append(job)
    print("\nsending transcript assembly stringtie jobs to worker\n")

    local_compute = False ## switching between local multithreading and cluster computing
    
    processedJobs = pg.process_jobs(Jobs, local=local_compute)
Exemple #15
0
def create_genome_index(yaml_config):
    """
    wrapper for calling genome index function 
    """
    operation_seleted = "2"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid
        num_cpus = 4
        arg = [[
            det['fasta'], det['genome_index_dir'], det['gtf'], num_cpus,
            det['read_length'] - 1
        ]]

        job = pg.cBioJob(call_genome_index, arg)

        job.mem = "46gb"
        job.vmem = "46gb"
        job.pmem = "46gb"
        job.pvmem = "46gb"
        job.nodes = 1
        job.ppn = num_cpus
        job.walltime = "24:00:00"

        Jobs.append(job)
    print
    print "sending star genome index jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)
Exemple #16
0
def alignment_filter(yaml_config):
    """
    run multimapper resolution program 
    """
    operation_seleted = "m"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():

        num_cpus = 5
        ## arguments to pygrid
        arg = [[det['short_name'], det['read_map_dir'], num_cpus]]

        job = pg.cBioJob(call_alignment_filter, arg)

        ## native specifications
        job.pmem = "90gb"
        job.pvmem = "90gb"
        job.mem = "90gb"
        job.vmem = "90gb"
        job.nodes = 1
        job.ppn = num_cpus
        job.walltime = "48:00:00"

        Jobs.append(job)
    print
    print "sending multi map resolution jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)
Exemple #17
0
def transcript_prediction_stringtie(yaml_config):
    """
    transcript prediction using StringTie
    """

    operation_seleted = "5"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid

        arg = [[
            det["read_map_dir"], det["short_name"], det["read_assembly_dir"]
        ]]

        job = pg.cBioJob(call_transcript_prediction_stringtie, arg)

        cpus = 1
        ## native specifications
        job.mem = "12gb"
        job.vmem = "12gb"
        job.pmem = "12gb"
        job.pvmem = "12gb"
        job.nodes = 1
        job.ppn = cpus
        job.walltime = "24:00:00"

        Jobs.append(job)
    print("\nsending transcript assembly stringtie jobs to worker\n")

    local_compute = False  ## switching between local multithreading and cluster computing

    processedJobs = pg.process_jobs(Jobs, local=local_compute)
Exemple #18
0
def transcript_prediction_trsk(yaml_config):
    """
    transcript prediction using TranscriptSkimmer
    """
    operation_seleted = "4"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid
        arg = [det]

        job = pg.cBioJob(call_transcript_prediction_trsk, arg)

        ## native specifications
        job.mem = "32gb"
        job.vmem = "32gb"
        job.pmem = "32gb"
        job.pvmem = "32gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "9:00:00"

        Jobs.append(job)
    print
    print "sending transcript assembly trsk jobs to worker"
    print

    local = True  ## cluster compute switch
    processedJobs = pg.process_jobs(Jobs, local=local)
Exemple #19
0
def transcript_prediction_cuff(yaml_config):
    """
    transcript prediction using cufflinks
    """
    operation_seleted = "c"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid
        arg = [[det, 4]]

        job = pg.cBioJob(call_transcript_prediction_cuff, arg)

        ## native specifications
        job.mem = "96gb"
        job.vmem = "96gb"
        job.pmem = "24gb"
        job.pvmem = "24gb"
        job.nodes = 1
        job.ppn = 4
        job.walltime = "32:00:00"

        Jobs.append(job)
    print
    print "sending transcript assembly cufflinks jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)
Exemple #20
0
def filter_genes(yaml_config, data_method):
    """
    filter out invalid gene models from the provided genome annotation
    """
    operation_seleted = "f"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        if data_method == "cufflinks":
            gff_file = "%s/transcripts.gtf" % det["read_assembly_dir"]  ## cufflinks run output file
            outFile = "%s/%s_cufflinks_genes.gff" % (
                det["read_assembly_dir"],
                org_name,
            )  ## example: A_thaliana_cufflinks_genes.gff
        elif data_method == "trsk":
            gff_file = "%s/tmp_trsk_genes.gff" % det["read_assembly_dir"]  ## trsk run output file
            outFile = "%s/%s_trsk_genes.gff" % (
                det["read_assembly_dir"],
                org_name,
            )  ## example: A_thaliana_trsk_genes.gff
        else:
            gff_file = det["gtf"]  ## public database genome annotation file
            outFile = "%s/%s_%s.gff" % (
                det["read_assembly_dir"],
                org_name,
                det["genome_release_db"],
            )  ## example: A_thaliana_arabidopsis-tair10.gff

        ## arguments to pygrid
        arg = [[gff_file, det["fasta"], outFile]]
        job = pg.cBioJob(call_filter_genes, arg)

        ## native specifications
        job.mem = "6gb"
        job.vmem = "6gb"
        job.pmem = "6gb"
        job.pvmem = "6gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "2:00:00"

        Jobs.append(job)
    print
    print "sending filter gene models jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)
Exemple #21
0
def filter_genes(yaml_config, data_method):
    """
    filter out invalid gene models from the provided genome annotation
    """
    operation_seleted = "f"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        if data_method == "cufflinks":
            gff_file = "%s/transcripts.gtf" % det[
                'read_assembly_dir']  ## cufflinks run output file
            outFile = "%s/%s_cufflinks_genes.gff" % (
                det['read_assembly_dir'], org_name
            )  ## example: A_thaliana_cufflinks_genes.gff
        elif data_method == "trsk":
            gff_file = "%s/tmp_trsk_genes.gff" % det[
                'read_assembly_dir']  ## trsk run output file
            outFile = "%s/%s_trsk_genes.gff" % (
                det['read_assembly_dir'], org_name
            )  ## example: A_thaliana_trsk_genes.gff
        else:
            gff_file = det['gtf']  ## public database genome annotation file
            outFile = "%s/%s_%s.gff" % (
                det['read_assembly_dir'], org_name, det['genome_release_db']
            )  ## example: A_thaliana_arabidopsis-tair10.gff

        ## arguments to pygrid
        arg = [[gff_file, det['fasta'], outFile]]
        job = pg.cBioJob(call_filter_genes, arg)

        ## native specifications
        job.mem = "6gb"
        job.vmem = "6gb"
        job.pmem = "6gb"
        job.pvmem = "6gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "2:00:00"

        Jobs.append(job)
    print
    print "sending filter gene models jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)
Exemple #22
0
def download_gtf(yaml_config):
    """
    download gtf/gff file from remote data publishing services
    """
    operation_seleted = "a"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid
        arg = [[det['release_nb'], det['long_name'], det['genome_dir']]]

        if det['release_db'] == 'ensembl_metazoa_genome':
            job = pg.cBioJob(call_metazoa_gtf, arg)
        elif det['release_db'] == 'phytozome_genome':
            job = pg.cBioJob(call_phytozome_gtf, arg)
        elif det['release_db'] == 'ensembl_genome':
            job = pg.cBioJob(call_ensembl_gtf, arg)
        elif det['release_db'] == 'ensembl_fungi_genome':
            job = pg.cBioJob(call_fungi_gtf, arg)
        elif det['release_db'] == 'ensembl_protists_genome':
            job = pg.cBioJob(call_protists_gtf, arg)
        else:
            exit(
                "error: download gtf plugin for %s not available, module works with ensembl_genome, ensembl_metazoa_genome and phytozome_genome servers."
                % det['release_db'])

        job.mem = "2gb"
        job.vmem = "2gb"
        job.pmem = "2gb"
        job.pvmem = "2gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "2:00:00"

        Jobs.append(job)
    print
    print "sending gtf download job to worker"
    print
    local_compute = True
    processedJobs = pg.process_jobs(Jobs, local=local_compute)
Exemple #23
0
def download_fasta(yaml_config):
    """
    download fasta file from remote data publishing services
    """
    operation_seleted = "g"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        ## arguments to pygrid
        arg = [[det["release_nb"], det["long_name"], det["genome_dir"]]]

        if det["release_db"] == "ensembl_metazoa_genome":
            job = pg.cBioJob(call_metazoa_fasta, arg)
        elif det["release_db"] == "phytozome_genome":
            job = pg.cBioJob(call_phytozome_fasta, arg)
        elif det["release_db"] == "ensembl_genome":
            job = pg.cBioJob(call_ensembl_fasta, arg)
        elif det["release_db"] == "ensembl_fungi_genome":
            job = pg.cBioJob(call_fungi_fasta, arg)
        else:
            exit(
                "error: download fasta plugin for %s not available, module works with ensembl_genome, ensembl_metazoa_genome and phytozome_genome servers."
                % det["release_db"]
            )

        job.mem = "2gb"
        job.vmem = "2gb"
        job.pmem = "2gb"
        job.pvmem = "2gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "2:00:00"

        Jobs.append(job)
    print
    print "sending fasta download job to worker"
    print
    local_compute = True
    processedJobs = pg.process_jobs(Jobs, local=local_compute)
def main(yaml_config):
    """
    """

    config_map = yaml.safe_load(open(yaml_config, "rU"))
    exp_path_pfx = config_map['experiment_data_path']['dir']

    org_db = defaultdict()
    for ent in config_map['experiment']:
        species_name = ent['organism_name']

        genus, species = species_name.strip().split("_")
        short_name = "%s_%s" % (genus[0].upper(), species)

        org_db[short_name] = dict(short_name=short_name)
        org_db[short_name]['work_dir'] = "%s/%s/set_union_refix" % (
            exp_path_pfx, short_name)
        org_db[short_name]['data_dir'] = "%s/%s/set_2" % (exp_path_pfx,
                                                          short_name)

    ## prepare jobs
    Jobs = []
    for org_name, det in org_db.items():

        arg = [[org_name, det['work_dir'], det['data_dir']]]

        job = pg.cBioJob(distribute_model_train, arg)
        job.mem = "4gb"
        job.vmem = "4gb"
        job.pmem = "4gb"
        job.pvmem = "4gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "2:00:00"

        Jobs.append(job)

    compute_local = True
    print "sending jobs to worker"
    processedJobs = pg.process_jobs(Jobs, local=compute_local)
def main(yaml_config):
    """
    """

    config_map = yaml.safe_load(open(yaml_config, "rU"))
    exp_path_pfx = config_map["experiment_data_path"]["dir"]

    org_db = defaultdict()
    for ent in config_map["experiment"]:
        species_name = ent["organism_name"]

        genus, species = species_name.strip().split("_")
        short_name = "%s_%s" % (genus[0].upper(), species)

        org_db[short_name] = dict(short_name=short_name)
        org_db[short_name]["work_dir"] = "%s/%s/set_union_refix" % (exp_path_pfx, short_name)
        org_db[short_name]["data_dir"] = "%s/%s/set_2" % (exp_path_pfx, short_name)

    ## prepare jobs
    Jobs = []
    for org_name, det in org_db.items():

        arg = [[org_name, det["work_dir"], det["data_dir"]]]

        job = pg.cBioJob(distribute_model_train, arg)
        job.mem = "4gb"
        job.vmem = "4gb"
        job.pmem = "4gb"
        job.pvmem = "4gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "2:00:00"

        Jobs.append(job)

    compute_local = True
    print "sending jobs to worker"
    processedJobs = pg.process_jobs(Jobs, local=compute_local)
Exemple #26
0
def decompose_sra_file(yaml_config):
    """
    decompress the .sra file from ncbi sra
    """
    operation_seleted = "d"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = [] 
    for org_name, det in orgdb.items():
        sra_file = "%s/%s.sra"  % (det['fastq_path'], det['sra_run_id'])

        if not os.path.isfile(sra_file):## check the file present or not  
            print "error: missing sequencing read file %s" % sra_file
            sys.exit(0)
        
        ## TODO can be consider to the yaml file options 
        #library_type = "pe"
        library_type = "pe"
        compress_format = "gzip"

        ## arguments to pygrid 
        arg = [[sra_file, det['fastq_path']]]

        job = pg.cBioJob(call_decompose_sra_file, arg) 
    
        job.mem="6gb"
        job.vmem="6gb"
        job.pmem="6gb"
        job.pvmem="6gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "24:00:00"
        
        Jobs.append(job)
    print 
    print "sending decompress SRA file jobs to worker"
    print 
    processedJobs = pg.process_jobs(Jobs)
Exemple #27
0
def decompose_sra_file(yaml_config):
    """
    decompress the .sra file from ncbi sra
    """
    operation_seleted = "d"
    orgdb = expdb.experiment_db(yaml_config, operation_seleted)

    Jobs = []
    for org_name, det in orgdb.items():
        sra_file = "%s/%s.sra" % (det['fastq_path'], det['sra_run_id'])

        if not os.path.isfile(sra_file):  ## check the file present or not
            print "error: missing sequencing read file %s" % sra_file
            sys.exit(0)

        ## TODO can be consider to the yaml file options
        #library_type = "pe"
        library_type = "pe"
        compress_format = "gzip"

        ## arguments to pygrid
        arg = [[sra_file, det['fastq_path']]]

        job = pg.cBioJob(call_decompose_sra_file, arg)

        job.mem = "6gb"
        job.vmem = "6gb"
        job.pmem = "6gb"
        job.pvmem = "6gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "24:00:00"

        Jobs.append(job)
    print
    print "sending decompress SRA file jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)
Exemple #28
0
        job = pg.cBioJob(call_fetch_db_signals, arg)

        ## native specifications
        job.mem = "5gb"
        job.vmem = "5gb"
        job.pmem = "5gb"
        job.pvmem = "5gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "1:00:00"

        Jobs.append(job)
    print
    print "sending genomic signal fetch jobs to worker"
    print
    processedJobs = pg.process_jobs(Jobs)


def call_filter_genes(args_list):
    """
    wrapper for submitting jobs to pygrid
    """
    from rnaseq_align_assembly import refine_transcript_models as filter_tool
    gtf_file, fasta_file, result_file = args_list
    filter_tool.filter_gene_models(gtf_file, fasta_file, result_file)
    return "done"


def filter_genes(yaml_config, data_method):
    """
    filter out invalid gene models from the provided genome annotation
Exemple #29
0
        job = pg.cBioJob(call_fetch_db_signals, arg) 

        ## native specifications 
        job.mem="5gb"
        job.vmem="5gb"
        job.pmem="5gb"
        job.pvmem="5gb"
        job.nodes = 1
        job.ppn = 1
        job.walltime = "1:00:00"

        Jobs.append(job)
    print 
    print "sending genomic signal fetch jobs to worker"
    print 
    processedJobs = pg.process_jobs(Jobs)


def call_filter_genes(args_list):
    """
    wrapper for submitting jobs to pygrid
    """
    from rnaseq_align_assembly import refine_transcript_models as filter_tool 
    gtf_file, fasta_file, result_file = args_list
    filter_tool.filter_gene_models(gtf_file, fasta_file, result_file)
    return "done"


def filter_genes(yaml_config, data_method):
    """
    filter out invalid gene models from the provided genome annotation